diff --git a/.gitattributes b/.gitattributes
index cd4359ba3..35df6ca2f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,42 +1,58 @@
-.git*       export-ignore
-
 *           text=auto whitespace=trailing-space,space-before-tab,-indent-with-non-tab,tab-in-indent,tabwidth=4
 
-*.py        text
-*.cpp       text
-*.hpp       text
-*.cxx       text
-*.hxx       text
-*.mm        text
-*.c         text
-*.h         text
-*.i         text
-*.js        text
-*.java      text
-*.scala     text
-*.cu        text
-*.cl        text
-*.css_t     text
-*.qrc       text
-*.qss       text
-*.S         text
-*.rst       text
-*.tex       text
-*.sty       text
+.git*       text export-ignore
 
-*.aidl      text
-*.mk        text
+*.aidl         text
+*.appxmanifest text
+*.bib          text
+*.c            text
+*.cl           text
+*.conf         text
+*.cpp          text
+*.css_t        text
+*.cu           text
+*.cxx          text
+*.def          text
+*.filelist     text
+*.h            text
+*.hpp          text
+*.htm          text
+*.html         text
+*.hxx          text
+*.i            text
+*.idl          text
+*.java         text
+*.js           text
+*.mk           text
+*.mm           text
+*.plist        text
+*.properties   text
+*.py           text
+*.qrc          text
+*.qss          text
+*.S            text
+*.sbt          text
+*.scala        text
+*.sty          text
+*.tex          text
+*.txt          text
+*.xaml         text
+
+# reST underlines/overlines can look like conflict markers
+*.rst          text conflict-marker-size=80
 
 *.cmake         text whitespace=tabwidth=2
 *.cmakein       text whitespace=tabwidth=2
 *.in            text whitespace=tabwidth=2
 CMakeLists.txt  text whitespace=tabwidth=2
 
-*.png       binary
-*.jpeg      binary
-*.jpg       binary
+*.avi       binary
+*.bmp       binary
 *.exr       binary
 *.ico       binary
+*.jpeg      binary
+*.jpg       binary
+*.png       binary
 
 *.a         binary
 *.so        binary
@@ -47,6 +63,7 @@ CMakeLists.txt  text whitespace=tabwidth=2
 *.pbxproj   binary
 *.vec       binary
 *.doc       binary
+*.dia       binary
 
 *.xml                      -text whitespace=cr-at-eol
 *.yml                      -text whitespace=cr-at-eol
@@ -55,9 +72,12 @@ CMakeLists.txt  text whitespace=tabwidth=2
 .cproject                  -text whitespace=cr-at-eol merge=union
 org.eclipse.jdt.core.prefs -text whitespace=cr-at-eol merge=union
 
-*.vcproj    text eol=crlf merge=union
 *.bat       text eol=crlf
 *.cmd       text eol=crlf
 *.cmd.tmpl  text eol=crlf
+*.dsp       text eol=crlf -whitespace
+*.sln       text eol=crlf -whitespace
+*.vcproj    text eol=crlf -whitespace merge=union
+*.vcxproj   text eol=crlf -whitespace merge=union
 
-*.sh        text eol=lf
\ No newline at end of file
+*.sh        text eol=lf
diff --git a/.gitignore b/.gitignore
index 0a19f3cee..de0707a93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,10 @@
 .DS_Store
 refman.rst
 OpenCV4Tegra/
+tegra/
 *.user
 .sw[a-z]
 .*.swp
 tags
+Thumbs.db
+*.autosave
diff --git a/3rdparty/.gitattributes b/3rdparty/.gitattributes
new file mode 100644
index 000000000..562b12e16
--- /dev/null
+++ b/3rdparty/.gitattributes
@@ -0,0 +1 @@
+* -whitespace
diff --git a/3rdparty/ffmpeg/ffmpeg_version.cmake b/3rdparty/ffmpeg/ffmpeg_version.cmake
index 3cbcb394d..7bb8bab3f 100644
--- a/3rdparty/ffmpeg/ffmpeg_version.cmake
+++ b/3rdparty/ffmpeg/ffmpeg_version.cmake
@@ -1,5 +1,4 @@
 set(HAVE_FFMPEG 1)
-set(NEW_FFMPEG 1)
 set(HAVE_FFMPEG_CODEC 1)
 set(HAVE_FFMPEG_FORMAT 1)
 set(HAVE_FFMPEG_UTIL 1)
@@ -9,4 +8,4 @@ set(HAVE_GENTOO_FFMPEG 1)
 set(ALIASOF_libavcodec_VERSION 53.61.100)
 set(ALIASOF_libavformat_VERSION 53.32.100)
 set(ALIASOF_libavutil_VERSION 51.35.100)
-set(ALIASOF_libswscale_VERSION 2.1.100)
\ No newline at end of file
+set(ALIASOF_libswscale_VERSION 2.1.100)
diff --git a/3rdparty/ffmpeg/make.bat b/3rdparty/ffmpeg/make.bat
index 9f11b52d6..2323af9ab 100644
--- a/3rdparty/ffmpeg/make.bat
+++ b/3rdparty/ffmpeg/make.bat
@@ -1,2 +1,2 @@
 set path=c:\dev\msys32\bin;%path% & gcc -Wall -shared -o opencv_ffmpeg.dll -O2 -x c++ -I../include -I../include/ffmpeg_ -I../../modules/highgui/src ffopencv.c -L../lib -lavformat -lavcodec -lavdevice -lswscale -lavutil -lwsock32
-set path=c:\dev\msys64\bin;%path% & gcc -m64 -Wall -shared -o opencv_ffmpeg_64.dll -O2 -x c++ -I../include -I../include/ffmpeg_ -I../../modules/highgui/src ffopencv.c -L../lib -lavformat64 -lavcodec64 -lavdevice64 -lswscale64 -lavutil64 -lavcore64 -lwsock32 -lws2_32
\ No newline at end of file
+set path=c:\dev\msys64\bin;%path% & gcc -m64 -Wall -shared -o opencv_ffmpeg_64.dll -O2 -x c++ -I../include -I../include/ffmpeg_ -I../../modules/highgui/src ffopencv.c -L../lib -lavformat64 -lavcodec64 -lavdevice64 -lswscale64 -lavutil64 -lavcore64 -lwsock32 -lws2_32
diff --git a/3rdparty/ffmpeg/readme.txt b/3rdparty/ffmpeg/readme.txt
index 1928a5303..e98b28520 100644
--- a/3rdparty/ffmpeg/readme.txt
+++ b/3rdparty/ffmpeg/readme.txt
@@ -40,5 +40,3 @@ How to update opencv_ffmpeg.dll and opencv_ffmpeg_64.dll when a new version of F
 
 8. Then, go to <opencv>\3rdparty\ffmpeg, edit make.bat
    (change paths to the actual paths to your msys32 and msys64 distributions) and then run make.bat
-
-
diff --git a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so
index f62e5961f..6f28f2c55 100755
Binary files a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so and b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.2.0.so differ
diff --git a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so
index c0237c1d0..010641ed1 100755
Binary files a/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so and b/3rdparty/lib/armeabi-v7a/libnative_camera_r2.3.3.so differ
diff --git a/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so b/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so
index 2c235d824..5a145b25b 100755
Binary files a/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so and b/3rdparty/lib/armeabi-v7a/libnative_camera_r3.0.1.so differ
diff --git a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so
index 487258889..a524b743f 100755
Binary files a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so and b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.0.so differ
diff --git a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so
index 169d97e77..a1802f1ff 100755
Binary files a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so and b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.0.3.so differ
diff --git a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so
index bdd09fb06..089c7e9e2 100755
Binary files a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so and b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.1.1.so differ
diff --git a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.2.0.so b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.2.0.so
index 74bfdea5e..a9ffa4b0c 100755
Binary files a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.2.0.so and b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.2.0.so differ
diff --git a/3rdparty/lib/armeabi-v7a/libnative_camera_r4.3.0.so b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.3.0.so
new file mode 100755
index 000000000..8ff7177ad
Binary files /dev/null and b/3rdparty/lib/armeabi-v7a/libnative_camera_r4.3.0.so differ
diff --git a/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so b/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so
index 7c061d9ee..b6ce0d5a5 100755
Binary files a/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so and b/3rdparty/lib/armeabi/libnative_camera_r2.2.0.so differ
diff --git a/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so b/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so
index 686bdfb61..635ce681a 100755
Binary files a/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so and b/3rdparty/lib/armeabi/libnative_camera_r2.3.3.so differ
diff --git a/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so b/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so
index 5a5c23173..caacf39d4 100755
Binary files a/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so and b/3rdparty/lib/armeabi/libnative_camera_r3.0.1.so differ
diff --git a/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so b/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so
index 694cfb80d..fff4a8069 100755
Binary files a/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so and b/3rdparty/lib/armeabi/libnative_camera_r4.0.0.so differ
diff --git a/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so b/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so
index c6cc8ab5f..311926555 100755
Binary files a/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so and b/3rdparty/lib/armeabi/libnative_camera_r4.0.3.so differ
diff --git a/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so b/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so
index 94966c82e..7c18baf01 100755
Binary files a/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so and b/3rdparty/lib/armeabi/libnative_camera_r4.1.1.so differ
diff --git a/3rdparty/lib/armeabi/libnative_camera_r4.2.0.so b/3rdparty/lib/armeabi/libnative_camera_r4.2.0.so
index 8251510c4..8bb093a3d 100755
Binary files a/3rdparty/lib/armeabi/libnative_camera_r4.2.0.so and b/3rdparty/lib/armeabi/libnative_camera_r4.2.0.so differ
diff --git a/3rdparty/lib/armeabi/libnative_camera_r4.3.0.so b/3rdparty/lib/armeabi/libnative_camera_r4.3.0.so
new file mode 100755
index 000000000..a05f17917
Binary files /dev/null and b/3rdparty/lib/armeabi/libnative_camera_r4.3.0.so differ
diff --git a/3rdparty/lib/mips/libnative_camera_r4.0.3.so b/3rdparty/lib/mips/libnative_camera_r4.0.3.so
index c8c9e2c57..b9500441a 100755
Binary files a/3rdparty/lib/mips/libnative_camera_r4.0.3.so and b/3rdparty/lib/mips/libnative_camera_r4.0.3.so differ
diff --git a/3rdparty/lib/mips/libnative_camera_r4.1.1.so b/3rdparty/lib/mips/libnative_camera_r4.1.1.so
index 6845d715d..d11dcf036 100755
Binary files a/3rdparty/lib/mips/libnative_camera_r4.1.1.so and b/3rdparty/lib/mips/libnative_camera_r4.1.1.so differ
diff --git a/3rdparty/lib/mips/libnative_camera_r4.2.0.so b/3rdparty/lib/mips/libnative_camera_r4.2.0.so
index b148d1621..b06a6819f 100755
Binary files a/3rdparty/lib/mips/libnative_camera_r4.2.0.so and b/3rdparty/lib/mips/libnative_camera_r4.2.0.so differ
diff --git a/3rdparty/lib/mips/libnative_camera_r4.3.0.so b/3rdparty/lib/mips/libnative_camera_r4.3.0.so
new file mode 100755
index 000000000..844b806b9
Binary files /dev/null and b/3rdparty/lib/mips/libnative_camera_r4.3.0.so differ
diff --git a/3rdparty/lib/x86/libnative_camera_r2.3.3.so b/3rdparty/lib/x86/libnative_camera_r2.3.3.so
index d9400638b..0dd8904ac 100755
Binary files a/3rdparty/lib/x86/libnative_camera_r2.3.3.so and b/3rdparty/lib/x86/libnative_camera_r2.3.3.so differ
diff --git a/3rdparty/lib/x86/libnative_camera_r3.0.1.so b/3rdparty/lib/x86/libnative_camera_r3.0.1.so
index cf2e9908c..105a19d0c 100755
Binary files a/3rdparty/lib/x86/libnative_camera_r3.0.1.so and b/3rdparty/lib/x86/libnative_camera_r3.0.1.so differ
diff --git a/3rdparty/lib/x86/libnative_camera_r4.0.3.so b/3rdparty/lib/x86/libnative_camera_r4.0.3.so
index 420ec818f..b01a4bd28 100755
Binary files a/3rdparty/lib/x86/libnative_camera_r4.0.3.so and b/3rdparty/lib/x86/libnative_camera_r4.0.3.so differ
diff --git a/3rdparty/lib/x86/libnative_camera_r4.1.1.so b/3rdparty/lib/x86/libnative_camera_r4.1.1.so
index 5468d206c..a59ae39b3 100755
Binary files a/3rdparty/lib/x86/libnative_camera_r4.1.1.so and b/3rdparty/lib/x86/libnative_camera_r4.1.1.so differ
diff --git a/3rdparty/lib/x86/libnative_camera_r4.2.0.so b/3rdparty/lib/x86/libnative_camera_r4.2.0.so
index 992331032..b90b82644 100755
Binary files a/3rdparty/lib/x86/libnative_camera_r4.2.0.so and b/3rdparty/lib/x86/libnative_camera_r4.2.0.so differ
diff --git a/3rdparty/lib/x86/libnative_camera_r4.3.0.so b/3rdparty/lib/x86/libnative_camera_r4.3.0.so
new file mode 100755
index 000000000..6607e5da8
Binary files /dev/null and b/3rdparty/lib/x86/libnative_camera_r4.3.0.so differ
diff --git a/3rdparty/libpng/CMakeLists.txt b/3rdparty/libpng/CMakeLists.txt
index d47dd53fc..141c9d151 100644
--- a/3rdparty/libpng/CMakeLists.txt
+++ b/3rdparty/libpng/CMakeLists.txt
@@ -9,7 +9,7 @@ else()
   project(${PNG_LIBRARY})
 endif()
 
-ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" ${ZLIB_INCLUDE_DIR})
+ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" ${ZLIB_INCLUDE_DIRS})
 
 file(GLOB lib_srcs *.c)
 file(GLOB lib_hdrs *.h)
@@ -29,8 +29,12 @@ if(MSVC)
   add_definitions(-D_CRT_SECURE_NO_DEPRECATE)
 endif(MSVC)
 
+if (HAVE_WINRT)
+  add_definitions(-DHAVE_WINRT)
+endif()
+
 add_library(${PNG_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs})
-target_link_libraries(${PNG_LIBRARY} ${ZLIB_LIBRARY})
+target_link_libraries(${PNG_LIBRARY} ${ZLIB_LIBRARIES})
 
 if(UNIX)
   if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
diff --git a/3rdparty/libpng/opencv-libpng.patch b/3rdparty/libpng/opencv-libpng.patch
new file mode 100644
index 000000000..6ca96392a
--- /dev/null
+++ b/3rdparty/libpng/opencv-libpng.patch
@@ -0,0 +1,22 @@
+diff --git a/3rdparty/libpng/pngpriv.h b/3rdparty/libpng/pngpriv.h
+index 07b2b0b..e7824b8 100644
+--- a/3rdparty/libpng/pngpriv.h
++++ b/3rdparty/libpng/pngpriv.h
+@@ -360,7 +360,7 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
+ 
+ /* Memory model/platform independent fns */
+ #ifndef PNG_ABORT
+-#  ifdef _WINDOWS_
++#  if defined(_WINDOWS_) && !defined(HAVE_WINRT)
+ #    define PNG_ABORT() ExitProcess(0)
+ #  else
+ #    define PNG_ABORT() abort()
+@@ -378,7 +378,7 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
+ #  define png_memcpy  _fmemcpy
+ #  define png_memset  _fmemset
+ #else
+-#  ifdef _WINDOWS_  /* Favor Windows over C runtime fns */
++#  if defined(_WINDOWS_) && !defined(HAVE_WINRT)  /* Favor Windows over C runtime fns */
+ #    define CVT_PTR(ptr)         (ptr)
+ #    define CVT_PTR_NOCHECK(ptr) (ptr)
+ #    define png_strlen  lstrlenA
diff --git a/3rdparty/libpng/pngpriv.h b/3rdparty/libpng/pngpriv.h
index 07b2b0b05..e7824b839 100644
--- a/3rdparty/libpng/pngpriv.h
+++ b/3rdparty/libpng/pngpriv.h
@@ -360,7 +360,7 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
 
 /* Memory model/platform independent fns */
 #ifndef PNG_ABORT
-#  ifdef _WINDOWS_
+#  if defined(_WINDOWS_) && !defined(HAVE_WINRT)
 #    define PNG_ABORT() ExitProcess(0)
 #  else
 #    define PNG_ABORT() abort()
@@ -378,7 +378,7 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
 #  define png_memcpy  _fmemcpy
 #  define png_memset  _fmemset
 #else
-#  ifdef _WINDOWS_  /* Favor Windows over C runtime fns */
+#  if defined(_WINDOWS_) && !defined(HAVE_WINRT)  /* Favor Windows over C runtime fns */
 #    define CVT_PTR(ptr)         (ptr)
 #    define CVT_PTR_NOCHECK(ptr) (ptr)
 #    define png_strlen  lstrlenA
diff --git a/3rdparty/libtiff/CMakeLists.txt b/3rdparty/libtiff/CMakeLists.txt
index 46fef61c7..cea2f906f 100644
--- a/3rdparty/libtiff/CMakeLists.txt
+++ b/3rdparty/libtiff/CMakeLists.txt
@@ -17,14 +17,14 @@ check_include_file(string.h HAVE_STRING_H)
 check_include_file(sys/types.h HAVE_SYS_TYPES_H)
 check_include_file(unistd.h HAVE_UNISTD_H)
 
-if(WIN32)
+if(WIN32 AND NOT HAVE_WINRT)
   set(USE_WIN32_FILEIO 1)
 endif()
 
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tif_config.h.cmakein"
                "${CMAKE_CURRENT_BINARY_DIR}/tif_config.h" @ONLY)
 
-ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" ${ZLIB_INCLUDE_DIR})
+ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" ${ZLIB_INCLUDE_DIRS})
 
 set(lib_srcs
     tif_aux.c
@@ -79,14 +79,12 @@ set(lib_srcs
     "${CMAKE_CURRENT_BINARY_DIR}/tif_config.h"
     )
 
-if(UNIX)
+if(WIN32 AND NOT HAVE_WINRT)
+  list(APPEND lib_srcs tif_win32.c)
+else()
   list(APPEND lib_srcs tif_unix.c)
 endif()
-
-if(WIN32)
-  list(APPEND lib_srcs tif_win32.c)
-endif(WIN32)
-
+  
 ocv_warnings_disable(CMAKE_C_FLAGS -Wno-unused-but-set-variable -Wmissing-prototypes -Wmissing-declarations -Wundef -Wunused -Wsign-compare
                                    -Wcast-align -Wshadow -Wno-maybe-uninitialized -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast)
 ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang
diff --git a/3rdparty/libtiff/tif_config.h.cmakein b/3rdparty/libtiff/tif_config.h.cmakein
index 1e6bc0454..182f2833d 100644
--- a/3rdparty/libtiff/tif_config.h.cmakein
+++ b/3rdparty/libtiff/tif_config.h.cmakein
@@ -168,4 +168,3 @@
 
 /* Support Deflate compression */
 #define ZIP_SUPPORT 1
-
diff --git a/3rdparty/libwebp/dec/alpha.c b/3rdparty/libwebp/dec/alpha.c
index 5c9cdd6ae..b5e68919c 100644
--- a/3rdparty/libwebp/dec/alpha.c
+++ b/3rdparty/libwebp/dec/alpha.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Alpha-plane decompression.
@@ -20,20 +22,10 @@
 extern "C" {
 #endif
 
-// TODO(skal): move to dsp/ ?
-static void CopyPlane(const uint8_t* src, int src_stride,
-                      uint8_t* dst, int dst_stride, int width, int height) {
-  while (height-- > 0) {
-    memcpy(dst, src, width);
-    src += src_stride;
-    dst += dst_stride;
-  }
-}
-
 //------------------------------------------------------------------------------
 // Decodes the compressed data 'data' of size 'data_size' into the 'output'.
 // The 'output' buffer should be pre-allocated and must be of the same
-// dimension 'height'x'stride', as that of the image.
+// dimension 'height'x'width', as that of the image.
 //
 // Returns 1 on successfully decoding the compressed alpha and
 //         0 if either:
@@ -41,16 +33,16 @@ static void CopyPlane(const uint8_t* src, int src_stride,
 //           error returned by appropriate compression method.
 
 static int DecodeAlpha(const uint8_t* data, size_t data_size,
-                       int width, int height, int stride, uint8_t* output) {
-  uint8_t* decoded_data = NULL;
-  const size_t decoded_size = height * width;
+                       int width, int height, uint8_t* output) {
   WEBP_FILTER_TYPE filter;
   int pre_processing;
   int rsrv;
   int ok = 0;
   int method;
+  const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
+  const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
 
-  assert(width > 0 && height > 0 && stride >= width);
+  assert(width > 0 && height > 0);
   assert(data != NULL && output != NULL);
 
   if (data_size <= ALPHA_HEADER_LEN) {
@@ -70,15 +62,12 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
   }
 
   if (method == ALPHA_NO_COMPRESSION) {
-    ok = (data_size >= decoded_size);
-    decoded_data = (uint8_t*)data + ALPHA_HEADER_LEN;
+    const size_t alpha_decoded_size = height * width;
+    ok = (alpha_data_size >= alpha_decoded_size);
+    if (ok) memcpy(output, alpha_data, alpha_decoded_size);
   } else {
-    decoded_data = (uint8_t*)malloc(decoded_size);
-    if (decoded_data == NULL) return 0;
-    ok = VP8LDecodeAlphaImageStream(width, height,
-                                    data + ALPHA_HEADER_LEN,
-                                    data_size - ALPHA_HEADER_LEN,
-                                    decoded_data);
+    ok = VP8LDecodeAlphaImageStream(width, height, alpha_data, alpha_data_size,
+                                    output);
   }
 
   if (ok) {
@@ -86,18 +75,13 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
     if (unfilter_func != NULL) {
       // TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
       // and apply filter per image-row.
-      unfilter_func(width, height, width, decoded_data);
+      unfilter_func(width, height, width, output);
     }
-    // Construct raw_data (height x stride) from alpha data (height x width).
-    CopyPlane(decoded_data, width, output, stride, width, height);
     if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
-      ok = DequantizeLevels(decoded_data, width, height);
+      ok = DequantizeLevels(output, width, height);
     }
   }
 
-  if (method != ALPHA_NO_COMPRESSION) {
-    free(decoded_data);
-  }
   return ok;
 }
 
@@ -105,23 +89,25 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
 
 const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
                                       int row, int num_rows) {
-  const int stride = dec->pic_hdr_.width_;
+  const int width = dec->pic_hdr_.width_;
+  const int height = dec->pic_hdr_.height_;
 
-  if (row < 0 || num_rows < 0 || row + num_rows > dec->pic_hdr_.height_) {
+  if (row < 0 || num_rows < 0 || row + num_rows > height) {
     return NULL;    // sanity check.
   }
 
   if (row == 0) {
     // Decode everything during the first call.
+    assert(!dec->is_alpha_decoded_);
     if (!DecodeAlpha(dec->alpha_data_, (size_t)dec->alpha_data_size_,
-                     dec->pic_hdr_.width_, dec->pic_hdr_.height_, stride,
-                     dec->alpha_plane_)) {
+                     width, height, dec->alpha_plane_)) {
       return NULL;  // Error.
     }
+    dec->is_alpha_decoded_ = 1;
   }
 
   // Return a pointer to the current decoded row.
-  return dec->alpha_plane_ + row * stride;
+  return dec->alpha_plane_ + row * width;
 }
 
 #if defined(__cplusplus) || defined(c_plusplus)
diff --git a/3rdparty/libwebp/dec/buffer.c b/3rdparty/libwebp/dec/buffer.c
index c159f6f24..385571523 100644
--- a/3rdparty/libwebp/dec/buffer.c
+++ b/3rdparty/libwebp/dec/buffer.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Everything about WebPDecBuffer
diff --git a/3rdparty/libwebp/dec/decode_vp8.h b/3rdparty/libwebp/dec/decode_vp8.h
index 12c77bcbf..acdb15aaa 100644
--- a/3rdparty/libwebp/dec/decode_vp8.h
+++ b/3rdparty/libwebp/dec/decode_vp8.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  Low-level API for VP8 decoder
diff --git a/3rdparty/libwebp/dec/frame.c b/3rdparty/libwebp/dec/frame.c
index 911c7ffc5..5f6a7d982 100644
--- a/3rdparty/libwebp/dec/frame.c
+++ b/3rdparty/libwebp/dec/frame.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Frame-reconstruction function. Memory allocation.
diff --git a/3rdparty/libwebp/dec/idec.c b/3rdparty/libwebp/dec/idec.c
index 17810c838..5fbf49aaf 100644
--- a/3rdparty/libwebp/dec/idec.c
+++ b/3rdparty/libwebp/dec/idec.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Incremental decoding
@@ -97,6 +99,23 @@ static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
   return (mem->end_ - mem->start_);
 }
 
+// Check if we need to preserve the compressed alpha data, as it may not have
+// been decoded yet.
+static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
+  if (idec->state_ == STATE_PRE_VP8) {
+    // We haven't parsed the headers yet, so we don't know whether the image is
+    // lossy or lossless. This also means that we haven't parsed the ALPH chunk.
+    return 0;
+  }
+  if (idec->is_lossless_) {
+    return 0;  // ALPH chunk is not present for lossless images.
+  } else {
+    const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+    assert(dec != NULL);  // Must be true as idec->state_ != STATE_PRE_VP8.
+    return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_;
+  }
+}
+
 static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
   MemBuffer* const mem = &idec->mem_;
   const uint8_t* const new_base = mem->buf_ + mem->start_;
@@ -122,6 +141,7 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
       }
       assert(last_part >= 0);
       dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_;
+      if (NeedCompressedAlpha(idec)) dec->alpha_data_ += offset;
     } else {    // Resize lossless bitreader
       VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
       VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
@@ -133,8 +153,12 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
 // size if required and also updates VP8BitReader's if new memory is allocated.
 static int AppendToMemBuffer(WebPIDecoder* const idec,
                              const uint8_t* const data, size_t data_size) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
   MemBuffer* const mem = &idec->mem_;
-  const uint8_t* const old_base = mem->buf_ + mem->start_;
+  const int need_compressed_alpha = NeedCompressedAlpha(idec);
+  const uint8_t* const old_start = mem->buf_ + mem->start_;
+  const uint8_t* const old_base =
+      need_compressed_alpha ? dec->alpha_data_ : old_start;
   assert(mem->mode_ == MEM_MODE_APPEND);
   if (data_size > MAX_CHUNK_PAYLOAD) {
     // security safeguard: trying to allocate more than what the format
@@ -143,7 +167,8 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
   }
 
   if (mem->end_ + data_size > mem->buf_size_) {  // Need some free memory
-    const size_t current_size = MemDataSize(mem);
+    const size_t new_mem_start = old_start - old_base;
+    const size_t current_size = MemDataSize(mem) + new_mem_start;
     const uint64_t new_size = (uint64_t)current_size + data_size;
     const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
     uint8_t* const new_buf =
@@ -153,7 +178,7 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
     free(mem->buf_);
     mem->buf_ = new_buf;
     mem->buf_size_ = (size_t)extra_size;
-    mem->start_ = 0;
+    mem->start_ = new_mem_start;
     mem->end_ = current_size;
   }
 
@@ -161,14 +186,15 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
   mem->end_ += data_size;
   assert(mem->end_ <= mem->buf_size_);
 
-  DoRemap(idec, mem->buf_ + mem->start_ - old_base);
+  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
   return 1;
 }
 
 static int RemapMemBuffer(WebPIDecoder* const idec,
                           const uint8_t* const data, size_t data_size) {
   MemBuffer* const mem = &idec->mem_;
-  const uint8_t* const old_base = mem->buf_ + mem->start_;
+  const uint8_t* const old_buf = mem->buf_;
+  const uint8_t* const old_start = old_buf + mem->start_;
   assert(mem->mode_ == MEM_MODE_MAP);
 
   if (data_size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
@@ -176,7 +202,7 @@ static int RemapMemBuffer(WebPIDecoder* const idec,
   mem->buf_ = (uint8_t*)data;
   mem->end_ = mem->buf_size_ = data_size;
 
-  DoRemap(idec, mem->buf_ + mem->start_ - old_base);
+  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
   return 1;
 }
 
diff --git a/3rdparty/libwebp/dec/io.c b/3rdparty/libwebp/dec/io.c
index 594804c2e..63810b447 100644
--- a/3rdparty/libwebp/dec/io.c
+++ b/3rdparty/libwebp/dec/io.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // functions for sample output.
diff --git a/3rdparty/libwebp/dec/layer.c b/3rdparty/libwebp/dec/layer.c
index a3a5bdcfe..9a4b2d903 100644
--- a/3rdparty/libwebp/dec/layer.c
+++ b/3rdparty/libwebp/dec/layer.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Enhancement layer (for YUV444/422)
diff --git a/3rdparty/libwebp/dec/quant.c b/3rdparty/libwebp/dec/quant.c
index d54097af0..a4cc693db 100644
--- a/3rdparty/libwebp/dec/quant.c
+++ b/3rdparty/libwebp/dec/quant.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Quantizer initialization
diff --git a/3rdparty/libwebp/dec/tree.c b/3rdparty/libwebp/dec/tree.c
index 82484e4c5..3f02efe43 100644
--- a/3rdparty/libwebp/dec/tree.c
+++ b/3rdparty/libwebp/dec/tree.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Coding trees and probas
diff --git a/3rdparty/libwebp/dec/vp8.c b/3rdparty/libwebp/dec/vp8.c
index 253cb6b62..8632e48e5 100644
--- a/3rdparty/libwebp/dec/vp8.c
+++ b/3rdparty/libwebp/dec/vp8.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // main entry for the decoder
diff --git a/3rdparty/libwebp/dec/vp8i.h b/3rdparty/libwebp/dec/vp8i.h
index 1aa92385a..1d0d4077e 100644
--- a/3rdparty/libwebp/dec/vp8i.h
+++ b/3rdparty/libwebp/dec/vp8i.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // VP8 decoder: internal header.
@@ -28,7 +30,7 @@ extern "C" {
 // version numbers
 #define DEC_MAJ_VERSION 0
 #define DEC_MIN_VERSION 3
-#define DEC_REV_VERSION 0
+#define DEC_REV_VERSION 1
 
 #define ONLY_KEYFRAME_CODE      // to remove any code related to P-Frames
 
@@ -276,6 +278,7 @@ struct VP8Decoder {
   // extensions
   const uint8_t* alpha_data_;   // compressed alpha data (if present)
   size_t alpha_data_size_;
+  int is_alpha_decoded_;  // true if alpha_data_ is decoded in alpha_plane_
   uint8_t* alpha_plane_;        // output. Persistent, contains the whole data.
 
   int layer_colorspace_;
diff --git a/3rdparty/libwebp/dec/vp8l.c b/3rdparty/libwebp/dec/vp8l.c
index 1665fe174..89b5b4bf6 100644
--- a/3rdparty/libwebp/dec/vp8l.c
+++ b/3rdparty/libwebp/dec/vp8l.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // main entry for the decoder
@@ -625,10 +627,24 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
   }
 }
 
+// Special method for paletted alpha data.
+static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows,
+                                        const uint8_t* const rows) {
+  const int start_row = dec->last_row_;
+  const int end_row = start_row + num_rows;
+  const uint8_t* rows_in = rows;
+  uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row;
+  VP8LTransform* const transform = &dec->transforms_[0];
+  assert(dec->next_transform_ == 1);
+  assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
+  VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in,
+                                      rows_out);
+}
+
 // Processes (transforms, scales & color-converts) the rows decoded after the
 // last call.
 static void ProcessRows(VP8LDecoder* const dec, int row) {
-  const uint32_t* const rows = dec->argb_ + dec->width_ * dec->last_row_;
+  const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_;
   const int num_rows = row - dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
@@ -667,121 +683,135 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
   assert(dec->last_row_ <= dec->height_);
 }
 
-static int DecodeImageData(VP8LDecoder* const dec,
-                           uint32_t* const data, int width, int height,
-                           ProcessRowsFunc process_func) {
-  int ok = 1;
-  int col = 0, row = 0;
-  VP8LBitReader* const br = &dec->br_;
-  VP8LMetadata* const hdr = &dec->hdr_;
-  HTreeGroup* htree_group = hdr->htree_groups_;
-  uint32_t* src = data;
-  uint32_t* last_cached = data;
-  uint32_t* const src_end = data + width * height;
-  const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
-  const int color_cache_limit = len_code_limit + hdr->color_cache_size_;
-  VP8LColorCache* const color_cache =
-      (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
-  const int mask = hdr->huffman_mask_;
-
-  assert(htree_group != NULL);
-
-  while (!br->eos_ && src < src_end) {
-    int code;
-    // Only update when changing tile. Note we could use the following test:
-    //   if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
-    // but that's actually slower and requires storing the previous col/row
-    if ((col & mask) == 0) {
-      htree_group = GetHtreeGroupForPos(hdr, col, row);
-    }
-    VP8LFillBitWindow(br);
-    code = ReadSymbol(&htree_group->htrees_[GREEN], br);
-    if (code < NUM_LITERAL_CODES) {   // Literal.
-      int red, green, blue, alpha;
-      red = ReadSymbol(&htree_group->htrees_[RED], br);
-      green = code;
-      VP8LFillBitWindow(br);
-      blue = ReadSymbol(&htree_group->htrees_[BLUE], br);
-      alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br);
-      *src = (alpha << 24) + (red << 16) + (green << 8) + blue;
- AdvanceByOne:
-      ++src;
-      ++col;
-      if (col >= width) {
-        col = 0;
-        ++row;
-        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
-          process_func(dec, row);
-        }
-        if (color_cache != NULL) {
-          while (last_cached < src) {
-            VP8LColorCacheInsert(color_cache, *last_cached++);
-          }
-        }
-      }
-    } else if (code < len_code_limit) {           // Backward reference
-      int dist_code, dist;
-      const int length_sym = code - NUM_LITERAL_CODES;
-      const int length = GetCopyLength(length_sym, br);
-      const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);
-      VP8LFillBitWindow(br);
-      dist_code = GetCopyDistance(dist_symbol, br);
-      dist = PlaneCodeToDistance(width, dist_code);
-      if (src - data < dist || src_end - src < length) {
-        ok = 0;
-        goto End;
-      }
-      {
-        int i;
-        for (i = 0; i < length; ++i) src[i] = src[i - dist];
-        src += length;
-      }
-      col += length;
-      while (col >= width) {
-        col -= width;
-        ++row;
-        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
-          process_func(dec, row);
-        }
-      }
-      if (src < src_end) {
-        htree_group = GetHtreeGroupForPos(hdr, col, row);
-        if (color_cache != NULL) {
-          while (last_cached < src) {
-            VP8LColorCacheInsert(color_cache, *last_cached++);
-          }
-        }
-      }
-    } else if (code < color_cache_limit) {    // Color cache.
-      const int key = code - len_code_limit;
-      assert(color_cache != NULL);
-      while (last_cached < src) {
-        VP8LColorCacheInsert(color_cache, *last_cached++);
-      }
-      *src = VP8LColorCacheLookup(color_cache, key);
-      goto AdvanceByOne;
-    } else {    // Not reached.
-      ok = 0;
-      goto End;
-    }
-    ok = !br->error_;
-    if (!ok) goto End;
-  }
-  // Process the remaining rows corresponding to last row-block.
-  if (process_func != NULL) process_func(dec, row);
-
- End:
-  if (br->error_ || !ok || (br->eos_ && src < src_end)) {
-    ok = 0;
-    dec->status_ = (!br->eos_) ?
-        VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED;
-  } else if (src == src_end) {
-    dec->state_ = READ_DATA;
-  }
-
-  return ok;
+#define DECODE_DATA_FUNC(FUNC_NAME, TYPE, STORE_PIXEL)                         \
+static int FUNC_NAME(VP8LDecoder* const dec, TYPE* const data, int width,      \
+                     int height, ProcessRowsFunc process_func) {               \
+  int ok = 1;                                                                  \
+  int col = 0, row = 0;                                                        \
+  VP8LBitReader* const br = &dec->br_;                                         \
+  VP8LMetadata* const hdr = &dec->hdr_;                                        \
+  HTreeGroup* htree_group = hdr->htree_groups_;                                \
+  TYPE* src = data;                                                            \
+  TYPE* last_cached = data;                                                    \
+  TYPE* const src_end = data + width * height;                                 \
+  const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;             \
+  const int color_cache_limit = len_code_limit + hdr->color_cache_size_;       \
+  VP8LColorCache* const color_cache =                                          \
+      (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;                \
+  const int mask = hdr->huffman_mask_;                                         \
+  assert(htree_group != NULL);                                                 \
+  while (!br->eos_ && src < src_end) {                                         \
+    int code;                                                                  \
+    /* Only update when changing tile. Note we could use this test:        */  \
+    /* if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed */  \
+    /* but that's actually slower and needs storing the previous col/row.  */  \
+    if ((col & mask) == 0) {                                                   \
+      htree_group = GetHtreeGroupForPos(hdr, col, row);                        \
+    }                                                                          \
+    VP8LFillBitWindow(br);                                                     \
+    code = ReadSymbol(&htree_group->htrees_[GREEN], br);                       \
+    if (code < NUM_LITERAL_CODES) {  /* Literal*/                              \
+      int red, green, blue, alpha;                                             \
+      red = ReadSymbol(&htree_group->htrees_[RED], br);                        \
+      green = code;                                                            \
+      VP8LFillBitWindow(br);                                                   \
+      blue = ReadSymbol(&htree_group->htrees_[BLUE], br);                      \
+      alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br);                    \
+      *src = STORE_PIXEL(alpha, red, green, blue);                             \
+    AdvanceByOne:                                                              \
+      ++src;                                                                   \
+      ++col;                                                                   \
+      if (col >= width) {                                                      \
+        col = 0;                                                               \
+        ++row;                                                                 \
+        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {      \
+          process_func(dec, row);                                              \
+        }                                                                      \
+        if (color_cache != NULL) {                                             \
+          while (last_cached < src) {                                          \
+            VP8LColorCacheInsert(color_cache, *last_cached++);                 \
+          }                                                                    \
+        }                                                                      \
+      }                                                                        \
+    } else if (code < len_code_limit) {  /* Backward reference */              \
+      int dist_code, dist;                                                     \
+      const int length_sym = code - NUM_LITERAL_CODES;                         \
+      const int length = GetCopyLength(length_sym, br);                        \
+      const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);     \
+      VP8LFillBitWindow(br);                                                   \
+      dist_code = GetCopyDistance(dist_symbol, br);                            \
+      dist = PlaneCodeToDistance(width, dist_code);                            \
+      if (src - data < dist || src_end - src < length) {                       \
+        ok = 0;                                                                \
+        goto End;                                                              \
+      }                                                                        \
+      {                                                                        \
+        int i;                                                                 \
+        for (i = 0; i < length; ++i) src[i] = src[i - dist];                   \
+        src += length;                                                         \
+      }                                                                        \
+      col += length;                                                           \
+      while (col >= width) {                                                   \
+        col -= width;                                                          \
+        ++row;                                                                 \
+        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {      \
+          process_func(dec, row);                                              \
+        }                                                                      \
+      }                                                                        \
+      if (src < src_end) {                                                     \
+        htree_group = GetHtreeGroupForPos(hdr, col, row);                      \
+        if (color_cache != NULL) {                                             \
+          while (last_cached < src) {                                          \
+            VP8LColorCacheInsert(color_cache, *last_cached++);                 \
+          }                                                                    \
+        }                                                                      \
+      }                                                                        \
+    } else if (code < color_cache_limit) {  /* Color cache */                  \
+      const int key = code - len_code_limit;                                   \
+      assert(color_cache != NULL);                                             \
+      while (last_cached < src) {                                              \
+        VP8LColorCacheInsert(color_cache, *last_cached++);                     \
+      }                                                                        \
+      *src = VP8LColorCacheLookup(color_cache, key);                           \
+      goto AdvanceByOne;                                                       \
+    } else {  /* Not reached */                                                \
+      ok = 0;                                                                  \
+      goto End;                                                                \
+    }                                                                          \
+    ok = !br->error_;                                                          \
+    if (!ok) goto End;                                                         \
+  }                                                                            \
+  /* Process the remaining rows corresponding to last row-block. */            \
+  if (process_func != NULL) process_func(dec, row);                            \
+End:                                                                           \
+  if (br->error_ || !ok || (br->eos_ && src < src_end)) {                      \
+    ok = 0;                                                                    \
+    dec->status_ =                                                             \
+        (!br->eos_) ? VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED;       \
+  } else if (src == src_end) {                                                 \
+    dec->state_ = READ_DATA;                                                   \
+  }                                                                            \
+  return ok;                                                                   \
 }
 
+static WEBP_INLINE uint32_t GetARGBPixel(int alpha, int red, int green,
+                                         int blue) {
+  return (alpha << 24) | (red << 16) | (green << 8) | blue;
+}
+
+static WEBP_INLINE uint8_t GetAlphaPixel(int alpha, int red, int green,
+                                         int blue) {
+  (void)alpha;
+  (void)red;
+  (void)blue;
+  return green;  // Alpha value is stored in green channel.
+}
+
+DECODE_DATA_FUNC(DecodeImageData, uint32_t, GetARGBPixel)
+DECODE_DATA_FUNC(DecodeAlphaData, uint8_t, GetAlphaPixel)
+
+#undef DECODE_DATA_FUNC
+
 // -----------------------------------------------------------------------------
 // VP8LTransform
 
@@ -903,8 +933,8 @@ void VP8LClear(VP8LDecoder* const dec) {
   if (dec == NULL) return;
   ClearMetadata(&dec->hdr_);
 
-  free(dec->argb_);
-  dec->argb_ = NULL;
+  free(dec->pixels_);
+  dec->pixels_ = NULL;
   for (i = 0; i < dec->next_transform_; ++i) {
     ClearTransform(&dec->transforms_[i]);
   }
@@ -1028,35 +1058,39 @@ static int DecodeImageStream(int xsize, int ysize,
 }
 
 //------------------------------------------------------------------------------
-// Allocate dec->argb_ and dec->argb_cache_ using dec->width_ and dec->height_
-
-static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) {
+// Allocate internal buffers dec->pixels_ and dec->argb_cache_.
+static int AllocateInternalBuffers(VP8LDecoder* const dec, int final_width,
+                                   size_t bytes_per_pixel) {
+  const int argb_cache_needed = (bytes_per_pixel == sizeof(uint32_t));
   const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_;
   // Scratch buffer corresponding to top-prediction row for transforming the
-  // first row in the row-blocks.
-  const uint64_t cache_top_pixels = final_width;
-  // Scratch buffer for temporary BGRA storage.
-  const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
+  // first row in the row-blocks. Not needed for paletted alpha.
+  const uint64_t cache_top_pixels =
+      argb_cache_needed ? (uint16_t)final_width : 0ULL;
+  // Scratch buffer for temporary BGRA storage. Not needed for paletted alpha.
+  const uint64_t cache_pixels =
+      argb_cache_needed ? (uint64_t)final_width * NUM_ARGB_CACHE_ROWS : 0ULL;
   const uint64_t total_num_pixels =
       num_pixels + cache_top_pixels + cache_pixels;
 
   assert(dec->width_ <= final_width);
-  dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_));
-  if (dec->argb_ == NULL) {
+  dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, bytes_per_pixel);
+  if (dec->pixels_ == NULL) {
     dec->argb_cache_ = NULL;    // for sanity check
     dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
     return 0;
   }
-  dec->argb_cache_ = dec->argb_ + num_pixels + cache_top_pixels;
+  dec->argb_cache_ =
+      argb_cache_needed ? dec->pixels_ + num_pixels + cache_top_pixels : NULL;
   return 1;
 }
 
 //------------------------------------------------------------------------------
-// Special row-processing that only stores the alpha data.
 
+// Special row-processing that only stores the alpha data.
 static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
   const int num_rows = row - dec->last_row_;
-  const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_;
+  const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
   ApplyInverseTransforms(dec, num_rows, in);
@@ -1070,7 +1104,17 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
     int i;
     for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
   }
+  dec->last_row_ = dec->last_out_row_ = row;
+}
 
+// Row-processing for the special case when alpha data contains only one
+// transform: color indexing.
+static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
+  const int num_rows = row - dec->last_row_;
+  const uint8_t* const in =
+      (uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_;
+  if (num_rows <= 0) return;  // Nothing to be done.
+  ApplyInverseTransformsAlpha(dec, num_rows, in);
   dec->last_row_ = dec->last_out_row_ = row;
 }
 
@@ -1079,6 +1123,7 @@ int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
   VP8Io io;
   int ok = 0;
   VP8LDecoder* const dec = VP8LNew();
+  size_t bytes_per_pixel = sizeof(uint32_t);  // Default: BGRA mode.
   if (dec == NULL) return 0;
 
   dec->width_ = width;
@@ -1097,13 +1142,25 @@ int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
   dec->action_ = READ_HDR;
   if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err;
 
-  // Allocate output (note that dec->width_ may have changed here).
-  if (!AllocateARGBBuffers(dec, width)) goto Err;
+  // Special case: if alpha data uses only the color indexing transform and
+  // doesn't use color cache (a frequent case), we will use DecodeAlphaData()
+  // method that only needs allocation of 1 byte per pixel (alpha channel).
+  if (dec->next_transform_ == 1 &&
+      dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM &&
+      dec->hdr_.color_cache_size_ == 0) {
+    bytes_per_pixel = sizeof(uint8_t);
+  }
+
+  // Allocate internal buffers (note that dec->width_ may have changed here).
+  if (!AllocateInternalBuffers(dec, width, bytes_per_pixel)) goto Err;
 
   // Decode (with special row processing).
   dec->action_ = READ_DATA;
-  ok = DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
-                       ExtractAlphaRows);
+  ok = (bytes_per_pixel == sizeof(uint8_t)) ?
+      DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
+                      ExtractPalettedAlphaRows) :
+      DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
+                      ExtractAlphaRows);
 
  Err:
   VP8LDelete(dec);
@@ -1143,6 +1200,7 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
 }
 
 int VP8LDecodeImage(VP8LDecoder* const dec) {
+  const size_t bytes_per_pixel = sizeof(uint32_t);
   VP8Io* io = NULL;
   WebPDecParams* params = NULL;
 
@@ -1162,13 +1220,13 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
     goto Err;
   }
 
-  if (!AllocateARGBBuffers(dec, io->width)) goto Err;
+  if (!AllocateInternalBuffers(dec, io->width, bytes_per_pixel)) goto Err;
 
   if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
 
   // Decode.
   dec->action_ = READ_DATA;
-  if (!DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
+  if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
                        ProcessRows)) {
     goto Err;
   }
diff --git a/3rdparty/libwebp/dec/vp8li.h b/3rdparty/libwebp/dec/vp8li.h
index ee29eb5fa..543a76759 100644
--- a/3rdparty/libwebp/dec/vp8li.h
+++ b/3rdparty/libwebp/dec/vp8li.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Lossless decoder: internal header.
@@ -63,7 +65,8 @@ typedef struct {
 
   const WebPDecBuffer *output_;    // shortcut to io->opaque->output
 
-  uint32_t        *argb_;          // Internal data: always in BGRA color mode.
+  uint32_t        *pixels_;        // Internal data: either uint8_t* for alpha
+                                   // or uint32_t* for BGRA.
   uint32_t        *argb_cache_;    // Scratch buffer for temporary BGRA storage.
 
   VP8LBitReader    br_;
diff --git a/3rdparty/libwebp/dec/webp.c b/3rdparty/libwebp/dec/webp.c
index 39d901888..97e79b64d 100644
--- a/3rdparty/libwebp/dec/webp.c
+++ b/3rdparty/libwebp/dec/webp.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Main decoding functions for WEBP images.
@@ -192,6 +194,15 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
       return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
     }
 
+    // Start of a (possibly incomplete) VP8/VP8L chunk implies that we have
+    // parsed all the optional chunks.
+    // Note: This check must occur before the check 'buf_size < disk_chunk_size'
+    // below to allow incomplete VP8/VP8L chunks.
+    if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
+        !memcmp(buf, "VP8L", TAG_SIZE)) {
+      return VP8_STATUS_OK;
+    }
+
     if (buf_size < disk_chunk_size) {             // Insufficient data.
       return VP8_STATUS_NOT_ENOUGH_DATA;
     }
@@ -199,9 +210,6 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
     if (!memcmp(buf, "ALPH", TAG_SIZE)) {         // A valid ALPH header.
       *alpha_data = buf + CHUNK_HEADER_SIZE;
       *alpha_size = chunk_size;
-    } else if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
-               !memcmp(buf, "VP8L", TAG_SIZE)) {  // A valid VP8/VP8L header.
-      return VP8_STATUS_OK;  // Found.
     }
 
     // We have a full and valid chunk; skip it.
diff --git a/3rdparty/libwebp/dec/webpi.h b/3rdparty/libwebp/dec/webpi.h
index 9349fcc7d..4ae0bfc5a 100644
--- a/3rdparty/libwebp/dec/webpi.h
+++ b/3rdparty/libwebp/dec/webpi.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Internal header: WebP decoding parameters and custom IO on buffer
diff --git a/3rdparty/libwebp/demux/demux.c b/3rdparty/libwebp/demux/demux.c
index 5d4dcf545..bd17ff7f6 100644
--- a/3rdparty/libwebp/demux/demux.c
+++ b/3rdparty/libwebp/demux/demux.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  WebP container demux.
@@ -27,7 +29,7 @@ extern "C" {
 
 #define DMUX_MAJ_VERSION 0
 #define DMUX_MIN_VERSION 1
-#define DMUX_REV_VERSION 0
+#define DMUX_REV_VERSION 1
 
 typedef struct {
   size_t start_;        // start location of the data
@@ -317,6 +319,7 @@ static ParseStatus ParseAnimationFrame(
   frame->duration_       = ReadLE24s(mem);
   frame->dispose_method_ = (WebPMuxAnimDispose)(ReadByte(mem) & 1);
   if (frame->width_ * (uint64_t)frame->height_ >= MAX_IMAGE_AREA) {
+    free(frame);
     return PARSE_ERROR;
   }
 
diff --git a/3rdparty/libwebp/dsp/cpu.c b/3rdparty/libwebp/dsp/cpu.c
index 022873445..179901e10 100644
--- a/3rdparty/libwebp/dsp/cpu.c
+++ b/3rdparty/libwebp/dsp/cpu.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // CPU detection
diff --git a/3rdparty/libwebp/dsp/dec.c b/3rdparty/libwebp/dsp/dec.c
index 758c6a572..2fbd6b1ac 100644
--- a/3rdparty/libwebp/dsp/dec.c
+++ b/3rdparty/libwebp/dsp/dec.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Speed-critical decoding functions.
diff --git a/3rdparty/libwebp/dsp/dec_neon.c b/3rdparty/libwebp/dsp/dec_neon.c
index 5aff0d3fc..5dcd3b7e5 100644
--- a/3rdparty/libwebp/dsp/dec_neon.c
+++ b/3rdparty/libwebp/dsp/dec_neon.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // ARM NEON version of dsp functions and loop filtering.
diff --git a/3rdparty/libwebp/dsp/dec_sse2.c b/3rdparty/libwebp/dsp/dec_sse2.c
index 1cac1b843..6be94678e 100644
--- a/3rdparty/libwebp/dsp/dec_sse2.c
+++ b/3rdparty/libwebp/dsp/dec_sse2.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // SSE2 version of some decoding functions (idct, loop filtering).
diff --git a/3rdparty/libwebp/dsp/dsp.h b/3rdparty/libwebp/dsp/dsp.h
index 933df60b4..01a95891f 100644
--- a/3rdparty/libwebp/dsp/dsp.h
+++ b/3rdparty/libwebp/dsp/dsp.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   Speed-critical functions.
diff --git a/3rdparty/libwebp/dsp/enc.c b/3rdparty/libwebp/dsp/enc.c
index a6f05a5bb..552807ad9 100644
--- a/3rdparty/libwebp/dsp/enc.c
+++ b/3rdparty/libwebp/dsp/enc.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Speed-critical encoding functions.
@@ -142,9 +144,9 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
     const int a1 = (d1 + d2);
     const int a2 = (d1 - d2);
     const int a3 = (d0 - d3);
-    tmp[0 + i * 4] = (a0 + a1) << 3;  // 14b                      [-8160,8160]
+    tmp[0 + i * 4] = (a0 + a1) * 8;   // 14b                      [-8160,8160]
     tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9;      // [-7536,7542]
-    tmp[2 + i * 4] = (a0 - a1) << 3;
+    tmp[2 + i * 4] = (a0 - a1) * 8;
     tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 +  937) >> 9;
   }
   for (i = 0; i < 4; ++i) {
@@ -187,31 +189,32 @@ static void ITransformWHT(const int16_t* in, int16_t* out) {
 }
 
 static void FTransformWHT(const int16_t* in, int16_t* out) {
-  int tmp[16];
+  // input is 12b signed
+  int16_t tmp[16];
   int i;
   for (i = 0; i < 4; ++i, in += 64) {
-    const int a0 = (in[0 * 16] + in[2 * 16]) << 2;
-    const int a1 = (in[1 * 16] + in[3 * 16]) << 2;
-    const int a2 = (in[1 * 16] - in[3 * 16]) << 2;
-    const int a3 = (in[0 * 16] - in[2 * 16]) << 2;
-    tmp[0 + i * 4] = (a0 + a1) + (a0 != 0);
+    const int a0 = (in[0 * 16] + in[2 * 16]);  // 13b
+    const int a1 = (in[1 * 16] + in[3 * 16]);
+    const int a2 = (in[1 * 16] - in[3 * 16]);
+    const int a3 = (in[0 * 16] - in[2 * 16]);
+    tmp[0 + i * 4] = a0 + a1;   // 14b
     tmp[1 + i * 4] = a3 + a2;
     tmp[2 + i * 4] = a3 - a2;
     tmp[3 + i * 4] = a0 - a1;
   }
   for (i = 0; i < 4; ++i) {
-    const int a0 = (tmp[0 + i] + tmp[8 + i]);
+    const int a0 = (tmp[0 + i] + tmp[8 + i]);  // 15b
     const int a1 = (tmp[4 + i] + tmp[12+ i]);
     const int a2 = (tmp[4 + i] - tmp[12+ i]);
     const int a3 = (tmp[0 + i] - tmp[8 + i]);
-    const int b0 = a0 + a1;
+    const int b0 = a0 + a1;    // 16b
     const int b1 = a3 + a2;
     const int b2 = a3 - a2;
     const int b3 = a0 - a1;
-    out[ 0 + i] = (b0 + (b0 > 0) + 3) >> 3;
-    out[ 4 + i] = (b1 + (b1 > 0) + 3) >> 3;
-    out[ 8 + i] = (b2 + (b2 > 0) + 3) >> 3;
-    out[12 + i] = (b3 + (b3 > 0) + 3) >> 3;
+    out[ 0 + i] = b0 >> 1;     // 15b
+    out[ 4 + i] = b1 >> 1;
+    out[ 8 + i] = b2 >> 1;
+    out[12 + i] = b3 >> 1;
   }
 }
 
diff --git a/3rdparty/libwebp/dsp/enc_neon.c b/3rdparty/libwebp/dsp/enc_neon.c
index 06b6b09cf..eb256e681 100644
--- a/3rdparty/libwebp/dsp/enc_neon.c
+++ b/3rdparty/libwebp/dsp/enc_neon.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // ARM NEON version of speed-critical encoding functions.
@@ -322,7 +324,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref,
     "vmlal.s16       q11, d6, d17             \n" // c1*2217 + d1*5352 + 12000
     "vmlsl.s16       q12, d6, d16             \n" // d1*2217 - c1*5352 + 51000
 
-    "vmvn.s16        d4, d4                   \n"
+    "vmvn            d4, d4                   \n" // !(d1 == 0)
     // op[4] = (c1*2217 + d1*5352 + 12000)>>16
     "vshrn.s32       d1, q11, #16             \n"
     // op[4] += (d1!=0)
@@ -363,19 +365,12 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
     "vld1.16         d2[3], [%[in]], %[kStep]   \n"
     "vld1.16         d3[3], [%[in]], %[kStep]   \n"
 
-    "vaddl.s16       q2, d0, d2                 \n"
-    "vshl.s32        q2, q2, #2                 \n" // a0=(in[0*16]+in[2*16])<<2
-    "vaddl.s16       q3, d1, d3                 \n"
-    "vshl.s32        q3, q3, #2                 \n" // a1=(in[1*16]+in[3*16])<<2
-    "vsubl.s16       q4, d1, d3                 \n"
-    "vshl.s32        q4, q4, #2                 \n" // a2=(in[1*16]-in[3*16])<<2
-    "vsubl.s16       q5, d0, d2                 \n"
-    "vshl.s32        q5, q5, #2                 \n" // a3=(in[0*16]-in[2*16])<<2
+    "vaddl.s16       q2, d0, d2                 \n" // a0=(in[0*16]+in[2*16])
+    "vaddl.s16       q3, d1, d3                 \n" // a1=(in[1*16]+in[3*16])
+    "vsubl.s16       q4, d1, d3                 \n" // a2=(in[1*16]-in[3*16])
+    "vsubl.s16       q5, d0, d2                 \n" // a3=(in[0*16]-in[2*16])
 
-    "vceq.s32        q10, q2, #0                \n"
-    "vmvn.s32        q10, q10                   \n" // (a0 != 0)
-    "vqadd.s32       q6, q2, q3                 \n" // (a0 + a1)
-    "vqsub.s32       q6, q6, q10                \n" // (a0 + a1) + (a0 != 0)
+    "vqadd.s32       q6, q2, q3                 \n" // a0 + a1
     "vqadd.s32       q7, q5, q4                 \n" // a3 + a2
     "vqsub.s32       q8, q5, q4                 \n" // a3 - a2
     "vqsub.s32       q9, q2, q3                 \n" // a0 - a1
@@ -398,27 +393,10 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
     "vqsub.s32       q6, q3, q2                 \n" // b2 = a3 - a2
     "vqsub.s32       q7, q0, q1                 \n" // b3 = a0 - a1
 
-    "vmov.s32         q0, #3                    \n" // q0 = 3
-
-    "vcgt.s32        q1, q4, #0                 \n" // (b0>0)
-    "vqsub.s32       q2, q4, q1                 \n" // (b0+(b0>0))
-    "vqadd.s32       q3, q2, q0                 \n" // (b0+(b0>0)+3)
-    "vshrn.s32       d18, q3, #3                \n" // (b0+(b0>0)+3) >> 3
-
-    "vcgt.s32        q1, q5, #0                 \n" // (b1>0)
-    "vqsub.s32       q2, q5, q1                 \n" // (b1+(b1>0))
-    "vqadd.s32       q3, q2, q0                 \n" // (b1+(b1>0)+3)
-    "vshrn.s32       d19, q3, #3                \n" // (b1+(b1>0)+3) >> 3
-
-    "vcgt.s32        q1, q6, #0                 \n" // (b2>0)
-    "vqsub.s32       q2, q6, q1                 \n" // (b2+(b2>0))
-    "vqadd.s32       q3, q2, q0                 \n" // (b2+(b2>0)+3)
-    "vshrn.s32       d20, q3, #3                \n" // (b2+(b2>0)+3) >> 3
-
-    "vcgt.s32        q1, q7, #0                 \n" // (b3>0)
-    "vqsub.s32       q2, q7, q1                 \n" // (b3+(b3>0))
-    "vqadd.s32       q3, q2, q0                 \n" // (b3+(b3>0)+3)
-    "vshrn.s32       d21, q3, #3                \n" // (b3+(b3>0)+3) >> 3
+    "vshrn.s32       d18, q4, #1                \n" // b0 >> 1
+    "vshrn.s32       d19, q5, #1                \n" // b1 >> 1
+    "vshrn.s32       d20, q6, #1                \n" // b2 >> 1
+    "vshrn.s32       d21, q7, #1                \n" // b3 >> 1
 
     "vst1.16         {q9, q10}, [%[out]]        \n"
 
diff --git a/3rdparty/libwebp/dsp/enc_sse2.c b/3rdparty/libwebp/dsp/enc_sse2.c
index 619e6c5ce..032e99076 100644
--- a/3rdparty/libwebp/dsp/enc_sse2.c
+++ b/3rdparty/libwebp/dsp/enc_sse2.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // SSE2 version of speed-critical encoding functions.
@@ -453,6 +455,39 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
   }
 }
 
+static void FTransformWHTSSE2(const int16_t* in, int16_t* out) {
+  int16_t tmp[16];
+  int i;
+  for (i = 0; i < 4; ++i, in += 64) {
+    const int a0 = (in[0 * 16] + in[2 * 16]);
+    const int a1 = (in[1 * 16] + in[3 * 16]);
+    const int a2 = (in[1 * 16] - in[3 * 16]);
+    const int a3 = (in[0 * 16] - in[2 * 16]);
+    tmp[0 + i * 4] = a0 + a1;
+    tmp[1 + i * 4] = a3 + a2;
+    tmp[2 + i * 4] = a3 - a2;
+    tmp[3 + i * 4] = a0 - a1;
+  }
+  {
+    const __m128i src0 = _mm_loadl_epi64((__m128i*)&tmp[0]);
+    const __m128i src1 = _mm_loadl_epi64((__m128i*)&tmp[4]);
+    const __m128i src2 = _mm_loadl_epi64((__m128i*)&tmp[8]);
+    const __m128i src3 = _mm_loadl_epi64((__m128i*)&tmp[12]);
+    const __m128i a0 = _mm_add_epi16(src0, src2);
+    const __m128i a1 = _mm_add_epi16(src1, src3);
+    const __m128i a2 = _mm_sub_epi16(src1, src3);
+    const __m128i a3 = _mm_sub_epi16(src0, src2);
+    const __m128i b0 = _mm_srai_epi16(_mm_adds_epi16(a0, a1), 1);
+    const __m128i b1 = _mm_srai_epi16(_mm_adds_epi16(a3, a2), 1);
+    const __m128i b2 = _mm_srai_epi16(_mm_subs_epi16(a3, a2), 1);
+    const __m128i b3 = _mm_srai_epi16(_mm_subs_epi16(a0, a1), 1);
+    _mm_storel_epi64((__m128i*)&out[ 0], b0);
+    _mm_storel_epi64((__m128i*)&out[ 4], b1);
+    _mm_storel_epi64((__m128i*)&out[ 8], b2);
+    _mm_storel_epi64((__m128i*)&out[12], b3);
+  }
+}
+
 //------------------------------------------------------------------------------
 // Metric
 
@@ -919,6 +954,7 @@ void VP8EncDspInitSSE2(void) {
   VP8EncQuantizeBlock = QuantizeBlockSSE2;
   VP8ITransform = ITransformSSE2;
   VP8FTransform = FTransformSSE2;
+  VP8FTransformWHT = FTransformWHTSSE2;
   VP8SSE16x16 = SSE16x16SSE2;
   VP8SSE16x8 = SSE16x8SSE2;
   VP8SSE8x8 = SSE8x8SSE2;
diff --git a/3rdparty/libwebp/dsp/lossless.c b/3rdparty/libwebp/dsp/lossless.c
index 080b3e632..e445924ed 100644
--- a/3rdparty/libwebp/dsp/lossless.c
+++ b/3rdparty/libwebp/dsp/lossless.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Image transforms and color space conversion methods for lossless decoder.
@@ -1093,39 +1095,64 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
 }
 
 // Separate out pixels packed together using pixel-bundling.
-static void ColorIndexInverseTransform(
-    const VP8LTransform* const transform,
-    int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
-  int y;
-  const int bits_per_pixel = 8 >> transform->bits_;
-  const int width = transform->xsize_;
-  const uint32_t* const color_map = transform->data_;
-  if (bits_per_pixel < 8) {
-    const int pixels_per_byte = 1 << transform->bits_;
-    const int count_mask = pixels_per_byte - 1;
-    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
-    for (y = y_start; y < y_end; ++y) {
-      uint32_t packed_pixels = 0;
-      int x;
-      for (x = 0; x < width; ++x) {
-        // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
-        // increments of x. Fortunately, pixels_per_byte is a power of 2, so
-        // can just use a mask for that, instead of decrementing a counter.
-        if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
-        *dst++ = color_map[packed_pixels & bit_mask];
-        packed_pixels >>= bits_per_pixel;
-      }
-    }
-  } else {
-    for (y = y_start; y < y_end; ++y) {
-      int x;
-      for (x = 0; x < width; ++x) {
-        *dst++ = color_map[((*src++) >> 8) & 0xff];
-      }
-    }
-  }
+// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
+#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)             \
+void FUNC_NAME(const VP8LTransform* const transform,                           \
+               int y_start, int y_end, const TYPE* src, TYPE* dst) {           \
+  int y;                                                                       \
+  const int bits_per_pixel = 8 >> transform->bits_;                            \
+  const int width = transform->xsize_;                                         \
+  const uint32_t* const color_map = transform->data_;                          \
+  if (bits_per_pixel < 8) {                                                    \
+    const int pixels_per_byte = 1 << transform->bits_;                         \
+    const int count_mask = pixels_per_byte - 1;                                \
+    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
+    for (y = y_start; y < y_end; ++y) {                                        \
+      uint32_t packed_pixels = 0;                                              \
+      int x;                                                                   \
+      for (x = 0; x < width; ++x) {                                            \
+        /* We need to load fresh 'packed_pixels' once every                */  \
+        /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
+        /* is a power of 2, so can just use a mask for that, instead of    */  \
+        /* decrementing a counter.                                         */  \
+        if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
+        *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
+        packed_pixels >>= bits_per_pixel;                                      \
+      }                                                                        \
+    }                                                                          \
+  } else {                                                                     \
+    for (y = y_start; y < y_end; ++y) {                                        \
+      int x;                                                                   \
+      for (x = 0; x < width; ++x) {                                            \
+        *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                      \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
 }
 
+static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
+  return (idx >> 8) & 0xff;
+}
+
+static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
+  return idx;
+}
+
+static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
+  return val;
+}
+
+static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
+  return (val >> 8) & 0xff;
+}
+
+static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
+                           GetARGBValue)
+COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
+                    GetAlphaValue)
+
+#undef COLOR_INDEX_INVERSE
+
 void VP8LInverseTransform(const VP8LTransform* const transform,
                           int row_start, int row_end,
                           const uint32_t* const in, uint32_t* const out) {
@@ -1254,11 +1281,12 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
     while (src < src_end) {
       uint32_t argb = *src++;
 
+#if !defined(__BIG_ENDIAN__)
 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
-#if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
+#if defined(__i386__) || defined(__x86_64__)
       __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
       *(uint32_t*)dst = argb;
-#elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
+#elif defined(_MSC_VER)
       argb = _byteswap_ulong(argb);
       *(uint32_t*)dst = argb;
 #else
@@ -1267,11 +1295,17 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
       dst[2] = (argb >>  8) & 0xff;
       dst[3] = (argb >>  0) & 0xff;
 #endif
-#else   // WEBP_REFERENCE_IMPLEMENTATION
+#else  // WEBP_REFERENCE_IMPLEMENTATION
       dst[0] = (argb >> 24) & 0xff;
       dst[1] = (argb >> 16) & 0xff;
       dst[2] = (argb >>  8) & 0xff;
       dst[3] = (argb >>  0) & 0xff;
+#endif
+#else  // __BIG_ENDIAN__
+      dst[0] = (argb >>  0) & 0xff;
+      dst[1] = (argb >>  8) & 0xff;
+      dst[2] = (argb >> 16) & 0xff;
+      dst[3] = (argb >> 24) & 0xff;
 #endif
       dst += sizeof(argb);
     }
@@ -1325,6 +1359,27 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
   }
 }
 
+// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
+void VP8LBundleColorMap(const uint8_t* const row, int width,
+                        int xbits, uint32_t* const dst) {
+  int x;
+  if (xbits > 0) {
+    const int bit_depth = 1 << (3 - xbits);
+    const int mask = (1 << xbits) - 1;
+    uint32_t code = 0xff000000;
+    for (x = 0; x < width; ++x) {
+      const int xsub = x & mask;
+      if (xsub == 0) {
+        code = 0xff000000;
+      }
+      code |= row[x] << (8 + bit_depth * xsub);
+      dst[x >> xbits] = code;
+    }
+  } else {
+    for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
+  }
+}
+
 //------------------------------------------------------------------------------
 
 #if defined(__cplusplus) || defined(c_plusplus)
diff --git a/3rdparty/libwebp/dsp/lossless.h b/3rdparty/libwebp/dsp/lossless.h
index 0ac4ecb84..7490ec8ef 100644
--- a/3rdparty/libwebp/dsp/lossless.h
+++ b/3rdparty/libwebp/dsp/lossless.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Image transforms and color space conversion methods for lossless decoder.
@@ -33,6 +35,13 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
                           int row_start, int row_end,
                           const uint32_t* const in, uint32_t* const out);
 
+// Similar to the static method ColorIndexInverseTransform() that is part of
+// lossless.c, but used only for alpha decoding. It takes uint8_t (rather than
+// uint32_t) arguments for 'src' and 'dst'.
+void VP8LColorIndexInverseTransformAlpha(
+    const struct VP8LTransform* const transform, int y_start, int y_end,
+    const uint8_t* src, uint8_t* dst);
+
 // Subtracts green from blue and red channels.
 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs);
 
@@ -83,6 +92,9 @@ static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
   return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
 }
 
+void VP8LBundleColorMap(const uint8_t* const row, int width,
+                        int xbits, uint32_t* const dst);
+
 //------------------------------------------------------------------------------
 
 #if defined(__cplusplus) || defined(c_plusplus)
diff --git a/3rdparty/libwebp/dsp/upsampling.c b/3rdparty/libwebp/dsp/upsampling.c
index aea4964b6..80ba4f8a5 100644
--- a/3rdparty/libwebp/dsp/upsampling.c
+++ b/3rdparty/libwebp/dsp/upsampling.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // YUV to RGB upsampling functions.
diff --git a/3rdparty/libwebp/dsp/upsampling_neon.c b/3rdparty/libwebp/dsp/upsampling_neon.c
index 00e2f8928..d11889590 100644
--- a/3rdparty/libwebp/dsp/upsampling_neon.c
+++ b/3rdparty/libwebp/dsp/upsampling_neon.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // NEON version of YUV to RGB upsampling functions.
diff --git a/3rdparty/libwebp/dsp/upsampling_sse2.c b/3rdparty/libwebp/dsp/upsampling_sse2.c
index ba075d11e..f31d04845 100644
--- a/3rdparty/libwebp/dsp/upsampling_sse2.c
+++ b/3rdparty/libwebp/dsp/upsampling_sse2.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // SSE2 version of YUV to RGB upsampling functions.
diff --git a/3rdparty/libwebp/dsp/yuv.c b/3rdparty/libwebp/dsp/yuv.c
index f8988bae1..1a59f7449 100644
--- a/3rdparty/libwebp/dsp/yuv.c
+++ b/3rdparty/libwebp/dsp/yuv.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // YUV->RGB conversion function
diff --git a/3rdparty/libwebp/dsp/yuv.h b/3rdparty/libwebp/dsp/yuv.h
index 126404b66..3844d8cab 100644
--- a/3rdparty/libwebp/dsp/yuv.h
+++ b/3rdparty/libwebp/dsp/yuv.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // inline YUV<->RGB conversion function
diff --git a/3rdparty/libwebp/enc/alpha.c b/3rdparty/libwebp/enc/alpha.c
index aadf88fef..e636c9672 100644
--- a/3rdparty/libwebp/enc/alpha.c
+++ b/3rdparty/libwebp/enc/alpha.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Alpha-plane compression.
@@ -80,7 +82,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
   config.lossless = 1;
   config.method = effort_level;  // impact is very small
   // Set a moderate default quality setting for alpha.
-  config.quality = 5.f * effort_level;
+  config.quality = 10.f * effort_level;
   assert(config.quality >= 0 && config.quality <= 100.f);
 
   ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
@@ -156,6 +158,25 @@ static void CopyPlane(const uint8_t* src, int src_stride,
   }
 }
 
+static int GetNumColors(const uint8_t* data, int width, int height,
+                        int stride) {
+  int j;
+  int colors = 0;
+  uint8_t color[256] = { 0 };
+
+  for (j = 0; j < height; ++j) {
+    int i;
+    const uint8_t* const p = data + j * stride;
+    for (i = 0; i < width; ++i) {
+      color[p[i]] = 1;
+    }
+  }
+  for (j = 0; j < 256; ++j) {
+    if (color[j] > 0) ++colors;
+  }
+  return colors;
+}
+
 static int EncodeAlpha(VP8Encoder* const enc,
                        int quality, int method, int filter,
                        int effort_level,
@@ -207,18 +228,32 @@ static int EncodeAlpha(VP8Encoder* const enc,
     VP8BitWriter bw;
     int test_filter;
     uint8_t* filtered_alpha = NULL;
+    int try_filter_none = (effort_level > 3);
 
-    // We always test WEBP_FILTER_NONE first.
-    ok = EncodeAlphaInternal(quant_alpha, width, height,
-                             method, WEBP_FILTER_NONE, reduce_levels,
-                             effort_level, NULL, &bw, pic->stats);
-    if (!ok) {
-      VP8BitWriterWipeOut(&bw);
-      goto End;
+    if (filter == WEBP_FILTER_FAST) {  // Quick estimate of the best candidate.
+      const int kMinColorsForFilterNone = 16;
+      const int kMaxColorsForFilterNone = 192;
+      const int num_colors = GetNumColors(quant_alpha, width, height, width);
+      // For low number of colors, NONE yeilds better compression.
+      filter = (num_colors <= kMinColorsForFilterNone) ? WEBP_FILTER_NONE :
+               EstimateBestFilter(quant_alpha, width, height, width);
+      // For large number of colors, try FILTER_NONE in addition to the best
+      // filter as well.
+      if (num_colors > kMaxColorsForFilterNone) {
+        try_filter_none = 1;
+      }
     }
 
-    if (filter == WEBP_FILTER_FAST) {  // Quick estimate of a second candidate?
-      filter = EstimateBestFilter(quant_alpha, width, height, width);
+    // Test for WEBP_FILTER_NONE for higher effort levels.
+    if (try_filter_none || filter == WEBP_FILTER_NONE) {
+      ok = EncodeAlphaInternal(quant_alpha, width, height,
+                               method, WEBP_FILTER_NONE, reduce_levels,
+                               effort_level, NULL, &bw, pic->stats);
+
+      if (!ok) {
+        VP8BitWriterWipeOut(&bw);
+        goto End;
+      }
     }
     // Stop?
     if (filter == WEBP_FILTER_NONE) {
@@ -234,11 +269,14 @@ static int EncodeAlpha(VP8Encoder* const enc,
     // Try the other mode(s).
     {
       WebPAuxStats best_stats;
-      size_t best_score = VP8BitWriterSize(&bw);
+      size_t best_score = try_filter_none ?
+                          VP8BitWriterSize(&bw) : (size_t)~0U;
+      int wipe_tmp_bw = try_filter_none;
 
       memset(&best_stats, 0, sizeof(best_stats));  // prevent spurious warning
       if (pic->stats != NULL) best_stats = *pic->stats;
-      for (test_filter = WEBP_FILTER_HORIZONTAL;
+      for (test_filter =
+           try_filter_none ? WEBP_FILTER_HORIZONTAL : WEBP_FILTER_NONE;
            ok && (test_filter <= WEBP_FILTER_GRADIENT);
            ++test_filter) {
         VP8BitWriter tmp_bw;
@@ -262,7 +300,10 @@ static int EncodeAlpha(VP8Encoder* const enc,
         } else {
           VP8BitWriterWipeOut(&bw);
         }
-        VP8BitWriterWipeOut(&tmp_bw);
+        if (wipe_tmp_bw) {
+          VP8BitWriterWipeOut(&tmp_bw);
+        }
+        wipe_tmp_bw = 1;  // For next filter trial for WEBP_FILTER_BEST.
       }
       if (pic->stats != NULL) *pic->stats = best_stats;
     }
diff --git a/3rdparty/libwebp/enc/analysis.c b/3rdparty/libwebp/enc/analysis.c
index 221e9d064..4ff3edd2a 100644
--- a/3rdparty/libwebp/enc/analysis.c
+++ b/3rdparty/libwebp/enc/analysis.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Macroblock analysis
diff --git a/3rdparty/libwebp/enc/backward_references.c b/3rdparty/libwebp/enc/backward_references.c
index cf0278751..db4f430df 100644
--- a/3rdparty/libwebp/enc/backward_references.c
+++ b/3rdparty/libwebp/enc/backward_references.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
@@ -142,9 +144,10 @@ static void HashChainInsert(HashChain* const p,
 }
 
 static void GetParamsForHashChainFindCopy(int quality, int xsize,
-                                          int* window_size, int* iter_pos,
-                                          int* iter_limit) {
+                                          int cache_bits, int* window_size,
+                                          int* iter_pos, int* iter_limit) {
   const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
+  const int iter_neg = -iter_mult * (quality >> 1);
   // Limit the backward-ref window size for lower qualities.
   const int max_window_size = (quality > 50) ? WINDOW_SIZE
                             : (quality > 25) ? (xsize << 8)
@@ -152,77 +155,74 @@ static void GetParamsForHashChainFindCopy(int quality, int xsize,
   assert(xsize > 0);
   *window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE
                : max_window_size;
-  *iter_pos = 5 + (quality >> 3);
-  *iter_limit = -quality * iter_mult;
+  *iter_pos = 8 + (quality >> 3);
+  // For lower entropy images, the rigourous search loop in HashChainFindCopy
+  // can be relaxed.
+  *iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2;
 }
 
 static int HashChainFindCopy(const HashChain* const p,
-                             int base_position, int xsize,
+                             int base_position, int xsize_signed,
                              const uint32_t* const argb, int maxlen,
                              int window_size, int iter_pos, int iter_limit,
                              int* const distance_ptr,
                              int* const length_ptr) {
-  const uint64_t hash_code = GetPixPairHash64(&argb[base_position]);
-  int prev_length = 0;
-  int64_t best_val = 0;
-  int best_length = 0;
-  int best_distance = 0;
   const uint32_t* const argb_start = argb + base_position;
+  uint64_t best_val = 0;
+  uint32_t best_length = 1;
+  uint32_t best_distance = 0;
+  const uint32_t xsize = (uint32_t)xsize_signed;
   const int min_pos =
       (base_position > window_size) ? base_position - window_size : 0;
   int pos;
-
   assert(xsize > 0);
-  for (pos = p->hash_to_first_index_[hash_code];
+  for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
        pos >= min_pos;
        pos = p->chain_[pos]) {
-    int64_t val;
-    int curr_length;
+    uint64_t val;
+    uint32_t curr_length;
+    uint32_t distance;
     if (iter_pos < 0) {
       if (iter_pos < iter_limit || best_val >= 0xff0000) {
         break;
       }
     }
     --iter_pos;
-    if (best_length != 0 &&
-        argb[pos + best_length - 1] != argb_start[best_length - 1]) {
+    if (argb[pos + best_length - 1] != argb_start[best_length - 1]) {
       continue;
     }
     curr_length = FindMatchLength(argb + pos, argb_start, maxlen);
-    if (curr_length < prev_length) {
+    if (curr_length < best_length) {
       continue;
     }
-    val = 65536 * curr_length;
+    distance = (uint32_t)(base_position - pos);
+    val = curr_length << 16;
     // Favoring 2d locality here gives savings for certain images.
-    if (base_position - pos < 9 * xsize) {
-      const int y = (base_position - pos) / xsize;
-      int x = (base_position - pos) % xsize;
-      if (x > xsize / 2) {
+    if (distance < 9 * xsize) {
+      const uint32_t y = distance / xsize;
+      uint32_t x = distance % xsize;
+      if (x > (xsize >> 1)) {
         x = xsize - x;
       }
-      if (x <= 7 && x >= -8) {
+      if (x <= 7) {
+        val += 9 * 9 + 9 * 9;
         val -= y * y + x * x;
-      } else {
-        val -= 9 * 9 + 9 * 9;
       }
-    } else {
-      val -= 9 * 9 + 9 * 9;
     }
     if (best_val < val) {
-      prev_length = curr_length;
       best_val = val;
       best_length = curr_length;
-      best_distance = base_position - pos;
+      best_distance = distance;
       if (curr_length >= MAX_LENGTH) {
         break;
       }
-      if ((best_distance == 1 || best_distance == xsize) &&
+      if ((best_distance == 1 || distance == xsize) &&
           best_length >= 128) {
         break;
       }
     }
   }
-  *distance_ptr = best_distance;
+  *distance_ptr = (int)best_distance;
   *length_ptr = best_length;
   return (best_length >= MIN_LENGTH);
 }
@@ -284,8 +284,8 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
   if (!HashChainInit(hash_chain, pix_count)) goto Error;
 
   refs->size = 0;
-  GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos,
-                                &iter_limit);
+  GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
+                                &window_size, &iter_pos, &iter_limit);
   for (i = 0; i < pix_count; ) {
     // Alternative#1: Code the pixels starting at 'i' using backward reference.
     int offset = 0;
@@ -510,8 +510,8 @@ static int BackwardReferencesHashChainDistanceOnly(
   // We loop one pixel at a time, but store all currently best points to
   // non-processed locations from this point.
   dist_array[0] = 0;
-  GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos,
-                                &iter_limit);
+  GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
+                                &window_size, &iter_pos, &iter_limit);
   for (i = 0; i < pix_count; ++i) {
     double prev_cost = 0.0;
     int shortmax;
@@ -645,8 +645,8 @@ static int BackwardReferencesHashChainFollowChosenPath(
   }
 
   refs->size = 0;
-  GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos,
-                                &iter_limit);
+  GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
+                                &window_size, &iter_pos, &iter_limit);
   for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
     int offset = 0;
     int len = 0;
@@ -785,7 +785,9 @@ int VP8LGetBackwardReferences(int width, int height,
     *best = refs_lz77;   // default guess: lz77 is better
     VP8LClearBackwardRefs(&refs_rle);
     if (try_lz77_trace_backwards) {
-      const int recursion_level = (num_pix < 320 * 200) ? 1 : 0;
+      // Set recursion level for large images using a color cache.
+      const int recursion_level =
+          (num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0;
       VP8LBackwardRefs refs_trace;
       if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) {
         goto End;
diff --git a/3rdparty/libwebp/enc/backward_references.h b/3rdparty/libwebp/enc/backward_references.h
index 8cb1a7a6d..b0d18135f 100644
--- a/3rdparty/libwebp/enc/backward_references.h
+++ b/3rdparty/libwebp/enc/backward_references.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
diff --git a/3rdparty/libwebp/enc/config.c b/3rdparty/libwebp/enc/config.c
index bb88111bc..acf96b0f3 100644
--- a/3rdparty/libwebp/enc/config.c
+++ b/3rdparty/libwebp/enc/config.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Coding tools configuration
diff --git a/3rdparty/libwebp/enc/cost.c b/3rdparty/libwebp/enc/cost.c
index 89b60ba61..d4916d7cd 100644
--- a/3rdparty/libwebp/enc/cost.c
+++ b/3rdparty/libwebp/enc/cost.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Cost tables for level and modes
diff --git a/3rdparty/libwebp/enc/cost.h b/3rdparty/libwebp/enc/cost.h
index e264d3213..7d7c2c79a 100644
--- a/3rdparty/libwebp/enc/cost.h
+++ b/3rdparty/libwebp/enc/cost.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Cost tables for level and modes.
diff --git a/3rdparty/libwebp/enc/filter.c b/3rdparty/libwebp/enc/filter.c
index 7fb78a394..aae2723d1 100644
--- a/3rdparty/libwebp/enc/filter.c
+++ b/3rdparty/libwebp/enc/filter.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Selecting filter level
diff --git a/3rdparty/libwebp/enc/frame.c b/3rdparty/libwebp/enc/frame.c
index 95206185b..c56abed75 100644
--- a/3rdparty/libwebp/enc/frame.c
+++ b/3rdparty/libwebp/enc/frame.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   frame coding and analysis
diff --git a/3rdparty/libwebp/enc/histogram.c b/3rdparty/libwebp/enc/histogram.c
index 69e5fa36e..787ea5d18 100644
--- a/3rdparty/libwebp/enc/histogram.c
+++ b/3rdparty/libwebp/enc/histogram.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
diff --git a/3rdparty/libwebp/enc/histogram.h b/3rdparty/libwebp/enc/histogram.h
index fe7cea6ff..583b5a4f8 100644
--- a/3rdparty/libwebp/enc/histogram.h
+++ b/3rdparty/libwebp/enc/histogram.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
diff --git a/3rdparty/libwebp/enc/iterator.c b/3rdparty/libwebp/enc/iterator.c
index 86e473bcf..074665904 100644
--- a/3rdparty/libwebp/enc/iterator.c
+++ b/3rdparty/libwebp/enc/iterator.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // VP8Iterator: block iterator
diff --git a/3rdparty/libwebp/enc/layer.c b/3rdparty/libwebp/enc/layer.c
index 423127df6..fa8966099 100644
--- a/3rdparty/libwebp/enc/layer.c
+++ b/3rdparty/libwebp/enc/layer.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Enhancement layer (for YUV444/422)
diff --git a/3rdparty/libwebp/enc/picture.c b/3rdparty/libwebp/enc/picture.c
index 1e51a8dc6..5aaa385d4 100644
--- a/3rdparty/libwebp/enc/picture.c
+++ b/3rdparty/libwebp/enc/picture.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // WebPPicture utils: colorspace conversion, crop, ...
@@ -709,7 +711,7 @@ static int Import(WebPPicture* const picture,
     for (y = 0; y < height; ++y) {
       for (x = 0; x < width; ++x) {
         const int offset = step * x + y * rgb_stride;
-        const uint32_t argb = (a_ptr[offset] << 24) |
+        const uint32_t argb = ((uint32_t)a_ptr[offset] << 24) |
                               (r_ptr[offset] << 16) |
                               (g_ptr[offset] <<  8) |
                               (b_ptr[offset]);
@@ -809,7 +811,7 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
         const uint8_t* const src = picture->a + y * picture->a_stride;
         int x;
         for (x = 0; x < width; ++x) {
-          argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | (src[x] << 24);
+          argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
         }
       }
     }
diff --git a/3rdparty/libwebp/enc/quant.c b/3rdparty/libwebp/enc/quant.c
index dcfd4d16d..462d4e9e6 100644
--- a/3rdparty/libwebp/enc/quant.c
+++ b/3rdparty/libwebp/enc/quant.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   Quantization
diff --git a/3rdparty/libwebp/enc/syntax.c b/3rdparty/libwebp/enc/syntax.c
index e81fa2bed..b0f7676b4 100644
--- a/3rdparty/libwebp/enc/syntax.c
+++ b/3rdparty/libwebp/enc/syntax.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Header syntax writing
diff --git a/3rdparty/libwebp/enc/token.c b/3rdparty/libwebp/enc/token.c
index 4e2f6c00a..6a63371f7 100644
--- a/3rdparty/libwebp/enc/token.c
+++ b/3rdparty/libwebp/enc/token.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Paginated token buffer
diff --git a/3rdparty/libwebp/enc/tree.c b/3rdparty/libwebp/enc/tree.c
index 8b25e5e48..ecd8fb910 100644
--- a/3rdparty/libwebp/enc/tree.c
+++ b/3rdparty/libwebp/enc/tree.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Token probabilities
diff --git a/3rdparty/libwebp/enc/vp8enci.h b/3rdparty/libwebp/enc/vp8enci.h
index 6aa3f436a..61d56be55 100644
--- a/3rdparty/libwebp/enc/vp8enci.h
+++ b/3rdparty/libwebp/enc/vp8enci.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   WebP encoder: internal header.
@@ -28,7 +30,7 @@ extern "C" {
 // version numbers
 #define ENC_MAJ_VERSION 0
 #define ENC_MIN_VERSION 3
-#define ENC_REV_VERSION 0
+#define ENC_REV_VERSION 1
 
 // intra prediction modes
 enum { B_DC_PRED = 0,   // 4x4 modes
diff --git a/3rdparty/libwebp/enc/vp8l.c b/3rdparty/libwebp/enc/vp8l.c
index 5077167be..945870ca4 100644
--- a/3rdparty/libwebp/enc/vp8l.c
+++ b/3rdparty/libwebp/enc/vp8l.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // main entry for the lossless encoder.
@@ -86,7 +88,7 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
     argb += pic->argb_stride;
   }
 
-  // TODO(skal): could we reuse in_use[] to speed up ApplyPalette()?
+  // TODO(skal): could we reuse in_use[] to speed up EncodePalette()?
   num_colors = 0;
   for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) {
     if (in_use[i]) {
@@ -811,34 +813,66 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
   return err;
 }
 
-// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
-static void BundleColorMap(const uint8_t* const row, int width,
-                           int xbits, uint32_t* const dst) {
-  int x;
-  if (xbits > 0) {
-    const int bit_depth = 1 << (3 - xbits);
-    const int mask = (1 << xbits) - 1;
-    uint32_t code = 0xff000000;
-    for (x = 0; x < width; ++x) {
-      const int xsub = x & mask;
-      if (xsub == 0) {
-        code = 0xff000000;
+static void ApplyPalette(uint32_t* src, uint32_t* dst,
+                         uint32_t src_stride, uint32_t dst_stride,
+                         const uint32_t* palette, int palette_size,
+                         int width, int height, int xbits, uint8_t* row) {
+  int i, x, y;
+  int use_LUT = 1;
+  for (i = 0; i < palette_size; ++i) {
+    if ((palette[i] & 0xffff00ffu) != 0) {
+      use_LUT = 0;
+      break;
+    }
+  }
+
+  if (use_LUT) {
+    int inv_palette[MAX_PALETTE_SIZE] = { 0 };
+    for (i = 0; i < palette_size; ++i) {
+      const int color = (palette[i] >> 8) & 0xff;
+      inv_palette[color] = i;
+    }
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        const int color = (src[x] >> 8) & 0xff;
+        row[x] = inv_palette[color];
       }
-      code |= row[x] << (8 + bit_depth * xsub);
-      dst[x >> xbits] = code;
+      VP8LBundleColorMap(row, width, xbits, dst);
+      src += src_stride;
+      dst += dst_stride;
     }
   } else {
-    for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
+    // Use 1 pixel cache for ARGB pixels.
+    uint32_t last_pix = palette[0];
+    int last_idx = 0;
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        const uint32_t pix = src[x];
+        if (pix != last_pix) {
+          for (i = 0; i < palette_size; ++i) {
+            if (pix == palette[i]) {
+              last_idx = i;
+              last_pix = pix;
+              break;
+            }
+          }
+        }
+        row[x] = last_idx;
+      }
+      VP8LBundleColorMap(row, width, xbits, dst);
+      src += src_stride;
+      dst += dst_stride;
+    }
   }
 }
 
 // Note: Expects "enc->palette_" to be set properly.
 // Also, "enc->palette_" will be modified after this call and should not be used
 // later.
-static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
-                                      VP8LEncoder* const enc, int quality) {
+static WebPEncodingError EncodePalette(VP8LBitWriter* const bw,
+                                       VP8LEncoder* const enc, int quality) {
   WebPEncodingError err = VP8_ENC_OK;
-  int i, x, y;
+  int i;
   const WebPPicture* const pic = enc->pic_;
   uint32_t* src = pic->argb;
   uint32_t* dst;
@@ -864,20 +898,8 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
   row = WebPSafeMalloc((uint64_t)width, sizeof(*row));
   if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
 
-  for (y = 0; y < height; ++y) {
-    for (x = 0; x < width; ++x) {
-      const uint32_t pix = src[x];
-      for (i = 0; i < palette_size; ++i) {
-        if (pix == palette[i]) {
-          row[x] = i;
-          break;
-        }
-      }
-    }
-    BundleColorMap(row, width, xbits, dst);
-    src += pic->argb_stride;
-    dst += enc->current_width_;
-  }
+  ApplyPalette(src, dst, pic->argb_stride, enc->current_width_,
+               palette, palette_size, width, height, xbits, row);
 
   // Save palette to bitstream.
   VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
@@ -899,13 +921,10 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
 
 // -----------------------------------------------------------------------------
 
-static int GetHistoBits(const WebPConfig* const config,
-                        const WebPPicture* const pic) {
-  const int width = pic->width;
-  const int height = pic->height;
+static int GetHistoBits(int method, int use_palette, int width, int height) {
   const uint64_t hist_size = sizeof(VP8LHistogram);
   // Make tile size a function of encoding method (Range: 0 to 6).
-  int histo_bits = 7 - config->method;
+  int histo_bits = (use_palette ? 9 : 7) - method;
   while (1) {
     const uint64_t huff_image_size = VP8LSubSampleSize(width, histo_bits) *
                                      VP8LSubSampleSize(height, histo_bits) *
@@ -917,13 +936,14 @@ static int GetHistoBits(const WebPConfig* const config,
          (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
 }
 
-static void InitEncParams(VP8LEncoder* const enc) {
+static void FinishEncParams(VP8LEncoder* const enc) {
   const WebPConfig* const config = enc->config_;
-  const WebPPicture* const picture = enc->pic_;
+  const WebPPicture* const pic = enc->pic_;
   const int method = config->method;
   const float quality = config->quality;
+  const int use_palette = enc->use_palette_;
   enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4;
-  enc->histo_bits_ = GetHistoBits(config, picture);
+  enc->histo_bits_ = GetHistoBits(method, use_palette, pic->width, pic->height);
   enc->cache_bits_ = (quality <= 25.f) ? 0 : 7;
 }
 
@@ -965,8 +985,6 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
     goto Error;
   }
 
-  InitEncParams(enc);
-
   // ---------------------------------------------------------------------------
   // Analyze image (entropy, num_palettes etc)
 
@@ -975,8 +993,10 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
     goto Error;
   }
 
+  FinishEncParams(enc);
+
   if (enc->use_palette_) {
-    err = ApplyPalette(bw, enc, quality);
+    err = EncodePalette(bw, enc, quality);
     if (err != VP8_ENC_OK) goto Error;
     // Color cache is disabled for palette.
     enc->cache_bits_ = 0;
diff --git a/3rdparty/libwebp/enc/vp8li.h b/3rdparty/libwebp/enc/vp8li.h
index eae90dd61..01f01f57e 100644
--- a/3rdparty/libwebp/enc/vp8li.h
+++ b/3rdparty/libwebp/enc/vp8li.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Lossless encoder: internal header.
diff --git a/3rdparty/libwebp/enc/webpenc.c b/3rdparty/libwebp/enc/webpenc.c
index 20fbac4d0..d420d063e 100644
--- a/3rdparty/libwebp/enc/webpenc.c
+++ b/3rdparty/libwebp/enc/webpenc.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // WebP encoder: main entry point
@@ -386,9 +388,9 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
     // Analysis is done, proceed to actual coding.
     ok = ok && VP8EncStartAlpha(enc);   // possibly done in parallel
     if (!enc->use_tokens_) {
-      ok = VP8EncLoop(enc);
+      ok = ok && VP8EncLoop(enc);
     } else {
-      ok = VP8EncTokenLoop(enc);
+      ok = ok && VP8EncTokenLoop(enc);
     }
     ok = ok && VP8EncFinishAlpha(enc);
 #ifdef WEBP_EXPERIMENTAL_FEATURES
diff --git a/3rdparty/libwebp/mux/muxedit.c b/3rdparty/libwebp/mux/muxedit.c
index a486229c2..2d25a14d0 100644
--- a/3rdparty/libwebp/mux/muxedit.c
+++ b/3rdparty/libwebp/mux/muxedit.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Set and delete APIs for mux.
@@ -475,7 +477,7 @@ static WebPMuxError GetImageCanvasWidthHeight(
     int64_t image_area = 0;
     // Aggregate the bounding box for animation frames & fragmented images.
     for (; wpi != NULL; wpi = wpi->next_) {
-      int x_offset, y_offset, duration, w, h;
+      int x_offset = 0, y_offset = 0, duration = 0, w = 0, h = 0;
       const WebPMuxError err = GetImageInfo(wpi, &x_offset, &y_offset,
                                             &duration, &w, &h);
       const int max_x_pos = x_offset + w;
diff --git a/3rdparty/libwebp/mux/muxi.h b/3rdparty/libwebp/mux/muxi.h
index 97b7f43dd..eaed558b4 100644
--- a/3rdparty/libwebp/mux/muxi.h
+++ b/3rdparty/libwebp/mux/muxi.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Internal header for mux library.
@@ -26,7 +28,7 @@ extern "C" {
 
 #define MUX_MAJ_VERSION 0
 #define MUX_MIN_VERSION 1
-#define MUX_REV_VERSION 0
+#define MUX_REV_VERSION 1
 
 // Chunk object.
 typedef struct WebPChunk WebPChunk;
diff --git a/3rdparty/libwebp/mux/muxinternal.c b/3rdparty/libwebp/mux/muxinternal.c
index 3fa91f7d8..9543c0ee5 100644
--- a/3rdparty/libwebp/mux/muxinternal.c
+++ b/3rdparty/libwebp/mux/muxinternal.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Internal objects and utils for mux.
diff --git a/3rdparty/libwebp/mux/muxread.c b/3rdparty/libwebp/mux/muxread.c
index 0e074fb2a..2179ccb79 100644
--- a/3rdparty/libwebp/mux/muxread.c
+++ b/3rdparty/libwebp/mux/muxread.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Read APIs for mux.
diff --git a/3rdparty/libwebp/utils/bit_reader.c b/3rdparty/libwebp/utils/bit_reader.c
index d6cfd8648..ab7a8273e 100644
--- a/3rdparty/libwebp/utils/bit_reader.c
+++ b/3rdparty/libwebp/utils/bit_reader.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Boolean decoder
diff --git a/3rdparty/libwebp/utils/bit_reader.h b/3rdparty/libwebp/utils/bit_reader.h
index ccf450c5d..588222b3b 100644
--- a/3rdparty/libwebp/utils/bit_reader.h
+++ b/3rdparty/libwebp/utils/bit_reader.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Boolean decoder
@@ -194,6 +196,7 @@ static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
 #endif
 #else    // BIG_ENDIAN
     bits = (bit_t)in_bits;
+    if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS);
 #endif
 #ifndef USE_RIGHT_JUSTIFY
     br->value_ |= bits << (-br->bits_);
diff --git a/3rdparty/libwebp/utils/bit_writer.c b/3rdparty/libwebp/utils/bit_writer.c
index 671159cac..3827a13ad 100644
--- a/3rdparty/libwebp/utils/bit_writer.c
+++ b/3rdparty/libwebp/utils/bit_writer.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Bit writing and boolean coder
diff --git a/3rdparty/libwebp/utils/bit_writer.h b/3rdparty/libwebp/utils/bit_writer.h
index f7ca08497..cbb095c1b 100644
--- a/3rdparty/libwebp/utils/bit_writer.h
+++ b/3rdparty/libwebp/utils/bit_writer.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Bit writing and boolean coder
diff --git a/3rdparty/libwebp/utils/color_cache.c b/3rdparty/libwebp/utils/color_cache.c
index 560f81db1..749db6128 100644
--- a/3rdparty/libwebp/utils/color_cache.c
+++ b/3rdparty/libwebp/utils/color_cache.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Color Cache for WebP Lossless
diff --git a/3rdparty/libwebp/utils/color_cache.h b/3rdparty/libwebp/utils/color_cache.h
index 13be629f3..e5a0bd6fd 100644
--- a/3rdparty/libwebp/utils/color_cache.h
+++ b/3rdparty/libwebp/utils/color_cache.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Color Cache for WebP Lossless
diff --git a/3rdparty/libwebp/utils/filters.c b/3rdparty/libwebp/utils/filters.c
index 9486355ab..eb5bb34f4 100644
--- a/3rdparty/libwebp/utils/filters.c
+++ b/3rdparty/libwebp/utils/filters.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Spatial prediction using various filters
@@ -154,8 +156,7 @@ static void GradientUnfilter(int width, int height, int stride, uint8_t* data) {
 #undef SANITY_CHECK
 
 // -----------------------------------------------------------------------------
-// Quick estimate of a potentially interesting filter mode to try, in addition
-// to the default NONE.
+// Quick estimate of a potentially interesting filter mode to try.
 
 #define SMAX 16
 #define SDIFF(a, b) (abs((a) - (b)) >> 4)   // Scoring diff, in [0..SMAX)
@@ -165,6 +166,7 @@ WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
   int i, j;
   int bins[WEBP_FILTER_LAST][SMAX];
   memset(bins, 0, sizeof(bins));
+
   // We only sample every other pixels. That's enough.
   for (j = 2; j < height - 1; j += 2) {
     const uint8_t* const p = data + j * stride;
diff --git a/3rdparty/libwebp/utils/filters.h b/3rdparty/libwebp/utils/filters.h
index 898252329..1f5fa164f 100644
--- a/3rdparty/libwebp/utils/filters.h
+++ b/3rdparty/libwebp/utils/filters.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Spatial prediction using various filters
diff --git a/3rdparty/libwebp/utils/huffman.c b/3rdparty/libwebp/utils/huffman.c
index 41529cc9d..0ba9d05cf 100644
--- a/3rdparty/libwebp/utils/huffman.c
+++ b/3rdparty/libwebp/utils/huffman.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Utilities for building and looking up Huffman trees.
diff --git a/3rdparty/libwebp/utils/huffman.h b/3rdparty/libwebp/utils/huffman.h
index 70220a67f..83a517ee6 100644
--- a/3rdparty/libwebp/utils/huffman.h
+++ b/3rdparty/libwebp/utils/huffman.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Utilities for building and looking up Huffman trees.
diff --git a/3rdparty/libwebp/utils/huffman_encode.c b/3rdparty/libwebp/utils/huffman_encode.c
index 2d680e3ec..96086663d 100644
--- a/3rdparty/libwebp/utils/huffman_encode.c
+++ b/3rdparty/libwebp/utils/huffman_encode.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
diff --git a/3rdparty/libwebp/utils/huffman_encode.h b/3rdparty/libwebp/utils/huffman_encode.h
index cc3b38d33..0b81f470b 100644
--- a/3rdparty/libwebp/utils/huffman_encode.h
+++ b/3rdparty/libwebp/utils/huffman_encode.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
diff --git a/3rdparty/libwebp/utils/quant_levels.c b/3rdparty/libwebp/utils/quant_levels.c
index 649aae655..42c7245d3 100644
--- a/3rdparty/libwebp/utils/quant_levels.c
+++ b/3rdparty/libwebp/utils/quant_levels.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Quantize levels for specified number of quantization-levels ([2, 256]).
diff --git a/3rdparty/libwebp/utils/quant_levels.h b/3rdparty/libwebp/utils/quant_levels.h
index 9f85f34bf..2d90828df 100644
--- a/3rdparty/libwebp/utils/quant_levels.h
+++ b/3rdparty/libwebp/utils/quant_levels.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Alpha plane quantization utility
diff --git a/3rdparty/libwebp/utils/quant_levels_dec.c b/3rdparty/libwebp/utils/quant_levels_dec.c
index 95142b1b1..d93594b3b 100644
--- a/3rdparty/libwebp/utils/quant_levels_dec.c
+++ b/3rdparty/libwebp/utils/quant_levels_dec.c
@@ -1,8 +1,10 @@
 // Copyright 2013 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // TODO(skal): implement gradient smoothing.
diff --git a/3rdparty/libwebp/utils/quant_levels_dec.h b/3rdparty/libwebp/utils/quant_levels_dec.h
index 470cf4790..589106765 100644
--- a/3rdparty/libwebp/utils/quant_levels_dec.h
+++ b/3rdparty/libwebp/utils/quant_levels_dec.h
@@ -1,8 +1,10 @@
 // Copyright 2013 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Alpha plane de-quantization utility
diff --git a/3rdparty/libwebp/utils/rescaler.c b/3rdparty/libwebp/utils/rescaler.c
index 61530cfef..e5ddc296a 100644
--- a/3rdparty/libwebp/utils/rescaler.c
+++ b/3rdparty/libwebp/utils/rescaler.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Rescaling functions
diff --git a/3rdparty/libwebp/utils/rescaler.h b/3rdparty/libwebp/utils/rescaler.h
index ef93d465f..aedce4625 100644
--- a/3rdparty/libwebp/utils/rescaler.h
+++ b/3rdparty/libwebp/utils/rescaler.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Rescaling functions
diff --git a/3rdparty/libwebp/utils/thread.c b/3rdparty/libwebp/utils/thread.c
index a14af559d..b1615d0fb 100644
--- a/3rdparty/libwebp/utils/thread.c
+++ b/3rdparty/libwebp/utils/thread.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Multi-threaded worker
diff --git a/3rdparty/libwebp/utils/thread.h b/3rdparty/libwebp/utils/thread.h
index 9afe09679..13a61a4c8 100644
--- a/3rdparty/libwebp/utils/thread.h
+++ b/3rdparty/libwebp/utils/thread.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Multi-threaded worker
diff --git a/3rdparty/libwebp/utils/utils.c b/3rdparty/libwebp/utils/utils.c
index b1db2f9d6..7eb06105b 100644
--- a/3rdparty/libwebp/utils/utils.c
+++ b/3rdparty/libwebp/utils/utils.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Misc. common utility functions
diff --git a/3rdparty/libwebp/utils/utils.h b/3rdparty/libwebp/utils/utils.h
index e5d6d6309..e10aeeb9d 100644
--- a/3rdparty/libwebp/utils/utils.h
+++ b/3rdparty/libwebp/utils/utils.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Misc. common utility functions
diff --git a/3rdparty/libwebp/webp/decode.h b/3rdparty/libwebp/webp/decode.h
index 181eb1860..141f8618f 100644
--- a/3rdparty/libwebp/webp/decode.h
+++ b/3rdparty/libwebp/webp/decode.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  Main decoding functions for WebP images.
@@ -20,13 +22,13 @@ extern "C" {
 
 #define WEBP_DECODER_ABI_VERSION 0x0201    // MAJOR(8b) + MINOR(8b)
 
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum VP8StatusCode VP8StatusCode;
+// typedef enum WEBP_CSP_MODE WEBP_CSP_MODE;
 typedef struct WebPRGBABuffer WebPRGBABuffer;
 typedef struct WebPYUVABuffer WebPYUVABuffer;
 typedef struct WebPDecBuffer WebPDecBuffer;
-#if !(defined(__cplusplus) || defined(c_plusplus))
-typedef enum VP8StatusCode VP8StatusCode;
-typedef enum WEBP_CSP_MODE WEBP_CSP_MODE;
-#endif
 typedef struct WebPIDecoder WebPIDecoder;
 typedef struct WebPBitstreamFeatures WebPBitstreamFeatures;
 typedef struct WebPDecoderOptions WebPDecoderOptions;
@@ -138,7 +140,7 @@ WEBP_EXTERN(uint8_t*) WebPDecodeYUVInto(
 // RGBA-4444: [b3 b2 b1 b0 a3 a2 a1 a0], [r3 r2 r1 r0 g3 g2 g1 g0], ...
 // RGB-565: [g2 g1 g0 b4 b3 b2 b1 b0], [r4 r3 r2 r1 r0 g5 g4 g3], ...
 
-enum WEBP_CSP_MODE {
+typedef enum WEBP_CSP_MODE {
   MODE_RGB = 0, MODE_RGBA = 1,
   MODE_BGR = 2, MODE_BGRA = 3,
   MODE_ARGB = 4, MODE_RGBA_4444 = 5,
@@ -151,7 +153,7 @@ enum WEBP_CSP_MODE {
   // YUV modes must come after RGB ones.
   MODE_YUV = 11, MODE_YUVA = 12,  // yuv 4:2:0
   MODE_LAST = 13
-};
+} WEBP_CSP_MODE;
 
 // Some useful macros:
 static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) {
@@ -220,7 +222,7 @@ WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer);
 //------------------------------------------------------------------------------
 // Enumeration of the status codes
 
-enum VP8StatusCode {
+typedef enum VP8StatusCode {
   VP8_STATUS_OK = 0,
   VP8_STATUS_OUT_OF_MEMORY,
   VP8_STATUS_INVALID_PARAM,
@@ -229,7 +231,7 @@ enum VP8StatusCode {
   VP8_STATUS_SUSPENDED,
   VP8_STATUS_USER_ABORT,
   VP8_STATUS_NOT_ENOUGH_DATA
-};
+} VP8StatusCode;
 
 //------------------------------------------------------------------------------
 // Incremental decoding
@@ -262,6 +264,12 @@ enum VP8StatusCode {
 // is used (with MODE_RGB). Otherwise, an internal reference to 'output_buffer'
 // is kept, which means that the lifespan of 'output_buffer' must be larger than
 // that of the returned WebPIDecoder object.
+// The supplied 'output_buffer' content MUST NOT be changed between calls to
+// WebPIAppend() or WebPIUpdate() unless 'output_buffer.is_external_memory' is
+// set to 1. In such a case, it is allowed to modify the pointers, size and
+// stride of output_buffer.u.RGBA or output_buffer.u.YUVA, provided they remain
+// within valid bounds.
+// All other fields of WebPDecBuffer MUST remain constant between calls.
 // Returns NULL if the allocation failed.
 WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
 
diff --git a/3rdparty/libwebp/webp/demux.h b/3rdparty/libwebp/webp/demux.h
index cfb4fdfe7..c7cd5d66d 100644
--- a/3rdparty/libwebp/webp/demux.h
+++ b/3rdparty/libwebp/webp/demux.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Demux API.
@@ -53,11 +55,11 @@ extern "C" {
 
 #define WEBP_DEMUX_ABI_VERSION 0x0100    // MAJOR(8b) + MINOR(8b)
 
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPDemuxState WebPDemuxState;
+// typedef enum WebPFormatFeature WebPFormatFeature;
 typedef struct WebPDemuxer WebPDemuxer;
-#if !(defined(__cplusplus) || defined(c_plusplus))
-typedef enum WebPDemuxState WebPDemuxState;
-typedef enum WebPFormatFeature WebPFormatFeature;
-#endif
 typedef struct WebPIterator WebPIterator;
 typedef struct WebPChunkIterator WebPChunkIterator;
 
@@ -70,11 +72,11 @@ WEBP_EXTERN(int) WebPGetDemuxVersion(void);
 //------------------------------------------------------------------------------
 // Life of a Demux object
 
-enum WebPDemuxState {
+typedef enum WebPDemuxState {
   WEBP_DEMUX_PARSING_HEADER,  // Not enough data to parse full header.
   WEBP_DEMUX_PARSED_HEADER,   // Header parsing complete, data may be available.
   WEBP_DEMUX_DONE             // Entire file has been parsed.
-};
+} WebPDemuxState;
 
 // Internal, version-checked, entry point
 WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(
@@ -100,7 +102,7 @@ WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
 //------------------------------------------------------------------------------
 // Data/information extraction.
 
-enum WebPFormatFeature {
+typedef enum WebPFormatFeature {
   WEBP_FF_FORMAT_FLAGS,  // Extended format flags present in the 'VP8X' chunk.
   WEBP_FF_CANVAS_WIDTH,
   WEBP_FF_CANVAS_HEIGHT,
@@ -110,7 +112,7 @@ enum WebPFormatFeature {
                          // In case of a partial demux, this is the number of
                          // frames seen so far, with the last frame possibly
                          // being partial.
-};
+} WebPFormatFeature;
 
 // Get the 'feature' value from the 'dmux'.
 // NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
diff --git a/3rdparty/libwebp/webp/encode.h b/3rdparty/libwebp/webp/encode.h
index fea8ee428..726992f57 100644
--- a/3rdparty/libwebp/webp/encode.h
+++ b/3rdparty/libwebp/webp/encode.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   WebP encoder: main interface
@@ -20,12 +22,12 @@ extern "C" {
 
 #define WEBP_ENCODER_ABI_VERSION 0x0201    // MAJOR(8b) + MINOR(8b)
 
-#if !(defined(__cplusplus) || defined(c_plusplus))
-typedef enum WebPImageHint WebPImageHint;
-typedef enum WebPEncCSP WebPEncCSP;
-typedef enum WebPPreset WebPPreset;
-typedef enum WebPEncodingError WebPEncodingError;
-#endif
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPImageHint WebPImageHint;
+// typedef enum WebPEncCSP WebPEncCSP;
+// typedef enum WebPPreset WebPPreset;
+// typedef enum WebPEncodingError WebPEncodingError;
 typedef struct WebPConfig WebPConfig;
 typedef struct WebPPicture WebPPicture;   // main structure for I/O
 typedef struct WebPAuxStats WebPAuxStats;
@@ -77,13 +79,13 @@ WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra,
 // Coding parameters
 
 // Image characteristics hint for the underlying encoder.
-enum WebPImageHint {
+typedef enum WebPImageHint {
   WEBP_HINT_DEFAULT = 0,  // default preset.
   WEBP_HINT_PICTURE,      // digital picture, like portrait, inner shot
   WEBP_HINT_PHOTO,        // outdoor photograph, with natural lighting
   WEBP_HINT_GRAPH,        // Discrete tone image (graph, map-tile etc).
   WEBP_HINT_LAST
-};
+} WebPImageHint;
 
 // Compression parameters.
 struct WebPConfig {
@@ -133,14 +135,14 @@ struct WebPConfig {
 
 // Enumerate some predefined settings for WebPConfig, depending on the type
 // of source picture. These presets are used when calling WebPConfigPreset().
-enum WebPPreset {
+typedef enum WebPPreset {
   WEBP_PRESET_DEFAULT = 0,  // default preset.
   WEBP_PRESET_PICTURE,      // digital picture, like portrait, inner shot
   WEBP_PRESET_PHOTO,        // outdoor photograph, with natural lighting
   WEBP_PRESET_DRAWING,      // hand or line drawing, with high-contrast details
   WEBP_PRESET_ICON,         // small-sized colorful images
   WEBP_PRESET_TEXT          // text-like
-};
+} WebPPreset;
 
 // Internal, version-checked, entry point
 WEBP_EXTERN(int) WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
@@ -230,7 +232,7 @@ WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
 typedef int (*WebPProgressHook)(int percent, const WebPPicture* picture);
 
 // Color spaces.
-enum WebPEncCSP {
+typedef enum WebPEncCSP {
   // chroma sampling
   WEBP_YUV420 = 0,   // 4:2:0
   WEBP_YUV422 = 1,   // 4:2:2
@@ -243,10 +245,10 @@ enum WebPEncCSP {
   WEBP_YUV444A = 6,
   WEBP_YUV400A = 7,   // grayscale + alpha
   WEBP_CSP_ALPHA_BIT = 4   // bit that is set if alpha is present
-};
+} WebPEncCSP;
 
 // Encoding error conditions.
-enum WebPEncodingError {
+typedef enum WebPEncodingError {
   VP8_ENC_OK = 0,
   VP8_ENC_ERROR_OUT_OF_MEMORY,            // memory error allocating objects
   VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY,  // memory error while flushing bits
@@ -259,7 +261,7 @@ enum WebPEncodingError {
   VP8_ENC_ERROR_FILE_TOO_BIG,             // file is bigger than 4G
   VP8_ENC_ERROR_USER_ABORT,               // abort request by user
   VP8_ENC_ERROR_LAST                      // list terminator. always last.
-};
+} WebPEncodingError;
 
 // maximum width/height allowed (inclusive), in pixels
 #define WEBP_MAX_DIMENSION 16383
@@ -360,8 +362,9 @@ WEBP_EXTERN(int) WebPPictureAlloc(WebPPicture* picture);
 // preserved.
 WEBP_EXTERN(void) WebPPictureFree(WebPPicture* picture);
 
-// Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return,
-// *dst will fully own the copied pixels (this is not a view).
+// Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return, *dst
+// will fully own the copied pixels (this is not a view). The 'dst' picture need
+// not be initialized as its content is overwritten.
 // Returns false in case of memory allocation error.
 WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
 
@@ -392,7 +395,9 @@ WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture,
 // the top and left coordinates will be snapped to even values.
 // Picture 'src' must out-live 'dst' picture. Self-extraction of view is allowed
 // ('src' equal to 'dst') as a mean of fast-cropping (but note that doing so,
-// the original dimension will be lost).
+// the original dimension will be lost). Picture 'dst' need not be initialized
+// with WebPPictureInit() if it is different from 'src', since its content will
+// be overwritten.
 // Returns false in case of memory allocation error or invalid parameters.
 WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src,
                                  int left, int top, int width, int height,
diff --git a/3rdparty/libwebp/webp/format_constants.h b/3rdparty/libwebp/webp/format_constants.h
index a6f76d8d5..4c04b50c6 100644
--- a/3rdparty/libwebp/webp/format_constants.h
+++ b/3rdparty/libwebp/webp/format_constants.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  Internal header for constants related to WebP file format.
diff --git a/3rdparty/libwebp/webp/mux.h b/3rdparty/libwebp/webp/mux.h
index 85a892270..b8c7dc62e 100644
--- a/3rdparty/libwebp/webp/mux.h
+++ b/3rdparty/libwebp/webp/mux.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  RIFF container manipulation for WEBP images.
@@ -53,26 +55,26 @@ extern "C" {
 
 #define WEBP_MUX_ABI_VERSION 0x0100        // MAJOR(8b) + MINOR(8b)
 
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPMuxError WebPMuxError;
+// typedef enum WebPChunkId WebPChunkId;
 typedef struct WebPMux WebPMux;   // main opaque object.
-#if !(defined(__cplusplus) || defined(c_plusplus))
-typedef enum WebPMuxError WebPMuxError;
-typedef enum WebPChunkId WebPChunkId;
-#endif
 typedef struct WebPMuxFrameInfo WebPMuxFrameInfo;
 typedef struct WebPMuxAnimParams WebPMuxAnimParams;
 
 // Error codes
-enum WebPMuxError {
+typedef enum WebPMuxError {
   WEBP_MUX_OK                 =  1,
   WEBP_MUX_NOT_FOUND          =  0,
   WEBP_MUX_INVALID_ARGUMENT   = -1,
   WEBP_MUX_BAD_DATA           = -2,
   WEBP_MUX_MEMORY_ERROR       = -3,
   WEBP_MUX_NOT_ENOUGH_DATA    = -4
-};
+} WebPMuxError;
 
 // IDs for different types of chunks.
-enum WebPChunkId {
+typedef enum WebPChunkId {
   WEBP_CHUNK_VP8X,     // VP8X
   WEBP_CHUNK_ICCP,     // ICCP
   WEBP_CHUNK_ANIM,     // ANIM
@@ -84,7 +86,7 @@ enum WebPChunkId {
   WEBP_CHUNK_XMP,      // XMP
   WEBP_CHUNK_UNKNOWN,  // Other chunks.
   WEBP_CHUNK_NIL
-};
+} WebPChunkId;
 
 //------------------------------------------------------------------------------
 
diff --git a/3rdparty/libwebp/webp/mux_types.h b/3rdparty/libwebp/webp/mux_types.h
index 4006a5409..b8bce363e 100644
--- a/3rdparty/libwebp/webp/mux_types.h
+++ b/3rdparty/libwebp/webp/mux_types.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Data-types common to the mux and demux libraries.
@@ -20,31 +22,31 @@
 extern "C" {
 #endif
 
-#if !(defined(__cplusplus) || defined(c_plusplus))
-typedef enum WebPFeatureFlags WebPFeatureFlags;
-typedef enum WebPMuxAnimDispose WebPMuxAnimDispose;
-#endif
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPFeatureFlags WebPFeatureFlags;
+// typedef enum WebPMuxAnimDispose WebPMuxAnimDispose;
+typedef struct WebPData WebPData;
 
 // VP8X Feature Flags.
-enum WebPFeatureFlags {
+typedef enum WebPFeatureFlags {
   FRAGMENTS_FLAG  = 0x00000001,
   ANIMATION_FLAG  = 0x00000002,
   XMP_FLAG        = 0x00000004,
   EXIF_FLAG       = 0x00000008,
   ALPHA_FLAG      = 0x00000010,
   ICCP_FLAG       = 0x00000020
-};
+} WebPFeatureFlags;
 
 // Dispose method (animation only). Indicates how the area used by the current
 // frame is to be treated before rendering the next frame on the canvas.
-enum WebPMuxAnimDispose {
+typedef enum WebPMuxAnimDispose {
   WEBP_MUX_DISPOSE_NONE,       // Do not dispose.
   WEBP_MUX_DISPOSE_BACKGROUND  // Dispose to background color.
-};
+} WebPMuxAnimDispose;
 
 // Data type used to describe 'raw' data, e.g., chunk data
 // (ICC profile, metadata) and WebP compressed image data.
-typedef struct WebPData WebPData;
 struct WebPData {
   const uint8_t* bytes;
   size_t size;
diff --git a/3rdparty/libwebp/webp/types.h b/3rdparty/libwebp/webp/types.h
index 3e27190be..568d1f263 100644
--- a/3rdparty/libwebp/webp/types.h
+++ b/3rdparty/libwebp/webp/types.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  Common types
diff --git a/3rdparty/openexr/CMakeLists.txt b/3rdparty/openexr/CMakeLists.txt
index 3d4cc689f..e10b94007 100644
--- a/3rdparty/openexr/CMakeLists.txt
+++ b/3rdparty/openexr/CMakeLists.txt
@@ -22,7 +22,7 @@ set(OPENEXR_INCLUDE_PATHS "${CMAKE_CURRENT_SOURCE_DIR}/Half"
                           "${CMAKE_CURRENT_SOURCE_DIR}/Imath"
                           "${CMAKE_CURRENT_SOURCE_DIR}/IlmImf")
 
-ocv_include_directories("${CMAKE_CURRENT_BINARY_DIR}" ${ZLIB_INCLUDE_DIR} ${OPENEXR_INCLUDE_PATHS})
+ocv_include_directories("${CMAKE_CURRENT_BINARY_DIR}" ${ZLIB_INCLUDE_DIRS} ${OPENEXR_INCLUDE_PATHS})
 
 file(GLOB lib_srcs Half/half.cpp Iex/*.cpp IlmThread/*.cpp Imath/*.cpp IlmImf/*.cpp)
 file(GLOB lib_hdrs Half/*.h Iex/Iex*.h IlmThread/IlmThread*.h Imath/Imath*.h IlmImf/*.h)
diff --git a/3rdparty/tbb/.gitignore b/3rdparty/tbb/.gitignore
index 601e1b265..69ce9ceb9 100644
--- a/3rdparty/tbb/.gitignore
+++ b/3rdparty/tbb/.gitignore
@@ -1 +1 @@
-tbb*.tgz
\ No newline at end of file
+tbb*.tgz
diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt
index af1581349..4e510ffa3 100644
--- a/3rdparty/tbb/CMakeLists.txt
+++ b/3rdparty/tbb/CMakeLists.txt
@@ -1,12 +1,39 @@
 #Cross compile TBB from source
 project(tbb)
 
+if (WIN32 AND NOT ARM)
+  message(FATAL_ERROR "BUILD_TBB option supports Windows on ARM only!\nUse regular official TBB build instead of the BUILD_TBB option!")
+endif()
+
+if (WIN32 AND ARM)
+  # 4.1 update 4 - The first release that supports Windows RT. Hangs on some Android devices
+  set(tbb_ver "tbb41_20130613oss")
+  set(tbb_url "http://threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb41_20130613oss_src.tgz")
+  set(tbb_md5 "108c8c1e481b0aaea61878289eb28b6a")
+  set(tbb_version_file "version_string.ver")
+  ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4702)
+else()
+  # 4.1 update 2 - works fine
+  set(tbb_ver "tbb41_20130116oss")
+  set(tbb_url "http://threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb41_20130116oss_src.tgz")
+  set(tbb_md5 "3809790e1001a1b32d59c9fee590ee85")
+  set(tbb_version_file "version_string.ver")
+  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
+endif()
+
+# 4.1 update 3 dev - Hangs on some Android devices
+#set(tbb_ver "tbb41_20130401oss")
+#set(tbb_url "http://threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb41_20130401oss_src.tgz")
+#set(tbb_md5 "f2f591a0d2ca8f801e221ce7d9ea84bb")
+#set(tbb_version_file "version_string.ver")
+#ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
+
 # 4.1 update 2 - works fine
-set(tbb_ver "tbb41_20130116oss")
-set(tbb_url "http://threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb41_20130116oss_src.tgz")
-set(tbb_md5 "3809790e1001a1b32d59c9fee590ee85")
-set(tbb_version_file "version_string.ver")
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
+#set(tbb_ver "tbb41_20130116oss")
+#set(tbb_url "http://threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb41_20130116oss_src.tgz")
+#set(tbb_md5 "3809790e1001a1b32d59c9fee590ee85")
+#set(tbb_version_file "version_string.ver")
+#ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
 
 # 4.1 update 1 - works fine
 #set(tbb_ver "tbb41_20121003oss")
@@ -55,24 +82,8 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
 set(tbb_tarball "${CMAKE_CURRENT_SOURCE_DIR}/${tbb_ver}_src.tgz")
 set(tbb_src_dir "${CMAKE_CURRENT_BINARY_DIR}/${tbb_ver}")
 
-macro(getMD5 filename varname)
-  if(CMAKE_VERSION VERSION_GREATER 2.8.6)
-    file(MD5 "${filename}" ${varname})
-  else()
-    execute_process(COMMAND ${CMAKE_COMMAND} -E md5sum "${filename}"
-              RESULT_VARIABLE getMD5_RESULT
-              OUTPUT_VARIABLE getMD5_OUTPUT
-              OUTPUT_STRIP_TRAILING_WHITESPACE)
-    if(getMD5_RESULT EQUAL 0)
-      string(REGEX MATCH "^[a-z0-9]+" ${varname} "${getMD5_OUTPUT}")
-    else()
-      set(${varname} "invalid_md5")
-    endif()
-  endif()
-endmacro()
-
 if(EXISTS "${tbb_tarball}")
-  getMD5("${tbb_tarball}" tbb_local_md5)
+  file(MD5 "${tbb_tarball}" tbb_local_md5)
   if(NOT tbb_local_md5 STREQUAL tbb_md5)
     message(WARNING "Local copy of TBB source tarball has invalid MD5 hash: ${tbb_local_md5} (expected: ${tbb_md5})")
     file(REMOVE "${tbb_tarball}")
@@ -88,7 +99,7 @@ if(NOT EXISTS "${tbb_tarball}")
   if(NOT __statvar EQUAL 0)
     message(FATAL_ERROR "Failed to download TBB sources: ${tbb_url}")
   endif()
-  getMD5("${tbb_tarball}" tbb_local_md5)
+  file(MD5 "${tbb_tarball}" tbb_local_md5)
   if(NOT tbb_local_md5 STREQUAL tbb_md5)
     file(REMOVE "${tbb_tarball}")
     message(FATAL_ERROR "Downloaded TBB source tarball has invalid MD5 hash: ${tbb_local_md5} (expected: ${tbb_md5})")
@@ -107,7 +118,7 @@ if(NOT EXISTS "${tbb_src_dir}")
               RESULT_VARIABLE tbb_untar_RESULT)
 
   if(NOT tbb_untar_RESULT EQUAL 0 OR NOT EXISTS "${tbb_src_dir}")
-    message(FATAL_ERROR "Failed to unpack TBB sources")
+    message(FATAL_ERROR "Failed to unpack TBB sources from ${tbb_tarball} to ${tbb_src_dir} with error ${tbb_untar_RESULT}")
   endif()
 endif()
 
@@ -123,13 +134,22 @@ file(GLOB lib_hdrs "${tbb_src_dir}/src/tbb/*.h")
 list(APPEND lib_srcs "${tbb_src_dir}/src/rml/client/rml_tbb.cpp")
 
 if (WIN32)
-  add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0
-                -D__TBB_BUILD=1
-                -D_UNICODE
-                -DUNICODE
-                -DWINAPI_FAMILY=WINAPI_FAMILY_APP
-                -DDO_ITT_NOTIFY=0
+  add_definitions(/D__TBB_DYNAMIC_LOAD_ENABLED=0
+                  /D__TBB_BUILD=1
+                  /DTBB_NO_LEGACY=1
+                  /D_UNICODE
+                  /DUNICODE
+                  /DWINAPI_FAMILY=WINAPI_FAMILY_APP
+                  /DDO_ITT_NOTIFY=0
+                  /DUSE_WINTHREAD
                ) # defines were copied from windows.cl.inc
+
+  if (ARM)
+    add_definitions(/D_WIN32_WINNT=0x0602
+                    /D__TBB_WIN32_USE_CL_BUILTINS
+                   )
+  endif()
+
 set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} /APPCONTAINER")
 else()
   add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0         #required
@@ -173,15 +193,38 @@ endif()
 set(TBB_SOURCE_FILES ${TBB_SOURCE_FILES} "${CMAKE_CURRENT_SOURCE_DIR}/${tbb_version_file}")
 
 add_library(tbb ${TBB_SOURCE_FILES})
-target_link_libraries(tbb c m dl)
+
+if (WIN32)
+  if (ARM)
+    set(platform_macro /D_M_ARM=1)
+  endif()
+
+  add_custom_command(TARGET tbb
+                     PRE_BUILD
+                     COMMAND ${CMAKE_C_COMPILER} /nologo /TC /EP ${tbb_src_dir}\\src\\tbb\\win32-tbb-export.def /DTBB_NO_LEGACY=1 /D_CRT_SECURE_NO_DEPRECATE /D__TBB_BUILD=1 ${platform_macro} /I${tbb_src_dir}\\src /I${tbb_src_dir}\\include > "${tbb_src_dir}\\src\\tbb\\tbb.def"
+                     WORKING_DIRECTORY ${tbb_src_dir}\\src\\tbb
+                     COMMENT "Generating tbb.def file" VERBATIM
+                    )
+
+  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DEF:${tbb_src_dir}/src/tbb/tbb.def /DLL /MAP /fixed:no /INCREMENTAL:NO")
+else()
+  target_link_libraries(tbb c m dl)
+endif()
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 string(REPLACE "-Werror=non-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
+if (WIN32)
+  set(tbb_debug_postfix "_debug") # to fit pragmas in _windef.h inside TBB
+else()
+  set(tbb_debug_postfix ${OPENCV_DEBUG_POSTFIX})
+endif()
+
 set_target_properties(tbb
   PROPERTIES OUTPUT_NAME tbb
-  DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+  DEBUG_POSTFIX "${tbb_debug_postfix}"
   ARCHIVE_OUTPUT_DIRECTORY ${3P_LIBRARY_OUTPUT_PATH}
+  RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
   )
 
 if(ENABLE_SOLUTION_FOLDERS)
diff --git a/3rdparty/zlib/ChangeLog b/3rdparty/zlib/ChangeLog
index c2c643a1a..f22aabaef 100644
--- a/3rdparty/zlib/ChangeLog
+++ b/3rdparty/zlib/ChangeLog
@@ -1,6 +1,69 @@
 
                 ChangeLog file for zlib
 
+Changes in 1.2.8 (28 Apr 2013)
+- Update contrib/minizip/iowin32.c for Windows RT [Vollant]
+- Do not force Z_CONST for C++
+- Clean up contrib/vstudio [Ro�]
+- Correct spelling error in zlib.h
+- Fix mixed line endings in contrib/vstudio
+
+Changes in 1.2.7.3 (13 Apr 2013)
+- Fix version numbers and DLL names in contrib/vstudio/*/zlib.rc
+
+Changes in 1.2.7.2 (13 Apr 2013)
+- Change check for a four-byte type back to hexadecimal
+- Fix typo in win32/Makefile.msc
+- Add casts in gzwrite.c for pointer differences
+
+Changes in 1.2.7.1 (24 Mar 2013)
+- Replace use of unsafe string functions with snprintf if available
+- Avoid including stddef.h on Windows for Z_SOLO compile [Niessink]
+- Fix gzgetc undefine when Z_PREFIX set [Turk]
+- Eliminate use of mktemp in Makefile (not always available)
+- Fix bug in 'F' mode for gzopen()
+- Add inflateGetDictionary() function
+- Correct comment in deflate.h
+- Use _snprintf for snprintf in Microsoft C
+- On Darwin, only use /usr/bin/libtool if libtool is not Apple
+- Delete "--version" file if created by "ar --version" [Richard G.]
+- Fix configure check for veracity of compiler error return codes
+- Fix CMake compilation of static lib for MSVC2010 x64
+- Remove unused variable in infback9.c
+- Fix argument checks in gzlog_compress() and gzlog_write()
+- Clean up the usage of z_const and respect const usage within zlib
+- Clean up examples/gzlog.[ch] comparisons of different types
+- Avoid shift equal to bits in type (caused endless loop)
+- Fix unintialized value bug in gzputc() introduced by const patches
+- Fix memory allocation error in examples/zran.c [Nor]
+- Fix bug where gzopen(), gzclose() would write an empty file
+- Fix bug in gzclose() when gzwrite() runs out of memory
+- Check for input buffer malloc failure in examples/gzappend.c
+- Add note to contrib/blast to use binary mode in stdio
+- Fix comparisons of differently signed integers in contrib/blast
+- Check for invalid code length codes in contrib/puff
+- Fix serious but very rare decompression bug in inftrees.c
+- Update inflateBack() comments, since inflate() can be faster
+- Use underscored I/O function names for WINAPI_FAMILY
+- Add _tr_flush_bits to the external symbols prefixed by --zprefix
+- Add contrib/vstudio/vc10 pre-build step for static only
+- Quote --version-script argument in CMakeLists.txt
+- Don't specify --version-script on Apple platforms in CMakeLists.txt
+- Fix casting error in contrib/testzlib/testzlib.c
+- Fix types in contrib/minizip to match result of get_crc_table()
+- Simplify contrib/vstudio/vc10 with 'd' suffix
+- Add TOP support to win32/Makefile.msc
+- Suport i686 and amd64 assembler builds in CMakeLists.txt
+- Fix typos in the use of _LARGEFILE64_SOURCE in zconf.h
+- Add vc11 and vc12 build files to contrib/vstudio
+- Add gzvprintf() as an undocumented function in zlib
+- Fix configure for Sun shell
+- Remove runtime check in configure for four-byte integer type
+- Add casts and consts to ease user conversion to C++
+- Add man pages for minizip and miniunzip
+- In Makefile uninstall, don't rm if preceding cd fails
+- Do not return Z_BUF_ERROR if deflateParam() has nothing to write
+
 Changes in 1.2.7 (2 May 2012)
 - Replace use of memmove() with a simple copy for portability
 - Test for existence of strerror
diff --git a/3rdparty/zlib/README b/3rdparty/zlib/README
index 6f1255ffe..5ca9d127e 100644
--- a/3rdparty/zlib/README
+++ b/3rdparty/zlib/README
@@ -1,6 +1,6 @@
 ZLIB DATA COMPRESSION LIBRARY
 
-zlib 1.2.7 is a general purpose data compression library.  All the code is
+zlib 1.2.8 is a general purpose data compression library.  All the code is
 thread safe.  The data format used by the zlib library is described by RFCs
 (Request for Comments) 1950 to 1952 in the files
 http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
@@ -31,7 +31,7 @@ Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan.  1997
 issue of Dr.  Dobb's Journal; a copy of the article is available at
 http://marknelson.us/1997/01/01/zlib-engine/ .
 
-The changes made in version 1.2.7 are documented in the file ChangeLog.
+The changes made in version 1.2.8 are documented in the file ChangeLog.
 
 Unsupported third party contributions are provided in directory contrib/ .
 
@@ -84,7 +84,7 @@ Acknowledgments:
 
 Copyright notice:
 
- (C) 1995-2012 Jean-loup Gailly and Mark Adler
+ (C) 1995-2013 Jean-loup Gailly and Mark Adler
 
   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
diff --git a/3rdparty/zlib/compress.c b/3rdparty/zlib/compress.c
index ea4dfbe9d..6e9762676 100644
--- a/3rdparty/zlib/compress.c
+++ b/3rdparty/zlib/compress.c
@@ -29,7 +29,7 @@ int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
     z_stream stream;
     int err;
 
-    stream.next_in = (Bytef*)source;
+    stream.next_in = (z_const Bytef *)source;
     stream.avail_in = (uInt)sourceLen;
 #ifdef MAXSEG_64K
     /* Check for source > 64K on 16-bit machine: */
diff --git a/3rdparty/zlib/deflate.c b/3rdparty/zlib/deflate.c
index 9e4c2cbc8..696957705 100644
--- a/3rdparty/zlib/deflate.c
+++ b/3rdparty/zlib/deflate.c
@@ -1,5 +1,5 @@
 /* deflate.c -- compress data using the deflation algorithm
- * Copyright (C) 1995-2012 Jean-loup Gailly and Mark Adler
+ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -52,7 +52,7 @@
 #include "deflate.h"
 
 const char deflate_copyright[] =
-   " deflate 1.2.7 Copyright 1995-2012 Jean-loup Gailly and Mark Adler ";
+   " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler ";
 /*
   If you use the zlib library in a product, an acknowledgment is welcome
   in the documentation of your product. If for some reason you cannot
@@ -305,7 +305,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
     if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
         s->pending_buf == Z_NULL) {
         s->status = FINISH_STATE;
-        strm->msg = (char*)ERR_MSG(Z_MEM_ERROR);
+        strm->msg = ERR_MSG(Z_MEM_ERROR);
         deflateEnd (strm);
         return Z_MEM_ERROR;
     }
@@ -329,7 +329,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
     uInt str, n;
     int wrap;
     unsigned avail;
-    unsigned char *next;
+    z_const unsigned char *next;
 
     if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL)
         return Z_STREAM_ERROR;
@@ -359,7 +359,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
     avail = strm->avail_in;
     next = strm->next_in;
     strm->avail_in = dictLength;
-    strm->next_in = (Bytef *)dictionary;
+    strm->next_in = (z_const Bytef *)dictionary;
     fill_window(s);
     while (s->lookahead >= MIN_MATCH) {
         str = s->strstart;
@@ -513,6 +513,8 @@ int ZEXPORT deflateParams(strm, level, strategy)
         strm->total_in != 0) {
         /* Flush the last buffer: */
         err = deflate(strm, Z_BLOCK);
+        if (err == Z_BUF_ERROR && s->pending == 0)
+            err = Z_OK;
     }
     if (s->level != level) {
         s->level = level;
diff --git a/3rdparty/zlib/deflate.h b/3rdparty/zlib/deflate.h
index fbac44d90..ce0299edd 100644
--- a/3rdparty/zlib/deflate.h
+++ b/3rdparty/zlib/deflate.h
@@ -104,7 +104,7 @@ typedef struct internal_state {
     int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
     gz_headerp  gzhead;  /* gzip header information to write */
     uInt   gzindex;      /* where in extra, name, or comment */
-    Byte  method;        /* STORED (for zip only) or DEFLATED */
+    Byte  method;        /* can only be DEFLATED */
     int   last_flush;    /* value of flush param for previous deflate call */
 
                 /* used by deflate.c: */
diff --git a/3rdparty/zlib/gzguts.h b/3rdparty/zlib/gzguts.h
index ee3f281aa..d87659d03 100644
--- a/3rdparty/zlib/gzguts.h
+++ b/3rdparty/zlib/gzguts.h
@@ -1,5 +1,5 @@
 /* gzguts.h -- zlib internal header definitions for gz* operations
- * Copyright (C) 2004, 2005, 2010, 2011, 2012 Mark Adler
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -35,6 +35,13 @@
 #  include <io.h>
 #endif
 
+#ifdef WINAPI_FAMILY
+#  define open _open
+#  define read _read
+#  define write _write
+#  define close _close
+#endif
+
 #ifdef NO_DEFLATE       /* for compatibility with old definition */
 #  define NO_GZCOMPRESS
 #endif
@@ -60,7 +67,7 @@
 #ifndef HAVE_VSNPRINTF
 #  ifdef MSDOS
 /* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
- but for now we just assume it doesn't. */
+   but for now we just assume it doesn't. */
 #    define NO_vsnprintf
 #  endif
 #  ifdef __TURBOC__
@@ -88,6 +95,14 @@
 #  endif
 #endif
 
+/* unlike snprintf (which is required in C99, yet still not supported by
+   Microsoft more than a decade later!), _snprintf does not guarantee null
+   termination of the result -- however this is only used in gzlib.c where
+   the result is assured to fit in the space provided */
+#ifdef _MSC_VER
+#  define snprintf _snprintf
+#endif
+
 #ifndef local
 #  define local static
 #endif
@@ -127,7 +142,8 @@
 #  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
 #endif
 
-/* default i/o buffer size -- double this for output when reading */
+/* default i/o buffer size -- double this for output when reading (this and
+   twice this must be able to fit in an unsigned type) */
 #define GZBUFSIZE 8192
 
 /* gzip modes, also provide a little integrity check on the passed structure */
diff --git a/3rdparty/zlib/gzlib.c b/3rdparty/zlib/gzlib.c
index ca55c6ea9..fae202ef8 100644
--- a/3rdparty/zlib/gzlib.c
+++ b/3rdparty/zlib/gzlib.c
@@ -1,5 +1,5 @@
 /* gzlib.c -- zlib functions common to reading and writing gzip files
- * Copyright (C) 2004, 2010, 2011, 2012 Mark Adler
+ * Copyright (C) 2004, 2010, 2011, 2012, 2013 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -108,7 +108,7 @@ local gzFile gz_open(path, fd, mode)
         return NULL;
 
     /* allocate gzFile structure to return */
-    state = malloc(sizeof(gz_state));
+    state = (gz_statep)malloc(sizeof(gz_state));
     if (state == NULL)
         return NULL;
     state->size = 0;            /* no buffers allocated yet */
@@ -162,8 +162,10 @@ local gzFile gz_open(path, fd, mode)
                 break;
             case 'F':
                 state->strategy = Z_FIXED;
+                break;
             case 'T':
                 state->direct = 1;
+                break;
             default:        /* could consider as an error, but just ignore */
                 ;
             }
@@ -194,8 +196,8 @@ local gzFile gz_open(path, fd, mode)
     }
     else
 #endif
-        len = strlen(path);
-    state->path = malloc(len + 1);
+        len = strlen((const char *)path);
+    state->path = (char *)malloc(len + 1);
     if (state->path == NULL) {
         free(state);
         return NULL;
@@ -208,7 +210,11 @@ local gzFile gz_open(path, fd, mode)
             *(state->path) = 0;
     else
 #endif
+#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
+        snprintf(state->path, len + 1, "%s", (const char *)path);
+#else
         strcpy(state->path, path);
+#endif
 
     /* compute the flags for open() */
     oflag =
@@ -236,7 +242,7 @@ local gzFile gz_open(path, fd, mode)
 #ifdef _WIN32
         fd == -2 ? _wopen(path, oflag, 0666) :
 #endif
-        open(path, oflag, 0666));
+        open((const char *)path, oflag, 0666));
     if (state->fd == -1) {
         free(state->path);
         free(state);
@@ -282,9 +288,13 @@ gzFile ZEXPORT gzdopen(fd, mode)
     char *path;         /* identifier for error messages */
     gzFile gz;
 
-    if (fd == -1 || (path = malloc(7 + 3 * sizeof(int))) == NULL)
+    if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL)
         return NULL;
+#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
+    snprintf(path, 7 + 3 * sizeof(int), "<fd:%d>", fd); /* for debugging */
+#else
     sprintf(path, "<fd:%d>", fd);   /* for debugging */
+#endif
     gz = gz_open(path, fd, mode);
     free(path);
     return gz;
@@ -531,7 +541,8 @@ const char * ZEXPORT gzerror(file, errnum)
     /* return error information */
     if (errnum != NULL)
         *errnum = state->err;
-    return state->msg == NULL ? "" : state->msg;
+    return state->err == Z_MEM_ERROR ? "out of memory" :
+                                       (state->msg == NULL ? "" : state->msg);
 }
 
 /* -- see zlib.h -- */
@@ -582,21 +593,24 @@ void ZLIB_INTERNAL gz_error(state, err, msg)
     if (msg == NULL)
         return;
 
-    /* for an out of memory error, save as static string */
-    if (err == Z_MEM_ERROR) {
-        state->msg = (char *)msg;
+    /* for an out of memory error, return literal string when requested */
+    if (err == Z_MEM_ERROR)
         return;
-    }
 
     /* construct error message with path */
-    if ((state->msg = malloc(strlen(state->path) + strlen(msg) + 3)) == NULL) {
+    if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) ==
+            NULL) {
         state->err = Z_MEM_ERROR;
-        state->msg = (char *)"out of memory";
         return;
     }
+#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
+    snprintf(state->msg, strlen(state->path) + strlen(msg) + 3,
+             "%s%s%s", state->path, ": ", msg);
+#else
     strcpy(state->msg, state->path);
     strcat(state->msg, ": ");
     strcat(state->msg, msg);
+#endif
     return;
 }
 
diff --git a/3rdparty/zlib/gzread.c b/3rdparty/zlib/gzread.c
index 3493d34d4..bf4538eb2 100644
--- a/3rdparty/zlib/gzread.c
+++ b/3rdparty/zlib/gzread.c
@@ -1,5 +1,5 @@
 /* gzread.c -- zlib functions for reading gzip files
- * Copyright (C) 2004, 2005, 2010, 2011, 2012 Mark Adler
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -58,7 +58,8 @@ local int gz_avail(state)
         return -1;
     if (state->eof == 0) {
         if (strm->avail_in) {       /* copy what's there to the start */
-            unsigned char *p = state->in, *q = strm->next_in;
+            unsigned char *p = state->in;
+            unsigned const char *q = strm->next_in;
             unsigned n = strm->avail_in;
             do {
                 *p++ = *q++;
@@ -90,8 +91,8 @@ local int gz_look(state)
     /* allocate read buffers and inflate memory */
     if (state->size == 0) {
         /* allocate buffers */
-        state->in = malloc(state->want);
-        state->out = malloc(state->want << 1);
+        state->in = (unsigned char *)malloc(state->want);
+        state->out = (unsigned char *)malloc(state->want << 1);
         if (state->in == NULL || state->out == NULL) {
             if (state->out != NULL)
                 free(state->out);
@@ -352,14 +353,14 @@ int ZEXPORT gzread(file, buf, len)
 
         /* large len -- read directly into user buffer */
         else if (state->how == COPY) {      /* read directly */
-            if (gz_load(state, buf, len, &n) == -1)
+            if (gz_load(state, (unsigned char *)buf, len, &n) == -1)
                 return -1;
         }
 
         /* large len -- decompress directly into user buffer */
         else {  /* state->how == GZIP */
             strm->avail_out = len;
-            strm->next_out = buf;
+            strm->next_out = (unsigned char *)buf;
             if (gz_decomp(state) == -1)
                 return -1;
             n = state->x.have;
@@ -378,7 +379,11 @@ int ZEXPORT gzread(file, buf, len)
 }
 
 /* -- see zlib.h -- */
-#undef gzgetc
+#ifdef Z_PREFIX_SET
+#  undef z_gzgetc
+#else
+#  undef gzgetc
+#endif
 int ZEXPORT gzgetc(file)
     gzFile file;
 {
@@ -518,7 +523,7 @@ char * ZEXPORT gzgets(file, buf, len)
 
         /* look for end-of-line in current output buffer */
         n = state->x.have > left ? left : state->x.have;
-        eol = memchr(state->x.next, '\n', n);
+        eol = (unsigned char *)memchr(state->x.next, '\n', n);
         if (eol != NULL)
             n = (unsigned)(eol - state->x.next) + 1;
 
diff --git a/3rdparty/zlib/gzwrite.c b/3rdparty/zlib/gzwrite.c
index 27cb3428e..aa767fbf6 100644
--- a/3rdparty/zlib/gzwrite.c
+++ b/3rdparty/zlib/gzwrite.c
@@ -1,5 +1,5 @@
 /* gzwrite.c -- zlib functions for writing gzip files
- * Copyright (C) 2004, 2005, 2010, 2011, 2012 Mark Adler
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -19,7 +19,7 @@ local int gz_init(state)
     z_streamp strm = &(state->strm);
 
     /* allocate input buffer */
-    state->in = malloc(state->want);
+    state->in = (unsigned char *)malloc(state->want);
     if (state->in == NULL) {
         gz_error(state, Z_MEM_ERROR, "out of memory");
         return -1;
@@ -28,7 +28,7 @@ local int gz_init(state)
     /* only need output buffer and deflate state if compressing */
     if (!state->direct) {
         /* allocate output buffer */
-        state->out = malloc(state->want);
+        state->out = (unsigned char *)malloc(state->want);
         if (state->out == NULL) {
             free(state->in);
             gz_error(state, Z_MEM_ERROR, "out of memory");
@@ -168,7 +168,6 @@ int ZEXPORT gzwrite(file, buf, len)
     unsigned len;
 {
     unsigned put = len;
-    unsigned n;
     gz_statep state;
     z_streamp strm;
 
@@ -208,16 +207,19 @@ int ZEXPORT gzwrite(file, buf, len)
     if (len < state->size) {
         /* copy to input buffer, compress when full */
         do {
+            unsigned have, copy;
+
             if (strm->avail_in == 0)
                 strm->next_in = state->in;
-            n = state->size - strm->avail_in;
-            if (n > len)
-                n = len;
-            memcpy(strm->next_in + strm->avail_in, buf, n);
-            strm->avail_in += n;
-            state->x.pos += n;
-            buf = (char *)buf + n;
-            len -= n;
+            have = (unsigned)((strm->next_in + strm->avail_in) - state->in);
+            copy = state->size - have;
+            if (copy > len)
+                copy = len;
+            memcpy(state->in + have, buf, copy);
+            strm->avail_in += copy;
+            state->x.pos += copy;
+            buf = (const char *)buf + copy;
+            len -= copy;
             if (len && gz_comp(state, Z_NO_FLUSH) == -1)
                 return 0;
         } while (len);
@@ -229,7 +231,7 @@ int ZEXPORT gzwrite(file, buf, len)
 
         /* directly compress user buffer to file */
         strm->avail_in = len;
-        strm->next_in = (voidp)buf;
+        strm->next_in = (z_const Bytef *)buf;
         state->x.pos += len;
         if (gz_comp(state, Z_NO_FLUSH) == -1)
             return 0;
@@ -244,6 +246,7 @@ int ZEXPORT gzputc(file, c)
     gzFile file;
     int c;
 {
+    unsigned have;
     unsigned char buf[1];
     gz_statep state;
     z_streamp strm;
@@ -267,12 +270,16 @@ int ZEXPORT gzputc(file, c)
 
     /* try writing to input buffer for speed (state->size == 0 if buffer not
        initialized) */
-    if (strm->avail_in < state->size) {
+    if (state->size) {
         if (strm->avail_in == 0)
             strm->next_in = state->in;
-        strm->next_in[strm->avail_in++] = c;
-        state->x.pos++;
-        return c & 0xff;
+        have = (unsigned)((strm->next_in + strm->avail_in) - state->in);
+        if (have < state->size) {
+            state->in[have] = c;
+            strm->avail_in++;
+            state->x.pos++;
+            return c & 0xff;
+        }
     }
 
     /* no room in buffer or not initialized, use gz_write() */
@@ -300,12 +307,11 @@ int ZEXPORT gzputs(file, str)
 #include <stdarg.h>
 
 /* -- see zlib.h -- */
-int ZEXPORTVA gzprintf (gzFile file, const char *format, ...)
+int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
 {
     int size, len;
     gz_statep state;
     z_streamp strm;
-    va_list va;
 
     /* get internal structure */
     if (file == NULL)
@@ -335,25 +341,20 @@ int ZEXPORTVA gzprintf (gzFile file, const char *format, ...)
     /* do the printf() into the input buffer, put length in len */
     size = (int)(state->size);
     state->in[size - 1] = 0;
-    va_start(va, format);
 #ifdef NO_vsnprintf
 #  ifdef HAS_vsprintf_void
     (void)vsprintf((char *)(state->in), format, va);
-    va_end(va);
     for (len = 0; len < size; len++)
         if (state->in[len] == 0) break;
 #  else
     len = vsprintf((char *)(state->in), format, va);
-    va_end(va);
 #  endif
 #else
 #  ifdef HAS_vsnprintf_void
     (void)vsnprintf((char *)(state->in), size, format, va);
-    va_end(va);
     len = strlen((char *)(state->in));
 #  else
     len = vsnprintf((char *)(state->in), size, format, va);
-    va_end(va);
 #  endif
 #endif
 
@@ -368,6 +369,17 @@ int ZEXPORTVA gzprintf (gzFile file, const char *format, ...)
     return len;
 }
 
+int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
+{
+    va_list va;
+    int ret;
+
+    va_start(va, format);
+    ret = gzvprintf(file, format, va);
+    va_end(va);
+    return ret;
+}
+
 #else /* !STDC && !Z_HAVE_STDARG_H */
 
 /* -- see zlib.h -- */
@@ -547,9 +559,9 @@ int ZEXPORT gzclose_w(file)
     }
 
     /* flush, free memory, and close file */
+    if (gz_comp(state, Z_FINISH) == -1)
+        ret = state->err;
     if (state->size) {
-        if (gz_comp(state, Z_FINISH) == -1)
-            ret = state->err;
         if (!state->direct) {
             (void)deflateEnd(&(state->strm));
             free(state->out);
diff --git a/3rdparty/zlib/infback.c b/3rdparty/zlib/infback.c
index 981aff17c..f3833c2e4 100644
--- a/3rdparty/zlib/infback.c
+++ b/3rdparty/zlib/infback.c
@@ -255,7 +255,7 @@ out_func out;
 void FAR *out_desc;
 {
     struct inflate_state FAR *state;
-    unsigned char FAR *next;    /* next input */
+    z_const unsigned char FAR *next;    /* next input */
     unsigned char FAR *put;     /* next output */
     unsigned have, left;        /* available input and output */
     unsigned long hold;         /* bit buffer */
diff --git a/3rdparty/zlib/inffast.c b/3rdparty/zlib/inffast.c
index 2f1d60b43..bda59ceb6 100644
--- a/3rdparty/zlib/inffast.c
+++ b/3rdparty/zlib/inffast.c
@@ -1,5 +1,5 @@
 /* inffast.c -- fast decoding
- * Copyright (C) 1995-2008, 2010 Mark Adler
+ * Copyright (C) 1995-2008, 2010, 2013 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -69,8 +69,8 @@ z_streamp strm;
 unsigned start;         /* inflate()'s starting value for strm->avail_out */
 {
     struct inflate_state FAR *state;
-    unsigned char FAR *in;      /* local strm->next_in */
-    unsigned char FAR *last;    /* while in < last, enough input available */
+    z_const unsigned char FAR *in;      /* local strm->next_in */
+    z_const unsigned char FAR *last;    /* have enough input while in < last */
     unsigned char FAR *out;     /* local strm->next_out */
     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
     unsigned char FAR *end;     /* while out < end, enough space available */
diff --git a/3rdparty/zlib/inflate.c b/3rdparty/zlib/inflate.c
index 47418a1e1..870f89bb4 100644
--- a/3rdparty/zlib/inflate.c
+++ b/3rdparty/zlib/inflate.c
@@ -93,11 +93,12 @@
 
 /* function prototypes */
 local void fixedtables OF((struct inflate_state FAR *state));
-local int updatewindow OF((z_streamp strm, unsigned out));
+local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
+                           unsigned copy));
 #ifdef BUILDFIXED
    void makefixed OF((void));
 #endif
-local unsigned syncsearch OF((unsigned FAR *have, unsigned char FAR *buf,
+local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
                               unsigned len));
 
 int ZEXPORT inflateResetKeep(strm)
@@ -375,12 +376,13 @@ void makefixed()
    output will fall in the output data, making match copies simpler and faster.
    The advantage may be dependent on the size of the processor's data caches.
  */
-local int updatewindow(strm, out)
+local int updatewindow(strm, end, copy)
 z_streamp strm;
-unsigned out;
+const Bytef *end;
+unsigned copy;
 {
     struct inflate_state FAR *state;
-    unsigned copy, dist;
+    unsigned dist;
 
     state = (struct inflate_state FAR *)strm->state;
 
@@ -400,19 +402,18 @@ unsigned out;
     }
 
     /* copy state->wsize or less output bytes into the circular window */
-    copy = out - strm->avail_out;
     if (copy >= state->wsize) {
-        zmemcpy(state->window, strm->next_out - state->wsize, state->wsize);
+        zmemcpy(state->window, end - state->wsize, state->wsize);
         state->wnext = 0;
         state->whave = state->wsize;
     }
     else {
         dist = state->wsize - state->wnext;
         if (dist > copy) dist = copy;
-        zmemcpy(state->window + state->wnext, strm->next_out - copy, dist);
+        zmemcpy(state->window + state->wnext, end - copy, dist);
         copy -= dist;
         if (copy) {
-            zmemcpy(state->window, strm->next_out - copy, copy);
+            zmemcpy(state->window, end - copy, copy);
             state->wnext = copy;
             state->whave = state->wsize;
         }
@@ -606,7 +607,7 @@ z_streamp strm;
 int flush;
 {
     struct inflate_state FAR *state;
-    unsigned char FAR *next;    /* next input */
+    z_const unsigned char FAR *next;    /* next input */
     unsigned char FAR *put;     /* next output */
     unsigned have, left;        /* available input and output */
     unsigned long hold;         /* bit buffer */
@@ -920,7 +921,7 @@ int flush;
             while (state->have < 19)
                 state->lens[order[state->have++]] = 0;
             state->next = state->codes;
-            state->lencode = (code const FAR *)(state->next);
+            state->lencode = (const code FAR *)(state->next);
             state->lenbits = 7;
             ret = inflate_table(CODES, state->lens, 19, &(state->next),
                                 &(state->lenbits), state->work);
@@ -994,7 +995,7 @@ int flush;
                values here (9 and 6) without reading the comments in inftrees.h
                concerning the ENOUGH constants, which depend on those values */
             state->next = state->codes;
-            state->lencode = (code const FAR *)(state->next);
+            state->lencode = (const code FAR *)(state->next);
             state->lenbits = 9;
             ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
                                 &(state->lenbits), state->work);
@@ -1003,7 +1004,7 @@ int flush;
                 state->mode = BAD;
                 break;
             }
-            state->distcode = (code const FAR *)(state->next);
+            state->distcode = (const code FAR *)(state->next);
             state->distbits = 6;
             ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
                             &(state->next), &(state->distbits), state->work);
@@ -1230,7 +1231,7 @@ int flush;
     RESTORE();
     if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
             (state->mode < CHECK || flush != Z_FINISH)))
-        if (updatewindow(strm, out)) {
+        if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
             state->mode = MEM;
             return Z_MEM_ERROR;
         }
@@ -1264,6 +1265,29 @@ z_streamp strm;
     return Z_OK;
 }
 
+int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
+z_streamp strm;
+Bytef *dictionary;
+uInt *dictLength;
+{
+    struct inflate_state FAR *state;
+
+    /* check state */
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* copy dictionary */
+    if (state->whave && dictionary != Z_NULL) {
+        zmemcpy(dictionary, state->window + state->wnext,
+                state->whave - state->wnext);
+        zmemcpy(dictionary + state->whave - state->wnext,
+                state->window, state->wnext);
+    }
+    if (dictLength != Z_NULL)
+        *dictLength = state->whave;
+    return Z_OK;
+}
+
 int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
 z_streamp strm;
 const Bytef *dictionary;
@@ -1271,8 +1295,6 @@ uInt dictLength;
 {
     struct inflate_state FAR *state;
     unsigned long dictid;
-    unsigned char *next;
-    unsigned avail;
     int ret;
 
     /* check state */
@@ -1291,13 +1313,7 @@ uInt dictLength;
 
     /* copy dictionary to window using updatewindow(), which will amend the
        existing dictionary if appropriate */
-    next = strm->next_out;
-    avail = strm->avail_out;
-    strm->next_out = (Bytef *)dictionary + dictLength;
-    strm->avail_out = 0;
-    ret = updatewindow(strm, dictLength);
-    strm->avail_out = avail;
-    strm->next_out = next;
+    ret = updatewindow(strm, dictionary + dictLength, dictLength);
     if (ret) {
         state->mode = MEM;
         return Z_MEM_ERROR;
@@ -1337,7 +1353,7 @@ gz_headerp head;
  */
 local unsigned syncsearch(have, buf, len)
 unsigned FAR *have;
-unsigned char FAR *buf;
+const unsigned char FAR *buf;
 unsigned len;
 {
     unsigned got;
diff --git a/3rdparty/zlib/inftrees.c b/3rdparty/zlib/inftrees.c
index abcd7c45e..44d89cf24 100644
--- a/3rdparty/zlib/inftrees.c
+++ b/3rdparty/zlib/inftrees.c
@@ -1,5 +1,5 @@
 /* inftrees.c -- generate Huffman trees for efficient decoding
- * Copyright (C) 1995-2012 Mark Adler
+ * Copyright (C) 1995-2013 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -9,7 +9,7 @@
 #define MAXBITS 15
 
 const char inflate_copyright[] =
-   " inflate 1.2.7 Copyright 1995-2012 Mark Adler ";
+   " inflate 1.2.8 Copyright 1995-2013 Mark Adler ";
 /*
   If you use the zlib library in a product, an acknowledgment is welcome
   in the documentation of your product. If for some reason you cannot
@@ -62,7 +62,7 @@ unsigned short FAR *work;
         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
-        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 78, 68};
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 72, 78};
     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
@@ -208,8 +208,8 @@ unsigned short FAR *work;
     mask = used - 1;            /* mask for comparing low */
 
     /* check available table space */
-    if ((type == LENS && used >= ENOUGH_LENS) ||
-        (type == DISTS && used >= ENOUGH_DISTS))
+    if ((type == LENS && used > ENOUGH_LENS) ||
+        (type == DISTS && used > ENOUGH_DISTS))
         return 1;
 
     /* process all codes and make table entries */
@@ -277,8 +277,8 @@ unsigned short FAR *work;
 
             /* check for enough space */
             used += 1U << curr;
-            if ((type == LENS && used >= ENOUGH_LENS) ||
-                (type == DISTS && used >= ENOUGH_DISTS))
+            if ((type == LENS && used > ENOUGH_LENS) ||
+                (type == DISTS && used > ENOUGH_DISTS))
                 return 1;
 
             /* point entry in root table to sub-table */
diff --git a/3rdparty/zlib/trees.c b/3rdparty/zlib/trees.c
index 8c32b214b..1fd7759ef 100644
--- a/3rdparty/zlib/trees.c
+++ b/3rdparty/zlib/trees.c
@@ -146,8 +146,8 @@ local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
 local int  build_bl_tree  OF((deflate_state *s));
 local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
                               int blcodes));
-local void compress_block OF((deflate_state *s, ct_data *ltree,
-                              ct_data *dtree));
+local void compress_block OF((deflate_state *s, const ct_data *ltree,
+                              const ct_data *dtree));
 local int  detect_data_type OF((deflate_state *s));
 local unsigned bi_reverse OF((unsigned value, int length));
 local void bi_windup      OF((deflate_state *s));
@@ -972,7 +972,8 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
     } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
 #endif
         send_bits(s, (STATIC_TREES<<1)+last, 3);
-        compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
+        compress_block(s, (const ct_data *)static_ltree,
+                       (const ct_data *)static_dtree);
 #ifdef DEBUG
         s->compressed_len += 3 + s->static_len;
 #endif
@@ -980,7 +981,8 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
         send_bits(s, (DYN_TREES<<1)+last, 3);
         send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
                        max_blindex+1);
-        compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
+        compress_block(s, (const ct_data *)s->dyn_ltree,
+                       (const ct_data *)s->dyn_dtree);
 #ifdef DEBUG
         s->compressed_len += 3 + s->opt_len;
 #endif
@@ -1057,8 +1059,8 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
  */
 local void compress_block(s, ltree, dtree)
     deflate_state *s;
-    ct_data *ltree; /* literal tree */
-    ct_data *dtree; /* distance tree */
+    const ct_data *ltree; /* literal tree */
+    const ct_data *dtree; /* distance tree */
 {
     unsigned dist;      /* distance of matched string */
     int lc;             /* match length or unmatched char (if dist == 0) */
diff --git a/3rdparty/zlib/uncompr.c b/3rdparty/zlib/uncompr.c
index ad98be3a5..242e9493d 100644
--- a/3rdparty/zlib/uncompr.c
+++ b/3rdparty/zlib/uncompr.c
@@ -30,7 +30,7 @@ int ZEXPORT uncompress (dest, destLen, source, sourceLen)
     z_stream stream;
     int err;
 
-    stream.next_in = (Bytef*)source;
+    stream.next_in = (z_const Bytef *)source;
     stream.avail_in = (uInt)sourceLen;
     /* Check for source > 64K on 16-bit machine: */
     if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
diff --git a/3rdparty/zlib/zconf.h.cmakein b/3rdparty/zlib/zconf.h.cmakein
index 1e4e14045..6528fb4df 100644
--- a/3rdparty/zlib/zconf.h.cmakein
+++ b/3rdparty/zlib/zconf.h.cmakein
@@ -1,5 +1,5 @@
 /* zconf.h -- configuration of the zlib compression library
- * Copyright (C) 1995-2012 Jean-loup Gailly.
+ * Copyright (C) 1995-2013 Jean-loup Gailly.
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -23,6 +23,7 @@
 #  define _dist_code            z__dist_code
 #  define _length_code          z__length_code
 #  define _tr_align             z__tr_align
+#  define _tr_flush_bits        z__tr_flush_bits
 #  define _tr_flush_block       z__tr_flush_block
 #  define _tr_init              z__tr_init
 #  define _tr_stored_block      z__tr_stored_block
@@ -79,6 +80,7 @@
 #      define gzopen_w              z_gzopen_w
 #    endif
 #    define gzprintf              z_gzprintf
+#    define gzvprintf             z_gzvprintf
 #    define gzputc                z_gzputc
 #    define gzputs                z_gzputs
 #    define gzread                z_gzread
@@ -105,6 +107,7 @@
 #  define inflateReset          z_inflateReset
 #  define inflateReset2         z_inflateReset2
 #  define inflateSetDictionary  z_inflateSetDictionary
+#  define inflateGetDictionary  z_inflateGetDictionary
 #  define inflateSync           z_inflateSync
 #  define inflateSyncPoint      z_inflateSyncPoint
 #  define inflateUndermine      z_inflateUndermine
@@ -390,20 +393,14 @@ typedef uLong FAR uLongf;
    typedef Byte       *voidp;
 #endif
 
-/* ./configure may #define Z_U4 here */
-
 #if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
 #  include <limits.h>
 #  if (UINT_MAX == 0xffffffffUL)
 #    define Z_U4 unsigned
-#  else
-#    if (ULONG_MAX == 0xffffffffUL)
-#      define Z_U4 unsigned long
-#    else
-#      if (USHRT_MAX == 0xffffffffUL)
-#        define Z_U4 unsigned short
-#      endif
-#    endif
+#  elif (ULONG_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned long
+#  elif (USHRT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned short
 #  endif
 #endif
 
@@ -427,8 +424,16 @@ typedef uLong FAR uLongf;
 #  endif
 #endif
 
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+#    include <stdarg.h>         /* for va_list */
+#  endif
+#endif
+
 #ifdef _WIN32
-#  include <stddef.h>           /* for wchar_t */
+#  ifndef Z_SOLO
+#    include <stddef.h>         /* for wchar_t */
+#  endif
 #endif
 
 /* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
@@ -437,7 +442,7 @@ typedef uLong FAR uLongf;
  * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
  * equivalently requesting no 64-bit operations
  */
-#if defined(LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
+#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
 #  undef _LARGEFILE64_SOURCE
 #endif
 
@@ -445,7 +450,7 @@ typedef uLong FAR uLongf;
 #  define Z_HAVE_UNISTD_H
 #endif
 #ifndef Z_SOLO
-#  if defined(Z_HAVE_UNISTD_H) || defined(LARGEFILE64_SOURCE)
+#  if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE)
 #    include <unistd.h>         /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
 #    ifdef VMS
 #      include <unixio.h>       /* for off_t */
diff --git a/3rdparty/zlib/zlib.h b/3rdparty/zlib/zlib.h
index 3edf3acdb..3e0c7672a 100644
--- a/3rdparty/zlib/zlib.h
+++ b/3rdparty/zlib/zlib.h
@@ -1,7 +1,7 @@
 /* zlib.h -- interface of the 'zlib' general purpose compression library
-  version 1.2.7, May 2nd, 2012
+  version 1.2.8, April 28th, 2013
 
-  Copyright (C) 1995-2012 Jean-loup Gailly and Mark Adler
+  Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
 
   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
@@ -37,11 +37,11 @@
 extern "C" {
 #endif
 
-#define ZLIB_VERSION "1.2.7"
-#define ZLIB_VERNUM 0x1270
+#define ZLIB_VERSION "1.2.8"
+#define ZLIB_VERNUM 0x1280
 #define ZLIB_VER_MAJOR 1
 #define ZLIB_VER_MINOR 2
-#define ZLIB_VER_REVISION 7
+#define ZLIB_VER_REVISION 8
 #define ZLIB_VER_SUBREVISION 0
 
 /*
@@ -839,6 +839,21 @@ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
    inflate().
 */
 
+ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
+                                             Bytef *dictionary,
+                                             uInt  *dictLength));
+/*
+     Returns the sliding dictionary being maintained by inflate.  dictLength is
+   set to the number of bytes in the dictionary, and that many bytes are copied
+   to dictionary.  dictionary must have enough space, where 32768 bytes is
+   always enough.  If inflateGetDictionary() is called with dictionary equal to
+   Z_NULL, then only the dictionary length is returned, and nothing is copied.
+   Similary, if dictLength is Z_NULL, then it is not set.
+
+     inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+   stream state is inconsistent.
+*/
+
 ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
 /*
      Skips invalid compressed data until a possible full flush point (see above
@@ -846,7 +861,7 @@ ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
    available input is skipped.  No output is provided.
 
      inflateSync searches for a 00 00 FF FF pattern in the compressed data.
-   All full flush points have this pattern, but not all occurences of this
+   All full flush points have this pattern, but not all occurrences of this
    pattern are full flush points.
 
      inflateSync returns Z_OK if a possible full flush point has been found,
@@ -1007,7 +1022,8 @@ ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
    the version of the header file.
 */
 
-typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *));
+typedef unsigned (*in_func) OF((void FAR *,
+                                z_const unsigned char FAR * FAR *));
 typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
 
 ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
@@ -1015,11 +1031,12 @@ ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
                                     out_func out, void FAR *out_desc));
 /*
      inflateBack() does a raw inflate with a single call using a call-back
-   interface for input and output.  This is more efficient than inflate() for
-   file i/o applications in that it avoids copying between the output and the
-   sliding window by simply making the window itself the output buffer.  This
-   function trusts the application to not change the output buffer passed by
-   the output function, at least until inflateBack() returns.
+   interface for input and output.  This is potentially more efficient than
+   inflate() for file i/o applications, in that it avoids copying between the
+   output and the sliding window by simply making the window itself the output
+   buffer.  inflate() can be faster on modern CPUs when used with large
+   buffers.  inflateBack() trusts the application to not change the output
+   buffer passed by the output function, at least until inflateBack() returns.
 
      inflateBackInit() must be called first to allocate the internal state
    and to initialize the state with the user-provided window buffer.
@@ -1736,6 +1753,13 @@ ZEXTERN int            ZEXPORT deflateResetKeep OF((z_streamp));
 ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
                                             const char *mode));
 #endif
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+ZEXTERN int            ZEXPORTVA gzvprintf Z_ARG((gzFile file,
+                                                  const char *format,
+                                                  va_list va));
+#  endif
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/3rdparty/zlib/zutil.c b/3rdparty/zlib/zutil.c
index 65e0d3b72..23d2ebef0 100644
--- a/3rdparty/zlib/zutil.c
+++ b/3rdparty/zlib/zutil.c
@@ -14,7 +14,7 @@
 struct internal_state      {int dummy;}; /* for buggy compilers */
 #endif
 
-const char * const z_errmsg[10] = {
+z_const char * const z_errmsg[10] = {
 "need dictionary",     /* Z_NEED_DICT       2  */
 "stream end",          /* Z_STREAM_END      1  */
 "",                    /* Z_OK              0  */
diff --git a/3rdparty/zlib/zutil.h b/3rdparty/zlib/zutil.h
index 4e3dcc6ae..24ab06b1c 100644
--- a/3rdparty/zlib/zutil.h
+++ b/3rdparty/zlib/zutil.h
@@ -1,5 +1,5 @@
 /* zutil.h -- internal interface and configuration of the compression library
- * Copyright (C) 1995-2012 Jean-loup Gailly.
+ * Copyright (C) 1995-2013 Jean-loup Gailly.
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -44,13 +44,13 @@ typedef unsigned short ush;
 typedef ush FAR ushf;
 typedef unsigned long  ulg;
 
-extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
+extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 /* (size given to avoid silly warnings with Visual C++) */
 
 #define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
 
 #define ERR_RETURN(strm,err) \
-  return (strm->msg = (char*)ERR_MSG(err), (err))
+  return (strm->msg = ERR_MSG(err), (err))
 /* To be used only when the state is known to be valid */
 
         /* common constants */
@@ -168,7 +168,8 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #endif
 
 /* provide prototypes for these when building zlib without LFS */
-#if !defined(_WIN32) && (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0)
+#if !defined(_WIN32) && \
+    (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0)
     ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
     ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
 #endif
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9cb3be0c3..c795ad3ff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,20 +4,14 @@
 #    From the off-tree build directory, invoke:
 #      $ cmake <PATH_TO_OPENCV_ROOT>
 #
-#
-#   - OCT-2008: Initial version <joseluisblancoc@gmail.com>
-#
 # ----------------------------------------------------------------------------
 
-set(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS true)
+include(cmake/OpenCVMinDepVersions.cmake)
 
-# --------------------------------------------------------------
-# Indicate CMake 2.7 and above that we don't want to mix relative
-#  and absolute paths in linker lib lists.
-# Run "cmake --help-policy CMP0003" for more information.
-# --------------------------------------------------------------
-if(COMMAND cmake_policy)
-  cmake_policy(SET CMP0003 NEW)
+if(CMAKE_GENERATOR MATCHES Xcode AND XCODE_VERSION VERSION_GREATER 4.3)
+  cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
+else()
+  cmake_minimum_required(VERSION "${MIN_VER_CMAKE}" FATAL_ERROR)
 endif()
 
 # Following block can broke build in case of cross-compilng
@@ -41,20 +35,10 @@ else(NOT CMAKE_TOOLCHAIN_FILE)
   set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install" CACHE PATH "Installation Directory")
 endif(NOT CMAKE_TOOLCHAIN_FILE)
 
-# --------------------------------------------------------------
-# Top level OpenCV project
-# --------------------------------------------------------------
-if(CMAKE_GENERATOR MATCHES Xcode AND XCODE_VERSION VERSION_GREATER 4.3)
-  cmake_minimum_required(VERSION 2.8.8)
-elseif(IOS)
-  cmake_minimum_required(VERSION 2.8.0)
-else()
-  cmake_minimum_required(VERSION 2.6.3)
-endif()
 
 # must go before the project command
 set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "Configs" FORCE)
-if(DEFINED CMAKE_BUILD_TYPE AND CMAKE_VERSION VERSION_GREATER "2.8")
+if(DEFINED CMAKE_BUILD_TYPE)
   set_property( CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${CMAKE_CONFIGURATION_TYPES} )
 endif()
 
@@ -126,10 +110,10 @@ endif()
 OCV_OPTION(WITH_1394           "Include IEEE1394 support"                    ON   IF (NOT ANDROID AND NOT IOS) )
 OCV_OPTION(WITH_AVFOUNDATION   "Use AVFoundation for Video I/O"              ON   IF IOS)
 OCV_OPTION(WITH_CARBON         "Use Carbon for UI instead of Cocoa"          OFF  IF APPLE )
-OCV_OPTION(WITH_CUDA           "Include NVidia Cuda Runtime support"         ON   IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS) )
-OCV_OPTION(WITH_CUFFT          "Include NVidia Cuda Fast Fourier Transform (FFT) library support"            ON  IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS) )
-OCV_OPTION(WITH_CUBLAS         "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support" OFF IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS) )
-OCV_OPTION(WITH_NVCUVID        "Include NVidia Video Decoding library support"                               OFF IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS AND NOT APPLE) )
+OCV_OPTION(WITH_CUDA           "Include NVidia Cuda Runtime support"                                         ON  IF (NOT ANDROID AND NOT IOS) )
+OCV_OPTION(WITH_CUFFT          "Include NVidia Cuda Fast Fourier Transform (FFT) library support"            ON  IF (NOT ANDROID AND NOT IOS) )
+OCV_OPTION(WITH_CUBLAS         "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support" OFF IF (NOT ANDROID AND NOT IOS) )
+OCV_OPTION(WITH_NVCUVID        "Include NVidia Video Decoding library support"                               OFF IF (NOT ANDROID AND NOT IOS AND NOT APPLE) )
 OCV_OPTION(WITH_EIGEN          "Include Eigen2/Eigen3 support"               ON)
 OCV_OPTION(WITH_VFW            "Include Video for Windows support"           ON   IF WIN32 )
 OCV_OPTION(WITH_FFMPEG         "Include FFMPEG support"                      ON   IF (NOT ANDROID AND NOT IOS))
@@ -200,7 +184,7 @@ OCV_OPTION(INSTALL_TO_MANGLED_PATHS "Enables mangled install paths, that help wi
 # OpenCV build options
 # ===================================================
 OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers"                                  ON   IF (NOT IOS) )
-OCV_OPTION(ENABLE_SOLUTION_FOLDERS    "Solution folder in Visual Studio or in other IDEs"        (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) IF (CMAKE_VERSION VERSION_GREATER "2.8.0") )
+OCV_OPTION(ENABLE_SOLUTION_FOLDERS    "Solution folder in Visual Studio or in other IDEs"        (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) )
 OCV_OPTION(ENABLE_PROFILING           "Enable profiling in the GCC compiler (Add flags: -g -pg)" OFF  IF CMAKE_COMPILER_IS_GNUCXX )
 OCV_OPTION(ENABLE_OMIT_FRAME_POINTER  "Enable -fomit-frame-pointer for GCC"                      ON   IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
 OCV_OPTION(ENABLE_POWERPC             "Enable PowerPC for GCC"                                   ON   IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
@@ -214,15 +198,7 @@ OCV_OPTION(ENABLE_SSE42               "Enable SSE4.2 instructions"
 OCV_OPTION(ENABLE_AVX                 "Enable AVX instructions"                                  OFF  IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_NOISY_WARNINGS      "Show all warnings even if they are too noisy"             OFF )
 OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors"                                 OFF )
-
-
-# uncategorized options
-# ===================================================
-OCV_OPTION(CMAKE_VERBOSE "Verbose mode" OFF )
-
-# backward compatibility
-# ===================================================
-include(cmake/OpenCVLegacyOptions.cmake OPTIONAL)
+OCV_OPTION(ENABLE_WINRT_MODE          "Build with Windows Runtime support"                       OFF  IF WIN32 )
 
 
 # ----------------------------------------------------------------------------
@@ -289,10 +265,6 @@ if(DEFINED CMAKE_DEBUG_POSTFIX)
   set(OPENCV_DEBUG_POSTFIX "${CMAKE_DEBUG_POSTFIX}")
 endif()
 
-if(CMAKE_VERBOSE)
-  set(CMAKE_VERBOSE_MAKEFILE 1)
-endif()
-
 
 # ----------------------------------------------------------------------------
 #  Path for build/platform -specific headers
@@ -308,21 +280,10 @@ set(OPENCV_EXTRA_MODULES_PATH "" CACHE PATH "Where to look for additional OpenCV
 # ----------------------------------------------------------------------------
 #  Autodetect if we are in a GIT repository
 # ----------------------------------------------------------------------------
+find_host_package(Git QUIET)
 
-# don't use FindGit because it requires CMake 2.8.2
-set(git_names git eg) # eg = easy git
-# Prefer .cmd variants on Windows unless running in a Makefile in the MSYS shell
-if(CMAKE_HOST_WIN32)
-  if(NOT CMAKE_GENERATOR MATCHES "MSYS")
-    set(git_names git.cmd git eg.cmd eg)
-  endif()
-endif()
-
-find_host_program(GIT_EXECUTABLE NAMES ${git_names} PATH_SUFFIXES Git/cmd Git/bin DOC "git command line client")
-mark_as_advanced(GIT_EXECUTABLE)
-
-if(GIT_EXECUTABLE)
-  execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --always --dirty --match "2.[0-9].[0-9]*"
+if(GIT_FOUND)
+  execute_process(COMMAND "${GIT_EXECUTABLE}" describe --tags --always --dirty --match "2.[0-9].[0-9]*"
     WORKING_DIRECTORY "${OpenCV_SOURCE_DIR}"
     OUTPUT_VARIABLE OPENCV_VCSVERSION
     RESULT_VARIABLE GIT_RESULT
@@ -366,14 +327,11 @@ endif(WIN32 AND NOT MINGW)
 #       CHECK FOR SYSTEM LIBRARIES, OPTIONS, ETC..
 # ----------------------------------------------------------------------------
 if(UNIX)
-  include(cmake/OpenCVFindPkgConfig.cmake OPTIONAL)
+  find_package(PkgConfig QUIET)
   include(CheckFunctionExists)
   include(CheckIncludeFile)
 
   if(NOT APPLE)
-    CHECK_INCLUDE_FILE(alloca.h HAVE_ALLOCA_H)
-    CHECK_FUNCTION_EXISTS(alloca HAVE_ALLOCA)
-    CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
     CHECK_INCLUDE_FILE(pthread.h HAVE_LIBPTHREAD)
     if(ANDROID)
       set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m log)
@@ -383,7 +341,7 @@ if(UNIX)
       set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m pthread rt)
     endif()
   else()
-    add_definitions(-DHAVE_ALLOCA -DHAVE_ALLOCA_H -DHAVE_LIBPTHREAD -DHAVE_UNISTD_H)
+    set(HAVE_LIBPTHREAD YES)
   endif()
 endif()
 
@@ -502,6 +460,8 @@ include(cmake/OpenCVGenAndroidMK.cmake)
 # Generate OpenCVСonfig.cmake and OpenCVConfig-version.cmake for cmake projects
 include(cmake/OpenCVGenConfig.cmake)
 
+# Generate Info.plist for the IOS framework
+include(cmake/OpenCVGenInfoPlist.cmake)
 
 # ----------------------------------------------------------------------------
 # Summary:
@@ -608,6 +568,16 @@ if(ANDROID)
   status("    Android examples:"    BUILD_ANDROID_EXAMPLES AND CAN_BUILD_ANDROID_PROJECTS         THEN YES ELSE NO)
 endif()
 
+# ================== Windows RT features ==================
+if(WIN32)
+status("")
+    status("  Windows RT support:" HAVE_WINRT THEN YES ELSE NO)
+    if (ENABLE_WINRT_MODE)
+      status("    Windows SDK v8.0:" ${WINDOWS_SDK_PATH})
+      status("    Visual Studio 2012:" ${VISUAL_STUDIO_PATH})
+    endif()
+endif(WIN32)
+
 # ========================== GUI ==========================
 status("")
 status("  GUI: ")
@@ -644,7 +614,7 @@ status("    OpenGL support:" HAVE_OPENGL THEN "YES (${OPENGL_LIBRARIES})" ELSE N
 # ========================== MEDIA IO ==========================
 status("")
 status("  Media I/O: ")
-status("    ZLib:"         BUILD_ZLIB    THEN "build (ver ${ZLIB_VERSION_STRING})"               ELSE "${ZLIB_LIBRARY} (ver ${ZLIB_VERSION_STRING})")
+status("    ZLib:"         BUILD_ZLIB    THEN "build (ver ${ZLIB_VERSION_STRING})"               ELSE "${ZLIB_LIBRARIES} (ver ${ZLIB_VERSION_STRING})")
 
 if(WITH_JPEG)
   status("    JPEG:"       JPEG_FOUND    THEN "${JPEG_LIBRARY} (ver ${JPEG_LIB_VERSION})"        ELSE "build (ver ${JPEG_LIB_VERSION})")
@@ -749,8 +719,8 @@ if(DEFINED WITH_GIGEAPI)
 endif(DEFINED WITH_GIGEAPI)
 
 if(DEFINED WITH_QUICKTIME)
-  status("    QuickTime:"      WITH_QUICKTIME      THEN YES                                        ELSE NO)
-  status("    QTKit:"          WITH_QUICKTIME      THEN NO                                         ELSE YES)
+  status("    QuickTime:"      HAVE_QUICKTIME      THEN YES                                        ELSE NO)
+  status("    QTKit:"          HAVE_QTKIT          THEN YES                                        ELSE NO)
 endif(DEFINED WITH_QUICKTIME)
 
 if(DEFINED WITH_UNICAP)
@@ -839,15 +809,15 @@ endif()
 # ========================== python ==========================
 status("")
 status("  Python:")
-status("    Interpreter:"     PYTHON_EXECUTABLE   THEN "${PYTHON_EXECUTABLE} (ver ${PYTHON_VERSION_FULL})"         ELSE NO)
+status("    Interpreter:"     PYTHONINTERP_FOUND  THEN "${PYTHON_EXECUTABLE} (ver ${PYTHON_VERSION_STRING})"       ELSE NO)
 if(BUILD_opencv_python)
   if(PYTHONLIBS_VERSION_STRING)
     status("    Libraries:"   HAVE_opencv_python  THEN  "${PYTHON_LIBRARIES} (ver ${PYTHONLIBS_VERSION_STRING})"   ELSE NO)
   else()
-    status("    Libraries:"   HAVE_opencv_python  THEN  ${PYTHON_LIBRARIES}                                        ELSE NO)
+    status("    Libraries:"   HAVE_opencv_python  THEN  "${PYTHON_LIBRARIES}"                                      ELSE NO)
   endif()
-  status("    numpy:"         PYTHON_NUMPY_INCLUDE_DIR THEN "${PYTHON_NUMPY_INCLUDE_DIR} (ver ${PYTHON_NUMPY_VERSION})" ELSE "NO (Python wrappers can not be generated)")
-  status("    packages path:" PYTHON_EXECUTABLE        THEN "${PYTHON_PACKAGES_PATH}"                                   ELSE "-")
+  status("    numpy:"         PYTHON_NUMPY_INCLUDE_DIRS THEN "${PYTHON_NUMPY_INCLUDE_DIRS} (ver ${PYTHON_NUMPY_VERSION})" ELSE "NO (Python wrappers can not be generated)")
+  status("    packages path:" PYTHON_EXECUTABLE         THEN "${PYTHON_PACKAGES_PATH}"                                    ELSE "-")
 endif()
 
 # ========================== java ==========================
@@ -870,6 +840,7 @@ if(BUILD_DOCS)
   endif()
   status("    Sphinx:"              HAVE_SPHINX              THEN "${SPHINX_BUILD} (ver ${SPHINX_VERSION})" ELSE NO)
   status("    PdfLaTeX compiler:"   PDFLATEX_COMPILER        THEN "${PDFLATEX_COMPILER}" ELSE NO)
+  status("    PlantUML:"            PLANTUML                 THEN "${PLANTUML}" ELSE NO)
 endif()
 
 # ========================== samples and tests ==========================
@@ -895,4 +866,3 @@ ocv_finalize_status()
 if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}")
   message(WARNING "The source directory is the same as binary directory. \"make clean\" may damage the source tree")
 endif()
-
diff --git a/README b/README
deleted file mode 100644
index 0799dff89..000000000
--- a/README
+++ /dev/null
@@ -1,17 +0,0 @@
-OpenCV: open source computer vision library
-
-Homepage:    http://opencv.org
-Online docs: http://docs.opencv.org
-Q&A forum:   http://answers.opencv.org
-Dev zone:    http://code.opencv.org
-
-Please read before starting work on a pull request:
-  http://code.opencv.org/projects/opencv/wiki/How_to_contribute
-
-Summary of guidelines:
-
-* One pull request per issue;
-* Choose the right base branch;
-* Include tests and documentation;
-* Clean up "oops" commits before submitting;
-* Follow the coding style guide.
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..403f118ee
--- /dev/null
+++ b/README.md
@@ -0,0 +1,23 @@
+### OpenCV: Open Source Computer Vision Library
+
+#### Resources
+
+* Homepage: <http://opencv.org>
+* Docs: <http://docs.opencv.org>
+* Q&A forum: <http://answers.opencv.org>
+* Issue tracking: <http://code.opencv.org>
+
+#### Contributing
+
+Please read before starting work on a pull request: <http://code.opencv.org/projects/opencv/wiki/How_to_contribute>
+
+Summary of guidelines:
+
+* One pull request per issue;
+* Choose the right base branch;
+* Include tests and documentation;
+* Clean up "oops" commits before submitting;
+* Follow the coding style guide.
+
+[![Donate OpenCV project](http://opencv.org/wp-content/uploads/2013/07/gittip1.png)](https://www.gittip.com/OpenCV/)
+[![Donate OpenCV project](http://opencv.org/wp-content/uploads/2013/07/paypal-donate-button.png)](https://www.paypal.com/cgi-bin/webscr?item_name=Donation+to+OpenCV&cmd=_donations&business=accountant%40opencv.org)
\ No newline at end of file
diff --git a/android/android.toolchain.cmake b/android/android.toolchain.cmake
deleted file mode 100644
index 9db174a13..000000000
--- a/android/android.toolchain.cmake
+++ /dev/null
@@ -1,1747 +0,0 @@
-message(STATUS "Android toolchain was moved to platfroms/android!")
-message(STATUS "This file is depricated and will be removed!")
-
-# Copyright (c) 2010-2011, Ethan Rublee
-# Copyright (c) 2011-2013, Andrey Kamaev
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1.  Redistributions of source code must retain the above copyright notice,
-#     this list of conditions and the following disclaimer.
-#
-# 2.  Redistributions in binary form must reproduce the above copyright notice,
-#     this list of conditions and the following disclaimer in the documentation
-#     and/or other materials provided with the distribution.
-#
-# 3.  The name of the copyright holders may be used to endorse or promote
-#     products derived from this software without specific prior written
-#     permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-# ------------------------------------------------------------------------------
-#  Android CMake toolchain file, for use with the Android NDK r5-r8
-#  Requires cmake 2.6.3 or newer (2.8.5 or newer is recommended).
-#  See home page: https://github.com/taka-no-me/android-cmake
-#
-#  The file is mantained by the OpenCV project. The latest version can be get at
-#  http://code.opencv.org/projects/opencv/repository/revisions/master/changes/android/android.toolchain.cmake
-#
-#  Usage Linux:
-#   $ export ANDROID_NDK=/absolute/path/to/the/android-ndk
-#   $ mkdir build && cd build
-#   $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake ..
-#   $ make -j8
-#
-#  Usage Linux (using standalone toolchain):
-#   $ export ANDROID_STANDALONE_TOOLCHAIN=/absolute/path/to/android-toolchain
-#   $ mkdir build && cd build
-#   $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake ..
-#   $ make -j8
-#
-#  Usage Windows:
-#     You need native port of make to build your project.
-#     Android NDK r7 (or newer) already has make.exe on board.
-#     For older NDK you have to install it separately.
-#     For example, this one: http://gnuwin32.sourceforge.net/packages/make.htm
-#
-#   $ SET ANDROID_NDK=C:\absolute\path\to\the\android-ndk
-#   $ mkdir build && cd build
-#   $ cmake.exe -G"MinGW Makefiles"
-#       -DCMAKE_TOOLCHAIN_FILE=path\to\the\android.toolchain.cmake
-#       -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%\prebuilt\windows\bin\make.exe" ..
-#   $ cmake.exe --build .
-#
-#
-#  Options (can be set as cmake parameters: -D<option_name>=<value>):
-#    ANDROID_NDK=/opt/android-ndk - path to the NDK root.
-#      Can be set as environment variable. Can be set only at first cmake run.
-#
-#    ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain - path to the
-#      standalone toolchain. This option is not used if full NDK is found
-#      (ignored if ANDROID_NDK is set).
-#      Can be set as environment variable. Can be set only at first cmake run.
-#
-#    ANDROID_ABI=armeabi-v7a - specifies the target Application Binary
-#      Interface (ABI). This option nearly matches to the APP_ABI variable
-#      used by ndk-build tool from Android NDK.
-#
-#      Possible targets are:
-#        "armeabi" - matches to the NDK ABI with the same name.
-#           See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
-#        "armeabi-v7a" - matches to the NDK ABI with the same name.
-#           See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
-#        "armeabi-v7a with NEON" - same as armeabi-v7a, but
-#            sets NEON as floating-point unit
-#        "armeabi-v7a with VFPV3" - same as armeabi-v7a, but
-#            sets VFPV3 as floating-point unit (has 32 registers instead of 16).
-#        "armeabi-v6 with VFP" - tuned for ARMv6 processors having VFP.
-#        "x86" - matches to the NDK ABI with the same name.
-#            See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
-#        "mips" - matches to the NDK ABI with the same name
-#            (It is not tested on real devices by the authos of this toolchain)
-#            See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
-#
-#    ANDROID_NATIVE_API_LEVEL=android-8 - level of Android API compile for.
-#      Option is read-only when standalone toolchain is used.
-#
-#    ANDROID_TOOLCHAIN_NAME=arm-linux-androideabi-4.6 - the name of compiler
-#      toolchain to be used. The list of possible values depends on the NDK
-#      version. For NDK r8c the possible values are:
-#
-#        * arm-linux-androideabi-4.4.3
-#        * arm-linux-androideabi-4.6
-#        * arm-linux-androideabi-clang3.1
-#        * mipsel-linux-android-4.4.3
-#        * mipsel-linux-android-4.6
-#        * mipsel-linux-android-clang3.1
-#        * x86-4.4.3
-#        * x86-4.6
-#        * x86-clang3.1
-#
-#    ANDROID_FORCE_ARM_BUILD=OFF - set ON to generate 32-bit ARM instructions
-#      instead of Thumb. Is not available for "x86" (inapplicable) and
-#      "armeabi-v6 with VFP" (is forced to be ON) ABIs.
-#
-#    ANDROID_NO_UNDEFINED=ON - set ON to show all undefined symbols as linker
-#      errors even if they are not used.
-#
-#    ANDROID_SO_UNDEFINED=OFF - set ON to allow undefined symbols in shared
-#      libraries. Automatically turned for NDK r5x and r6x due to GLESv2
-#      problems.
-#
-#    LIBRARY_OUTPUT_PATH_ROOT=${CMAKE_SOURCE_DIR} - where to output binary
-#      files. See additional details below.
-#
-#    ANDROID_SET_OBSOLETE_VARIABLES=ON - if set, then toolchain defines some
-#      obsolete variables which were used by previous versions of this file for
-#      backward compatibility.
-#
-#    ANDROID_STL=gnustl_static - specify the runtime to use.
-#
-#      Possible values are:
-#        none           -> Do not configure the runtime.
-#        system         -> Use the default minimal system C++ runtime library.
-#                          Implies -fno-rtti -fno-exceptions.
-#                          Is not available for standalone toolchain.
-#        system_re      -> Use the default minimal system C++ runtime library.
-#                          Implies -frtti -fexceptions.
-#                          Is not available for standalone toolchain.
-#        gabi++_static  -> Use the GAbi++ runtime as a static library.
-#                          Implies -frtti -fno-exceptions.
-#                          Available for NDK r7 and newer.
-#                          Is not available for standalone toolchain.
-#        gabi++_shared  -> Use the GAbi++ runtime as a shared library.
-#                          Implies -frtti -fno-exceptions.
-#                          Available for NDK r7 and newer.
-#                          Is not available for standalone toolchain.
-#        stlport_static -> Use the STLport runtime as a static library.
-#                          Implies -fno-rtti -fno-exceptions for NDK before r7.
-#                          Implies -frtti -fno-exceptions for NDK r7 and newer.
-#                          Is not available for standalone toolchain.
-#        stlport_shared -> Use the STLport runtime as a shared library.
-#                          Implies -fno-rtti -fno-exceptions for NDK before r7.
-#                          Implies -frtti -fno-exceptions for NDK r7 and newer.
-#                          Is not available for standalone toolchain.
-#        gnustl_static  -> Use the GNU STL as a static library.
-#                          Implies -frtti -fexceptions.
-#        gnustl_shared  -> Use the GNU STL as a shared library.
-#                          Implies -frtti -fno-exceptions.
-#                          Available for NDK r7b and newer.
-#                          Silently degrades to gnustl_static if not available.
-#
-#    ANDROID_STL_FORCE_FEATURES=ON - turn rtti and exceptions support based on
-#      chosen runtime. If disabled, then the user is responsible for settings
-#      these options.
-#
-#  What?:
-#    android-cmake toolchain searches for NDK/toolchain in the following order:
-#      ANDROID_NDK - cmake parameter
-#      ANDROID_NDK - environment variable
-#      ANDROID_STANDALONE_TOOLCHAIN - cmake parameter
-#      ANDROID_STANDALONE_TOOLCHAIN - environment variable
-#      ANDROID_NDK - default locations
-#      ANDROID_STANDALONE_TOOLCHAIN - default locations
-#
-#    Make sure to do the following in your scripts:
-#      SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${my_cxx_flags}" )
-#      SET( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${my_cxx_flags}" )
-#    The flags will be prepopulated with critical flags, so don't loose them.
-#    Also be aware that toolchain also sets configuration-specific compiler
-#    flags and linker flags.
-#
-#    ANDROID and BUILD_ANDROID will be set to true, you may test any of these
-#    variables to make necessary Android-specific configuration changes.
-#
-#    Also ARMEABI or ARMEABI_V7A or X86 or MIPS will be set true, mutually
-#    exclusive. NEON option will be set true if VFP is set to NEON.
-#
-#    LIBRARY_OUTPUT_PATH_ROOT should be set in cache to determine where Android
-#    libraries will be installed.
-#    Default is ${CMAKE_SOURCE_DIR}, and the android libs will always be
-#    under the ${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}
-#    (depending on the target ABI). This is convenient for Android packaging.
-#
-#  Change Log:
-#   - initial version December 2010
-#   - April 2011
-#     [+] added possibility to build with NDK (without standalone toolchain)
-#     [+] support cross-compilation on Windows (native, no cygwin support)
-#     [+] added compiler option to force "char" type to be signed
-#     [+] added toolchain option to compile to 32-bit ARM instructions
-#     [+] added toolchain option to disable SWIG search
-#     [+] added platform "armeabi-v7a with VFPV3"
-#     [~] ARM_TARGETS renamed to ARM_TARGET
-#     [+] EXECUTABLE_OUTPUT_PATH is set by toolchain (required on Windows)
-#     [~] Fixed bug with ANDROID_API_LEVEL variable
-#     [~] turn off SWIG search if it is not found first time
-#   - May 2011
-#     [~] ANDROID_LEVEL is renamed to ANDROID_API_LEVEL
-#     [+] ANDROID_API_LEVEL is detected by toolchain if not specified
-#     [~] added guard to prevent changing of output directories on the first
-#         cmake pass
-#     [~] toolchain exits with error if ARM_TARGET is not recognized
-#   - June 2011
-#     [~] default NDK path is updated for version r5c
-#     [+] variable CMAKE_SYSTEM_PROCESSOR is set based on ARM_TARGET
-#     [~] toolchain install directory is added to linker paths
-#     [-] removed SWIG-related stuff from toolchain
-#     [+] added macro find_host_package, find_host_program to search
-#         packages/programs on the host system
-#     [~] fixed path to STL library
-#   - July 2011
-#     [~] fixed options caching
-#     [~] search for all supported NDK versions
-#     [~] allowed spaces in NDK path
-#   - September 2011
-#     [~] updated for NDK r6b
-#   - November 2011
-#     [*] rewritten for NDK r7
-#     [+] x86 toolchain support (experimental)
-#     [+] added "armeabi-v6 with VFP" ABI for ARMv6 processors.
-#     [~] improved compiler and linker flags management
-#     [+] support different build flags for Release and Debug configurations
-#     [~] by default compiler flags the same as used by ndk-build (but only
-#         where reasonable)
-#     [~] ANDROID_NDK_TOOLCHAIN_ROOT is splitted to ANDROID_STANDALONE_TOOLCHAIN
-#         and ANDROID_TOOLCHAIN_ROOT
-#     [~] ARM_TARGET is renamed to ANDROID_ABI
-#     [~] ARMEABI_NDK_NAME is renamed to ANDROID_NDK_ABI_NAME
-#     [~] ANDROID_API_LEVEL is renamed to ANDROID_NATIVE_API_LEVEL
-#   - January 2012
-#     [+] added stlport_static support (experimental)
-#     [+] added special check for cygwin
-#     [+] filtered out hidden files (starting with .) while globbing inside NDK
-#     [+] automatically applied GLESv2 linkage fix for NDK revisions 5-6
-#     [+] added ANDROID_GET_ABI_RAWNAME to get NDK ABI names by CMake flags
-#   - February 2012
-#     [+] updated for NDK r7b
-#     [~] fixed cmake try_compile() command
-#     [~] Fix for missing install_name_tool on OS X
-#   - March 2012
-#     [~] fixed incorrect C compiler flags
-#     [~] fixed CMAKE_SYSTEM_PROCESSOR change on ANDROID_ABI change
-#     [+] improved toolchain loading speed
-#     [+] added assembler language support (.S)
-#     [+] allowed preset search paths and extra search suffixes
-#   - April 2012
-#     [+] updated for NDK r7c
-#     [~] fixed most of problems with compiler/linker flags and caching
-#     [+] added option ANDROID_FUNCTION_LEVEL_LINKING
-#   - May 2012
-#     [+] updated for NDK r8
-#     [+] added mips architecture support
-#   - August 2012
-#     [+] updated for NDK r8b
-#     [~] all intermediate files generated by toolchain are moved to CMakeFiles
-#     [~] libstdc++ and libsupc are removed from explicit link libraries
-#     [+] added CCache support (via NDK_CCACHE environment or cmake variable)
-#     [+] added gold linker support for NDK r8b
-#     [~] fixed mips linker flags for NDK r8b
-#   - September 2012
-#     [+] added NDK release name detection (see ANDROID_NDK_RELEASE)
-#     [+] added support for all C++ runtimes from NDK
-#         (system, gabi++, stlport, gnustl)
-#     [+] improved warnings on known issues of NDKs
-#     [~] use gold linker as default if available (NDK r8b)
-#     [~] globally turned off rpath
-#     [~] compiler options are aligned with NDK r8b
-#   - October 2012
-#     [~] fixed C++ linking: explicitly link with math library (OpenCV #2426)
-#   - November 2012
-#     [+] updated for NDK r8c
-#     [+] added support for clang compiler
-#   - December 2012
-#     [+] suppress warning about unused CMAKE_TOOLCHAIN_FILE variable
-#     [+] adjust API level to closest compatible as NDK does
-#     [~] fixed ccache full path search
-#     [+] updated for NDK r8d
-#     [~] compiler options are aligned with NDK r8d
-#   - March 2013
-#     [+] updated for NDK r8e (x86 version)
-#     [+] support x86_64 version of NDK
-#   - April 2013
-#     [+] support non-release NDK layouts (from Linaro git and Android git)
-#     [~] automatically detect if explicit link to crtbegin_*.o is needed
-# ------------------------------------------------------------------------------
-
-cmake_minimum_required( VERSION 2.6.3 )
-
-if( DEFINED CMAKE_CROSSCOMPILING )
- # subsequent toolchain loading is not really needed
- return()
-endif()
-
-if( CMAKE_TOOLCHAIN_FILE )
- # touch toolchain variable only to suppress "unused variable" warning
-endif()
-
-get_property( _CMAKE_IN_TRY_COMPILE GLOBAL PROPERTY IN_TRY_COMPILE )
-if( _CMAKE_IN_TRY_COMPILE )
- include( "${CMAKE_CURRENT_SOURCE_DIR}/../android.toolchain.config.cmake" OPTIONAL )
-endif()
-
-# this one is important
-set( CMAKE_SYSTEM_NAME Linux )
-# this one not so much
-set( CMAKE_SYSTEM_VERSION 1 )
-
-# rpath makes low sence for Android
-set( CMAKE_SKIP_RPATH TRUE CACHE BOOL "If set, runtime paths are not added when using shared libraries." )
-
-set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" )
-if(NOT DEFINED ANDROID_NDK_SEARCH_PATHS)
- if( CMAKE_HOST_WIN32 )
-  file( TO_CMAKE_PATH "$ENV{PROGRAMFILES}" ANDROID_NDK_SEARCH_PATHS )
-  set( ANDROID_NDK_SEARCH_PATHS "${ANDROID_NDK_SEARCH_PATHS}/android-ndk" "$ENV{SystemDrive}/NVPACK/android-ndk" )
- else()
-  file( TO_CMAKE_PATH "$ENV{HOME}" ANDROID_NDK_SEARCH_PATHS )
-  set( ANDROID_NDK_SEARCH_PATHS /opt/android-ndk "${ANDROID_NDK_SEARCH_PATHS}/NVPACK/android-ndk" )
- endif()
-endif()
-if(NOT DEFINED ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH)
- set( ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH /opt/android-toolchain )
-endif()
-
-set( ANDROID_SUPPORTED_ABIS_arm "armeabi-v7a;armeabi;armeabi-v7a with NEON;armeabi-v7a with VFPV3;armeabi-v6 with VFP" )
-set( ANDROID_SUPPORTED_ABIS_x86 "x86" )
-set( ANDROID_SUPPORTED_ABIS_mipsel "mips" )
-
-set( ANDROID_DEFAULT_NDK_API_LEVEL 8 )
-set( ANDROID_DEFAULT_NDK_API_LEVEL_x86 9 )
-set( ANDROID_DEFAULT_NDK_API_LEVEL_mips 9 )
-
-
-macro( __LIST_FILTER listvar regex )
- if( ${listvar} )
-  foreach( __val ${${listvar}} )
-   if( __val MATCHES "${regex}" )
-    list( REMOVE_ITEM ${listvar} "${__val}" )
-   endif()
-  endforeach()
- endif()
-endmacro()
-
-macro( __INIT_VARIABLE var_name )
- set( __test_path 0 )
- foreach( __var ${ARGN} )
-  if( __var STREQUAL "PATH" )
-   set( __test_path 1 )
-   break()
-  endif()
- endforeach()
- if( __test_path AND NOT EXISTS "${${var_name}}" )
-  unset( ${var_name} CACHE )
- endif()
- if( "${${var_name}}" STREQUAL "" )
-  set( __values 0 )
-  foreach( __var ${ARGN} )
-   if( __var STREQUAL "VALUES" )
-    set( __values 1 )
-   elseif( NOT __var STREQUAL "PATH" )
-    set( __obsolete 0 )
-    if( __var MATCHES "^OBSOLETE_.*$" )
-     string( REPLACE "OBSOLETE_" "" __var "${__var}" )
-     set( __obsolete 1 )
-    endif()
-    if( __var MATCHES "^ENV_.*$" )
-     string( REPLACE "ENV_" "" __var "${__var}" )
-     set( __value "$ENV{${__var}}" )
-    elseif( DEFINED ${__var} )
-     set( __value "${${__var}}" )
-    else()
-     if( __values )
-      set( __value "${__var}" )
-     else()
-      set( __value "" )
-     endif()
-    endif()
-    if( NOT "${__value}" STREQUAL "" )
-     if( __test_path )
-      if( EXISTS "${__value}" )
-       file( TO_CMAKE_PATH "${__value}" ${var_name} )
-       if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE )
-        message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." )
-       endif()
-       break()
-      endif()
-     else()
-      set( ${var_name} "${__value}" )
-       if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE )
-        message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." )
-       endif()
-      break()
-     endif()
-    endif()
-   endif()
-  endforeach()
-  unset( __value )
-  unset( __values )
-  unset( __obsolete )
- elseif( __test_path )
-  file( TO_CMAKE_PATH "${${var_name}}" ${var_name} )
- endif()
- unset( __test_path )
-endmacro()
-
-macro( __DETECT_NATIVE_API_LEVEL _var _path )
- SET( __ndkApiLevelRegex "^[\t ]*#define[\t ]+__ANDROID_API__[\t ]+([0-9]+)[\t ]*$" )
- FILE( STRINGS ${_path} __apiFileContent REGEX "${__ndkApiLevelRegex}" )
- if( NOT __apiFileContent )
-  message( SEND_ERROR "Could not get Android native API level. Probably you have specified invalid level value, or your copy of NDK/toolchain is broken." )
- endif()
- string( REGEX REPLACE "${__ndkApiLevelRegex}" "\\1" ${_var} "${__apiFileContent}" )
- unset( __apiFileContent )
- unset( __ndkApiLevelRegex )
-endmacro()
-
-macro( __DETECT_TOOLCHAIN_MACHINE_NAME _var _root )
- if( EXISTS "${_root}" )
-  file( GLOB __gccExePath RELATIVE "${_root}/bin/" "${_root}/bin/*-gcc${TOOL_OS_SUFFIX}" )
-  __LIST_FILTER( __gccExePath "^[.].*" )
-  list( LENGTH __gccExePath __gccExePathsCount )
-  if( NOT __gccExePathsCount EQUAL 1  AND NOT _CMAKE_IN_TRY_COMPILE )
-   message( WARNING "Could not determine machine name for compiler from ${_root}" )
-   set( ${_var} "" )
-  else()
-   get_filename_component( __gccExeName "${__gccExePath}" NAME_WE )
-   string( REPLACE "-gcc" "" ${_var} "${__gccExeName}" )
-  endif()
-  unset( __gccExePath )
-  unset( __gccExePathsCount )
-  unset( __gccExeName )
- else()
-  set( ${_var} "" )
- endif()
-endmacro()
-
-
-# fight against cygwin
-set( ANDROID_FORBID_SYGWIN TRUE CACHE BOOL "Prevent cmake from working under cygwin and using cygwin tools")
-mark_as_advanced( ANDROID_FORBID_SYGWIN )
-if( ANDROID_FORBID_SYGWIN )
- if( CYGWIN )
-  message( FATAL_ERROR "Android NDK and android-cmake toolchain are not welcome Cygwin. It is unlikely that this cmake toolchain will work under cygwin. But if you want to try then you can set cmake variable ANDROID_FORBID_SYGWIN to FALSE and rerun cmake." )
- endif()
-
- if( CMAKE_HOST_WIN32 )
-  # remove cygwin from PATH
-  set( __new_path "$ENV{PATH}")
-  __LIST_FILTER( __new_path "cygwin" )
-  set(ENV{PATH} "${__new_path}")
-  unset(__new_path)
- endif()
-endif()
-
-
-# detect current host platform
-if( NOT DEFINED ANDROID_NDK_HOST_X64 AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64|AMD64")
- set( ANDROID_NDK_HOST_X64 1 CACHE BOOL "Try to use 64-bit compiler toolchain" )
- mark_as_advanced( ANDROID_NDK_HOST_X64 )
-endif()
-
-set( TOOL_OS_SUFFIX "" )
-if( CMAKE_HOST_APPLE )
- set( ANDROID_NDK_HOST_SYSTEM_NAME "darwin-x86_64" )
- set( ANDROID_NDK_HOST_SYSTEM_NAME2 "darwin-x86" )
-elseif( CMAKE_HOST_WIN32 )
- set( ANDROID_NDK_HOST_SYSTEM_NAME "windows-x86_64" )
- set( ANDROID_NDK_HOST_SYSTEM_NAME2 "windows" )
- set( TOOL_OS_SUFFIX ".exe" )
-elseif( CMAKE_HOST_UNIX )
- set( ANDROID_NDK_HOST_SYSTEM_NAME "linux-x86_64" )
- set( ANDROID_NDK_HOST_SYSTEM_NAME2 "linux-x86" )
-else()
- message( FATAL_ERROR "Cross-compilation on your platform is not supported by this cmake toolchain" )
-endif()
-
-if( NOT ANDROID_NDK_HOST_X64 )
- set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
-endif()
-
-# see if we have path to Android NDK
-__INIT_VARIABLE( ANDROID_NDK PATH ENV_ANDROID_NDK )
-if( NOT ANDROID_NDK )
- # see if we have path to Android standalone toolchain
- __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ENV_ANDROID_STANDALONE_TOOLCHAIN OBSOLETE_ANDROID_NDK_TOOLCHAIN_ROOT OBSOLETE_ENV_ANDROID_NDK_TOOLCHAIN_ROOT )
-
- if( NOT ANDROID_STANDALONE_TOOLCHAIN )
-  #try to find Android NDK in one of the the default locations
-  set( __ndkSearchPaths )
-  foreach( __ndkSearchPath ${ANDROID_NDK_SEARCH_PATHS} )
-   foreach( suffix ${ANDROID_SUPPORTED_NDK_VERSIONS} )
-    list( APPEND __ndkSearchPaths "${__ndkSearchPath}${suffix}" )
-   endforeach()
-  endforeach()
-  __INIT_VARIABLE( ANDROID_NDK PATH VALUES ${__ndkSearchPaths} )
-  unset( __ndkSearchPaths )
-
-  if( ANDROID_NDK )
-   message( STATUS "Using default path for Android NDK: ${ANDROID_NDK}" )
-   message( STATUS "  If you prefer to use a different location, please define a cmake or environment variable: ANDROID_NDK" )
-  else()
-   #try to find Android standalone toolchain in one of the the default locations
-   __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH )
-
-   if( ANDROID_STANDALONE_TOOLCHAIN )
-    message( STATUS "Using default path for standalone toolchain ${ANDROID_STANDALONE_TOOLCHAIN}" )
-    message( STATUS "  If you prefer to use a different location, please define the variable: ANDROID_STANDALONE_TOOLCHAIN" )
-   endif( ANDROID_STANDALONE_TOOLCHAIN )
-  endif( ANDROID_NDK )
- endif( NOT ANDROID_STANDALONE_TOOLCHAIN )
-endif( NOT ANDROID_NDK )
-
-# remember found paths
-if( ANDROID_NDK )
- get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE )
- set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE )
- set( BUILD_WITH_ANDROID_NDK True )
- if( EXISTS "${ANDROID_NDK}/RELEASE.TXT" )
-  file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? )
-  string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" )
- else()
-  set( ANDROID_NDK_RELEASE "r1x" )
-  set( ANDROID_NDK_RELEASE_FULL "unreleased" )
- endif()
-elseif( ANDROID_STANDALONE_TOOLCHAIN )
- get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE )
- # try to detect change
- if( CMAKE_AR )
-  string( LENGTH "${ANDROID_STANDALONE_TOOLCHAIN}" __length )
-  string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidStandaloneToolchainPreviousPath )
-  if( NOT __androidStandaloneToolchainPreviousPath STREQUAL ANDROID_STANDALONE_TOOLCHAIN )
-   message( FATAL_ERROR "It is not possible to change path to the Android standalone toolchain on subsequent run." )
-  endif()
-  unset( __androidStandaloneToolchainPreviousPath )
-  unset( __length )
- endif()
- set( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" CACHE INTERNAL "Path of the Android standalone toolchain" FORCE )
- set( BUILD_WITH_STANDALONE_TOOLCHAIN True )
-else()
- list(GET ANDROID_NDK_SEARCH_PATHS 0 ANDROID_NDK_SEARCH_PATH)
- message( FATAL_ERROR "Could not find neither Android NDK nor Android standalone toolchain.
-    You should either set an environment variable:
-      export ANDROID_NDK=~/my-android-ndk
-    or
-      export ANDROID_STANDALONE_TOOLCHAIN=~/my-android-toolchain
-    or put the toolchain or NDK in the default path:
-      sudo ln -s ~/my-android-ndk ${ANDROID_NDK_SEARCH_PATH}
-      sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" )
-endif()
-
-# android NDK layout
-if( BUILD_WITH_ANDROID_NDK )
- if( NOT DEFINED ANDROID_NDK_LAYOUT )
-  # try to automatically detect the layout
-  if( EXISTS "${ANDROID_NDK}/RELEASE.TXT")
-   set( ANDROID_NDK_LAYOUT "RELEASE" )
-  elseif( EXISTS "${ANDROID_NDK}/../../linux-x86/toolchain/" )
-   set( ANDROID_NDK_LAYOUT "LINARO" )
-  elseif( EXISTS "${ANDROID_NDK}/../../gcc/" )
-   set( ANDROID_NDK_LAYOUT "ANDROID" )
-  endif()
- endif()
- set( ANDROID_NDK_LAYOUT "${ANDROID_NDK_LAYOUT}" CACHE STRING "The inner layout of NDK" )
- mark_as_advanced( ANDROID_NDK_LAYOUT )
- if( ANDROID_NDK_LAYOUT STREQUAL "LINARO" )
-  set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
-  set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../${ANDROID_NDK_HOST_SYSTEM_NAME}/toolchain" )
-  set( ANDROID_NDK_TOOLCHAINS_SUBPATH  "" )
-  set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
- elseif( ANDROID_NDK_LAYOUT STREQUAL "ANDROID" )
-  set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
-  set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../gcc/${ANDROID_NDK_HOST_SYSTEM_NAME}/arm" )
-  set( ANDROID_NDK_TOOLCHAINS_SUBPATH  "" )
-  set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
- else() # ANDROID_NDK_LAYOUT STREQUAL "RELEASE"
-  set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/toolchains" )
-  set( ANDROID_NDK_TOOLCHAINS_SUBPATH  "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
-  set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME2}" )
- endif()
- get_filename_component( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK_TOOLCHAINS_PATH}" ABSOLUTE )
-
- # try to detect change of NDK
- if( CMAKE_AR )
-  string( LENGTH "${ANDROID_NDK_TOOLCHAINS_PATH}" __length )
-  string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath )
-  if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK_TOOLCHAINS_PATH )
-   message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first.
-   " )
-  endif()
-  unset( __androidNdkPreviousPath )
-  unset( __length )
- endif()
-endif()
-
-
-# get all the details about standalone toolchain
-if( BUILD_WITH_STANDALONE_TOOLCHAIN )
- __DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" )
- set( ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
- set( __availableToolchains "standalone" )
- __DETECT_TOOLCHAIN_MACHINE_NAME( __availableToolchainMachines "${ANDROID_STANDALONE_TOOLCHAIN}" )
- if( NOT __availableToolchainMachines )
-  message( FATAL_ERROR "Could not determine machine name of your toolchain. Probably your Android standalone toolchain is broken." )
- endif()
- if( __availableToolchainMachines MATCHES i686 )
-  set( __availableToolchainArchs "x86" )
- elseif( __availableToolchainMachines MATCHES arm )
-  set( __availableToolchainArchs "arm" )
- elseif( __availableToolchainMachines MATCHES mipsel )
-  set( __availableToolchainArchs "mipsel" )
- endif()
- execute_process( COMMAND "${ANDROID_STANDALONE_TOOLCHAIN}/bin/${__availableToolchainMachines}-gcc${TOOL_OS_SUFFIX}" -dumpversion
-                  OUTPUT_VARIABLE __availableToolchainCompilerVersions OUTPUT_STRIP_TRAILING_WHITESPACE )
- string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?" __availableToolchainCompilerVersions "${__availableToolchainCompilerVersions}" )
- if( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/bin/clang${TOOL_OS_SUFFIX}" )
-  list( APPEND __availableToolchains "standalone-clang" )
-  list( APPEND __availableToolchainMachines ${__availableToolchainMachines} )
-  list( APPEND __availableToolchainArchs ${__availableToolchainArchs} )
-  list( APPEND __availableToolchainCompilerVersions ${__availableToolchainCompilerVersions} )
- endif()
-endif()
-
-macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __toolchain_subpath )
- foreach( __toolchain ${${__availableToolchainsLst}} )
-  if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${__toolchain}${__toolchain_subpath}" )
-   string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" )
-  else()
-   set( __gcc_toolchain "${__toolchain}" )
-  endif()
-  __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK_TOOLCHAINS_PATH}/${__gcc_toolchain}${__toolchain_subpath}" )
-  if( __machine )
-   string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9x]+)?$" __version "${__gcc_toolchain}" )
-   if( __machine MATCHES i686 )
-    set( __arch "x86" )
-   elseif( __machine MATCHES arm )
-    set( __arch "arm" )
-   elseif( __machine MATCHES mipsel )
-    set( __arch "mipsel" )
-   endif()
-   list( APPEND __availableToolchainMachines "${__machine}" )
-   list( APPEND __availableToolchainArchs "${__arch}" )
-   list( APPEND __availableToolchainCompilerVersions "${__version}" )
-   list( APPEND ${__availableToolchainsVar} "${__toolchain}" )
-  endif()
-  unset( __gcc_toolchain )
- endforeach()
-endmacro()
-
-# get all the details about NDK
-if( BUILD_WITH_ANDROID_NDK )
- file( GLOB ANDROID_SUPPORTED_NATIVE_API_LEVELS RELATIVE "${ANDROID_NDK}/platforms" "${ANDROID_NDK}/platforms/android-*" )
- string( REPLACE "android-" "" ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_SUPPORTED_NATIVE_API_LEVELS}" )
- set( __availableToolchains "" )
- set( __availableToolchainMachines "" )
- set( __availableToolchainArchs "" )
- set( __availableToolchainCompilerVersions "" )
- if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_TOOLCHAIN_NAME}/" )
-  # do not go through all toolchains if we know the name
-  set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" )
-  __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
-  if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
-   __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
-   if( __availableToolchains )
-    set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
-   endif()
-  endif()
- endif()
- if( NOT __availableToolchains )
-  file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK_TOOLCHAINS_PATH}" "${ANDROID_NDK_TOOLCHAINS_PATH}/*" )
-  if( __availableToolchains )
-   list(SORT __availableToolchainsLst) # we need clang to go after gcc
-  endif()
-  __LIST_FILTER( __availableToolchainsLst "^[.]" )
-  __LIST_FILTER( __availableToolchainsLst "llvm" )
-  __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
-  if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
-   __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
-   if( __availableToolchains )
-    set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
-   endif()
-  endif()
- endif()
- if( NOT __availableToolchains )
-  message( FATAL_ERROR "Could not find any working toolchain in the NDK. Probably your Android NDK is broken." )
- endif()
-endif()
-
-# build list of available ABIs
-set( ANDROID_SUPPORTED_ABIS "" )
-set( __uniqToolchainArchNames ${__availableToolchainArchs} )
-list( REMOVE_DUPLICATES __uniqToolchainArchNames )
-list( SORT __uniqToolchainArchNames )
-foreach( __arch ${__uniqToolchainArchNames} )
- list( APPEND ANDROID_SUPPORTED_ABIS ${ANDROID_SUPPORTED_ABIS_${__arch}} )
-endforeach()
-unset( __uniqToolchainArchNames )
-if( NOT ANDROID_SUPPORTED_ABIS )
- message( FATAL_ERROR "No one of known Android ABIs is supported by this cmake toolchain." )
-endif()
-
-# choose target ABI
-__INIT_VARIABLE( ANDROID_ABI OBSOLETE_ARM_TARGET OBSOLETE_ARM_TARGETS VALUES ${ANDROID_SUPPORTED_ABIS} )
-# verify that target ABI is supported
-list( FIND ANDROID_SUPPORTED_ABIS "${ANDROID_ABI}" __androidAbiIdx )
-if( __androidAbiIdx EQUAL -1 )
- string( REPLACE ";" "\", \"", PRINTABLE_ANDROID_SUPPORTED_ABIS  "${ANDROID_SUPPORTED_ABIS}" )
- message( FATAL_ERROR "Specified ANDROID_ABI = \"${ANDROID_ABI}\" is not supported by this cmake toolchain or your NDK/toolchain.
-   Supported values are: \"${PRINTABLE_ANDROID_SUPPORTED_ABIS}\"
-   " )
-endif()
-unset( __androidAbiIdx )
-
-# set target ABI options
-if( ANDROID_ABI STREQUAL "x86" )
- set( X86 true )
- set( ANDROID_NDK_ABI_NAME "x86" )
- set( ANDROID_ARCH_NAME "x86" )
- set( ANDROID_ARCH_FULLNAME "x86" )
- set( ANDROID_LLVM_TRIPLE "i686-none-linux-android" )
- set( CMAKE_SYSTEM_PROCESSOR "i686" )
-elseif( ANDROID_ABI STREQUAL "mips" )
- set( MIPS true )
- set( ANDROID_NDK_ABI_NAME "mips" )
- set( ANDROID_ARCH_NAME "mips" )
- set( ANDROID_ARCH_FULLNAME "mipsel" )
- set( ANDROID_LLVM_TRIPLE "mipsel-none-linux-android" )
- set( CMAKE_SYSTEM_PROCESSOR "mips" )
-elseif( ANDROID_ABI STREQUAL "armeabi" )
- set( ARMEABI true )
- set( ANDROID_NDK_ABI_NAME "armeabi" )
- set( ANDROID_ARCH_NAME "arm" )
- set( ANDROID_ARCH_FULLNAME "arm" )
- set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" )
- set( CMAKE_SYSTEM_PROCESSOR "armv5te" )
-elseif( ANDROID_ABI STREQUAL "armeabi-v6 with VFP" )
- set( ARMEABI_V6 true )
- set( ANDROID_NDK_ABI_NAME "armeabi" )
- set( ANDROID_ARCH_NAME "arm" )
- set( ANDROID_ARCH_FULLNAME "arm" )
- set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" )
- set( CMAKE_SYSTEM_PROCESSOR "armv6" )
- # need always fallback to older platform
- set( ARMEABI true )
-elseif( ANDROID_ABI STREQUAL "armeabi-v7a")
- set( ARMEABI_V7A true )
- set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
- set( ANDROID_ARCH_NAME "arm" )
- set( ANDROID_ARCH_FULLNAME "arm" )
- set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
- set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
-elseif( ANDROID_ABI STREQUAL "armeabi-v7a with VFPV3" )
- set( ARMEABI_V7A true )
- set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
- set( ANDROID_ARCH_NAME "arm" )
- set( ANDROID_ARCH_FULLNAME "arm" )
- set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
- set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
- set( VFPV3 true )
-elseif( ANDROID_ABI STREQUAL "armeabi-v7a with NEON" )
- set( ARMEABI_V7A true )
- set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
- set( ANDROID_ARCH_NAME "arm" )
- set( ANDROID_ARCH_FULLNAME "arm" )
- set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
- set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
- set( VFPV3 true )
- set( NEON true )
-else()
- message( SEND_ERROR "Unknown ANDROID_ABI=\"${ANDROID_ABI}\" is specified." )
-endif()
-
-if( CMAKE_BINARY_DIR AND EXISTS "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" )
- # really dirty hack
- # it is not possible to change CMAKE_SYSTEM_PROCESSOR after the first run...
- file( APPEND "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" "SET(CMAKE_SYSTEM_PROCESSOR \"${CMAKE_SYSTEM_PROCESSOR}\")\n" )
-endif()
-
-if( ANDROID_ARCH_NAME STREQUAL "arm" AND NOT ARMEABI_V6 )
- __INIT_VARIABLE( ANDROID_FORCE_ARM_BUILD OBSOLETE_FORCE_ARM VALUES OFF )
- set( ANDROID_FORCE_ARM_BUILD ${ANDROID_FORCE_ARM_BUILD} CACHE BOOL "Use 32-bit ARM instructions instead of Thumb-1" FORCE )
- mark_as_advanced( ANDROID_FORCE_ARM_BUILD )
-else()
- unset( ANDROID_FORCE_ARM_BUILD CACHE )
-endif()
-
-# choose toolchain
-if( ANDROID_TOOLCHAIN_NAME )
- list( FIND __availableToolchains "${ANDROID_TOOLCHAIN_NAME}" __toolchainIdx )
- if( __toolchainIdx EQUAL -1 )
-  list( SORT __availableToolchains )
-  string( REPLACE ";" "\n  * " toolchains_list "${__availableToolchains}" )
-  set( toolchains_list "  * ${toolchains_list}")
-  message( FATAL_ERROR "Specified toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is missing in your NDK or broken. Please verify that your NDK is working or select another compiler toolchain.
-To configure the toolchain set CMake variable ANDROID_TOOLCHAIN_NAME to one of the following values:\n${toolchains_list}\n" )
- endif()
- list( GET __availableToolchainArchs ${__toolchainIdx} __toolchainArch )
- if( NOT __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
-  message( SEND_ERROR "Selected toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is not able to compile binaries for the \"${ANDROID_ARCH_NAME}\" platform." )
- endif()
-else()
- set( __toolchainIdx -1 )
- set( __applicableToolchains "" )
- set( __toolchainMaxVersion "0.0.0" )
- list( LENGTH __availableToolchains __availableToolchainsCount )
- math( EXPR __availableToolchainsCount "${__availableToolchainsCount}-1" )
- foreach( __idx RANGE ${__availableToolchainsCount} )
-  list( GET __availableToolchainArchs ${__idx} __toolchainArch )
-  if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
-   list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion )
-   string( REPLACE "x" "99" __toolchainVersion "${__toolchainVersion}")
-   if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion )
-    set( __toolchainMaxVersion "${__toolchainVersion}" )
-    set( __toolchainIdx ${__idx} )
-   endif()
-  endif()
- endforeach()
- unset( __availableToolchainsCount )
- unset( __toolchainMaxVersion )
- unset( __toolchainVersion )
-endif()
-unset( __toolchainArch )
-if( __toolchainIdx EQUAL -1 )
- message( FATAL_ERROR "No one of available compiler toolchains is able to compile for ${ANDROID_ARCH_NAME} platform." )
-endif()
-list( GET __availableToolchains ${__toolchainIdx} ANDROID_TOOLCHAIN_NAME )
-list( GET __availableToolchainMachines ${__toolchainIdx} ANDROID_TOOLCHAIN_MACHINE_NAME )
-list( GET __availableToolchainCompilerVersions ${__toolchainIdx} ANDROID_COMPILER_VERSION )
-
-unset( __toolchainIdx )
-unset( __availableToolchains )
-unset( __availableToolchainMachines )
-unset( __availableToolchainArchs )
-unset( __availableToolchainCompilerVersions )
-
-# choose native API level
-__INIT_VARIABLE( ANDROID_NATIVE_API_LEVEL ENV_ANDROID_NATIVE_API_LEVEL ANDROID_API_LEVEL ENV_ANDROID_API_LEVEL ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME} ANDROID_DEFAULT_NDK_API_LEVEL )
-string( REGEX MATCH "[0-9]+" ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" )
-# adjust API level
-set( __real_api_level ${ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME}} )
-foreach( __level ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
- if( NOT __level GREATER ANDROID_NATIVE_API_LEVEL AND NOT __level LESS __real_api_level )
-  set( __real_api_level ${__level} )
- endif()
-endforeach()
-if( __real_api_level AND NOT ANDROID_NATIVE_API_LEVEL EQUAL __real_api_level )
- message( STATUS "Adjusting Android API level 'android-${ANDROID_NATIVE_API_LEVEL}' to 'android-${__real_api_level}'")
- set( ANDROID_NATIVE_API_LEVEL ${__real_api_level} )
-endif()
-unset(__real_api_level)
-# validate
-list( FIND ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_NATIVE_API_LEVEL}" __levelIdx )
-if( __levelIdx EQUAL -1 )
- message( SEND_ERROR "Specified Android native API level 'android-${ANDROID_NATIVE_API_LEVEL}' is not supported by your NDK/toolchain." )
-else()
- if( BUILD_WITH_ANDROID_NDK )
-  __DETECT_NATIVE_API_LEVEL( __realApiLevel "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}/usr/include/android/api-level.h" )
-  if( NOT __realApiLevel EQUAL ANDROID_NATIVE_API_LEVEL )
-   message( SEND_ERROR "Specified Android API level (${ANDROID_NATIVE_API_LEVEL}) does not match to the level found (${__realApiLevel}). Probably your copy of NDK is broken." )
-  endif()
-  unset( __realApiLevel )
- endif()
- set( ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" CACHE STRING "Android API level for native code" FORCE )
- if( CMAKE_VERSION VERSION_GREATER "2.8" )
-  list( SORT ANDROID_SUPPORTED_NATIVE_API_LEVELS )
-  set_property( CACHE ANDROID_NATIVE_API_LEVEL PROPERTY STRINGS ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
- endif()
-endif()
-unset( __levelIdx )
-
-
-# remember target ABI
-set( ANDROID_ABI "${ANDROID_ABI}" CACHE STRING "The target ABI for Android. If arm, then armeabi-v7a is recommended for hardware floating point." FORCE )
-if( CMAKE_VERSION VERSION_GREATER "2.8" )
- list( SORT ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME} )
- set_property( CACHE ANDROID_ABI PROPERTY STRINGS ${ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME}} )
-endif()
-
-
-# runtime choice (STL, rtti, exceptions)
-if( NOT ANDROID_STL )
- # honor legacy ANDROID_USE_STLPORT
- if( DEFINED ANDROID_USE_STLPORT )
-  if( ANDROID_USE_STLPORT )
-   set( ANDROID_STL stlport_static )
-  endif()
-  message( WARNING "You are using an obsolete variable ANDROID_USE_STLPORT to select the STL variant. Use -DANDROID_STL=stlport_static instead." )
- endif()
- if( NOT ANDROID_STL )
-  set( ANDROID_STL gnustl_static )
- endif()
-endif()
-set( ANDROID_STL "${ANDROID_STL}" CACHE STRING "C++ runtime" )
-set( ANDROID_STL_FORCE_FEATURES ON CACHE BOOL "automatically configure rtti and exceptions support based on C++ runtime" )
-mark_as_advanced( ANDROID_STL ANDROID_STL_FORCE_FEATURES )
-
-if( BUILD_WITH_ANDROID_NDK )
- if( NOT "${ANDROID_STL}" MATCHES "^(none|system|system_re|gabi\\+\\+_static|gabi\\+\\+_shared|stlport_static|stlport_shared|gnustl_static|gnustl_shared)$")
-  message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\".
-The possible values are:
-  none           -> Do not configure the runtime.
-  system         -> Use the default minimal system C++ runtime library.
-  system_re      -> Same as system but with rtti and exceptions.
-  gabi++_static  -> Use the GAbi++ runtime as a static library.
-  gabi++_shared  -> Use the GAbi++ runtime as a shared library.
-  stlport_static -> Use the STLport runtime as a static library.
-  stlport_shared -> Use the STLport runtime as a shared library.
-  gnustl_static  -> (default) Use the GNU STL as a static library.
-  gnustl_shared  -> Use the GNU STL as a shared library.
-" )
- endif()
-elseif( BUILD_WITH_STANDALONE_TOOLCHAIN )
- if( NOT "${ANDROID_STL}" MATCHES "^(none|gnustl_static|gnustl_shared)$")
-  message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\".
-The possible values are:
-  none           -> Do not configure the runtime.
-  gnustl_static  -> (default) Use the GNU STL as a static library.
-  gnustl_shared  -> Use the GNU STL as a shared library.
-" )
- endif()
-endif()
-
-unset( ANDROID_RTTI )
-unset( ANDROID_EXCEPTIONS )
-unset( ANDROID_STL_INCLUDE_DIRS )
-unset( __libstl )
-unset( __libsupcxx )
-
-if( NOT _CMAKE_IN_TRY_COMPILE AND ANDROID_NDK_RELEASE STREQUAL "r7b" AND ARMEABI_V7A AND NOT VFPV3 AND ANDROID_STL MATCHES "gnustl" )
- message( WARNING  "The GNU STL armeabi-v7a binaries from NDK r7b can crash non-NEON devices. The files provided with NDK r7b were not configured properly, resulting in crashes on Tegra2-based devices and others when trying to use certain floating-point functions (e.g., cosf, sinf, expf).
-You are strongly recommended to switch to another NDK release.
-" )
-endif()
-
-if( NOT _CMAKE_IN_TRY_COMPILE AND X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" )
-  message( WARNING  "The x86 system header file from NDK r6 has incorrect definition for ptrdiff_t. You are recommended to upgrade to a newer NDK release or manually patch the header:
-See https://android.googlesource.com/platform/development.git f907f4f9d4e56ccc8093df6fee54454b8bcab6c2
-  diff --git a/ndk/platforms/android-9/arch-x86/include/machine/_types.h b/ndk/platforms/android-9/arch-x86/include/machine/_types.h
-  index 5e28c64..65892a1 100644
-  --- a/ndk/platforms/android-9/arch-x86/include/machine/_types.h
-  +++ b/ndk/platforms/android-9/arch-x86/include/machine/_types.h
-  @@ -51,7 +51,11 @@ typedef long int       ssize_t;
-   #endif
-   #ifndef _PTRDIFF_T
-   #define _PTRDIFF_T
-  -typedef long           ptrdiff_t;
-  +#  ifdef __ANDROID__
-  +     typedef int            ptrdiff_t;
-  +#  else
-  +     typedef long           ptrdiff_t;
-  +#  endif
-   #endif
-" )
-endif()
-
-
-# setup paths and STL for standalone toolchain
-if( BUILD_WITH_STANDALONE_TOOLCHAIN )
- set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" )
- set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" )
- set( ANDROID_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot" )
-
- if( NOT ANDROID_STL STREQUAL "none" )
-  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/include/c++/${ANDROID_COMPILER_VERSION}" )
-  if( ARMEABI_V7A AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}/bits" )
-   list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}" )
-  elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb/bits" )
-   list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb" )
-  else()
-   list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" )
-  endif()
-  # always search static GNU STL to get the location of libsupc++.a
-  if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" )
-   set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb" )
-  elseif( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" )
-   set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}" )
-  elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libstdc++.a" )
-   set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb" )
-  elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libstdc++.a" )
-   set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib" )
-  endif()
-  if( __libstl )
-   set( __libsupcxx "${__libstl}/libsupc++.a" )
-   set( __libstl    "${__libstl}/libstdc++.a" )
-  endif()
-  if( NOT EXISTS "${__libsupcxx}" )
-   message( FATAL_ERROR "The required libstdsupc++.a is missing in your standalone toolchain.
- Usually it happens because of bug in make-standalone-toolchain.sh script from NDK r7, r7b and r7c.
- You need to either upgrade to newer NDK or manually copy
-     $ANDROID_NDK/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a
- to
-     ${__libsupcxx}
-   " )
-  endif()
-  if( ANDROID_STL STREQUAL "gnustl_shared" )
-   if( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" )
-    set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" )
-   elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" )
-    set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" )
-   elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" )
-    set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" )
-   endif()
-  endif()
- endif()
-endif()
-
-# clang
-if( "${ANDROID_TOOLCHAIN_NAME}" STREQUAL "standalone-clang" )
- set( ANDROID_COMPILER_IS_CLANG 1 )
- execute_process( COMMAND "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/clang${TOOL_OS_SUFFIX}" --version OUTPUT_VARIABLE ANDROID_CLANG_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE )
- string( REGEX MATCH "[0-9]+[.][0-9]+" ANDROID_CLANG_VERSION "${ANDROID_CLANG_VERSION}")
-elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" )
- string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}")
- string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
- if( NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}/bin/clang${TOOL_OS_SUFFIX}" )
-  message( FATAL_ERROR "Could not find the Clang compiler driver" )
- endif()
- set( ANDROID_COMPILER_IS_CLANG 1 )
- set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
-else()
- set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
- unset( ANDROID_COMPILER_IS_CLANG CACHE )
-endif()
-
-string( REPLACE "." "" _clang_name "clang${ANDROID_CLANG_VERSION}" )
-if( NOT EXISTS "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" )
- set( _clang_name "clang" )
-endif()
-
-
-# setup paths and STL for NDK
-if( BUILD_WITH_ANDROID_NDK )
- set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
- set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" )
-
- if( ANDROID_STL STREQUAL "none" )
-  # do nothing
- elseif( ANDROID_STL STREQUAL "system" )
-  set( ANDROID_RTTI             OFF )
-  set( ANDROID_EXCEPTIONS       OFF )
-  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" )
- elseif( ANDROID_STL STREQUAL "system_re" )
-  set( ANDROID_RTTI             ON )
-  set( ANDROID_EXCEPTIONS       ON )
-  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" )
- elseif( ANDROID_STL MATCHES "gabi" )
-  if( ANDROID_NDK_RELEASE STRLESS "r7" )
-   message( FATAL_ERROR "gabi++ is not awailable in your NDK. You have to upgrade to NDK r7 or newer to use gabi++.")
-  endif()
-  set( ANDROID_RTTI             ON )
-  set( ANDROID_EXCEPTIONS       OFF )
-  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/gabi++/include" )
-  set( __libstl                 "${ANDROID_NDK}/sources/cxx-stl/gabi++/libs/${ANDROID_NDK_ABI_NAME}/libgabi++_static.a" )
- elseif( ANDROID_STL MATCHES "stlport" )
-  if( NOT ANDROID_NDK_RELEASE STRLESS "r8d" )
-   set( ANDROID_EXCEPTIONS       ON )
-  else()
-   set( ANDROID_EXCEPTIONS       OFF )
-  endif()
-  if( ANDROID_NDK_RELEASE STRLESS "r7" )
-   set( ANDROID_RTTI            OFF )
-  else()
-   set( ANDROID_RTTI            ON )
-  endif()
-  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/stlport/stlport" )
-  set( __libstl                 "${ANDROID_NDK}/sources/cxx-stl/stlport/libs/${ANDROID_NDK_ABI_NAME}/libstlport_static.a" )
- elseif( ANDROID_STL MATCHES "gnustl" )
-  set( ANDROID_EXCEPTIONS       ON )
-  set( ANDROID_RTTI             ON )
-  if( EXISTS "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" )
-   if( ARMEABI_V7A AND ANDROID_COMPILER_VERSION VERSION_EQUAL "4.7" AND ANDROID_NDK_RELEASE STREQUAL "r8d" )
-    # gnustl binary for 4.7 compiler is buggy :(
-    # TODO: look for right fix
-    set( __libstl                "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/4.6" )
-   else()
-    set( __libstl                "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" )
-   endif()
-  else()
-   set( __libstl                "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++" )
-  endif()
-  set( ANDROID_STL_INCLUDE_DIRS "${__libstl}/include" "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/include" )
-  if( EXISTS "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" )
-   set( __libstl                "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" )
-  else()
-   set( __libstl                "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
-  endif()
- else()
-  message( FATAL_ERROR "Unknown runtime: ${ANDROID_STL}" )
- endif()
- # find libsupc++.a - rtti & exceptions
- if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" )
-  set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r8b or newer
-  if( NOT EXISTS "${__libsupcxx}" )
-   set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r7-r8
-  endif()
-  if( NOT EXISTS "${__libsupcxx}" ) # before r7
-   if( ARMEABI_V7A )
-    if( ANDROID_FORCE_ARM_BUILD )
-     set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" )
-    else()
-     set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" )
-    endif()
-   elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD )
-    set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" )
-   else()
-    set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" )
-   endif()
-  endif()
-  if( NOT EXISTS "${__libsupcxx}")
-   message( ERROR "Could not find libsupc++.a for a chosen platform. Either your NDK is not supported or is broken.")
-  endif()
- endif()
-endif()
-
-
-# case of shared STL linkage
-if( ANDROID_STL MATCHES "shared" AND DEFINED __libstl )
- string( REPLACE "_static.a" "_shared.so" __libstl "${__libstl}" )
- if( NOT _CMAKE_IN_TRY_COMPILE AND __libstl MATCHES "[.]so$" )
-  get_filename_component( __libstlname "${__libstl}" NAME )
-  execute_process( COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__libstl}" "${LIBRARY_OUTPUT_PATH}/${__libstlname}" RESULT_VARIABLE __fileCopyProcess )
-  if( NOT __fileCopyProcess EQUAL 0 OR NOT EXISTS "${LIBRARY_OUTPUT_PATH}/${__libstlname}")
-   message( SEND_ERROR "Failed copying of ${__libstl} to the ${LIBRARY_OUTPUT_PATH}/${__libstlname}" )
-  endif()
-  unset( __fileCopyProcess )
-  unset( __libstlname )
- endif()
-endif()
-
-
-# ccache support
-__INIT_VARIABLE( _ndk_ccache NDK_CCACHE ENV_NDK_CCACHE )
-if( _ndk_ccache )
- if( DEFINED NDK_CCACHE AND NOT EXISTS NDK_CCACHE )
-  unset( NDK_CCACHE CACHE )
- endif()
- find_program( NDK_CCACHE "${_ndk_ccache}" DOC "The path to ccache binary")
-else()
- unset( NDK_CCACHE CACHE )
-endif()
-unset( _ndk_ccache )
-
-
-# setup the cross-compiler
-if( NOT CMAKE_C_COMPILER )
- if( NDK_CCACHE AND NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
-  set( CMAKE_C_COMPILER   "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" )
-  set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" )
-  if( ANDROID_COMPILER_IS_CLANG )
-   set( CMAKE_C_COMPILER_ARG1   "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}"   CACHE PATH "C compiler")
-   set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
-  else()
-   set( CMAKE_C_COMPILER_ARG1   "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "C compiler")
-   set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
-  endif()
- else()
-  if( ANDROID_COMPILER_IS_CLANG )
-   set( CMAKE_C_COMPILER   "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}"   CACHE PATH "C compiler")
-   set( CMAKE_CXX_COMPILER "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
-  else()
-   set( CMAKE_C_COMPILER   "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}"    CACHE PATH "C compiler" )
-   set( CMAKE_CXX_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}"    CACHE PATH "C++ compiler" )
-  endif()
- endif()
- set( CMAKE_ASM_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}"     CACHE PATH "assembler" )
- set( CMAKE_STRIP        "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-strip${TOOL_OS_SUFFIX}"   CACHE PATH "strip" )
- set( CMAKE_AR           "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ar${TOOL_OS_SUFFIX}"      CACHE PATH "archive" )
- set( CMAKE_LINKER       "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ld${TOOL_OS_SUFFIX}"      CACHE PATH "linker" )
- set( CMAKE_NM           "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-nm${TOOL_OS_SUFFIX}"      CACHE PATH "nm" )
- set( CMAKE_OBJCOPY      "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objcopy${TOOL_OS_SUFFIX}" CACHE PATH "objcopy" )
- set( CMAKE_OBJDUMP      "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objdump${TOOL_OS_SUFFIX}" CACHE PATH "objdump" )
- set( CMAKE_RANLIB       "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ranlib${TOOL_OS_SUFFIX}"  CACHE PATH "ranlib" )
-endif()
-
-set( _CMAKE_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_MACHINE_NAME}-" )
-if( CMAKE_VERSION VERSION_LESS 2.8.5 )
- set( CMAKE_ASM_COMPILER_ARG1 "-c" )
-endif()
-if( APPLE )
- find_program( CMAKE_INSTALL_NAME_TOOL NAMES install_name_tool )
- if( NOT CMAKE_INSTALL_NAME_TOOL )
-  message( FATAL_ERROR "Could not find install_name_tool, please check your installation." )
- endif()
- mark_as_advanced( CMAKE_INSTALL_NAME_TOOL )
-endif()
-
-# Force set compilers because standard identification works badly for us
-include( CMakeForceCompiler )
-CMAKE_FORCE_C_COMPILER( "${CMAKE_C_COMPILER}" GNU )
-if( ANDROID_COMPILER_IS_CLANG )
- set( CMAKE_C_COMPILER_ID Clang)
-endif()
-set( CMAKE_C_PLATFORM_ID Linux )
-set( CMAKE_C_SIZEOF_DATA_PTR 4 )
-set( CMAKE_C_HAS_ISYSROOT 1 )
-set( CMAKE_C_COMPILER_ABI ELF )
-CMAKE_FORCE_CXX_COMPILER( "${CMAKE_CXX_COMPILER}" GNU )
-if( ANDROID_COMPILER_IS_CLANG )
- set( CMAKE_CXX_COMPILER_ID Clang)
-endif()
-set( CMAKE_CXX_PLATFORM_ID Linux )
-set( CMAKE_CXX_SIZEOF_DATA_PTR 4 )
-set( CMAKE_CXX_HAS_ISYSROOT 1 )
-set( CMAKE_CXX_COMPILER_ABI ELF )
-set( CMAKE_CXX_SOURCE_FILE_EXTENSIONS cc cp cxx cpp CPP c++ C )
-# force ASM compiler (required for CMake < 2.8.5)
-set( CMAKE_ASM_COMPILER_ID_RUN TRUE )
-set( CMAKE_ASM_COMPILER_ID GNU )
-set( CMAKE_ASM_COMPILER_WORKS TRUE )
-set( CMAKE_ASM_COMPILER_FORCED TRUE )
-set( CMAKE_COMPILER_IS_GNUASM 1)
-set( CMAKE_ASM_SOURCE_FILE_EXTENSIONS s S asm )
-
-# flags and definitions
-remove_definitions( -DANDROID )
-add_definitions( -DANDROID )
-
-if( ANDROID_SYSROOT MATCHES "[ ;\"]" )
- if( CMAKE_HOST_WIN32 )
-  # try to convert path to 8.3 form
-  file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "@echo %~s1" )
-  execute_process( COMMAND "$ENV{ComSpec}" /c "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "${ANDROID_SYSROOT}"
-                   OUTPUT_VARIABLE __path OUTPUT_STRIP_TRAILING_WHITESPACE
-                   RESULT_VARIABLE __result ERROR_QUIET )
-  if( __result EQUAL 0 )
-   file( TO_CMAKE_PATH "${__path}" ANDROID_SYSROOT )
-   set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
-  else()
-   set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" )
-  endif()
- else()
-  set( ANDROID_CXX_FLAGS "'--sysroot=${ANDROID_SYSROOT}'" )
- endif()
- if( NOT _CMAKE_IN_TRY_COMPILE )
-  # quotes can break try_compile and compiler identification
-  message(WARNING "Path to your Android NDK (or toolchain) has non-alphanumeric symbols.\nThe build might be broken.\n")
- endif()
-else()
- set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
-endif()
-
-# NDK flags
-if( ARMEABI OR ARMEABI_V7A )
- set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -funwind-tables" )
- if( NOT ANDROID_FORCE_ARM_BUILD AND NOT ARMEABI_V6 )
-  set( ANDROID_CXX_FLAGS_RELEASE "-mthumb -fomit-frame-pointer -fno-strict-aliasing" )
-  set( ANDROID_CXX_FLAGS_DEBUG   "-marm -fno-omit-frame-pointer -fno-strict-aliasing" )
-  if( NOT ANDROID_COMPILER_IS_CLANG )
-   set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -finline-limit=64" )
-  endif()
- else()
-  # always compile ARMEABI_V6 in arm mode; otherwise there is no difference from ARMEABI
-  set( ANDROID_CXX_FLAGS_RELEASE "-marm -fomit-frame-pointer -fstrict-aliasing" )
-  set( ANDROID_CXX_FLAGS_DEBUG   "-marm -fno-omit-frame-pointer -fno-strict-aliasing" )
-  if( NOT ANDROID_COMPILER_IS_CLANG )
-   set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" )
-  endif()
- endif()
-elseif( X86 )
- set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funwind-tables" )
- if( NOT ANDROID_COMPILER_IS_CLANG )
-  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" )
- else()
-  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fPIC" )
- endif()
- set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer -fstrict-aliasing" )
- set( ANDROID_CXX_FLAGS_DEBUG   "-fno-omit-frame-pointer -fno-strict-aliasing" )
-elseif( MIPS )
- set( ANDROID_CXX_FLAGS         "${ANDROID_CXX_FLAGS} -fpic -fno-strict-aliasing -finline-functions -ffunction-sections -funwind-tables -fmessage-length=0" )
- set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer" )
- set( ANDROID_CXX_FLAGS_DEBUG   "-fno-omit-frame-pointer" )
- if( NOT ANDROID_COMPILER_IS_CLANG )
-  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fno-inline-functions-called-once -fgcse-after-reload -frerun-cse-after-loop -frename-registers" )
-  set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} -funswitch-loops -finline-limit=300" )
- endif()
-elseif()
- set( ANDROID_CXX_FLAGS_RELEASE "" )
- set( ANDROID_CXX_FLAGS_DEBUG   "" )
-endif()
-
-set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fsigned-char" ) # good/necessary when porting desktop libraries
-
-if( NOT X86 AND NOT ANDROID_COMPILER_IS_CLANG )
- set( ANDROID_CXX_FLAGS "-Wno-psabi ${ANDROID_CXX_FLAGS}" )
-endif()
-
-if( NOT ANDROID_COMPILER_VERSION VERSION_LESS "4.6" )
- set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -no-canonical-prefixes" ) # see https://android-review.googlesource.com/#/c/47564/
-endif()
-
-# ABI-specific flags
-if( ARMEABI_V7A )
- set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv7-a -mfloat-abi=softfp" )
- if( NEON )
-  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=neon" )
- elseif( VFPV3 )
-  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3" )
- else()
-  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3-d16" )
- endif()
-elseif( ARMEABI_V6 )
- set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv6 -mfloat-abi=softfp -mfpu=vfp" ) # vfp == vfpv2
-elseif( ARMEABI )
- set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" )
-endif()
-
-if( ANDROID_STL MATCHES "gnustl" AND (EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}") )
- set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
- set( CMAKE_CXX_CREATE_SHARED_MODULE  "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
- set( CMAKE_CXX_LINK_EXECUTABLE       "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
-else()
- set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
- set( CMAKE_CXX_CREATE_SHARED_MODULE  "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
- set( CMAKE_CXX_LINK_EXECUTABLE       "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
-endif()
-
-# STL
-if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" )
- if( EXISTS "${__libstl}" )
-  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" )
-  set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" )
-  set( CMAKE_CXX_LINK_EXECUTABLE       "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libstl}\"" )
- endif()
- if( EXISTS "${__libsupcxx}" )
-  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" )
-  set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" )
-  set( CMAKE_CXX_LINK_EXECUTABLE       "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
-  # C objects:
-  set( CMAKE_C_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
-  set( CMAKE_C_CREATE_SHARED_MODULE  "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
-  set( CMAKE_C_LINK_EXECUTABLE       "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
-  set( CMAKE_C_CREATE_SHARED_LIBRARY "${CMAKE_C_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" )
-  set( CMAKE_C_CREATE_SHARED_MODULE  "${CMAKE_C_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" )
-  set( CMAKE_C_LINK_EXECUTABLE       "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
- endif()
- if( ANDROID_STL MATCHES "gnustl" )
-  if( NOT EXISTS "${ANDROID_LIBM_PATH}" )
-   set( ANDROID_LIBM_PATH -lm )
-  endif()
-  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} ${ANDROID_LIBM_PATH}" )
-  set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} ${ANDROID_LIBM_PATH}" )
-  set( CMAKE_CXX_LINK_EXECUTABLE       "${CMAKE_CXX_LINK_EXECUTABLE} ${ANDROID_LIBM_PATH}" )
- endif()
-endif()
-
-# variables controlling optional build flags
-if (ANDROID_NDK_RELEASE STRLESS "r7")
- # libGLESv2.so in NDK's prior to r7 refers to missing external symbols.
- # So this flag option is required for all projects using OpenGL from native.
- __INIT_VARIABLE( ANDROID_SO_UNDEFINED                      VALUES ON )
-else()
- __INIT_VARIABLE( ANDROID_SO_UNDEFINED                      VALUES OFF )
-endif()
-__INIT_VARIABLE( ANDROID_NO_UNDEFINED OBSOLETE_NO_UNDEFINED VALUES ON )
-__INIT_VARIABLE( ANDROID_FUNCTION_LEVEL_LINKING             VALUES ON )
-__INIT_VARIABLE( ANDROID_GOLD_LINKER                        VALUES ON )
-__INIT_VARIABLE( ANDROID_NOEXECSTACK                        VALUES ON )
-__INIT_VARIABLE( ANDROID_RELRO                              VALUES ON )
-
-set( ANDROID_NO_UNDEFINED           ${ANDROID_NO_UNDEFINED}           CACHE BOOL "Show all undefined symbols as linker errors" )
-set( ANDROID_SO_UNDEFINED           ${ANDROID_SO_UNDEFINED}           CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
-set( ANDROID_FUNCTION_LEVEL_LINKING ${ANDROID_FUNCTION_LEVEL_LINKING} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
-set( ANDROID_GOLD_LINKER            ${ANDROID_GOLD_LINKER}            CACHE BOOL "Enables gold linker (only avaialble for NDK r8b for ARM and x86 architectures on linux-86 and darwin-x86 hosts)" )
-set( ANDROID_NOEXECSTACK            ${ANDROID_NOEXECSTACK}            CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
-set( ANDROID_RELRO                  ${ANDROID_RELRO}                  CACHE BOOL "Enables RELRO - a memory corruption mitigation technique" )
-mark_as_advanced( ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_FUNCTION_LEVEL_LINKING ANDROID_GOLD_LINKER ANDROID_NOEXECSTACK ANDROID_RELRO )
-
-# linker flags
-set( ANDROID_LINKER_FLAGS "" )
-
-if( ARMEABI_V7A )
- # this is *required* to use the following linker flags that routes around
- # a CPU bug in some Cortex-A8 implementations:
- set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--fix-cortex-a8" )
-endif()
-
-if( ANDROID_NO_UNDEFINED )
- if( MIPS )
-  # there is some sysroot-related problem in mips linker...
-  if( NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
-   set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined -Wl,-rpath-link,${ANDROID_SYSROOT}/usr/lib" )
-  endif()
- else()
-  set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" )
- endif()
-endif()
-
-if( ANDROID_SO_UNDEFINED )
- set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-allow-shlib-undefined" )
-endif()
-
-if( ANDROID_FUNCTION_LEVEL_LINKING )
- set( ANDROID_CXX_FLAGS    "${ANDROID_CXX_FLAGS} -fdata-sections -ffunction-sections" )
- set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--gc-sections" )
-endif()
-
-if( ANDROID_COMPILER_VERSION VERSION_EQUAL "4.6" )
- if( ANDROID_GOLD_LINKER AND (CMAKE_HOST_UNIX OR ANDROID_NDK_RELEASE STRGREATER "r8b") AND (ARMEABI OR ARMEABI_V7A OR X86) )
-  set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=gold" )
- elseif( ANDROID_NDK_RELEASE STRGREATER "r8b")
-  set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=bfd" )
- elseif( ANDROID_NDK_RELEASE STREQUAL "r8b" AND ARMEABI AND NOT _CMAKE_IN_TRY_COMPILE )
-  message( WARNING "The default bfd linker from arm GCC 4.6 toolchain can fail with 'unresolvable R_ARM_THM_CALL relocation' error message. See https://code.google.com/p/android/issues/detail?id=35342
-  On Linux and OS X host platform you can workaround this problem using gold linker (default).
-  Rerun cmake with -DANDROID_GOLD_LINKER=ON option in case of problems.
-" )
- endif()
-endif() # version 4.6
-
-if( ANDROID_NOEXECSTACK )
- if( ANDROID_COMPILER_IS_CLANG )
-  set( ANDROID_CXX_FLAGS    "${ANDROID_CXX_FLAGS} -Xclang -mnoexecstack" )
- else()
-  set( ANDROID_CXX_FLAGS    "${ANDROID_CXX_FLAGS} -Wa,--noexecstack" )
- endif()
- set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,noexecstack" )
-endif()
-
-if( ANDROID_RELRO )
- set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,relro -Wl,-z,now" )
-endif()
-
-if( ANDROID_COMPILER_IS_CLANG )
- set( ANDROID_CXX_FLAGS "-Qunused-arguments ${ANDROID_CXX_FLAGS}" )
- if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD )
-  set( ANDROID_CXX_FLAGS_RELEASE "-target thumbv7-none-linux-androideabi ${ANDROID_CXX_FLAGS_RELEASE}" )
-  set( ANDROID_CXX_FLAGS_DEBUG   "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS_DEBUG}" )
- else()
-  set( ANDROID_CXX_FLAGS "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS}" )
- endif()
- if( BUILD_WITH_ANDROID_NDK )
-  set( ANDROID_CXX_FLAGS "-gcc-toolchain ${ANDROID_TOOLCHAIN_ROOT} ${ANDROID_CXX_FLAGS}" )
- endif()
-endif()
-
-# cache flags
-set( CMAKE_CXX_FLAGS           ""                        CACHE STRING "c++ flags" )
-set( CMAKE_C_FLAGS             ""                        CACHE STRING "c flags" )
-set( CMAKE_CXX_FLAGS_RELEASE   "-O3 -DNDEBUG"            CACHE STRING "c++ Release flags" )
-set( CMAKE_C_FLAGS_RELEASE     "-O3 -DNDEBUG"            CACHE STRING "c Release flags" )
-set( CMAKE_CXX_FLAGS_DEBUG     "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c++ Debug flags" )
-set( CMAKE_C_FLAGS_DEBUG       "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c Debug flags" )
-set( CMAKE_SHARED_LINKER_FLAGS ""                        CACHE STRING "shared linker flags" )
-set( CMAKE_MODULE_LINKER_FLAGS ""                        CACHE STRING "module linker flags" )
-set( CMAKE_EXE_LINKER_FLAGS    "-Wl,-z,nocopyreloc"      CACHE STRING "executable linker flags" )
-
-# put flags to cache (for debug purpose only)
-set( ANDROID_CXX_FLAGS         "${ANDROID_CXX_FLAGS}"         CACHE INTERNAL "Android specific c/c++ flags" )
-set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE}" CACHE INTERNAL "Android specific c/c++ Release flags" )
-set( ANDROID_CXX_FLAGS_DEBUG   "${ANDROID_CXX_FLAGS_DEBUG}"   CACHE INTERNAL "Android specific c/c++ Debug flags" )
-set( ANDROID_LINKER_FLAGS      "${ANDROID_LINKER_FLAGS}"      CACHE INTERNAL "Android specific c/c++ linker flags" )
-
-# finish flags
-set( CMAKE_CXX_FLAGS           "${ANDROID_CXX_FLAGS} ${CMAKE_CXX_FLAGS}" )
-set( CMAKE_C_FLAGS             "${ANDROID_CXX_FLAGS} ${CMAKE_C_FLAGS}" )
-set( CMAKE_CXX_FLAGS_RELEASE   "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_RELEASE}" )
-set( CMAKE_C_FLAGS_RELEASE     "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_C_FLAGS_RELEASE}" )
-set( CMAKE_CXX_FLAGS_DEBUG     "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_CXX_FLAGS_DEBUG}" )
-set( CMAKE_C_FLAGS_DEBUG       "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_C_FLAGS_DEBUG}" )
-set( CMAKE_SHARED_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}" )
-set( CMAKE_MODULE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS}" )
-set( CMAKE_EXE_LINKER_FLAGS    "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" )
-
-if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" )
- set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" )
- set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" )
- set( CMAKE_EXE_LINKER_FLAGS    "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" )
-endif()
-
-# configure rtti
-if( DEFINED ANDROID_RTTI AND ANDROID_STL_FORCE_FEATURES )
- if( ANDROID_RTTI )
-  set( CMAKE_CXX_FLAGS "-frtti ${CMAKE_CXX_FLAGS}" )
- else()
-  set( CMAKE_CXX_FLAGS "-fno-rtti ${CMAKE_CXX_FLAGS}" )
- endif()
-endif()
-
-# configure exceptios
-if( DEFINED ANDROID_EXCEPTIONS AND ANDROID_STL_FORCE_FEATURES )
- if( ANDROID_EXCEPTIONS )
-  set( CMAKE_CXX_FLAGS "-fexceptions ${CMAKE_CXX_FLAGS}" )
-  set( CMAKE_C_FLAGS "-fexceptions ${CMAKE_C_FLAGS}" )
- else()
-  set( CMAKE_CXX_FLAGS "-fno-exceptions ${CMAKE_CXX_FLAGS}" )
-  set( CMAKE_C_FLAGS "-fno-exceptions ${CMAKE_C_FLAGS}" )
- endif()
-endif()
-
-# global includes and link directories
-include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} )
-link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
-
-# detect if need link crtbegin_so.o explicitly
-if( NOT DEFINED ANDROID_EXPLICIT_CRT_LINK )
- set( __cmd "${CMAKE_CXX_CREATE_SHARED_LIBRARY}" )
- string( REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}" __cmd "${__cmd}" )
- string( REPLACE "<CMAKE_C_COMPILER>"   "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}"   __cmd "${__cmd}" )
- string( REPLACE "<CMAKE_SHARED_LIBRARY_CXX_FLAGS>" "${CMAKE_CXX_FLAGS}" __cmd "${__cmd}" )
- string( REPLACE "<LANGUAGE_COMPILE_FLAGS>" "" __cmd "${__cmd}" )
- string( REPLACE "<LINK_FLAGS>" "${CMAKE_SHARED_LINKER_FLAGS}" __cmd "${__cmd}" )
- string( REPLACE "<CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS>" "-shared" __cmd "${__cmd}" )
- string( REPLACE "<CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG>" "" __cmd "${__cmd}" )
- string( REPLACE "<TARGET_SONAME>" "" __cmd "${__cmd}" )
- string( REPLACE "<TARGET>" "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/toolchain_crtlink_test.so" __cmd "${__cmd}" )
- string( REPLACE "<OBJECTS>" "\"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" __cmd "${__cmd}" )
- string( REPLACE "<LINK_LIBRARIES>" "" __cmd "${__cmd}" )
- separate_arguments( __cmd )
- foreach( __var ANDROID_NDK ANDROID_NDK_TOOLCHAINS_PATH ANDROID_STANDALONE_TOOLCHAIN )
-  if( ${__var} )
-   set( __tmp "${${__var}}" )
-   separate_arguments( __tmp )
-   string( REPLACE "${__tmp}" "${${__var}}" __cmd "${__cmd}")
-  endif()
- endforeach()
- string( REPLACE "'" "" __cmd "${__cmd}" )
- string( REPLACE "\"" "" __cmd "${__cmd}" )
- execute_process( COMMAND ${__cmd} RESULT_VARIABLE __cmd_result OUTPUT_QUIET ERROR_QUIET )
- if( __cmd_result EQUAL 0 )
-  set( ANDROID_EXPLICIT_CRT_LINK ON )
- else()
-  set( ANDROID_EXPLICIT_CRT_LINK OFF )
- endif()
-endif()
-
-if( ANDROID_EXPLICIT_CRT_LINK )
- set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
- set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
-endif()
-
-# setup output directories
-set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" )
-set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" )
-
-if(NOT _CMAKE_IN_TRY_COMPILE)
- if( EXISTS "${CMAKE_SOURCE_DIR}/jni/CMakeLists.txt" )
-  set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin/${ANDROID_NDK_ABI_NAME}" CACHE PATH "Output directory for applications" )
- else()
-  set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin" CACHE PATH "Output directory for applications" )
- endif()
- set( LIBRARY_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}" CACHE PATH "path for android libs" )
-endif()
-
-# set these global flags for cmake client scripts to change behavior
-set( ANDROID True )
-set( BUILD_ANDROID True )
-
-# where is the target environment
-set( CMAKE_FIND_ROOT_PATH "${ANDROID_TOOLCHAIN_ROOT}/bin" "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" "${ANDROID_SYSROOT}" "${CMAKE_INSTALL_PREFIX}" "${CMAKE_INSTALL_PREFIX}/share" )
-
-# only search for libraries and includes in the ndk toolchain
-set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
-set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
-set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
-
-
-# macro to find packages on the host OS
-macro( find_host_package )
- set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
- set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
- set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER )
- if( CMAKE_HOST_WIN32 )
-  SET( WIN32 1 )
-  SET( UNIX )
- elseif( CMAKE_HOST_APPLE )
-  SET( APPLE 1 )
-  SET( UNIX )
- endif()
- find_package( ${ARGN} )
- SET( WIN32 )
- SET( APPLE )
- SET( UNIX 1 )
- set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
- set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
- set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
-endmacro()
-
-
-# macro to find programs on the host OS
-macro( find_host_program )
- set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
- set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
- set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER )
- if( CMAKE_HOST_WIN32 )
-  SET( WIN32 1 )
-  SET( UNIX )
- elseif( CMAKE_HOST_APPLE )
-  SET( APPLE 1 )
-  SET( UNIX )
- endif()
- find_program( ${ARGN} )
- SET( WIN32 )
- SET( APPLE )
- SET( UNIX 1 )
- set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
- set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
- set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
-endmacro()
-
-
-macro( ANDROID_GET_ABI_RAWNAME TOOLCHAIN_FLAG VAR )
- if( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI" )
-  set( ${VAR} "armeabi" )
- elseif( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI_V7A" )
-  set( ${VAR} "armeabi-v7a" )
- elseif( "${TOOLCHAIN_FLAG}" STREQUAL "X86" )
-  set( ${VAR} "x86" )
- elseif( "${TOOLCHAIN_FLAG}" STREQUAL "MIPS" )
-  set( ${VAR} "mips" )
- else()
-  set( ${VAR} "unknown" )
- endif()
-endmacro()
-
-
-# export toolchain settings for the try_compile() command
-if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" )
- set( __toolchain_config "")
- foreach( __var NDK_CCACHE  LIBRARY_OUTPUT_PATH_ROOT  ANDROID_FORBID_SYGWIN  ANDROID_SET_OBSOLETE_VARIABLES
-                ANDROID_NDK_HOST_X64
-                ANDROID_NDK
-                ANDROID_NDK_LAYOUT
-                ANDROID_STANDALONE_TOOLCHAIN
-                ANDROID_TOOLCHAIN_NAME
-                ANDROID_ABI
-                ANDROID_NATIVE_API_LEVEL
-                ANDROID_STL
-                ANDROID_STL_FORCE_FEATURES
-                ANDROID_FORCE_ARM_BUILD
-                ANDROID_NO_UNDEFINED
-                ANDROID_SO_UNDEFINED
-                ANDROID_FUNCTION_LEVEL_LINKING
-                ANDROID_GOLD_LINKER
-                ANDROID_NOEXECSTACK
-                ANDROID_RELRO
-                ANDROID_LIBM_PATH
-                ANDROID_EXPLICIT_CRT_LINK
-                )
-  if( DEFINED ${__var} )
-   if( "${__var}" MATCHES " ")
-    set( __toolchain_config "${__toolchain_config}set( ${__var} \"${${__var}}\" CACHE INTERNAL \"\" )\n" )
-   else()
-    set( __toolchain_config "${__toolchain_config}set( ${__var} ${${__var}} CACHE INTERNAL \"\" )\n" )
-   endif()
-  endif()
- endforeach()
- file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/android.toolchain.config.cmake" "${__toolchain_config}" )
- unset( __toolchain_config )
-endif()
-
-
-# set some obsolete variables for backward compatibility
-set( ANDROID_SET_OBSOLETE_VARIABLES ON CACHE BOOL "Define obsolete Andrid-specific cmake variables" )
-mark_as_advanced( ANDROID_SET_OBSOLETE_VARIABLES )
-if( ANDROID_SET_OBSOLETE_VARIABLES )
- set( ANDROID_API_LEVEL ${ANDROID_NATIVE_API_LEVEL} )
- set( ARM_TARGET "${ANDROID_ABI}" )
- set( ARMEABI_NDK_NAME "${ANDROID_NDK_ABI_NAME}" )
-endif()
-
-
-# Variables controlling behavior or set by cmake toolchain:
-#   ANDROID_ABI : "armeabi-v7a" (default), "armeabi", "armeabi-v7a with NEON", "armeabi-v7a with VFPV3", "armeabi-v6 with VFP", "x86", "mips"
-#   ANDROID_NATIVE_API_LEVEL : 3,4,5,8,9,14 (depends on NDK version)
-#   ANDROID_STL : gnustl_static/gnustl_shared/stlport_static/stlport_shared/gabi++_static/gabi++_shared/system_re/system/none
-#   ANDROID_FORBID_SYGWIN : ON/OFF
-#   ANDROID_NO_UNDEFINED : ON/OFF
-#   ANDROID_SO_UNDEFINED : OFF/ON  (default depends on NDK version)
-#   ANDROID_FUNCTION_LEVEL_LINKING : ON/OFF
-#   ANDROID_GOLD_LINKER : ON/OFF
-#   ANDROID_NOEXECSTACK : ON/OFF
-#   ANDROID_RELRO : ON/OFF
-#   ANDROID_FORCE_ARM_BUILD : ON/OFF
-#   ANDROID_STL_FORCE_FEATURES : ON/OFF
-#   ANDROID_SET_OBSOLETE_VARIABLES : ON/OFF
-# Can be set only at the first run:
-#   ANDROID_NDK
-#   ANDROID_STANDALONE_TOOLCHAIN
-#   ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain
-#   ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems)
-#   ANDROID_NDK_LAYOUT : the inner NDK structure (RELEASE, LINARO, ANDROID)
-#   LIBRARY_OUTPUT_PATH_ROOT : <any valid path>
-#   NDK_CCACHE : <path to your ccache executable>
-# Obsolete:
-#   ANDROID_API_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL
-#   ARM_TARGET : superseded by ANDROID_ABI
-#   ARM_TARGETS : superseded by ANDROID_ABI (can be set only)
-#   ANDROID_NDK_TOOLCHAIN_ROOT : superseded by ANDROID_STANDALONE_TOOLCHAIN (can be set only)
-#   ANDROID_USE_STLPORT : superseded by ANDROID_STL=stlport_static
-#   ANDROID_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL (completely removed)
-#
-# Primary read-only variables:
-#   ANDROID : always TRUE
-#   ARMEABI : TRUE for arm v6 and older devices
-#   ARMEABI_V6 : TRUE for arm v6
-#   ARMEABI_V7A : TRUE for arm v7a
-#   NEON : TRUE if NEON unit is enabled
-#   VFPV3 : TRUE if VFP version 3 is enabled
-#   X86 : TRUE if configured for x86
-#   MIPS : TRUE if configured for mips
-#   BUILD_ANDROID : always TRUE
-#   BUILD_WITH_ANDROID_NDK : TRUE if NDK is used
-#   BUILD_WITH_STANDALONE_TOOLCHAIN : TRUE if standalone toolchain is used
-#   ANDROID_NDK_HOST_SYSTEM_NAME : "windows", "linux-x86" or "darwin-x86" depending on host platform
-#   ANDROID_NDK_ABI_NAME : "armeabi", "armeabi-v7a", "x86" or "mips" depending on ANDROID_ABI
-#   ANDROID_NDK_RELEASE : one of r5, r5b, r5c, r6, r6b, r7, r7b, r7c, r8, r8b, r8c, r8d, r8e; set only for NDK
-#   ANDROID_ARCH_NAME : "arm" or "x86" or "mips" depending on ANDROID_ABI
-#   ANDROID_SYSROOT : path to the compiler sysroot
-#   TOOL_OS_SUFFIX : "" or ".exe" depending on host platform
-#   ANDROID_COMPILER_IS_CLANG : TRUE if clang compiler is used
-# Obsolete:
-#   ARMEABI_NDK_NAME : superseded by ANDROID_NDK_ABI_NAME
-#
-# Secondary (less stable) read-only variables:
-#   ANDROID_COMPILER_VERSION : GCC version used
-#   ANDROID_CXX_FLAGS : C/C++ compiler flags required by Android platform
-#   ANDROID_SUPPORTED_ABIS : list of currently allowed values for ANDROID_ABI
-#   ANDROID_TOOLCHAIN_MACHINE_NAME : "arm-linux-androideabi", "arm-eabi" or "i686-android-linux"
-#   ANDROID_TOOLCHAIN_ROOT : path to the top level of toolchain (standalone or placed inside NDK)
-#   ANDROID_CLANG_TOOLCHAIN_ROOT : path to clang tools
-#   ANDROID_SUPPORTED_NATIVE_API_LEVELS : list of native API levels found inside NDK
-#   ANDROID_STL_INCLUDE_DIRS : stl include paths
-#   ANDROID_RTTI : if rtti is enabled by the runtime
-#   ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime
-#   ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used
-#   ANDROID_CLANG_VERSION : version of clang compiler if clang is used
-#   ANDROID_LIBM_PATH : path to libm.so (set to something like $(TOP)/out/target/product/<product_name>/obj/lib/libm.so) to workaround unresolved `sincos`
-#
-# Defaults:
-#   ANDROID_DEFAULT_NDK_API_LEVEL
-#   ANDROID_DEFAULT_NDK_API_LEVEL_${ARCH}
-#   ANDROID_NDK_SEARCH_PATHS
-#   ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH
-#   ANDROID_SUPPORTED_ABIS_${ARCH}
-#   ANDROID_SUPPORTED_NDK_VERSIONS
diff --git a/android/readme.txt b/android/readme.txt
deleted file mode 100644
index 2d5f3962f..000000000
--- a/android/readme.txt
+++ /dev/null
@@ -1 +0,0 @@
-All Android specific sources are moved to platforms/android.
\ No newline at end of file
diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt
index ad0f0daad..f5ac42a01 100644
--- a/apps/CMakeLists.txt
+++ b/apps/CMakeLists.txt
@@ -3,4 +3,3 @@ add_definitions(-D__OPENCV_BUILD=1)
 add_subdirectory(haartraining)
 add_subdirectory(traincascade)
 add_subdirectory(sft)
-
diff --git a/apps/haartraining/CMakeLists.txt b/apps/haartraining/CMakeLists.txt
index f71cbeeaa..7a197db83 100644
--- a/apps/haartraining/CMakeLists.txt
+++ b/apps/haartraining/CMakeLists.txt
@@ -79,4 +79,3 @@ if(ENABLE_SOLUTION_FOLDERS)
   set_target_properties(opencv_haartraining PROPERTIES FOLDER "applications")
   set_target_properties(opencv_haartraining_engine PROPERTIES FOLDER "applications")
 endif()
-
diff --git a/apps/haartraining/_cvcommon.h b/apps/haartraining/_cvcommon.h
index 1c4bad52d..92fee8e84 100644
--- a/apps/haartraining/_cvcommon.h
+++ b/apps/haartraining/_cvcommon.h
@@ -90,4 +90,3 @@ int icvGetIdxAt( CvMat* idx, int pos )
 void icvSave( const CvArr* ptr, const char* filename, int line );
 
 #endif /* __CVCOMMON_H_ */
-
diff --git a/apps/haartraining/performance.cpp b/apps/haartraining/performance.cpp
index a95e4d4c4..cb8dda1c8 100644
--- a/apps/haartraining/performance.cpp
+++ b/apps/haartraining/performance.cpp
@@ -375,4 +375,3 @@ int main( int argc, char* argv[] )
 
     return 0;
 }
-
diff --git a/apps/sft/CMakeLists.txt b/apps/sft/CMakeLists.txt
index 8b950225c..05bd337c3 100644
--- a/apps/sft/CMakeLists.txt
+++ b/apps/sft/CMakeLists.txt
@@ -30,4 +30,4 @@ if(ENABLE_SOLUTION_FOLDERS)
   set_target_properties(${the_target} PROPERTIES FOLDER "applications")
 endif()
 
-install(TARGETS ${the_target} RUNTIME DESTINATION bin COMPONENT main)
\ No newline at end of file
+install(TARGETS ${the_target} RUNTIME DESTINATION bin COMPONENT main)
diff --git a/apps/sft/config.cpp b/apps/sft/config.cpp
index 3cc64c7fe..9157575a1 100644
--- a/apps/sft/config.cpp
+++ b/apps/sft/config.cpp
@@ -159,4 +159,4 @@ std::ostream& sft::operator<<(std::ostream& out, const Config& m)
         << std::setw(14) << std::left  << "featureType"  << m.featureType  << std::endl;
 
     return out;
-}
\ No newline at end of file
+}
diff --git a/apps/sft/include/sft/common.hpp b/apps/sft/include/sft/common.hpp
index 4a0a03f36..5c142a749 100644
--- a/apps/sft/include/sft/common.hpp
+++ b/apps/sft/include/sft/common.hpp
@@ -71,4 +71,4 @@ namespace sft
 # define dprintf(format, ...)
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/apps/sft/include/sft/config.hpp b/apps/sft/include/sft/config.hpp
index 3d39d3272..c6e85b264 100644
--- a/apps/sft/include/sft/config.hpp
+++ b/apps/sft/include/sft/config.hpp
@@ -135,4 +135,4 @@ std::ostream& operator<<(std::ostream& out, const Config& m);
 
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/apps/sft/include/sft/dataset.hpp b/apps/sft/include/sft/dataset.hpp
index 98d619256..7504f4033 100644
--- a/apps/sft/include/sft/dataset.hpp
+++ b/apps/sft/include/sft/dataset.hpp
@@ -64,4 +64,4 @@ private:
 };
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/apps/sft/sft.cpp b/apps/sft/sft.cpp
index 7b5138763..79d41032e 100644
--- a/apps/sft/sft.cpp
+++ b/apps/sft/sft.cpp
@@ -165,4 +165,4 @@ int main(int argc, char** argv)
     fso.release();
     std::cout << "Training complete..." << std::endl;
     return 0;
-}
\ No newline at end of file
+}
diff --git a/apps/traincascade/CMakeLists.txt b/apps/traincascade/CMakeLists.txt
index 72e51b9ea..eb0c83df5 100644
--- a/apps/traincascade/CMakeLists.txt
+++ b/apps/traincascade/CMakeLists.txt
@@ -34,4 +34,3 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 install(TARGETS ${the_target} RUNTIME DESTINATION bin COMPONENT main)
-
diff --git a/apps/traincascade/boost.cpp b/apps/traincascade/boost.cpp
index 29ac4bc9a..732704a4a 100644
--- a/apps/traincascade/boost.cpp
+++ b/apps/traincascade/boost.cpp
@@ -957,7 +957,7 @@ void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
     int subsetN = (maxCatCount + 31)/32;
     queue<CvDTreeNode*> internalNodesQueue;
     int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
-    Ptr<float> leafVals = new float[size];
+    std::vector<float> leafVals(size);
     int leafValIdx = 0;
     int internalNodeIdx = 1;
     CvDTreeNode* tempNode;
diff --git a/apps/traincascade/cascadeclassifier.cpp b/apps/traincascade/cascadeclassifier.cpp
index 3983a614f..5c96b45f7 100644
--- a/apps/traincascade/cascadeclassifier.cpp
+++ b/apps/traincascade/cascadeclassifier.cpp
@@ -159,10 +159,10 @@ bool CvCascadeClassifier::train( const string _cascadeDirName,
         cascadeParams = _cascadeParams;
         featureParams = CvFeatureParams::create(cascadeParams.featureType);
         featureParams->init(_featureParams);
-        stageParams = new CvCascadeBoostParams;
+        stageParams = makePtr<CvCascadeBoostParams>();
         *stageParams = _stageParams;
         featureEvaluator = CvFeatureEvaluator::create(cascadeParams.featureType);
-        featureEvaluator->init( (CvFeatureParams*)featureParams, numPos + numNeg, cascadeParams.winSize );
+        featureEvaluator->init( featureParams, numPos + numNeg, cascadeParams.winSize );
         stageClassifiers.reserve( numStages );
     }
     cout << "PARAMETERS:" << endl;
@@ -206,10 +206,10 @@ bool CvCascadeClassifier::train( const string _cascadeDirName,
             break;
         }
 
-        CvCascadeBoost* tempStage = new CvCascadeBoost;
-        bool isStageTrained = tempStage->train( (CvFeatureEvaluator*)featureEvaluator,
+        Ptr<CvCascadeBoost> tempStage = makePtr<CvCascadeBoost>();
+        bool isStageTrained = tempStage->train( featureEvaluator,
                                                 curNumSamples, _precalcValBufSize, _precalcIdxBufSize,
-                                                *((CvCascadeBoostParams*)stageParams) );
+                                                *stageParams );
         cout << "END>" << endl;
 
         if(!isStageTrained)
@@ -325,7 +325,7 @@ void CvCascadeClassifier::writeParams( FileStorage &fs ) const
 
 void CvCascadeClassifier::writeFeatures( FileStorage &fs, const Mat& featureMap ) const
 {
-    ((CvFeatureEvaluator*)((Ptr<CvFeatureEvaluator>)featureEvaluator))->writeFeatures( fs, featureMap );
+    featureEvaluator->writeFeatures( fs, featureMap );
 }
 
 void CvCascadeClassifier::writeStages( FileStorage &fs, const Mat& featureMap ) const
@@ -339,7 +339,7 @@ void CvCascadeClassifier::writeStages( FileStorage &fs, const Mat& featureMap )
         sprintf( cmnt, "stage %d", i );
         cvWriteComment( fs.fs, cmnt, 0 );
         fs << "{";
-        ((CvCascadeBoost*)((Ptr<CvCascadeBoost>)*it))->write( fs, featureMap );
+        (*it)->write( fs, featureMap );
         fs << "}";
     }
     fs << "]";
@@ -350,7 +350,7 @@ bool CvCascadeClassifier::readParams( const FileNode &node )
     if ( !node.isMap() || !cascadeParams.read( node ) )
         return false;
 
-    stageParams = new CvCascadeBoostParams;
+    stageParams = makePtr<CvCascadeBoostParams>();
     FileNode rnode = node[CC_STAGE_PARAMS];
     if ( !stageParams->read( rnode ) )
         return false;
@@ -371,12 +371,9 @@ bool CvCascadeClassifier::readStages( const FileNode &node)
     FileNodeIterator it = rnode.begin();
     for( int i = 0; i < min( (int)rnode.size(), numStages ); i++, it++ )
     {
-        CvCascadeBoost* tempStage = new CvCascadeBoost;
-        if ( !tempStage->read( *it, (CvFeatureEvaluator *)featureEvaluator, *((CvCascadeBoostParams*)stageParams) ) )
-        {
-            delete tempStage;
+        Ptr<CvCascadeBoost> tempStage = makePtr<CvCascadeBoost>();
+        if ( !tempStage->read( *it, featureEvaluator, *stageParams) )
             return false;
-        }
         stageClassifiers.push_back(tempStage);
     }
     return true;
@@ -453,7 +450,7 @@ void CvCascadeClassifier::save( const string filename, bool baseFormat )
 
                     fs << "{";
                     fs << ICV_HAAR_FEATURE_NAME << "{";
-                    ((CvHaarEvaluator*)((CvFeatureEvaluator*)featureEvaluator))->writeFeature( fs, tempNode->split->var_idx );
+                    ((CvHaarEvaluator*)featureEvaluator.get())->writeFeature( fs, tempNode->split->var_idx );
                     fs << "}";
 
                     fs << ICV_HAAR_THRESHOLD_NAME << tempNode->split->ord.c;
@@ -499,7 +496,7 @@ bool CvCascadeClassifier::load( const string cascadeDirName )
     if ( !readParams( node ) )
         return false;
     featureEvaluator = CvFeatureEvaluator::create(cascadeParams.featureType);
-    featureEvaluator->init( ((CvFeatureParams*)featureParams), numPos + numNeg, cascadeParams.winSize );
+    featureEvaluator->init( featureParams, numPos + numNeg, cascadeParams.winSize );
     fs.release();
 
     char buf[10];
@@ -510,11 +507,10 @@ bool CvCascadeClassifier::load( const string cascadeDirName )
         node = fs.getFirstTopLevelNode();
         if ( !fs.isOpened() )
             break;
-        CvCascadeBoost *tempStage = new CvCascadeBoost;
+        Ptr<CvCascadeBoost> tempStage = makePtr<CvCascadeBoost>();
 
-        if ( !tempStage->read( node, (CvFeatureEvaluator*)featureEvaluator, *((CvCascadeBoostParams*)stageParams )) )
+        if ( !tempStage->read( node, featureEvaluator, *stageParams ))
         {
-            delete tempStage;
             fs.release();
             break;
         }
@@ -531,7 +527,7 @@ void CvCascadeClassifier::getUsedFeaturesIdxMap( Mat& featureMap )
 
     for( vector< Ptr<CvCascadeBoost> >::const_iterator it = stageClassifiers.begin();
         it != stageClassifiers.end(); it++ )
-        ((CvCascadeBoost*)((Ptr<CvCascadeBoost>)(*it)))->markUsedFeaturesInMap( featureMap );
+        (*it)->markUsedFeaturesInMap( featureMap );
 
     for( int fi = 0, idx = 0; fi < varCount; fi++ )
         if ( featureMap.at<int>(0, fi) >= 0 )
diff --git a/apps/traincascade/traincascade.cpp b/apps/traincascade/traincascade.cpp
index 7b8fcddd8..a896c216c 100644
--- a/apps/traincascade/traincascade.cpp
+++ b/apps/traincascade/traincascade.cpp
@@ -18,9 +18,9 @@ int main( int argc, char* argv[] )
 
     CvCascadeParams cascadeParams;
     CvCascadeBoostParams stageParams;
-    Ptr<CvFeatureParams> featureParams[] = { Ptr<CvFeatureParams>(new CvHaarFeatureParams),
-                                             Ptr<CvFeatureParams>(new CvLBPFeatureParams),
-                                             Ptr<CvFeatureParams>(new CvHOGFeatureParams)
+    Ptr<CvFeatureParams> featureParams[] = { makePtr<CvHaarFeatureParams>(),
+                                             makePtr<CvLBPFeatureParams>(),
+                                             makePtr<CvHOGFeatureParams>()
                                            };
     int fc = sizeof(featureParams)/sizeof(featureParams[0]);
     if( argc == 1 )
diff --git a/cmake/CMakeParseArguments.cmake b/cmake/CMakeParseArguments.cmake
deleted file mode 100644
index 7ce4c49ae..000000000
--- a/cmake/CMakeParseArguments.cmake
+++ /dev/null
@@ -1,138 +0,0 @@
-# CMAKE_PARSE_ARGUMENTS(<prefix> <options> <one_value_keywords> <multi_value_keywords> args...)
-#
-# CMAKE_PARSE_ARGUMENTS() is intended to be used in macros or functions for
-# parsing the arguments given to that macro or function.
-# It processes the arguments and defines a set of variables which hold the
-# values of the respective options.
-#
-# The <options> argument contains all options for the respective macro,
-# i.e. keywords which can be used when calling the macro without any value
-# following, like e.g. the OPTIONAL keyword of the install() command.
-#
-# The <one_value_keywords> argument contains all keywords for this macro
-# which are followed by one value, like e.g. DESTINATION keyword of the
-# install() command.
-#
-# The <multi_value_keywords> argument contains all keywords for this macro
-# which can be followed by more than one value, like e.g. the TARGETS or
-# FILES keywords of the install() command.
-#
-# When done, CMAKE_PARSE_ARGUMENTS() will have defined for each of the
-# keywords listed in <options>, <one_value_keywords> and
-# <multi_value_keywords> a variable composed of the given <prefix>
-# followed by "_" and the name of the respective keyword.
-# These variables will then hold the respective value from the argument list.
-# For the <options> keywords this will be TRUE or FALSE.
-#
-# All remaining arguments are collected in a variable
-# <prefix>_UNPARSED_ARGUMENTS, this can be checked afterwards to see whether
-# your macro was called with unrecognized parameters.
-#
-# As an example here a my_install() macro, which takes similar arguments as the
-# real install() command:
-#
-#   function(MY_INSTALL)
-#     set(options OPTIONAL FAST)
-#     set(oneValueArgs DESTINATION RENAME)
-#     set(multiValueArgs TARGETS CONFIGURATIONS)
-#     cmake_parse_arguments(MY_INSTALL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
-#     ...
-#
-# Assume my_install() has been called like this:
-#   my_install(TARGETS foo bar DESTINATION bin OPTIONAL blub)
-#
-# After the cmake_parse_arguments() call the macro will have set the following
-# variables:
-#   MY_INSTALL_OPTIONAL = TRUE
-#   MY_INSTALL_FAST = FALSE (this option was not used when calling my_install()
-#   MY_INSTALL_DESTINATION = "bin"
-#   MY_INSTALL_RENAME = "" (was not used)
-#   MY_INSTALL_TARGETS = "foo;bar"
-#   MY_INSTALL_CONFIGURATIONS = "" (was not used)
-#   MY_INSTALL_UNPARSED_ARGUMENTS = "blub" (no value expected after "OPTIONAL"
-#
-# You can the continue and process these variables.
-#
-# Keywords terminate lists of values, e.g. if directly after a one_value_keyword
-# another recognized keyword follows, this is interpreted as the beginning of
-# the new option.
-# E.g. my_install(TARGETS foo DESTINATION OPTIONAL) would result in
-# MY_INSTALL_DESTINATION set to "OPTIONAL", but MY_INSTALL_DESTINATION would
-# be empty and MY_INSTALL_OPTIONAL would be set to TRUE therefor.
-
-#=============================================================================
-# Copyright 2010 Alexander Neundorf <neundorf@kde.org>
-#
-# Distributed under the OSI-approved BSD License (the "License");
-# see accompanying file Copyright.txt for details.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# (To distribute this file outside of CMake, substitute the full
-#  License text for the above reference.)
-
-
-if(__CMAKE_PARSE_ARGUMENTS_INCLUDED)
-  return()
-endif()
-set(__CMAKE_PARSE_ARGUMENTS_INCLUDED TRUE)
-
-
-function(CMAKE_PARSE_ARGUMENTS prefix _optionNames _singleArgNames _multiArgNames)
-  # first set all result variables to empty/FALSE
-  foreach(arg_name ${_singleArgNames} ${_multiArgNames})
-    set(${prefix}_${arg_name})
-  endforeach(arg_name)
-
-  foreach(option ${_optionNames})
-    set(${prefix}_${option} FALSE)
-  endforeach(option)
-
-  set(${prefix}_UNPARSED_ARGUMENTS)
-
-  set(insideValues FALSE)
-  set(currentArgName)
-
-  # now iterate over all arguments and fill the result variables
-  foreach(currentArg ${ARGN})
-    list(FIND _optionNames "${currentArg}" optionIndex)  # ... then this marks the end of the arguments belonging to this keyword
-    list(FIND _singleArgNames "${currentArg}" singleArgIndex)  # ... then this marks the end of the arguments belonging to this keyword
-    list(FIND _multiArgNames "${currentArg}" multiArgIndex)  # ... then this marks the end of the arguments belonging to this keyword
-
-    if(${optionIndex} EQUAL -1  AND  ${singleArgIndex} EQUAL -1  AND  ${multiArgIndex} EQUAL -1)
-      if(insideValues)
-        if("${insideValues}" STREQUAL "SINGLE")
-          set(${prefix}_${currentArgName} ${currentArg})
-          set(insideValues FALSE)
-        elseif("${insideValues}" STREQUAL "MULTI")
-          list(APPEND ${prefix}_${currentArgName} ${currentArg})
-        endif()
-      else(insideValues)
-        list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg})
-      endif(insideValues)
-    else()
-      if(NOT ${optionIndex} EQUAL -1)
-        set(${prefix}_${currentArg} TRUE)
-        set(insideValues FALSE)
-      elseif(NOT ${singleArgIndex} EQUAL -1)
-        set(currentArgName ${currentArg})
-        set(${prefix}_${currentArgName})
-        set(insideValues "SINGLE")
-      elseif(NOT ${multiArgIndex} EQUAL -1)
-        set(currentArgName ${currentArg})
-        set(${prefix}_${currentArgName})
-        set(insideValues "MULTI")
-      endif()
-    endif()
-
-  endforeach(currentArg)
-
-  # propagate the result variables to the caller:
-  foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames})
-    set(${prefix}_${arg_name}  ${${prefix}_${arg_name}} PARENT_SCOPE)
-  endforeach(arg_name)
-  set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE)
-
-endfunction(CMAKE_PARSE_ARGUMENTS _options _singleArgs _multiArgs)
diff --git a/cmake/OpenCVCRTLinkage.cmake b/cmake/OpenCVCRTLinkage.cmake
index 7514285d9..8a297c685 100644
--- a/cmake/OpenCVCRTLinkage.cmake
+++ b/cmake/OpenCVCRTLinkage.cmake
@@ -2,6 +2,45 @@ if(NOT MSVC)
   message(FATAL_ERROR "CRT options are available only for MSVC")
 endif()
 
+#INCLUDE (CheckIncludeFiles)
+
+set(HAVE_WINRT FALSE)
+
+# search Windows Platform SDK
+message(STATUS "Checking for Windows Platform SDK")
+GET_FILENAME_COMPONENT(WINDOWS_SDK_PATH  "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\v8.0;InstallationFolder]" ABSOLUTE CACHE)
+if (WINDOWS_SDK_PATH STREQUAL "")
+  set(HAVE_MSPDK FALSE)
+  message(STATUS "Windows Platform SDK 8.0 was not found")
+else()
+  set(HAVE_MSPDK TRUE)
+endif()
+
+#search for Visual Studio 11.0 install directory
+message(STATUS "Checking for Visual Studio 2012")
+GET_FILENAME_COMPONENT(VISUAL_STUDIO_PATH [HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\VisualStudio\\11.0\\Setup\\VS;ProductDir] REALPATH CACHE)
+if (VISUAL_STUDIO_PATH STREQUAL "")
+  set(HAVE_MSVC2012 FALSE)
+  message(STATUS "Visual Studio 2012 was not found")
+else()
+  set(HAVE_MSVC2012 TRUE)
+endif()
+
+try_compile(HAVE_WINRT_SDK
+  "${OpenCV_BINARY_DIR}"
+  "${OpenCV_SOURCE_DIR}/cmake/checks/winrttest.cpp")
+
+if (ENABLE_WINRT_MODE AND HAVE_WINRT_SDK AND HAVE_MSVC2012 AND HAVE_MSPDK)
+  set(HAVE_WINRT TRUE)
+endif()
+
+if (HAVE_WINRT)
+  add_definitions(/DWINVER=0x0602 /DNTDDI_VERSION=NTDDI_WIN8 /D_WIN32_WINNT=0x0602)
+  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /appcontainer")
+  set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /appcontainer")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /appcontainer")
+endif()
+
 if(NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT)
   foreach(flag_var
           CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
@@ -62,4 +101,3 @@ if(NOT BUILD_WITH_DEBUG_INFO AND NOT MSVC)
   string(REPLACE "/Zi" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
   string(REPLACE "/Zi" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
 endif()
-
diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index a71bcf06e..bfca6a0bc 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -233,6 +233,10 @@ if(MSVC)
       set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers
     endif()
   endif()
+
+  if(OPENCV_WARNINGS_ARE_ERRORS)
+    set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /WX")
+  endif()
 endif()
 
 # Extra link libs if the user selects building static libs:
@@ -294,4 +298,4 @@ if(MSVC)
   if(NOT ENABLE_NOISY_WARNINGS)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251") #class 'std::XXX' needs to have dll-interface to be used by clients of YYY
   endif()
-endif()
\ No newline at end of file
+endif()
diff --git a/cmake/OpenCVConfig.cmake b/cmake/OpenCVConfig.cmake
deleted file mode 100644
index 300fcd419..000000000
--- a/cmake/OpenCVConfig.cmake
+++ /dev/null
@@ -1,159 +0,0 @@
-# ===================================================================================
-#  The OpenCV CMake configuration file
-#
-#             ** File generated automatically, do not modify **
-#
-#  Usage from an external project:
-#    In your CMakeLists.txt, add these lines:
-#
-#    FIND_PACKAGE(OpenCV REQUIRED)
-#    TARGET_LINK_LIBRARIES(MY_TARGET_NAME ${OpenCV_LIBS})
-#
-#    Or you can search for specific OpenCV modules:
-#
-#    FIND_PACKAGE(OpenCV REQUIRED core highgui)
-#
-#    If the module is found then OPENCV_<MODULE>_FOUND is set to TRUE.
-#
-#    This file will define the following variables:
-#      - OpenCV_LIBS                     : The list of libraries to links against.
-#      - OpenCV_LIB_DIR                  : The directory(es) where lib files are. Calling LINK_DIRECTORIES
-#                                          with this path is NOT needed.
-#      - OpenCV_INCLUDE_DIRS             : The OpenCV include directories.
-#      - OpenCV_COMPUTE_CAPABILITIES     : The version of compute capability
-#      - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API
-#      - OpenCV_VERSION                  : The version of this OpenCV build. Example: "2.4.0"
-#      - OpenCV_VERSION_MAJOR            : Major version part of OpenCV_VERSION. Example: "2"
-#      - OpenCV_VERSION_MINOR            : Minor version part of OpenCV_VERSION. Example: "4"
-#      - OpenCV_VERSION_PATCH            : Patch version part of OpenCV_VERSION. Example: "0"
-#
-#    Advanced variables:
-#      - OpenCV_SHARED
-#      - OpenCV_CONFIG_PATH
-#      - OpenCV_LIB_COMPONENTS
-#
-# ===================================================================================
-#
-#    Windows pack specific options:
-#      - OpenCV_STATIC
-#      - OpenCV_CUDA
-
-if(CMAKE_VERSION VERSION_GREATER 2.6)
-  get_property(OpenCV_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
-  if(NOT ";${OpenCV_LANGUAGES};" MATCHES ";CXX;")
-    enable_language(CXX)
-  endif()
-endif()
-
-if(NOT DEFINED OpenCV_STATIC)
-  # look for global setting
-  if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS)
-    set(OpenCV_STATIC OFF)
-  else()
-    set(OpenCV_STATIC ON)
-  endif()
-endif()
-
-if(NOT DEFINED OpenCV_CUDA)
-  # if user' app uses CUDA, then it probably wants CUDA-enabled OpenCV binaries
-  if(CUDA_FOUND)
-    set(OpenCV_CUDA ON)
-  endif()
-endif()
-
-if(MSVC)
-  if(CMAKE_CL_64)
-    set(OpenCV_ARCH x64)
-    set(OpenCV_TBB_ARCH intel64)
-  else()
-    set(OpenCV_ARCH x86)
-    set(OpenCV_TBB_ARCH ia32)
-  endif()
-  if(MSVC_VERSION EQUAL 1400)
-    set(OpenCV_RUNTIME vc8)
-  elseif(MSVC_VERSION EQUAL 1500)
-    set(OpenCV_RUNTIME vc9)
-  elseif(MSVC_VERSION EQUAL 1600)
-    set(OpenCV_RUNTIME vc10)
-  elseif(MSVC_VERSION EQUAL 1700)
-    set(OpenCV_RUNTIME vc11)
-  endif()
-elseif(MINGW)
-  set(OpenCV_RUNTIME mingw)
-
-  execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine
-                  OUTPUT_VARIABLE OPENCV_GCC_TARGET_MACHINE
-                  OUTPUT_STRIP_TRAILING_WHITESPACE)
-  if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "64")
-    set(MINGW64 1)
-    set(OpenCV_ARCH x64)
-  else()
-    set(OpenCV_ARCH x86)
-  endif()
-endif()
-
-if(CMAKE_VERSION VERSION_GREATER 2.6.2)
-  unset(OpenCV_CONFIG_PATH CACHE)
-endif()
-
-get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_FILE}" PATH CACHE)
-if(OpenCV_RUNTIME AND OpenCV_ARCH)
-  if(OpenCV_STATIC AND EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake")
-    if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake")
-      set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib")
-    else()
-      set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib")
-    endif()
-  elseif(EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake")
-    if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake")
-      set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib")
-    else()
-      set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib")
-    endif()
-  endif()
-endif()
-
-if(OpenCV_LIB_PATH AND EXISTS "${OpenCV_LIB_PATH}/OpenCVConfig.cmake")
-  set(OpenCV_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release OpenCV libraries are located" FORCE)
-  set(OpenCV_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug OpenCV libraries are located" FORCE)
-  set(OpenCV_3RDPARTY_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release 3rdpaty OpenCV dependencies are located" FORCE)
-  set(OpenCV_3RDPARTY_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug 3rdpaty OpenCV dependencies are located" FORCE)
-
-  include("${OpenCV_LIB_PATH}/OpenCVConfig.cmake")
-
-  if(OpenCV_CUDA)
-    set(_OpenCV_LIBS "")
-    foreach(_lib ${OpenCV_LIBS})
-      string(REPLACE "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}" "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}" _lib2 "${_lib}")
-      if(NOT EXISTS "${_lib}" AND EXISTS "${_lib2}")
-        list(APPEND _OpenCV_LIBS "${_lib2}")
-      else()
-        list(APPEND _OpenCV_LIBS "${_lib}")
-      endif()
-    endforeach()
-    set(OpenCV_LIBS ${_OpenCV_LIBS})
-  endif()
-  set(OpenCV_FOUND TRUE CACHE BOOL "" FORCE)
-  set(OPENCV_FOUND TRUE CACHE BOOL "" FORCE)
-
-  if(NOT OpenCV_FIND_QUIETLY)
-    message(STATUS "Found OpenCV ${OpenCV_VERSION} in ${OpenCV_LIB_PATH}")
-    if(NOT OpenCV_LIB_PATH MATCHES "/staticlib")
-      get_filename_component(_OpenCV_LIB_PATH "${OpenCV_LIB_PATH}/../bin" ABSOLUTE)
-      file(TO_NATIVE_PATH "${_OpenCV_LIB_PATH}" _OpenCV_LIB_PATH)
-      message(STATUS "You might need to add ${_OpenCV_LIB_PATH} to your PATH to be able to run your applications.")
-      if(OpenCV_LIB_PATH MATCHES "/gpu/")
-        string(REPLACE "\\gpu" "" _OpenCV_LIB_PATH2 "${_OpenCV_LIB_PATH}")
-        message(STATUS "GPU support is enabled so you might also need ${_OpenCV_LIB_PATH2} in your PATH (it must go after the ${_OpenCV_LIB_PATH}).")
-      endif()
-    endif()
-  endif()
-else()
-  if(NOT OpenCV_FIND_QUIETLY)
-    message(WARNING "Found OpenCV 2.4.3 Windows Super Pack but it has not binaries compatible with your configuration.
-    You should manually point CMake variable OpenCV_DIR to your build of OpenCV library.")
-  endif()
-  set(OpenCV_FOUND FALSE CACHE BOOL "" FORCE)
-  set(OPENCV_FOUND FALSE CACHE BOOL "" FORCE)
-endif()
-
diff --git a/cmake/OpenCVDetectAndroidSDK.cmake b/cmake/OpenCVDetectAndroidSDK.cmake
index ee4188897..0173223d4 100644
--- a/cmake/OpenCVDetectAndroidSDK.cmake
+++ b/cmake/OpenCVDetectAndroidSDK.cmake
@@ -124,7 +124,7 @@ if(ANDROID_EXECUTABLE)
   if(NOT ANDROID_SDK_TARGET)
     set(ANDROID_SDK_TARGET "" CACHE STRING "Android SDK target for the OpenCV Java API and samples")
   endif()
-  if(ANDROID_SDK_TARGETS AND CMAKE_VERSION VERSION_GREATER "2.8")
+  if(ANDROID_SDK_TARGETS)
     set_property( CACHE ANDROID_SDK_TARGET PROPERTY STRINGS ${ANDROID_SDK_TARGETS} )
   endif()
 endif(ANDROID_EXECUTABLE)
@@ -283,7 +283,7 @@ macro(add_android_project target path)
         ocv_include_modules_recurse(${android_proj_NATIVE_DEPS})
         ocv_include_directories("${path}/jni")
 
-        if (NATIVE_APP_GLUE AND 0)
+        if(NATIVE_APP_GLUE)
           include_directories(${ANDROID_NDK}/sources/android/native_app_glue)
           list(APPEND android_proj_jni_files ${ANDROID_NDK}/sources/android/native_app_glue/android_native_app_glue.c)
           ocv_warnings_disable(CMAKE_C_FLAGS -Wstrict-prototypes -Wunused-parameter -Wmissing-prototypes)
diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake
index c44c8129e..076da108f 100644
--- a/cmake/OpenCVDetectCUDA.cmake
+++ b/cmake/OpenCVDetectCUDA.cmake
@@ -1,8 +1,3 @@
-if(${CMAKE_VERSION} VERSION_LESS "2.8.3")
-  message(STATUS "WITH_CUDA flag requires CMake 2.8.3 or newer. CUDA support is disabled.")
-  return()
-endif()
-
 if(WIN32 AND NOT MSVC)
   message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler supported on your platform).")
   return()
@@ -29,10 +24,42 @@ if(CUDA_FOUND)
   if(${CUDA_VERSION} VERSION_LESS "5.5")
     find_cuda_helper_libs(npp)
   else()
-    find_cuda_helper_libs(nppc)
-    find_cuda_helper_libs(nppi)
-    find_cuda_helper_libs(npps)
-    set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY})
+    # hack for CUDA 5.5
+    if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm")
+      unset(CUDA_TOOLKIT_INCLUDE CACHE)
+      unset(CUDA_CUDART_LIBRARY CACHE)
+      unset(CUDA_cublas_LIBRARY CACHE)
+      unset(CUDA_cufft_LIBRARY CACHE)
+      unset(CUDA_npp_LIBRARY CACHE)
+
+      if(SOFTFP)
+        set(cuda_arm_path "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-gnueabi")
+      else()
+        set(cuda_arm_path "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-gnueabihf")
+      endif()
+
+      set(CUDA_TOOLKIT_INCLUDE "${cuda_arm_path}/include" CACHE PATH "include path")
+      set(CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
+
+      set(cuda_arm_library_path "${cuda_arm_path}/lib")
+
+      set(CUDA_CUDART_LIBRARY "${cuda_arm_library_path}/libcudart.so" CACHE FILEPATH "cudart library")
+      set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY})
+      set(CUDA_cublas_LIBRARY "${cuda_arm_library_path}/libcublas.so" CACHE FILEPATH "cublas library")
+      set(CUDA_cufft_LIBRARY "${cuda_arm_library_path}/libcufft.so" CACHE FILEPATH "cufft library")
+      set(CUDA_nppc_LIBRARY "${cuda_arm_library_path}/libnppc.so" CACHE FILEPATH "nppc library")
+      set(CUDA_nppi_LIBRARY "${cuda_arm_library_path}/libnppi.so" CACHE FILEPATH "nppi library")
+      set(CUDA_npps_LIBRARY "${cuda_arm_library_path}/libnpps.so" CACHE FILEPATH "npps library")
+      set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}" CACHE STRING "npp library")
+    else()
+      unset(CUDA_npp_LIBRARY CACHE)
+
+      find_cuda_helper_libs(nppc)
+      find_cuda_helper_libs(nppi)
+      find_cuda_helper_libs(npps)
+
+      set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}" CACHE STRING "npp library")
+    endif()
   endif()
 
   if(WITH_NVCUVID)
@@ -67,7 +94,11 @@ if(CUDA_FOUND)
   if(CUDA_GENERATION STREQUAL "Fermi")
     set(__cuda_arch_bin "2.0 2.1(2.0)")
   elseif(CUDA_GENERATION STREQUAL "Kepler")
-    set(__cuda_arch_bin "3.0")
+    if(${CUDA_VERSION} VERSION_LESS "5.0")
+      set(__cuda_arch_bin "3.0")
+    else()
+      set(__cuda_arch_bin "3.0 3.5")
+    endif()
   elseif(CUDA_GENERATION STREQUAL "Auto")
     execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
                      WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
@@ -81,8 +112,12 @@ if(CUDA_FOUND)
   endif()
 
   if(NOT DEFINED __cuda_arch_bin)
-    set(__cuda_arch_bin "1.1 1.2 1.3 2.0 2.1(2.0) 3.0")
-    set(__cuda_arch_ptx "2.0 3.0")
+    if(${CUDA_VERSION} VERSION_LESS "5.0")
+      set(__cuda_arch_bin "1.1 1.2 1.3 2.0 2.1(2.0) 3.0")
+    else()
+      set(__cuda_arch_bin "1.1 1.2 1.3 2.0 2.1(2.0) 3.0 3.5")
+    endif()
+    set(__cuda_arch_ptx "3.0")
   endif()
 
   set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake
index a1e8bbac7..c96df82e2 100644
--- a/cmake/OpenCVDetectOpenCL.cmake
+++ b/cmake/OpenCVDetectOpenCL.cmake
@@ -20,10 +20,24 @@ else(APPLE)
               DOC "OpenCL include directory"
               NO_DEFAULT_PATH)
 
-    if (X86_64)
-      set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64)
-    elseif (X86)
-      set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86)
+    if(WIN32)
+      if(X86_64)
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64)
+      elseif(X86)
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86)
+      else()
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
+      endif()
+    elseif(UNIX)
+      if(X86_64)
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib64 lib)
+      elseif(X86)
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib32 lib)
+      else()
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
+      endif()
+    else()
+      set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
     endif()
 
     find_library(OPENCL_LIBRARY
@@ -44,7 +58,7 @@ if(OPENCL_FOUND)
   set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
   set(OPENCL_LIBRARIES    ${OPENCL_LIBRARY})
 
-  if(WIN64)
+  if(WIN32 AND X86_64)
     set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import)
   elseif(WIN32)
     set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import)
diff --git a/cmake/OpenCVDetectPython.cmake b/cmake/OpenCVDetectPython.cmake
index f27176d66..0ef0be9c9 100644
--- a/cmake/OpenCVDetectPython.cmake
+++ b/cmake/OpenCVDetectPython.cmake
@@ -2,7 +2,7 @@ if(WIN32 AND NOT PYTHON_EXECUTABLE)
   # search for executable with the same bitness as resulting binaries
   # standard FindPythonInterp always prefers executable from system path
   # this is really important because we are using the interpreter for numpy search and for choosing the install location
-  foreach(_CURRENT_VERSION ${Python_ADDITIONAL_VERSIONS} 2.7 2.6 2.5 2.4 2.3 2.2 2.1 2.0)
+  foreach(_CURRENT_VERSION ${Python_ADDITIONAL_VERSIONS} 2.7 "${MIN_VER_PYTHON}")
     find_host_program(PYTHON_EXECUTABLE
       NAMES python${_CURRENT_VERSION} python
       PATHS
@@ -12,39 +12,15 @@ if(WIN32 AND NOT PYTHON_EXECUTABLE)
     )
   endforeach()
 endif()
-find_host_package(PythonInterp 2.0)
+find_host_package(PythonInterp "${MIN_VER_PYTHON}")
 
 unset(HAVE_SPHINX CACHE)
-if(PYTHON_EXECUTABLE)
-  if(PYTHON_VERSION_STRING)
-    set(PYTHON_VERSION_MAJOR_MINOR "${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}")
-    set(PYTHON_VERSION_FULL "${PYTHON_VERSION_STRING}")
-  else()
-    execute_process(COMMAND ${PYTHON_EXECUTABLE} --version
-      ERROR_VARIABLE PYTHON_VERSION_FULL
-      ERROR_STRIP_TRAILING_WHITESPACE)
 
-    string(REGEX MATCH "[0-9]+.[0-9]+" PYTHON_VERSION_MAJOR_MINOR "${PYTHON_VERSION_FULL}")
-  endif()
-
-  if("${PYTHON_VERSION_FULL}" MATCHES "[0-9]+.[0-9]+.[0-9]+")
-    set(PYTHON_VERSION_FULL "${CMAKE_MATCH_0}")
-  elseif("${PYTHON_VERSION_FULL}" MATCHES "[0-9]+.[0-9]+")
-    set(PYTHON_VERSION_FULL "${CMAKE_MATCH_0}")
-  else()
-    unset(PYTHON_VERSION_FULL)
-  endif()
+if(PYTHONINTERP_FOUND)
+  set(PYTHON_VERSION_MAJOR_MINOR "${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}")
 
   if(NOT ANDROID AND NOT IOS)
-    if(CMAKE_VERSION VERSION_GREATER 2.8.8 AND PYTHON_VERSION_FULL)
-      find_host_package(PythonLibs ${PYTHON_VERSION_FULL} EXACT)
-    else()
-      find_host_package(PythonLibs ${PYTHON_VERSION_FULL})
-    endif()
-    # cmake 2.4 (at least on Ubuntu 8.04 (hardy)) don't define PYTHONLIBS_FOUND
-    if(NOT PYTHONLIBS_FOUND AND PYTHON_INCLUDE_PATH)
-      set(PYTHONLIBS_FOUND ON)
-    endif()
+    find_host_package(PythonLibs "${PYTHON_VERSION_STRING}" EXACT)
   endif()
 
   if(NOT ANDROID AND NOT IOS)
@@ -78,39 +54,40 @@ if(PYTHON_EXECUTABLE)
     endif()
     SET(PYTHON_PACKAGES_PATH "${_PYTHON_PACKAGES_PATH}" CACHE PATH "Where to install the python packages.")
 
-    if(NOT PYTHON_NUMPY_INCLUDE_DIR)
+    if(NOT PYTHON_NUMPY_INCLUDE_DIRS)
       # Attempt to discover the NumPy include directory. If this succeeds, then build python API with NumPy
-      execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['DISTUTILS_USE_SDK']='1'; import numpy.distutils; print(numpy.distutils.misc_util.get_numpy_include_dirs()[0])"
+      execute_process(COMMAND "${PYTHON_EXECUTABLE}" -c
+                        "import os; os.environ['DISTUTILS_USE_SDK']='1'; import numpy.distutils; print(os.pathsep.join(numpy.distutils.misc_util.get_numpy_include_dirs()))"
                       RESULT_VARIABLE PYTHON_NUMPY_PROCESS
-                      OUTPUT_VARIABLE PYTHON_NUMPY_INCLUDE_DIR
+                      OUTPUT_VARIABLE PYTHON_NUMPY_INCLUDE_DIRS
                       OUTPUT_STRIP_TRAILING_WHITESPACE)
 
       if(PYTHON_NUMPY_PROCESS EQUAL 0)
-        file(TO_CMAKE_PATH "${PYTHON_NUMPY_INCLUDE_DIR}" _PYTHON_NUMPY_INCLUDE_DIR)
-        set(PYTHON_NUMPY_INCLUDE_DIR ${_PYTHON_NUMPY_INCLUDE_DIR} CACHE PATH "Path to numpy headers")
+        file(TO_CMAKE_PATH "${PYTHON_NUMPY_INCLUDE_DIRS}" _PYTHON_NUMPY_INCLUDE_DIRS)
+        set(PYTHON_NUMPY_INCLUDE_DIRS "${_PYTHON_NUMPY_INCLUDE_DIRS}" CACHE PATH "Path to numpy headers")
       endif()
     endif()
 
-    if(PYTHON_NUMPY_INCLUDE_DIR)
-      execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import numpy; print(numpy.version.version)"
-                        RESULT_VARIABLE PYTHON_NUMPY_PROCESS
-                        OUTPUT_VARIABLE PYTHON_NUMPY_VERSION
-                        OUTPUT_STRIP_TRAILING_WHITESPACE)
+    if(PYTHON_NUMPY_INCLUDE_DIRS)
+      execute_process(COMMAND "${PYTHON_EXECUTABLE}" -c "import numpy; print(numpy.version.version)"
+                      OUTPUT_VARIABLE PYTHON_NUMPY_VERSION
+                      OUTPUT_STRIP_TRAILING_WHITESPACE)
     endif()
   endif(NOT ANDROID AND NOT IOS)
+endif()
 
-  if(BUILD_DOCS)
-    find_host_program(SPHINX_BUILD sphinx-build)
-    if(SPHINX_BUILD)
-        execute_process(COMMAND "${SPHINX_BUILD}"
-                        OUTPUT_QUIET
-                        ERROR_VARIABLE SPHINX_OUTPUT
-                        OUTPUT_STRIP_TRAILING_WHITESPACE)
-        if(SPHINX_OUTPUT MATCHES "^Sphinx v([0-9][^ \n]*)")
-          set(SPHINX_VERSION "${CMAKE_MATCH_1}")
-          set(HAVE_SPHINX 1)
-          message(STATUS "Found Sphinx ${SPHINX_VERSION}: ${SPHINX_BUILD}")
-        endif()
-    endif()
-  endif(BUILD_DOCS)
-endif(PYTHON_EXECUTABLE)
+if(BUILD_DOCS)
+  find_host_program(SPHINX_BUILD sphinx-build)
+  find_host_program(PLANTUML plantuml)
+  if(SPHINX_BUILD)
+      execute_process(COMMAND "${SPHINX_BUILD}"
+                      OUTPUT_QUIET
+                      ERROR_VARIABLE SPHINX_OUTPUT
+                      OUTPUT_STRIP_TRAILING_WHITESPACE)
+      if(SPHINX_OUTPUT MATCHES "Sphinx v([0-9][^ \n]*)")
+        set(SPHINX_VERSION "${CMAKE_MATCH_1}")
+        set(HAVE_SPHINX 1)
+        message(STATUS "Found Sphinx ${SPHINX_VERSION}: ${SPHINX_BUILD}")
+      endif()
+  endif()
+endif(BUILD_DOCS)
diff --git a/cmake/OpenCVFindIPP.cmake b/cmake/OpenCVFindIPP.cmake
index d1af60565..772cae886 100644
--- a/cmake/OpenCVFindIPP.cmake
+++ b/cmake/OpenCVFindIPP.cmake
@@ -136,12 +136,20 @@ endfunction()
 
 # ------------------------------------------------------------------------
 # This is auxiliary function called from set_ipp_variables()
-# to set IPP_LIBRARIES variable in IPP 7.x style
+# to set IPP_LIBRARIES variable in IPP 7.x and 8.x style
 # ------------------------------------------------------------------------
-function(set_ipp_new_libraries)
+function(set_ipp_new_libraries _LATEST_VERSION)
     set(IPP_PREFIX "ipp")
-    set(IPP_SUFFIX "_l")       # static not threaded libs suffix
-    set(IPP_THRD   "_t")       # static threaded libs suffix
+
+    if(${_LATEST_VERSION} VERSION_LESS "8.0")
+        set(IPP_SUFFIX "_l")        # static not threaded libs suffix IPP 7.x
+    else()
+        if(WIN32)
+            set(IPP_SUFFIX "mt")    # static not threaded libs suffix IPP 8.x for Windows
+        else()
+            set(IPP_SUFFIX "")      # static not threaded libs suffix IPP 8.x for Linux/OS X
+        endif()
+    endif()
     set(IPPCORE    "core")     # core functionality
     set(IPPSP      "s")        # signal processing
     set(IPPIP      "i")        # image processing
@@ -199,7 +207,9 @@ function(set_ipp_variables _LATEST_VERSION)
         # set INCLUDE and LIB folders
         set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include PARENT_SCOPE)
 
-        if (IPP_X64)
+        if (APPLE)
+            set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib PARENT_SCOPE)
+        elseif (IPP_X64)
             if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64)
                 message(SEND_ERROR "IPP EM64T libraries not found")
             endif()
@@ -211,8 +221,8 @@ function(set_ipp_variables _LATEST_VERSION)
             set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib/ia32 PARENT_SCOPE)
         endif()
 
-        # set IPP_LIBRARIES variable (7.x lib names)
-        set_ipp_new_libraries()
+        # set IPP_LIBRARIES variable (7.x or 8.x lib names)
+        set_ipp_new_libraries(${_LATEST_VERSION})
         set(IPP_LIBRARIES ${IPP_LIBRARIES} PARENT_SCOPE)
         message(STATUS "IPP libs: ${IPP_LIBRARIES}")
 
@@ -336,4 +346,4 @@ if(WIN32 AND MINGW AND NOT IPP_LATEST_VERSION_MAJOR LESS 7)
     #     See http://code.opencv.org/issues/1906 for additional details
     set(MSV_NTDLL    "ntdll")
     set(IPP_LIBRARIES ${IPP_LIBRARIES} ${MSV_NTDLL}${IPP_LIB_SUFFIX})
-endif()
\ No newline at end of file
+endif()
diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake
index 59ce1cd05..c80beca05 100644
--- a/cmake/OpenCVFindLibsGUI.cmake
+++ b/cmake/OpenCVFindLibsGUI.cmake
@@ -5,17 +5,16 @@
 #--- Win32 UI ---
 ocv_clear_vars(HAVE_WIN32UI)
 if(WITH_WIN32UI)
-  TRY_COMPILE(HAVE_WIN32UI
-    "${OPENCV_BINARY_DIR}/CMakeFiles/CMakeTmp"
+  try_compile(HAVE_WIN32UI
+    "${OpenCV_BINARY_DIR}"
     "${OpenCV_SOURCE_DIR}/cmake/checks/win32uitest.cpp"
-    CMAKE_FLAGS "\"user32.lib\" \"gdi32.lib\""
-    OUTPUT_VARIABLE OUTPUT)
-endif(WITH_WIN32UI)
+    CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=user32;gdi32")
+endif()
 
 # --- QT4 ---
 ocv_clear_vars(HAVE_QT HAVE_QT5)
 if(WITH_QT)
-  if(NOT CMAKE_VERSION VERSION_LESS 2.8.3 AND NOT WITH_QT EQUAL 4)
+  if(NOT WITH_QT EQUAL 4)
     find_package(Qt5Core)
     find_package(Qt5Gui)
     find_package(Qt5Widgets)
@@ -24,7 +23,6 @@ if(WITH_QT)
     if(Qt5Core_FOUND AND Qt5Gui_FOUND AND Qt5Widgets_FOUND AND Qt5Test_FOUND AND Qt5Concurrent_FOUND)
       set(HAVE_QT5 ON)
       set(HAVE_QT  ON)
-      add_definitions(-DHAVE_QT)
       find_package(Qt5OpenGL)
       if(Qt5OpenGL_FOUND)
         set(QT_QTOPENGL_FOUND ON)
@@ -36,7 +34,6 @@ if(WITH_QT)
     find_package(Qt4 REQUIRED QtCore QtGui QtTest)
     if(QT4_FOUND)
       set(HAVE_QT TRUE)
-      add_definitions(-DHAVE_QT) # We need to define the macro this way, using cvconfig.h does not work
     endif()
   endif()
 endif()
@@ -61,10 +58,18 @@ if(WITH_OPENGL)
       list(APPEND OPENCV_LINKER_LIBS ${OPENGL_LIBRARIES})
       if(QT_QTOPENGL_FOUND)
         set(HAVE_QT_OPENGL TRUE)
-        add_definitions(-DHAVE_QT_OPENGL)
       else()
         ocv_include_directories(${OPENGL_INCLUDE_DIR})
       endif()
     endif()
   endif()
 endif(WITH_OPENGL)
+
+# --- Carbon & Cocoa ---
+if(APPLE)
+  if(WITH_CARBON)
+    set(HAVE_CARBON YES)
+  elseif(NOT IOS)
+    set(HAVE_COCOA YES)
+  endif()
+endif()
diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake
index ed62c9362..d8ddcfeb7 100644
--- a/cmake/OpenCVFindLibsGrfmt.cmake
+++ b/cmake/OpenCVFindLibsGrfmt.cmake
@@ -6,22 +6,21 @@
 if(BUILD_ZLIB)
   ocv_clear_vars(ZLIB_FOUND)
 else()
-  include(FindZLIB)
+  find_package(ZLIB "${MIN_VER_ZLIB}")
   if(ZLIB_FOUND AND ANDROID)
-    if(ZLIB_LIBRARY STREQUAL "${ANDROID_SYSROOT}/usr/lib/libz.so")
-      set(ZLIB_LIBRARY z)
+    if(ZLIB_LIBRARIES STREQUAL "${ANDROID_SYSROOT}/usr/lib/libz.so")
       set(ZLIB_LIBRARIES z)
     endif()
   endif()
 endif()
 
 if(NOT ZLIB_FOUND)
-  ocv_clear_vars(ZLIB_LIBRARY ZLIB_LIBRARIES ZLIB_INCLUDE_DIR)
+  ocv_clear_vars(ZLIB_LIBRARY ZLIB_LIBRARIES ZLIB_INCLUDE_DIRS)
 
   set(ZLIB_LIBRARY zlib)
-  set(ZLIB_LIBRARIES ${ZLIB_LIBRARY})
   add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/zlib")
-  set(ZLIB_INCLUDE_DIR "${${ZLIB_LIBRARY}_SOURCE_DIR}" "${${ZLIB_LIBRARY}_BINARY_DIR}")
+  set(ZLIB_INCLUDE_DIRS "${${ZLIB_LIBRARY}_SOURCE_DIR}" "${${ZLIB_LIBRARY}_BINARY_DIR}")
+  set(ZLIB_LIBRARIES ${ZLIB_LIBRARY})
 
   ocv_parse_header2(ZLIB "${${ZLIB_LIBRARY}_SOURCE_DIR}/zlib.h" ZLIB_VERSION)
 endif()
@@ -36,56 +35,59 @@ if(WITH_TIFF)
       ocv_parse_header("${TIFF_INCLUDE_DIR}/tiff.h" TIFF_VERSION_LINES TIFF_VERSION_CLASSIC TIFF_VERSION_BIG TIFF_VERSION TIFF_BIGTIFF_VERSION)
     endif()
   endif()
-endif()
 
-if(WITH_TIFF AND NOT TIFF_FOUND)
-  ocv_clear_vars(TIFF_LIBRARY TIFF_LIBRARIES TIFF_INCLUDE_DIR)
+  if(NOT TIFF_FOUND)
+    ocv_clear_vars(TIFF_LIBRARY TIFF_LIBRARIES TIFF_INCLUDE_DIR)
 
-  set(TIFF_LIBRARY libtiff)
-  set(TIFF_LIBRARIES ${TIFF_LIBRARY})
-  add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libtiff")
-  set(TIFF_INCLUDE_DIR "${${TIFF_LIBRARY}_SOURCE_DIR}" "${${TIFF_LIBRARY}_BINARY_DIR}")
-  ocv_parse_header("${${TIFF_LIBRARY}_SOURCE_DIR}/tiff.h" TIFF_VERSION_LINES TIFF_VERSION_CLASSIC TIFF_VERSION_BIG TIFF_VERSION TIFF_BIGTIFF_VERSION)
-endif()
-
-if(TIFF_VERSION_CLASSIC AND NOT TIFF_VERSION)
-  set(TIFF_VERSION ${TIFF_VERSION_CLASSIC})
-endif()
-
-if(TIFF_BIGTIFF_VERSION AND NOT TIFF_VERSION_BIG)
-  set(TIFF_VERSION_BIG ${TIFF_BIGTIFF_VERSION})
-endif()
-
-if(NOT TIFF_VERSION_STRING AND TIFF_INCLUDE_DIR)
-  list(GET TIFF_INCLUDE_DIR 0 _TIFF_INCLUDE_DIR)
-  if(EXISTS "${_TIFF_INCLUDE_DIR}/tiffvers.h")
-    file(STRINGS "${_TIFF_INCLUDE_DIR}/tiffvers.h" tiff_version_str REGEX "^#define[\t ]+TIFFLIB_VERSION_STR[\t ]+\"LIBTIFF, Version .*")
-    string(REGEX REPLACE "^#define[\t ]+TIFFLIB_VERSION_STR[\t ]+\"LIBTIFF, Version +([^ \\n]*).*" "\\1" TIFF_VERSION_STRING "${tiff_version_str}")
-    unset(tiff_version_str)
+    set(TIFF_LIBRARY libtiff)
+    set(TIFF_LIBRARIES ${TIFF_LIBRARY})
+    add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libtiff")
+    set(TIFF_INCLUDE_DIR "${${TIFF_LIBRARY}_SOURCE_DIR}" "${${TIFF_LIBRARY}_BINARY_DIR}")
+    ocv_parse_header("${${TIFF_LIBRARY}_SOURCE_DIR}/tiff.h" TIFF_VERSION_LINES TIFF_VERSION_CLASSIC TIFF_VERSION_BIG TIFF_VERSION TIFF_BIGTIFF_VERSION)
   endif()
-  unset(_TIFF_INCLUDE_DIR)
+
+  if(TIFF_VERSION_CLASSIC AND NOT TIFF_VERSION)
+    set(TIFF_VERSION ${TIFF_VERSION_CLASSIC})
+  endif()
+
+  if(TIFF_BIGTIFF_VERSION AND NOT TIFF_VERSION_BIG)
+    set(TIFF_VERSION_BIG ${TIFF_BIGTIFF_VERSION})
+  endif()
+
+  if(NOT TIFF_VERSION_STRING AND TIFF_INCLUDE_DIR)
+    list(GET TIFF_INCLUDE_DIR 0 _TIFF_INCLUDE_DIR)
+    if(EXISTS "${_TIFF_INCLUDE_DIR}/tiffvers.h")
+      file(STRINGS "${_TIFF_INCLUDE_DIR}/tiffvers.h" tiff_version_str REGEX "^#define[\t ]+TIFFLIB_VERSION_STR[\t ]+\"LIBTIFF, Version .*")
+      string(REGEX REPLACE "^#define[\t ]+TIFFLIB_VERSION_STR[\t ]+\"LIBTIFF, Version +([^ \\n]*).*" "\\1" TIFF_VERSION_STRING "${tiff_version_str}")
+      unset(tiff_version_str)
+    endif()
+    unset(_TIFF_INCLUDE_DIR)
+  endif()
+
+  set(HAVE_TIFF YES)
 endif()
 
 # --- libjpeg (optional) ---
-if(WITH_JPEG AND NOT IOS)
+if(WITH_JPEG)
   if(BUILD_JPEG)
     ocv_clear_vars(JPEG_FOUND)
   else()
     include(FindJPEG)
   endif()
+
+  if(NOT JPEG_FOUND)
+    ocv_clear_vars(JPEG_LIBRARY JPEG_LIBRARIES JPEG_INCLUDE_DIR)
+
+    set(JPEG_LIBRARY libjpeg)
+    set(JPEG_LIBRARIES ${JPEG_LIBRARY})
+    add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libjpeg")
+    set(JPEG_INCLUDE_DIR "${${JPEG_LIBRARY}_SOURCE_DIR}")
+  endif()
+
+  ocv_parse_header("${JPEG_INCLUDE_DIR}/jpeglib.h" JPEG_VERSION_LINES JPEG_LIB_VERSION)
+  set(HAVE_JPEG YES)
 endif()
 
-if(WITH_JPEG AND NOT JPEG_FOUND)
-  ocv_clear_vars(JPEG_LIBRARY JPEG_LIBRARIES JPEG_INCLUDE_DIR)
-
-  set(JPEG_LIBRARY libjpeg)
-  set(JPEG_LIBRARIES ${JPEG_LIBRARY})
-  add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libjpeg")
-  set(JPEG_INCLUDE_DIR "${${JPEG_LIBRARY}_SOURCE_DIR}")
-endif()
-
-ocv_parse_header("${JPEG_INCLUDE_DIR}/jpeglib.h" JPEG_VERSION_LINES JPEG_LIB_VERSION)
-
 # --- libwebp (optional) ---
 
 if(WITH_WEBP)
@@ -129,53 +131,55 @@ if(WITH_JASPER)
   else()
     include(FindJasper)
   endif()
-endif()
 
-if(WITH_JASPER AND NOT JASPER_FOUND)
-  ocv_clear_vars(JASPER_LIBRARY JASPER_LIBRARIES JASPER_INCLUDE_DIR)
+  if(NOT JASPER_FOUND)
+    ocv_clear_vars(JASPER_LIBRARY JASPER_LIBRARIES JASPER_INCLUDE_DIR)
 
-  set(JASPER_LIBRARY libjasper)
-  set(JASPER_LIBRARIES ${JASPER_LIBRARY})
-  add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libjasper")
-  set(JASPER_INCLUDE_DIR "${${JASPER_LIBRARY}_SOURCE_DIR}")
-endif()
+    set(JASPER_LIBRARY libjasper)
+    set(JASPER_LIBRARIES ${JASPER_LIBRARY})
+    add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libjasper")
+    set(JASPER_INCLUDE_DIR "${${JASPER_LIBRARY}_SOURCE_DIR}")
+  endif()
 
-if(NOT JASPER_VERSION_STRING)
-  ocv_parse_header2(JASPER "${JASPER_INCLUDE_DIR}/jasper/jas_config.h" JAS_VERSION "")
+  set(HAVE_JASPER YES)
+
+  if(NOT JASPER_VERSION_STRING)
+    ocv_parse_header2(JASPER "${JASPER_INCLUDE_DIR}/jasper/jas_config.h" JAS_VERSION "")
+  endif()
 endif()
 
 # --- libpng (optional, should be searched after zlib) ---
-if(WITH_PNG AND NOT IOS)
+if(WITH_PNG)
   if(BUILD_PNG)
     ocv_clear_vars(PNG_FOUND)
   else()
     include(FindPNG)
     if(PNG_FOUND)
       include(CheckIncludeFile)
-      check_include_file("${PNG_PNG_INCLUDE_DIR}/png.h"        HAVE_PNG_H)
       check_include_file("${PNG_PNG_INCLUDE_DIR}/libpng/png.h" HAVE_LIBPNG_PNG_H)
-      if(HAVE_PNG_H)
-        ocv_parse_header("${PNG_PNG_INCLUDE_DIR}/png.h" PNG_VERSION_LINES PNG_LIBPNG_VER_MAJOR PNG_LIBPNG_VER_MINOR PNG_LIBPNG_VER_RELEASE)
-      elseif(HAVE_LIBPNG_PNG_H)
+      if(HAVE_LIBPNG_PNG_H)
         ocv_parse_header("${PNG_PNG_INCLUDE_DIR}/libpng/png.h" PNG_VERSION_LINES PNG_LIBPNG_VER_MAJOR PNG_LIBPNG_VER_MINOR PNG_LIBPNG_VER_RELEASE)
+      else()
+        ocv_parse_header("${PNG_PNG_INCLUDE_DIR}/png.h" PNG_VERSION_LINES PNG_LIBPNG_VER_MAJOR PNG_LIBPNG_VER_MINOR PNG_LIBPNG_VER_RELEASE)
       endif()
     endif()
   endif()
+
+  if(NOT PNG_FOUND)
+    ocv_clear_vars(PNG_LIBRARY PNG_LIBRARIES PNG_INCLUDE_DIR PNG_PNG_INCLUDE_DIR HAVE_LIBPNG_PNG_H PNG_DEFINITIONS)
+
+    set(PNG_LIBRARY libpng)
+    set(PNG_LIBRARIES ${PNG_LIBRARY})
+    add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libpng")
+    set(PNG_INCLUDE_DIR "${${PNG_LIBRARY}_SOURCE_DIR}")
+    set(PNG_DEFINITIONS "")
+    ocv_parse_header("${PNG_INCLUDE_DIR}/png.h" PNG_VERSION_LINES PNG_LIBPNG_VER_MAJOR PNG_LIBPNG_VER_MINOR PNG_LIBPNG_VER_RELEASE)
+  endif()
+
+  set(HAVE_PNG YES)
+  set(PNG_VERSION "${PNG_LIBPNG_VER_MAJOR}.${PNG_LIBPNG_VER_MINOR}.${PNG_LIBPNG_VER_RELEASE}")
 endif()
 
-if(WITH_PNG AND NOT PNG_FOUND)
-  ocv_clear_vars(PNG_LIBRARY PNG_LIBRARIES PNG_INCLUDE_DIR PNG_PNG_INCLUDE_DIR HAVE_PNG_H HAVE_LIBPNG_PNG_H PNG_DEFINITIONS)
-
-  set(PNG_LIBRARY libpng)
-  set(PNG_LIBRARIES ${PNG_LIBRARY})
-  add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libpng")
-  set(PNG_INCLUDE_DIR "${${PNG_LIBRARY}_SOURCE_DIR}")
-  set(PNG_DEFINITIONS "")
-  ocv_parse_header("${PNG_INCLUDE_DIR}/png.h" PNG_VERSION_LINES PNG_LIBPNG_VER_MAJOR PNG_LIBPNG_VER_MINOR PNG_LIBPNG_VER_RELEASE)
-endif()
-
-set(PNG_VERSION "${PNG_LIBPNG_VER_MAJOR}.${PNG_LIBPNG_VER_MINOR}.${PNG_LIBPNG_VER_RELEASE}")
-
 # --- OpenEXR (optional) ---
 if(WITH_OPENEXR)
   if(BUILD_OPENEXR)
@@ -183,17 +187,14 @@ if(WITH_OPENEXR)
   else()
     include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindOpenEXR.cmake")
   endif()
-endif()
 
-if(WITH_OPENEXR AND NOT OPENEXR_FOUND)
-  ocv_clear_vars(OPENEXR_INCLUDE_PATHS OPENEXR_LIBRARIES OPENEXR_ILMIMF_LIBRARY OPENEXR_VERSION)
+  if(NOT OPENEXR_FOUND)
+    ocv_clear_vars(OPENEXR_INCLUDE_PATHS OPENEXR_LIBRARIES OPENEXR_ILMIMF_LIBRARY OPENEXR_VERSION)
 
-  set(OPENEXR_LIBRARIES IlmImf)
-  set(OPENEXR_ILMIMF_LIBRARY IlmImf)
-  add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/openexr")
-endif()
+    set(OPENEXR_LIBRARIES IlmImf)
+    set(OPENEXR_ILMIMF_LIBRARY IlmImf)
+    add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/openexr")
+  endif()
 
-#cmake 2.8.2 bug - it fails to determine zlib version
-if(ZLIB_FOUND)
-  ocv_parse_header2(ZLIB "${ZLIB_INCLUDE_DIR}/zlib.h" ZLIB_VERSION)
+  set(HAVE_OPENEXR YES)
 endif()
diff --git a/cmake/OpenCVFindLibsPerf.cmake b/cmake/OpenCVFindLibsPerf.cmake
index db71b8aeb..b8945c257 100644
--- a/cmake/OpenCVFindLibsPerf.cmake
+++ b/cmake/OpenCVFindLibsPerf.cmake
@@ -27,7 +27,7 @@ endif(WITH_CUDA)
 # --- Eigen ---
 if(WITH_EIGEN)
   find_path(EIGEN_INCLUDE_PATH "Eigen/Core"
-            PATHS /usr/local /opt /usr $ENV{EIGEN_ROOT}/include ENV ProgramFiles ENV ProgramW6432 
+            PATHS /usr/local /opt /usr $ENV{EIGEN_ROOT}/include ENV ProgramFiles ENV ProgramW6432
             PATH_SUFFIXES include/eigen3 include/eigen2 Eigen/include/eigen3 Eigen/include/eigen2
             DOC "The path to Eigen3/Eigen2 headers"
             CMAKE_FIND_ROOT_PATH_BOTH)
diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake
index 96d480584..eba6fa988 100644
--- a/cmake/OpenCVFindLibsVideo.cmake
+++ b/cmake/OpenCVFindLibsVideo.cmake
@@ -3,13 +3,12 @@
 # ----------------------------------------------------------------------------
 
 ocv_clear_vars(HAVE_VFW)
-if (WITH_VFW)
-  TRY_COMPILE(HAVE_VFW
-    "${OPENCV_BINARY_DIR}/CMakeFiles/CMakeTmp"
+if(WITH_VFW)
+  try_compile(HAVE_VFW
+    "${OpenCV_BINARY_DIR}"
     "${OpenCV_SOURCE_DIR}/cmake/checks/vfwtest.cpp"
-    CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=vfw32"
-    OUTPUT_VARIABLE OUTPUT)
- endif(WITH_VFW)
+    CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=vfw32")
+endif(WITH_VFW)
 
 # --- GStreamer ---
 ocv_clear_vars(HAVE_GSTREAMER)
@@ -87,7 +86,14 @@ if(WITH_PVAPI)
       set(_PVAPI_LIBRARY "${_PVAPI_LIBRARY}/${CMAKE_OPENCV_GCC_VERSION_MAJOR}.${CMAKE_OPENCV_GCC_VERSION_MINOR}")
     endif()
 
-    set(PVAPI_LIBRARY "${_PVAPI_LIBRARY}/${CMAKE_STATIC_LIBRARY_PREFIX}PvAPI${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE PATH "The PvAPI library")
+    if(WIN32)
+      if(MINGW)
+        set(PVAPI_DEFINITIONS "-DPVDECL=__stdcall")
+      endif(MINGW)
+      set(PVAPI_LIBRARY "${_PVAPI_LIBRARY}/PvAPI.lib" CACHE PATH "The PvAPI library")
+    else(WIN32)
+      set(PVAPI_LIBRARY "${_PVAPI_LIBRARY}/${CMAKE_STATIC_LIBRARY_PREFIX}PvAPI${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE PATH "The PvAPI library")
+    endif(WIN32)
     if(EXISTS "${PVAPI_LIBRARY}")
       set(HAVE_PVAPI TRUE)
     endif()
@@ -257,3 +263,17 @@ if(WIN32)
     list(APPEND HIGHGUI_LIBRARIES winmm)
   endif()
 endif(WIN32)
+
+# --- Apple AV Foundation ---
+if(WITH_AVFOUNDATION)
+  set(HAVE_AVFOUNDATION YES)
+endif()
+
+# --- QuickTime ---
+if (NOT IOS)
+  if(WITH_QUICKTIME)
+    set(HAVE_QUICKTIME YES)
+  elseif(APPLE)
+    set(HAVE_QTKIT YES)
+  endif()
+endif()
diff --git a/cmake/OpenCVFindPkgConfig.cmake b/cmake/OpenCVFindPkgConfig.cmake
deleted file mode 100644
index 7e439fd8d..000000000
--- a/cmake/OpenCVFindPkgConfig.cmake
+++ /dev/null
@@ -1,365 +0,0 @@
-#
-# OpenCV note: the file has been extracted from CMake 2.6.2 distribution.
-# It is used to build OpenCV with CMake 2.4.x
-#
-
-# - a pkg-config module for CMake
-#
-# Usage:
-#   pkg_check_modules(<PREFIX> [REQUIRED] <MODULE> [<MODULE>]*)
-#     checks for all the given modules
-#
-#   pkg_search_module(<PREFIX> [REQUIRED] <MODULE> [<MODULE>]*)
-#     checks for given modules and uses the first working one
-#
-# When the 'REQUIRED' argument was set, macros will fail with an error
-# when module(s) could not be found
-#
-# It sets the following variables:
-#   PKG_CONFIG_FOUND         ... true if pkg-config works on the system
-#   PKG_CONFIG_EXECUTABLE    ... pathname of the pkg-config program
-#   <PREFIX>_FOUND           ... set to 1 if module(s) exist
-#
-# For the following variables two sets of values exist; first one is the
-# common one and has the given PREFIX. The second set contains flags
-# which are given out when pkgconfig was called with the '--static'
-# option.
-#   <XPREFIX>_LIBRARIES      ... only the libraries (w/o the '-l')
-#   <XPREFIX>_LIBRARY_DIRS   ... the paths of the libraries (w/o the '-L')
-#   <XPREFIX>_LDFLAGS        ... all required linker flags
-#   <XPREFIX>_LDFLAGS_OTHER  ... all other linker flags
-#   <XPREFIX>_INCLUDE_DIRS   ... the '-I' preprocessor flags (w/o the '-I')
-#   <XPREFIX>_CFLAGS         ... all required cflags
-#   <XPREFIX>_CFLAGS_OTHER   ... the other compiler flags
-#
-#   <XPREFIX> = <PREFIX>        for common case
-#   <XPREFIX> = <PREFIX>_STATIC for static linking
-#
-# There are some special variables whose prefix depends on the count
-# of given modules. When there is only one module, <PREFIX> stays
-# unchanged. When there are multiple modules, the prefix will be
-# changed to <PREFIX>_<MODNAME>:
-#   <XPREFIX>_VERSION    ... version of the module
-#   <XPREFIX>_PREFIX     ... prefix-directory of the module
-#   <XPREFIX>_INCLUDEDIR ... include-dir of the module
-#   <XPREFIX>_LIBDIR     ... lib-dir of the module
-#
-#   <XPREFIX> = <PREFIX>  when |MODULES| == 1, else
-#   <XPREFIX> = <PREFIX>_<MODNAME>
-#
-# A <MODULE> parameter can have the following formats:
-#   {MODNAME}            ... matches any version
-#   {MODNAME}>={VERSION} ... at least version <VERSION> is required
-#   {MODNAME}={VERSION}  ... exactly version <VERSION> is required
-#   {MODNAME}<={VERSION} ... modules must not be newer than <VERSION>
-#
-# Examples
-#   pkg_check_modules (GLIB2   glib-2.0)
-#
-#   pkg_check_modules (GLIB2   glib-2.0>=2.10)
-#     requires at least version 2.10 of glib2 and defines e.g.
-#       GLIB2_VERSION=2.10.3
-#
-#   pkg_check_modules (FOO     glib-2.0>=2.10 gtk+-2.0)
-#     requires both glib2 and gtk2, and defines e.g.
-#       FOO_glib-2.0_VERSION=2.10.3
-#       FOO_gtk+-2.0_VERSION=2.8.20
-#
-#   pkg_check_modules (XRENDER REQUIRED xrender)
-#     defines e.g.:
-#       XRENDER_LIBRARIES=Xrender;X11
-#       XRENDER_STATIC_LIBRARIES=Xrender;X11;pthread;Xau;Xdmcp
-#
-#   pkg_search_module (BAR     libxml-2.0 libxml2 libxml>=2)
-
-
-# Copyright (C) 2006 Enrico Scholz <enrico.scholz@informatik.tu-chemnitz.de>
-#
-# Redistribution and use, with or without modification, are permitted
-# provided that the following conditions are met:
-#
-#    1. Redistributions must retain the above copyright notice, this
-#       list of conditions and the following disclaimer.
-#    2. The name of the author may not be used to endorse or promote
-#       products derived from this software without specific prior
-#       written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
-# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
-# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-# IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-### Common stuff ####
-set(PKG_CONFIG_VERSION 1)
-set(PKG_CONFIG_FOUND   0)
-
-find_program(PKG_CONFIG_EXECUTABLE NAMES pkg-config DOC "pkg-config executable")
-mark_as_advanced(PKG_CONFIG_EXECUTABLE)
-
-if(PKG_CONFIG_EXECUTABLE)
-  set(PKG_CONFIG_FOUND 1)
-endif(PKG_CONFIG_EXECUTABLE)
-
-
-# Unsets the given variables
-macro(_pkgconfig_unset var)
-  set(${var} "" CACHE INTERNAL "")
-endmacro(_pkgconfig_unset)
-
-macro(_pkgconfig_set var value)
-  set(${var} ${value} CACHE INTERNAL "")
-endmacro(_pkgconfig_set)
-
-# Invokes pkgconfig, cleans up the result and sets variables
-macro(_pkgconfig_invoke _pkglist _prefix _varname _regexp)
-  set(_pkgconfig_invoke_result)
-
-  execute_process(
-    COMMAND ${PKG_CONFIG_EXECUTABLE} ${ARGN} ${_pkglist}
-    OUTPUT_VARIABLE _pkgconfig_invoke_result
-    RESULT_VARIABLE _pkgconfig_failed)
-
-  if (_pkgconfig_failed)
-    set(_pkgconfig_${_varname} "")
-    _pkgconfig_unset(${_prefix}_${_varname})
-  else(_pkgconfig_failed)
-    string(REGEX REPLACE "[\r\n]"                  " " _pkgconfig_invoke_result "${_pkgconfig_invoke_result}")
-    string(REGEX REPLACE " +$"                     ""  _pkgconfig_invoke_result "${_pkgconfig_invoke_result}")
-
-    if (NOT ${_regexp} STREQUAL "")
-      string(REGEX REPLACE "${_regexp}" " " _pkgconfig_invoke_result "${_pkgconfig_invoke_result}")
-    endif(NOT ${_regexp} STREQUAL "")
-
-    separate_arguments(_pkgconfig_invoke_result)
-
-    #message(STATUS "  ${_varname} ... ${_pkgconfig_invoke_result}")
-    set(_pkgconfig_${_varname} ${_pkgconfig_invoke_result})
-    _pkgconfig_set(${_prefix}_${_varname} "${_pkgconfig_invoke_result}")
-  endif(_pkgconfig_failed)
-endmacro(_pkgconfig_invoke)
-
-# Invokes pkgconfig two times; once without '--static' and once with
-# '--static'
-macro(_pkgconfig_invoke_dyn _pkglist _prefix _varname cleanup_regexp)
-  _pkgconfig_invoke("${_pkglist}" ${_prefix}        ${_varname} "${cleanup_regexp}" ${ARGN})
-  _pkgconfig_invoke("${_pkglist}" ${_prefix} STATIC_${_varname} "${cleanup_regexp}" --static  ${ARGN})
-endmacro(_pkgconfig_invoke_dyn)
-
-# Splits given arguments into options and a package list
-macro(_pkgconfig_parse_options _result _is_req)
-  set(${_is_req} 0)
-
-  foreach(_pkg ${ARGN})
-    if (_pkg STREQUAL "REQUIRED")
-      set(${_is_req} 1)
-    endif (_pkg STREQUAL "REQUIRED")
-  endforeach(_pkg ${ARGN})
-
-  set(${_result} ${ARGN})
-  list(REMOVE_ITEM ${_result} "REQUIRED")
-endmacro(_pkgconfig_parse_options)
-
-###
-macro(_pkg_check_modules_internal _is_required _is_silent _prefix)
-  _pkgconfig_unset(${_prefix}_FOUND)
-  _pkgconfig_unset(${_prefix}_VERSION)
-  _pkgconfig_unset(${_prefix}_PREFIX)
-  _pkgconfig_unset(${_prefix}_INCLUDEDIR)
-  _pkgconfig_unset(${_prefix}_LIBDIR)
-  _pkgconfig_unset(${_prefix}_LIBS)
-  _pkgconfig_unset(${_prefix}_LIBS_L)
-  _pkgconfig_unset(${_prefix}_LIBS_PATHS)
-  _pkgconfig_unset(${_prefix}_LIBS_OTHER)
-  _pkgconfig_unset(${_prefix}_CFLAGS)
-  _pkgconfig_unset(${_prefix}_CFLAGS_I)
-  _pkgconfig_unset(${_prefix}_CFLAGS_OTHER)
-  _pkgconfig_unset(${_prefix}_STATIC_LIBDIR)
-  _pkgconfig_unset(${_prefix}_STATIC_LIBS)
-  _pkgconfig_unset(${_prefix}_STATIC_LIBS_L)
-  _pkgconfig_unset(${_prefix}_STATIC_LIBS_PATHS)
-  _pkgconfig_unset(${_prefix}_STATIC_LIBS_OTHER)
-  _pkgconfig_unset(${_prefix}_STATIC_CFLAGS)
-  _pkgconfig_unset(${_prefix}_STATIC_CFLAGS_I)
-  _pkgconfig_unset(${_prefix}_STATIC_CFLAGS_OTHER)
-
-  # create a better addressable variable of the modules and calculate its size
-  set(_pkg_check_modules_list ${ARGN})
-  list(LENGTH _pkg_check_modules_list _pkg_check_modules_cnt)
-
-  if(PKG_CONFIG_EXECUTABLE)
-    # give out status message telling checked module
-    if (NOT ${_is_silent})
-      if (_pkg_check_modules_cnt EQUAL 1)
-        message(STATUS "checking for module '${_pkg_check_modules_list}'")
-      else(_pkg_check_modules_cnt EQUAL 1)
-        message(STATUS "checking for modules '${_pkg_check_modules_list}'")
-      endif(_pkg_check_modules_cnt EQUAL 1)
-    endif(NOT ${_is_silent})
-
-    set(_pkg_check_modules_packages)
-    set(_pkg_check_modules_failed)
-
-    # iterate through module list and check whether they exist and match the required version
-    foreach (_pkg_check_modules_pkg ${_pkg_check_modules_list})
-      set(_pkg_check_modules_exist_query)
-
-      # check whether version is given
-      if (_pkg_check_modules_pkg MATCHES ".*(>=|=|<=).*")
-        string(REGEX REPLACE "(.*[^><])(>=|=|<=)(.*)" "\\1" _pkg_check_modules_pkg_name "${_pkg_check_modules_pkg}")
-        string(REGEX REPLACE "(.*[^><])(>=|=|<=)(.*)" "\\2" _pkg_check_modules_pkg_op   "${_pkg_check_modules_pkg}")
-        string(REGEX REPLACE "(.*[^><])(>=|=|<=)(.*)" "\\3" _pkg_check_modules_pkg_ver  "${_pkg_check_modules_pkg}")
-      else(_pkg_check_modules_pkg MATCHES ".*(>=|=|<=).*")
-        set(_pkg_check_modules_pkg_name "${_pkg_check_modules_pkg}")
-        set(_pkg_check_modules_pkg_op)
-        set(_pkg_check_modules_pkg_ver)
-      endif(_pkg_check_modules_pkg MATCHES ".*(>=|=|<=).*")
-
-      # handle the operands
-      if (_pkg_check_modules_pkg_op STREQUAL ">=")
-        list(APPEND _pkg_check_modules_exist_query --atleast-version)
-      endif(_pkg_check_modules_pkg_op STREQUAL ">=")
-
-      if (_pkg_check_modules_pkg_op STREQUAL "=")
-        list(APPEND _pkg_check_modules_exist_query --exact-version)
-      endif(_pkg_check_modules_pkg_op STREQUAL "=")
-
-      if (_pkg_check_modules_pkg_op STREQUAL "<=")
-        list(APPEND _pkg_check_modules_exist_query --max-version)
-      endif(_pkg_check_modules_pkg_op STREQUAL "<=")
-
-      # create the final query which is of the format:
-      # * --atleast-version <version> <pkg-name>
-      # * --exact-version <version> <pkg-name>
-      # * --max-version <version> <pkg-name>
-      # * --exists <pkg-name>
-      if (_pkg_check_modules_pkg_op)
-        list(APPEND _pkg_check_modules_exist_query "${_pkg_check_modules_pkg_ver}")
-      else(_pkg_check_modules_pkg_op)
-        list(APPEND _pkg_check_modules_exist_query --exists)
-      endif(_pkg_check_modules_pkg_op)
-
-      _pkgconfig_unset(${_prefix}_${_pkg_check_modules_pkg_name}_VERSION)
-      _pkgconfig_unset(${_prefix}_${_pkg_check_modules_pkg_name}_PREFIX)
-      _pkgconfig_unset(${_prefix}_${_pkg_check_modules_pkg_name}_INCLUDEDIR)
-      _pkgconfig_unset(${_prefix}_${_pkg_check_modules_pkg_name}_LIBDIR)
-
-      list(APPEND _pkg_check_modules_exist_query "${_pkg_check_modules_pkg_name}")
-      list(APPEND _pkg_check_modules_packages    "${_pkg_check_modules_pkg_name}")
-
-      # execute the query
-      execute_process(
-        COMMAND ${PKG_CONFIG_EXECUTABLE} ${_pkg_check_modules_exist_query}
-        RESULT_VARIABLE _pkgconfig_retval)
-
-      # evaluate result and tell failures
-      if (_pkgconfig_retval)
-        if(NOT ${_is_silent})
-          message(STATUS "  package '${_pkg_check_modules_pkg}' not found")
-        endif(NOT ${_is_silent})
-
-        set(_pkg_check_modules_failed 1)
-      endif(_pkgconfig_retval)
-    endforeach(_pkg_check_modules_pkg)
-
-    if(_pkg_check_modules_failed)
-      # fail when requested
-      if (${_is_required})
-        message(SEND_ERROR "A required package was not found")
-      endif (${_is_required})
-    else(_pkg_check_modules_failed)
-      # when we are here, we checked whether requested modules
-      # exist. Now, go through them and set variables
-
-      _pkgconfig_set(${_prefix}_FOUND 1)
-      list(LENGTH _pkg_check_modules_packages pkg_count)
-
-      # iterate through all modules again and set individual variables
-      foreach (_pkg_check_modules_pkg ${_pkg_check_modules_packages})
-        # handle case when there is only one package required
-        if (pkg_count EQUAL 1)
-          set(_pkg_check_prefix "${_prefix}")
-        else(pkg_count EQUAL 1)
-          set(_pkg_check_prefix "${_prefix}_${_pkg_check_modules_pkg}")
-        endif(pkg_count EQUAL 1)
-
-        _pkgconfig_invoke(${_pkg_check_modules_pkg} "${_pkg_check_prefix}" VERSION    ""   --modversion )
-        _pkgconfig_invoke(${_pkg_check_modules_pkg} "${_pkg_check_prefix}" PREFIX     ""   --variable=prefix )
-        _pkgconfig_invoke(${_pkg_check_modules_pkg} "${_pkg_check_prefix}" INCLUDEDIR ""   --variable=includedir )
-        _pkgconfig_invoke(${_pkg_check_modules_pkg} "${_pkg_check_prefix}" LIBDIR     ""   --variable=libdir )
-
-        message(STATUS "  found ${_pkg_check_modules_pkg}, version ${_pkgconfig_VERSION}")
-      endforeach(_pkg_check_modules_pkg)
-
-      # set variables which are combined for multiple modules
-      _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" LIBRARIES           "(^| )-l" --libs-only-l )
-      _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" LIBRARY_DIRS        "(^| )-L" --libs-only-L )
-      _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" LDFLAGS             ""        --libs )
-      _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" LDFLAGS_OTHER       ""        --libs-only-other )
-
-      _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" INCLUDE_DIRS        "(^| )-I" --cflags-only-I )
-      _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" CFLAGS              ""        --cflags )
-      _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" CFLAGS_OTHER        ""        --cflags-only-other )
-    endif(_pkg_check_modules_failed)
-  else(PKG_CONFIG_EXECUTABLE)
-    if (${_is_required})
-      message(SEND_ERROR "pkg-config tool not found")
-    endif (${_is_required})
-  endif(PKG_CONFIG_EXECUTABLE)
-endmacro(_pkg_check_modules_internal)
-
-###
-### User visible macros start here
-###
-
-###
-macro(pkg_check_modules _prefix _module0)
-  # check cached value
-  if (NOT DEFINED __pkg_config_checked_${_prefix} OR __pkg_config_checked_${_prefix} LESS ${PKG_CONFIG_VERSION} OR NOT ${_prefix}_FOUND)
-    _pkgconfig_parse_options   (_pkg_modules _pkg_is_required "${_module0}" ${ARGN})
-    _pkg_check_modules_internal("${_pkg_is_required}" 0 "${_prefix}" ${_pkg_modules})
-
-    _pkgconfig_set(__pkg_config_checked_${_prefix} ${PKG_CONFIG_VERSION})
-  endif(NOT DEFINED __pkg_config_checked_${_prefix} OR __pkg_config_checked_${_prefix} LESS ${PKG_CONFIG_VERSION} OR NOT ${_prefix}_FOUND)
-endmacro(pkg_check_modules)
-
-###
-macro(pkg_search_module _prefix _module0)
-  # check cached value
-  if (NOT DEFINED __pkg_config_checked_${_prefix} OR __pkg_config_checked_${_prefix} LESS ${PKG_CONFIG_VERSION} OR NOT ${_prefix}_FOUND)
-    set(_pkg_modules_found 0)
-    _pkgconfig_parse_options(_pkg_modules_alt _pkg_is_required "${_module0}" ${ARGN})
-
-    message(STATUS "checking for one of the modules '${_pkg_modules_alt}'")
-
-    # iterate through all modules and stop at the first working one.
-    foreach(_pkg_alt ${_pkg_modules_alt})
-      if(NOT _pkg_modules_found)
-        _pkg_check_modules_internal(0 1 "${_prefix}" "${_pkg_alt}")
-      endif(NOT _pkg_modules_found)
-
-      if (${_prefix}_FOUND)
-        set(_pkg_modules_found 1)
-      endif(${_prefix}_FOUND)
-    endforeach(_pkg_alt)
-
-    if (NOT ${_prefix}_FOUND)
-      if(${_pkg_is_required})
-        message(SEND_ERROR "None of the required '${_pkg_modules_alt}' found")
-      endif(${_pkg_is_required})
-    endif(NOT ${_prefix}_FOUND)
-
-    _pkgconfig_set(__pkg_config_checked_${_prefix} ${PKG_CONFIG_VERSION})
-  endif(NOT DEFINED __pkg_config_checked_${_prefix} OR __pkg_config_checked_${_prefix} LESS ${PKG_CONFIG_VERSION} OR NOT ${_prefix}_FOUND)
-endmacro(pkg_search_module)
-
-### Local Variables:
-### mode: cmake
-### End:
diff --git a/cmake/OpenCVFindXimea.cmake b/cmake/OpenCVFindXimea.cmake
index 27e2a78ad..6b86b609e 100644
--- a/cmake/OpenCVFindXimea.cmake
+++ b/cmake/OpenCVFindXimea.cmake
@@ -19,7 +19,7 @@ set(XIMEA_LIBRARY_DIR)
 if(WIN32)
   # Try to find the XIMEA API path in registry.
   GET_FILENAME_COMPONENT(XIMEA_PATH "[HKEY_CURRENT_USER\\Software\\XIMEA\\CamSupport\\API;Path]" ABSOLUTE)
- 
+
   if(EXISTS ${XIMEA_PATH})
     set(XIMEA_FOUND 1)
     # set LIB folders
@@ -43,4 +43,4 @@ endif()
 
 mark_as_advanced(FORCE XIMEA_FOUND)
 mark_as_advanced(FORCE XIMEA_PATH)
-mark_as_advanced(FORCE XIMEA_LIBRARY_DIR)
\ No newline at end of file
+mark_as_advanced(FORCE XIMEA_LIBRARY_DIR)
diff --git a/cmake/OpenCVGenConfig.cmake b/cmake/OpenCVGenConfig.cmake
index c99cae788..7ccf3ef42 100644
--- a/cmake/OpenCVGenConfig.cmake
+++ b/cmake/OpenCVGenConfig.cmake
@@ -84,7 +84,7 @@ macro(ocv_generate_dependencies_map_configcmake suffix configuration)
 
     set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_LIBNAME_${suffix} \"${__libname}\")\n")
     set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_DEPS_${suffix} ${__mod_deps})\n")
-    set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_EXTRA_DEPS_${suffix} ${__ext_deps})\n")
+    set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_EXTRA_DEPS_${suffix} \"${__ext_deps}\")\n")
 
     list(APPEND OPENCV_PROCESSED_LIBS ${__ocv_lib})
     list(APPEND OPENCV_LIBS_TO_PROCESS ${${__ocv_lib}_MODULE_DEPS_${suffix}})
diff --git a/cmake/OpenCVGenHeaders.cmake b/cmake/OpenCVGenHeaders.cmake
index d2d38c90a..c7129fefa 100644
--- a/cmake/OpenCVGenHeaders.cmake
+++ b/cmake/OpenCVGenHeaders.cmake
@@ -1,13 +1,3 @@
-# ----------------------------------------------------------------------------
-#  Variables for cvconfig.h.cmake
-# ----------------------------------------------------------------------------
-set(PACKAGE "opencv")
-set(PACKAGE_BUGREPORT "opencvlibrary-devel@lists.sourceforge.net")
-set(PACKAGE_NAME "opencv")
-set(PACKAGE_STRING "${PACKAGE} ${OPENCV_VERSION}")
-set(PACKAGE_TARNAME "${PACKAGE}")
-set(PACKAGE_VERSION "${OPENCV_VERSION}")
-
 # platform-specific config file
 configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.cmake" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h")
 
diff --git a/cmake/OpenCVGenInfoPlist.cmake b/cmake/OpenCVGenInfoPlist.cmake
new file mode 100644
index 000000000..97c674ceb
--- /dev/null
+++ b/cmake/OpenCVGenInfoPlist.cmake
@@ -0,0 +1,4 @@
+if(IOS)
+  configure_file("${OpenCV_SOURCE_DIR}/platforms/ios/Info.plist.in"
+                 "${CMAKE_BINARY_DIR}/ios/Info.plist")
+endif()
diff --git a/cmake/OpenCVGenPkgconfig.cmake b/cmake/OpenCVGenPkgconfig.cmake
index 49d670734..7bfc7bc5a 100644
--- a/cmake/OpenCVGenPkgconfig.cmake
+++ b/cmake/OpenCVGenPkgconfig.cmake
@@ -12,7 +12,6 @@ set(prefix      "${CMAKE_INSTALL_PREFIX}")
 set(exec_prefix "\${prefix}")
 set(libdir      "") #TODO: need link paths for OpenCV_EXTRA_COMPONENTS
 set(includedir  "\${prefix}/${OPENCV_INCLUDE_INSTALL_PATH}")
-set(VERSION     ${OPENCV_VERSION})
 
 if(CMAKE_BUILD_TYPE MATCHES "Release")
   set(ocv_optkind OPT)
diff --git a/cmake/OpenCVLegacyOptions.cmake b/cmake/OpenCVLegacyOptions.cmake
deleted file mode 100644
index e05ad4c48..000000000
--- a/cmake/OpenCVLegacyOptions.cmake
+++ /dev/null
@@ -1,25 +0,0 @@
-macro(ocv_legacy_option old superseded_by)
-  if(DEFINED ${old})
-    if(ARGV2)
-      set(${superseded_by} ${${old}} CACHE ${ARGV2} "Set via depricated ${old}" FORCE)
-    else()
-      set(${superseded_by} ${${old}} CACHE BOOL     "Set via depricated ${old}" FORCE)
-    endif()
-    unset(${old} CACHE)
-  endif()
-endmacro()
-
-ocv_legacy_option(BUILD_NEW_PYTHON_SUPPORT BUILD_opencv_python)
-ocv_legacy_option(BUILD_JAVA_SUPPORT       BUILD_opencv_java)
-ocv_legacy_option(WITH_ANDROID_CAMERA      BUILD_opencv_androidcamera)
-ocv_legacy_option(WITH_VIDEOINPUT          WITH_DSHOW)
-
-if(DEFINED OPENCV_BUILD_3RDPARTY_LIBS)
-  set(BUILD_ZLIB   ${OPENCV_BUILD_3RDPARTY_LIBS} CACHE BOOL "Set via depricated OPENCV_BUILD_3RDPARTY_LIBS" FORCE)
-  set(BUILD_TIFF   ${OPENCV_BUILD_3RDPARTY_LIBS} CACHE BOOL "Set via depricated OPENCV_BUILD_3RDPARTY_LIBS" FORCE)
-  set(BUILD_JASPER ${OPENCV_BUILD_3RDPARTY_LIBS} CACHE BOOL "Set via depricated OPENCV_BUILD_3RDPARTY_LIBS" FORCE)
-  set(BUILD_JPEG   ${OPENCV_BUILD_3RDPARTY_LIBS} CACHE BOOL "Set via depricated OPENCV_BUILD_3RDPARTY_LIBS" FORCE)
-  set(BUILD_PNG    ${OPENCV_BUILD_3RDPARTY_LIBS} CACHE BOOL "Set via depricated OPENCV_BUILD_3RDPARTY_LIBS" FORCE)
-  unset(OPENCV_BUILD_3RDPARTY_LIBS CACHE)
-endif()
-
diff --git a/cmake/OpenCVMinDepVersions.cmake b/cmake/OpenCVMinDepVersions.cmake
new file mode 100644
index 000000000..b659a8379
--- /dev/null
+++ b/cmake/OpenCVMinDepVersions.cmake
@@ -0,0 +1,3 @@
+set(MIN_VER_CMAKE 2.8.7)
+set(MIN_VER_PYTHON 2.6)
+set(MIN_VER_ZLIB 1.2.3)
diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
index 00eb3cfa4..4956edf91 100644
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -33,6 +33,7 @@
 #   <add extra installation rules>
 #   ocv_add_accuracy_tests(<extra dependencies>)
 #   ocv_add_perf_tests(<extra dependencies>)
+#   ocv_add_samples(<extra dependencies>)
 #
 #
 # If module have no "extra" then you can define it in one line:
@@ -470,8 +471,16 @@ endmacro()
 #   ocv_create_module(<extra link dependencies>)
 #   ocv_create_module(SKIP_LINK)
 macro(ocv_create_module)
+  # The condition we ought to be testing here is whether ocv_add_precompiled_headers will
+  # be called at some point in the future. We can't look into the future, though,
+  # so this will have to do.
+  if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp")
+    get_native_precompiled_header(${the_module} precomp.hpp)
+  endif()
+
   add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES}
-    "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/opencv_modules.hpp")
+    "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/opencv_modules.hpp"
+    ${${the_module}_pch})
   if(NOT the_module STREQUAL opencv_ts)
     set_target_properties(${the_module} PROPERTIES COMPILE_DEFINITIONS OPENCV_NOSTL)
   endif()
@@ -511,7 +520,8 @@ macro(ocv_create_module)
     )
   endif()
 
-  if(BUILD_SHARED_LIBS)
+  if((NOT DEFINED OPENCV_MODULE_TYPE AND BUILD_SHARED_LIBS)
+      OR (DEFINED OPENCV_MODULE_TYPE AND OPENCV_MODULE_TYPE STREQUAL SHARED))
     if(MSVC)
       set_target_properties(${the_module} PROPERTIES DEFINE_SYMBOL CVAPI_EXPORTS)
     else()
@@ -572,6 +582,7 @@ macro(ocv_define_module module_name)
 
   ocv_add_accuracy_tests()
   ocv_add_perf_tests()
+  ocv_add_samples()
 endmacro()
 
 # ensures that all passed modules are available
@@ -639,7 +650,9 @@ function(ocv_add_perf_tests)
         set(OPENCV_PERF_${the_module}_SOURCES ${perf_srcs} ${perf_hdrs})
       endif()
 
-      add_executable(${the_target} ${OPENCV_PERF_${the_module}_SOURCES})
+      get_native_precompiled_header(${the_target} perf_precomp.hpp)
+
+      add_executable(${the_target} ${OPENCV_PERF_${the_module}_SOURCES} ${${the_target}_pch})
       target_link_libraries(${the_target} ${OPENCV_MODULE_${the_module}_DEPS} ${perf_deps} ${OPENCV_LINKER_LIBS})
       add_dependencies(opencv_perf_tests ${the_target})
 
@@ -687,7 +700,9 @@ function(ocv_add_accuracy_tests)
         set(OPENCV_TEST_${the_module}_SOURCES ${test_srcs} ${test_hdrs})
       endif()
 
-      add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES})
+      get_native_precompiled_header(${the_target} test_precomp.hpp)
+
+      add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch})
       target_link_libraries(${the_target} ${OPENCV_MODULE_${the_module}_DEPS} ${test_deps} ${OPENCV_LINKER_LIBS})
       add_dependencies(opencv_tests ${the_target})
 
@@ -712,6 +727,48 @@ function(ocv_add_accuracy_tests)
   endif()
 endfunction()
 
+function(ocv_add_samples)
+  set(samples_path "${CMAKE_CURRENT_SOURCE_DIR}/samples")
+  string(REGEX REPLACE "^opencv_" "" module_id ${the_module})
+
+  if(BUILD_EXAMPLES AND EXISTS "${samples_path}")
+    set(samples_deps ${the_module} ${OPENCV_MODULE_${the_module}_DEPS} opencv_highgui ${ARGN})
+    ocv_check_dependencies(${samples_deps})
+
+    if(OCV_DEPENDENCIES_FOUND)
+      file(GLOB sample_sources "${samples_path}/*.cpp")
+      ocv_include_modules(${OPENCV_MODULE_${the_module}_DEPS})
+
+      foreach(source ${sample_sources})
+        get_filename_component(name "${source}" NAME_WE)
+        set(the_target "example_${module_id}_${name}")
+
+        add_executable(${the_target} "${source}")
+        target_link_libraries(${the_target} ${samples_deps})
+
+        set_target_properties(${the_target} PROPERTIES PROJECT_LABEL "(sample) ${name}")
+
+        if(ENABLE_SOLUTION_FOLDERS)
+          set_target_properties(${the_target} PROPERTIES
+            OUTPUT_NAME "${module_id}-example-${name}"
+            FOLDER "samples/${module_id}")
+        endif()
+
+        if(WIN32)
+          install(TARGETS ${the_target} RUNTIME DESTINATION "samples/${module_id}" COMPONENT main)
+        endif()
+      endforeach()
+    endif()
+  endif()
+
+  if(INSTALL_C_EXAMPLES AND NOT WIN32 AND EXISTS "${samples_path}")
+    file(GLOB sample_files "${samples_path}/*")
+    install(FILES ${sample_files}
+            DESTINATION share/OpenCV/samples/${module_id}
+            PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
+  endif()
+endfunction()
+
 # internal macro; finds all link dependencies of the module
 # should be used at the end of CMake processing
 macro(__ocv_track_module_link_dependencies the_module optkind)
diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake
index 9b3efd9d1..060965346 100644
--- a/cmake/OpenCVPCHSupport.cmake
+++ b/cmake/OpenCVPCHSupport.cmake
@@ -279,12 +279,9 @@ ENDMACRO(ADD_PRECOMPILED_HEADER)
 MACRO(GET_NATIVE_PRECOMPILED_HEADER _targetName _input)
 
     if(CMAKE_GENERATOR MATCHES "^Visual.*$")
-        SET(_dummy_str "#include \"${_input}\"\n"
-"// This is required to suppress LNK4221.  Very annoying.\n"
-"void *g_${_targetName}Dummy = 0\;\n")
+        set(_dummy_str "#include \"${_input}\"\n")
 
-        # Use of cxx extension for generated files (as Qt does)
-        SET(${_targetName}_pch ${CMAKE_CURRENT_BINARY_DIR}/${_targetName}_pch.cxx)
+        set(${_targetName}_pch ${CMAKE_CURRENT_BINARY_DIR}/${_targetName}_pch.cpp)
         if(EXISTS ${${_targetName}_pch})
             # Check if contents is the same, if not rewrite
             # todo
@@ -344,11 +341,7 @@ ENDMACRO(ADD_NATIVE_PRECOMPILED_HEADER)
 
 macro(ocv_add_precompiled_header_to_target the_target pch_header)
   if(PCHSupport_FOUND AND ENABLE_PRECOMPILED_HEADERS AND EXISTS "${pch_header}")
-    if(CMAKE_GENERATOR MATCHES Visual)
-      string(REGEX REPLACE "hpp$" "cpp" ${the_target}_pch "${pch_header}")
-      add_native_precompiled_header(${the_target} ${pch_header})
-      unset(${the_target}_pch)
-    elseif(CMAKE_GENERATOR MATCHES Xcode)
+    if(CMAKE_GENERATOR MATCHES "^Visual" OR CMAKE_GENERATOR MATCHES Xcode)
       add_native_precompiled_header(${the_target} ${pch_header})
     elseif(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_GENERATOR MATCHES "Makefiles|Ninja")
       add_precompiled_header(${the_target} ${pch_header})
diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
index 59366eb03..ddf029067 100644
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@@ -77,7 +77,7 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
     if(_fname)
       MESSAGE(STATUS "Performing Test ${RESULT}")
       TRY_COMPILE(${RESULT}
-        ${CMAKE_BINARY_DIR}
+        "${CMAKE_BINARY_DIR}"
         "${_fname}"
         COMPILE_DEFINITIONS "${FLAG}"
         OUTPUT_VARIABLE OUTPUT)
@@ -515,4 +515,4 @@ function(ocv_source_group group)
   cmake_parse_arguments(OCV_SOURCE_GROUP "" "" "GLOB" ${ARGN})
   file(GLOB srcs ${OCV_SOURCE_GROUP_GLOB})
   source_group(${group} FILES ${srcs})
-endfunction()
\ No newline at end of file
+endfunction()
diff --git a/cmake/OpenCVVersion.cmake b/cmake/OpenCVVersion.cmake
index 03de98068..c8ce40308 100644
--- a/cmake/OpenCVVersion.cmake
+++ b/cmake/OpenCVVersion.cmake
@@ -1,16 +1,19 @@
 SET(OPENCV_VERSION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/modules/core/include/opencv2/core/version.hpp")
-FILE(STRINGS "${OPENCV_VERSION_FILE}" OPENCV_VERSION_PARTS REGEX "#define CV_VERSION_[A-Z]+[ ]+[0-9]+" )
+file(STRINGS "${OPENCV_VERSION_FILE}" OPENCV_VERSION_PARTS REGEX "#define CV_VERSION_[A-Z]+[ ]+" )
 
 string(REGEX REPLACE ".+CV_VERSION_EPOCH[ ]+([0-9]+).*" "\\1" OPENCV_VERSION_MAJOR "${OPENCV_VERSION_PARTS}")
 string(REGEX REPLACE ".+CV_VERSION_MAJOR[ ]+([0-9]+).*" "\\1" OPENCV_VERSION_MINOR "${OPENCV_VERSION_PARTS}")
 string(REGEX REPLACE ".+CV_VERSION_MINOR[ ]+([0-9]+).*" "\\1" OPENCV_VERSION_PATCH "${OPENCV_VERSION_PARTS}")
 string(REGEX REPLACE ".+CV_VERSION_REVISION[ ]+([0-9]+).*" "\\1" OPENCV_VERSION_TWEAK "${OPENCV_VERSION_PARTS}")
+string(REGEX REPLACE ".+CV_VERSION_STATUS[ ]+\"([^\"]*)\".*" "\\1" OPENCV_VERSION_STATUS "${OPENCV_VERSION_PARTS}")
 
-set(OPENCV_VERSION "${OPENCV_VERSION_MAJOR}.${OPENCV_VERSION_MINOR}.${OPENCV_VERSION_PATCH}")
+set(OPENCV_VERSION_PLAIN "${OPENCV_VERSION_MAJOR}.${OPENCV_VERSION_MINOR}.${OPENCV_VERSION_PATCH}")
 if(OPENCV_VERSION_TWEAK GREATER 0)
-  set(OPENCV_VERSION "${OPENCV_VERSION}.${OPENCV_VERSION_TWEAK}")
+  set(OPENCV_VERSION_PLAIN "${OPENCV_VERSION_PLAIN}.${OPENCV_VERSION_TWEAK}")
 endif()
 
+set(OPENCV_VERSION "${OPENCV_VERSION_PLAIN}${OPENCV_VERSION_STATUS}")
+
 set(OPENCV_SOVERSION "${OPENCV_VERSION_MAJOR}.${OPENCV_VERSION_MINOR}")
 set(OPENCV_LIBVERSION "${OPENCV_VERSION_MAJOR}.${OPENCV_VERSION_MINOR}.${OPENCV_VERSION_PATCH}")
 
diff --git a/cmake/checks/OpenCVDetectCudaArch.cu b/cmake/checks/OpenCVDetectCudaArch.cu
index 008f8ba8d..9d7086cf2 100644
--- a/cmake/checks/OpenCVDetectCudaArch.cu
+++ b/cmake/checks/OpenCVDetectCudaArch.cu
@@ -11,4 +11,4 @@ int main()
         printf("%d.%d ", prop.major, prop.minor);
     }
     return 0;
-}
\ No newline at end of file
+}
diff --git a/cmake/checks/vfwtest.cpp b/cmake/checks/vfwtest.cpp
index 63d545788..8d8ecb271 100644
--- a/cmake/checks/vfwtest.cpp
+++ b/cmake/checks/vfwtest.cpp
@@ -7,4 +7,4 @@ int main()
   AVIFileInit();
   AVIFileExit();
   return 0;
-}
\ No newline at end of file
+}
diff --git a/cmake/checks/winrttest.cpp b/cmake/checks/winrttest.cpp
new file mode 100644
index 000000000..9ec0c9ac1
--- /dev/null
+++ b/cmake/checks/winrttest.cpp
@@ -0,0 +1,6 @@
+#include <wrl/client.h>
+
+int main(int, char**)
+{
+    return 0;
+}
diff --git a/cmake/cl2cpp.cmake b/cmake/cl2cpp.cmake
index 4f18e9e64..1e932eabd 100644
--- a/cmake/cl2cpp.cmake
+++ b/cmake/cl2cpp.cmake
@@ -32,4 +32,4 @@ foreach(cl ${cl_list})
   file(APPEND ${OUTPUT} "const char* ${cl_filename}=\"${lines};\n")
 endforeach()
 
-file(APPEND ${OUTPUT} "}\n}\n")
\ No newline at end of file
+file(APPEND ${OUTPUT} "}\n}\n")
diff --git a/cmake/templates/OpenCVConfig-version.cmake.in b/cmake/templates/OpenCVConfig-version.cmake.in
index 0b432a074..b5ac5f8e2 100644
--- a/cmake/templates/OpenCVConfig-version.cmake.in
+++ b/cmake/templates/OpenCVConfig-version.cmake.in
@@ -1,4 +1,4 @@
-set(OpenCV_VERSION @OPENCV_VERSION@)
+set(OpenCV_VERSION @OPENCV_VERSION_PLAIN@)
 set(PACKAGE_VERSION ${OpenCV_VERSION})
 
 set(PACKAGE_VERSION_EXACT False)
diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in
index 1dcd84030..90749488a 100644
--- a/cmake/templates/OpenCVConfig.cmake.in
+++ b/cmake/templates/OpenCVConfig.cmake.in
@@ -22,11 +22,12 @@
 #      - OpenCV_INCLUDE_DIRS             : The OpenCV include directories.
 #      - OpenCV_COMPUTE_CAPABILITIES     : The version of compute capability
 #      - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API
-#      - OpenCV_VERSION                  : The version of this OpenCV build: "@OPENCV_VERSION@"
+#      - OpenCV_VERSION                  : The version of this OpenCV build: "@OPENCV_VERSION_PLAIN@"
 #      - OpenCV_VERSION_MAJOR            : Major version part of OpenCV_VERSION: "@OPENCV_VERSION_MAJOR@"
 #      - OpenCV_VERSION_MINOR            : Minor version part of OpenCV_VERSION: "@OPENCV_VERSION_MINOR@"
 #      - OpenCV_VERSION_PATCH            : Patch version part of OpenCV_VERSION: "@OPENCV_VERSION_PATCH@"
 #      - OpenCV_VERSION_TWEAK            : Tweak version part of OpenCV_VERSION: "@OPENCV_VERSION_TWEAK@"
+#      - OpenCV_VERSION_STATUS           : Development status of this build: "@OPENCV_VERSION_STATUS@"
 #
 #    Advanced variables:
 #      - OpenCV_SHARED
@@ -96,11 +97,12 @@ mark_as_advanced(FORCE OpenCV_LIB_DIR_OPT OpenCV_LIB_DIR_DBG OpenCV_3RDPARTY_LIB
 # ======================================================
 #  Version variables:
 # ======================================================
-SET(OpenCV_VERSION @OPENCV_VERSION@)
+SET(OpenCV_VERSION @OPENCV_VERSION_PLAIN@)
 SET(OpenCV_VERSION_MAJOR  @OPENCV_VERSION_MAJOR@)
 SET(OpenCV_VERSION_MINOR  @OPENCV_VERSION_MINOR@)
 SET(OpenCV_VERSION_PATCH  @OPENCV_VERSION_PATCH@)
 SET(OpenCV_VERSION_TWEAK  @OPENCV_VERSION_TWEAK@)
+SET(OpenCV_VERSION_STATUS "@OPENCV_VERSION_STATUS@")
 
 # ====================================================================
 # Link libraries: e.g.   libopencv_core.so, opencv_imgproc220d.lib, etc...
diff --git a/cmake/templates/cmake_uninstall.cmake.in b/cmake/templates/cmake_uninstall.cmake.in
index 14e601019..0e63d705c 100644
--- a/cmake/templates/cmake_uninstall.cmake.in
+++ b/cmake/templates/cmake_uninstall.cmake.in
@@ -23,5 +23,3 @@ FOREACH(file ${files})
     MESSAGE(STATUS "File \"$ENV{DESTDIR}${file}\" does not exist.")
   ENDIF(EXISTS "$ENV{DESTDIR}${file}")
 ENDFOREACH(file)
-
-
diff --git a/cmake/templates/cvconfig.h.cmake b/cmake/templates/cvconfig.h.cmake
index a419b0c3f..56c5d5aad 100644
--- a/cmake/templates/cvconfig.h.cmake
+++ b/cmake/templates/cvconfig.h.cmake
@@ -1,20 +1,20 @@
-/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
-   systems. This function is required for `alloca.c' support on those systems.
-   */
-#cmakedefine  CRAY_STACKSEG_END
+/* OpenCV compiled as static or dynamic libs */
+#cmakedefine BUILD_SHARED_LIBS
 
-/* Define to 1 if using `alloca.c'. */
-#cmakedefine C_ALLOCA
+/* Compile for 'real' NVIDIA GPU architectures */
+#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
 
-/* Define to 1 if you have `alloca', as a function or macro. */
-#cmakedefine HAVE_ALLOCA 1
+/* Create PTX or BIN for 1.0 compute capability */
+#cmakedefine CUDA_ARCH_BIN_OR_PTX_10
 
-/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
-   */
-#cmakedefine HAVE_ALLOCA_H 1
+/* NVIDIA GPU features are used */
+#define CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES}"
 
-/* Video for Windows support */
-#cmakedefine HAVE_VFW
+/* Compile for 'virtual' NVIDIA PTX architectures */
+#define CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX}"
+
+/* AVFoundation video libraries */
+#cmakedefine HAVE_AVFOUNDATION
 
 /* V4L capturing support */
 #cmakedefine HAVE_CAMV4L
@@ -22,15 +22,33 @@
 /* V4L2 capturing support */
 #cmakedefine HAVE_CAMV4L2
 
-/* V4L2 capturing support in videoio.h */
-#cmakedefine HAVE_VIDEOIO
-
-/* V4L/V4L2 capturing support via libv4l */
-#cmakedefine HAVE_LIBV4L
-
 /* Carbon windowing environment */
 #cmakedefine HAVE_CARBON
 
+/* AMD's Basic Linear Algebra Subprograms Library*/
+#cmakedefine HAVE_CLAMDBLAS
+
+/* AMD's OpenCL Fast Fourier Transform Library*/
+#cmakedefine HAVE_CLAMDFFT
+
+/* Clp support */
+#cmakedefine HAVE_CLP
+
+/* Cocoa API */
+#cmakedefine HAVE_COCOA
+
+/* C= */
+#cmakedefine HAVE_CSTRIPES
+
+/* NVidia Cuda Basic Linear Algebra Subprograms (BLAS) API*/
+#cmakedefine HAVE_CUBLAS
+
+/* NVidia Cuda Runtime API*/
+#cmakedefine HAVE_CUDA
+
+/* NVidia Cuda Fast Fourier Transform (FFT) API*/
+#cmakedefine HAVE_CUFFT
+
 /* IEEE1394 capturing support */
 #cmakedefine HAVE_DC1394
 
@@ -40,188 +58,108 @@
 /* IEEE1394 capturing support - libdc1394 v2.x */
 #cmakedefine HAVE_DC1394_2
 
+/* DirectShow Video Capture library */
+#cmakedefine HAVE_DSHOW
+
+/* Eigen Matrix & Linear Algebra Library */
+#cmakedefine HAVE_EIGEN
+
+/* FFMpeg video library */
+#cmakedefine HAVE_FFMPEG
+
+/* ffmpeg's libswscale */
+#cmakedefine HAVE_FFMPEG_SWSCALE
+
 /* ffmpeg in Gentoo */
 #cmakedefine HAVE_GENTOO_FFMPEG
 
-/* FFMpeg video library */
-#cmakedefine  HAVE_FFMPEG
-
-/* FFMpeg version flag */
-#cmakedefine  NEW_FFMPEG
-
-/* ffmpeg's libswscale */
-#cmakedefine  HAVE_FFMPEG_SWSCALE
-
 /* GStreamer multimedia framework */
-#cmakedefine  HAVE_GSTREAMER
+#cmakedefine HAVE_GSTREAMER
 
 /* GTK+ 2.0 Thread support */
-#cmakedefine  HAVE_GTHREAD
-
-/* Win32 UI */
-#cmakedefine HAVE_WIN32UI
+#cmakedefine HAVE_GTHREAD
 
 /* GTK+ 2.x toolkit */
-#cmakedefine  HAVE_GTK
-
-/* OpenEXR codec */
-#cmakedefine  HAVE_ILMIMF
+#cmakedefine HAVE_GTK
 
 /* Define to 1 if you have the <inttypes.h> header file. */
-#cmakedefine  HAVE_INTTYPES_H 1
-
-/* JPEG-2000 codec */
-#cmakedefine  HAVE_JASPER
-
-/* IJG JPEG codec */
-#cmakedefine  HAVE_JPEG
-
-/* Define to 1 if you have the `dl' library (-ldl). */
-#cmakedefine  HAVE_LIBDL 1
-
-/* Define to 1 if you have the `gomp' library (-lgomp). */
-#cmakedefine  HAVE_LIBGOMP 1
-
-/* Define to 1 if you have the `m' library (-lm). */
-#cmakedefine  HAVE_LIBM 1
-
-/* libpng/png.h needs to be included */
-#cmakedefine  HAVE_LIBPNG_PNG_H
-
-/* Define to 1 if you have the `pthread' library (-lpthread). */
-#cmakedefine  HAVE_LIBPTHREAD 1
-
-/* Define to 1 if you have the `lrint' function. */
-#cmakedefine  HAVE_LRINT 1
-
-/* PNG codec */
-#cmakedefine  HAVE_PNG
-
-/* Define to 1 if you have the `png_get_valid' function. */
-#cmakedefine  HAVE_PNG_GET_VALID 1
-
-/* png.h needs to be included */
-#cmakedefine  HAVE_PNG_H
-
-/* Define to 1 if you have the `png_set_tRNS_to_alpha' function. */
-#cmakedefine  HAVE_PNG_SET_TRNS_TO_ALPHA 1
-
-/* QuickTime video libraries */
-#cmakedefine  HAVE_QUICKTIME
-
-/* AVFoundation video libraries */
-#cmakedefine  HAVE_AVFOUNDATION
-
-/* TIFF codec */
-#cmakedefine  HAVE_TIFF
-
-/* Unicap video capture library */
-#cmakedefine  HAVE_UNICAP
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#cmakedefine  HAVE_UNISTD_H 1
-
-/* Xine video library */
-#cmakedefine  HAVE_XINE
-
-/* OpenNI library */
-#cmakedefine  HAVE_OPENNI
-
-/* LZ77 compression/decompression library (used for PNG) */
-#cmakedefine  HAVE_ZLIB
+#cmakedefine HAVE_INTTYPES_H 1
 
 /* Intel Integrated Performance Primitives */
-#cmakedefine  HAVE_IPP
+#cmakedefine HAVE_IPP
 
-/* OpenCV compiled as static or dynamic libs */
-#cmakedefine  BUILD_SHARED_LIBS
+/* JPEG-2000 codec */
+#cmakedefine HAVE_JASPER
 
-/* Name of package */
-#define  PACKAGE "${PACKAGE}"
+/* IJG JPEG codec */
+#cmakedefine HAVE_JPEG
 
-/* Define to the address where bug reports for this package should be sent. */
-#define  PACKAGE_BUGREPORT "${PACKAGE_BUGREPORT}"
+/* libpng/png.h needs to be included */
+#cmakedefine HAVE_LIBPNG_PNG_H
 
-/* Define to the full name of this package. */
-#define  PACKAGE_NAME "${PACKAGE_NAME}"
-
-/* Define to the full name and version of this package. */
-#define  PACKAGE_STRING "${PACKAGE_STRING}"
-
-/* Define to the one symbol short name of this package. */
-#define  PACKAGE_TARNAME "${PACKAGE_TARNAME}"
-
-/* Define to the version of this package. */
-#define  PACKAGE_VERSION "${PACKAGE_VERSION}"
-
-/* If using the C implementation of alloca, define if you know the
-   direction of stack growth for your system; otherwise it will be
-   automatically deduced at runtime.
-    STACK_DIRECTION > 0 => grows toward higher addresses
-    STACK_DIRECTION < 0 => grows toward lower addresses
-    STACK_DIRECTION = 0 => direction of growth unknown */
-#cmakedefine  STACK_DIRECTION
-
-/* Version number of package */
-#define  VERSION "${PACKAGE_VERSION}"
-
-/* Define to 1 if your processor stores words with the most significant byte
-   first (like Motorola and SPARC, unlike Intel and VAX). */
-#cmakedefine  WORDS_BIGENDIAN
-
-/* Intel Threading Building Blocks */
-#cmakedefine  HAVE_TBB
-
-/* C= */
-#cmakedefine  HAVE_CSTRIPES
-
-/* Eigen Matrix & Linear Algebra Library */
-#cmakedefine  HAVE_EIGEN
-
-/* NVidia Cuda Runtime API*/
-#cmakedefine HAVE_CUDA
-
-/* NVidia Cuda Fast Fourier Transform (FFT) API*/
-#cmakedefine HAVE_CUFFT
-
-/* NVidia Cuda Basic Linear Algebra Subprograms (BLAS) API*/
-#cmakedefine HAVE_CUBLAS
-
-/* NVidia Video Decoding API*/
-#cmakedefine HAVE_NVCUVID
-
-/* Compile for 'real' NVIDIA GPU architectures */
-#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
-
-/* Compile for 'virtual' NVIDIA PTX architectures */
-#define CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX}"
-
-/* NVIDIA GPU features are used */
-#define CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES}"
-
-/* Create PTX or BIN for 1.0 compute capability */
-#cmakedefine CUDA_ARCH_BIN_OR_PTX_10
-
-/* OpenCL Support */
-#cmakedefine HAVE_OPENCL
-
-/* AMD's OpenCL Fast Fourier Transform Library*/
-#cmakedefine HAVE_CLAMDFFT
-
-/* AMD's Basic Linear Algebra Subprograms Library*/
-#cmakedefine HAVE_CLAMDBLAS
-
-/* DirectShow Video Capture library */
-#cmakedefine HAVE_DSHOW
+/* V4L/V4L2 capturing support via libv4l */
+#cmakedefine HAVE_LIBV4L
 
 /* Microsoft Media Foundation Capture library */
 #cmakedefine HAVE_MSMF
 
-/* XIMEA camera support */
-#cmakedefine HAVE_XIMEA
+/* NVidia Video Decoding API*/
+#cmakedefine HAVE_NVCUVID
+
+/* OpenCL Support */
+#cmakedefine HAVE_OPENCL
+
+/* OpenEXR codec */
+#cmakedefine HAVE_OPENEXR
 
 /* OpenGL support*/
 #cmakedefine HAVE_OPENGL
 
-/* Clp support */
-#cmakedefine HAVE_CLP
+/* OpenNI library */
+#cmakedefine HAVE_OPENNI
+
+/* PNG codec */
+#cmakedefine HAVE_PNG
+
+/* Qt support */
+#cmakedefine HAVE_QT
+
+/* Qt OpenGL support */
+#cmakedefine HAVE_QT_OPENGL
+
+/* QuickTime video libraries */
+#cmakedefine HAVE_QUICKTIME
+
+/* QTKit video libraries */
+#cmakedefine HAVE_QTKIT
+
+/* Intel Threading Building Blocks */
+#cmakedefine HAVE_TBB
+
+/* TIFF codec */
+#cmakedefine HAVE_TIFF
+
+/* Unicap video capture library */
+#cmakedefine HAVE_UNICAP
+
+/* Video for Windows support */
+#cmakedefine HAVE_VFW
+
+/* V4L2 capturing support in videoio.h */
+#cmakedefine HAVE_VIDEOIO
+
+/* Win32 UI */
+#cmakedefine HAVE_WIN32UI
+
+/* Windows Runtime support */
+#cmakedefine HAVE_WINRT
+
+/* XIMEA camera support */
+#cmakedefine HAVE_XIMEA
+
+/* Xine video library */
+#cmakedefine HAVE_XINE
+
+/* Define to 1 if your processor stores words with the most significant byte
+   first (like Motorola and SPARC, unlike Intel and VAX). */
+#cmakedefine WORDS_BIGENDIAN
diff --git a/cmake/templates/opencv-XXX.pc.cmake.in b/cmake/templates/opencv-XXX.pc.cmake.in
index 4f6415cab..5b94e4d6c 100644
--- a/cmake/templates/opencv-XXX.pc.cmake.in
+++ b/cmake/templates/opencv-XXX.pc.cmake.in
@@ -8,6 +8,6 @@ includedir_new=@includedir@
 
 Name: OpenCV
 Description: Open Source Computer Vision Library
-Version: @VERSION@
+Version: @OPENCV_VERSION_PLAIN@
 Libs: @OpenCV_LIB_COMPONENTS@
 Cflags: -I${includedir_old} -I${includedir_new}
diff --git a/cmake/templates/opencv_modules.hpp.in b/cmake/templates/opencv_modules.hpp.in
index 5d58e40e4..149871502 100644
--- a/cmake/templates/opencv_modules.hpp.in
+++ b/cmake/templates/opencv_modules.hpp.in
@@ -6,4 +6,4 @@
  *
 */
 
-@OPENCV_MODULE_DEFINITIONS_CONFIGMAKE@
\ No newline at end of file
+@OPENCV_MODULE_DEFINITIONS_CONFIGMAKE@
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index 70f4809d2..888740bd4 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -17,7 +17,7 @@ if(BUILD_DOCS AND HAVE_SPHINX)
   set(OPTIONAL_DOC_LIST "")
 
 
-  set(OPENCV2_BASE_MODULES core imgproc highgui video calib3d features2d objdetect ml flann gpu photo stitching nonfree contrib legacy)
+  set(OPENCV2_BASE_MODULES core imgproc highgui video calib3d features2d objdetect ml flann gpu photo stitching nonfree contrib legacy bioinspired)
 
   # build lists of modules to be documented
   set(OPENCV2_MODULES "")
@@ -49,7 +49,7 @@ if(BUILD_DOCS AND HAVE_SPHINX)
     set(toc_file "${OPENCV_MODULE_opencv_${mod}_LOCATION}/doc/${mod}.rst")
     if(EXISTS "${toc_file}")
       file(RELATIVE_PATH toc_file "${OpenCV_SOURCE_DIR}/modules" "${toc_file}")
-      set(OPENCV_REFMAN_TOC "${OPENCV_REFMAN_TOC}   ${toc_file}\r\n")
+      set(OPENCV_REFMAN_TOC "${OPENCV_REFMAN_TOC}   ${toc_file}\n")
     endif()
   endforeach()
 
@@ -67,9 +67,14 @@ if(BUILD_DOCS AND HAVE_SPHINX)
   set(OPENCV_DOC_DEPS conf.py ${OPENCV_FILES_REF} ${OPENCV_FILES_REF_PICT}
            ${OPENCV_FILES_UG} ${OPENCV_FILES_TUT} ${OPENCV_FILES_TUT_PICT})
 
+  set(BUILD_PLANTUML "")
+  if(PLANTUML)
+    set(BUILD_PLANTUML "-tplantuml")
+  endif()
+
   if(PDFLATEX_COMPILER)
     add_custom_target(docs
-      COMMAND ${SPHINX_BUILD} -b latex -c ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. .
+      COMMAND ${SPHINX_BUILD} ${BUILD_PLANTUML} -b latex -c ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. .
       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/pics ${CMAKE_CURRENT_BINARY_DIR}/doc/opencv1/pics
       COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/mymath.sty ${CMAKE_CURRENT_BINARY_DIR}
       COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/patch_refman_latex.py" opencv2refman.tex
@@ -103,7 +108,7 @@ if(BUILD_DOCS AND HAVE_SPHINX)
   endif()
 
   add_custom_target(html_docs
-    COMMAND ${SPHINX_BUILD} -b html -c ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ./_html
+    COMMAND ${SPHINX_BUILD} ${BUILD_PLANTUML} -b html -c ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ./_html
     COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/mymath.sty ${CMAKE_CURRENT_BINARY_DIR}
     DEPENDS ${OPENCV_DOC_DEPS}
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
@@ -122,4 +127,4 @@ if(BUILD_DOCS AND HAVE_SPHINX)
     install(FILES "${f}" DESTINATION "${OPENCV_DOC_INSTALL_PATH}" OPTIONAL)
   endforeach()
 
-endif()
\ No newline at end of file
+endif()
diff --git a/doc/_static/insertIframe.js b/doc/_static/insertIframe.js
index 904ff361d..0a3099a8c 100644
--- a/doc/_static/insertIframe.js
+++ b/doc/_static/insertIframe.js
@@ -1,4 +1,4 @@
-function insertIframe (elementId, iframeSrc) 
+function insertIframe (elementId, iframeSrc)
 {
   var iframe;
   if (document.createElement && (iframe = document.createElement('iframe')))
@@ -10,4 +10,3 @@ function insertIframe (elementId, iframeSrc)
     element.parentNode.replaceChild(iframe, element);
   }
 }
-
diff --git a/doc/_themes/blue/layout.html b/doc/_themes/blue/layout.html
index a376c9759..d0d43031c 100644
--- a/doc/_themes/blue/layout.html
+++ b/doc/_themes/blue/layout.html
@@ -183,7 +183,7 @@
                   {% if theme_lang == 'c' %}
                   {% endif %}
                   {% if theme_lang == 'cpp' %}
-                    <li>Try the <a href="http://docs.opencv.org/trunk/opencv_cheatsheet.pdf">Cheatsheet</a>.</li>
+                    <li>Try the <a href="http://docs.opencv.org/opencv_cheatsheet.pdf">Cheatsheet</a>.</li>
                   {% endif %}
                   {% if theme_lang == 'py' %}
                     <li>Try the <a href="cookbook.html">Cookbook</a>.</li>
diff --git a/doc/_themes/blue/static/default.css_t b/doc/_themes/blue/static/default.css_t
index 49a57e9ec..4d410be8a 100644
--- a/doc/_themes/blue/static/default.css_t
+++ b/doc/_themes/blue/static/default.css_t
@@ -387,4 +387,4 @@ div.sphinxsidebar #searchbox input[type="text"] {
 
 div.sphinxsidebar #searchbox input[type="submit"] {
     width:auto;
-}
\ No newline at end of file
+}
diff --git a/doc/_themes/blue/theme.conf b/doc/_themes/blue/theme.conf
index 206193f6f..bc0b99b2a 100644
--- a/doc/_themes/blue/theme.conf
+++ b/doc/_themes/blue/theme.conf
@@ -28,4 +28,4 @@ feedbacklinkcolor = #ffffff
 bodyfont = sans-serif
 headfont = 'Trebuchet MS', sans-serif
 guifont = "Lucida Sans","Lucida Sans Unicode","Lucida Grande",Verdana,Arial,Helvetica,sans-serif
-lang = none
\ No newline at end of file
+lang = none
diff --git a/doc/check_docs.py b/doc/check_docs.py
index c18bf0726..0290fc70f 100755
--- a/doc/check_docs.py
+++ b/doc/check_docs.py
@@ -184,5 +184,3 @@ p = RSTParser()
 for m in opencv_module_list:
     print "\n\n*************************** " + m + " *************************\n"
     p.check_module_docs(m)
-
-
diff --git a/doc/conf.py b/doc/conf.py
index f3f7aec58..2cbcdf886 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -28,6 +28,16 @@ sys.path.insert(0, os.path.abspath('.'))
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = ['sphinx.ext.pngmath', 'sphinx.ext.ifconfig', 'sphinx.ext.todo', 'sphinx.ext.extlinks', 'ocv', 'sphinx.ext.doctest']
+
+have_plantuml_ext = False
+if tags.has('plantuml'):
+  try:
+    import sphinxcontrib.plantuml
+    extensions.append("sphinxcontrib.plantuml")
+    have_plantuml_ext = True
+  except ImportError:
+    print "No module sphinxcontrib.plantuml found, sphinx will not render UML diagrams"
+
 doctest_test_doctest_blocks = 'block'
 
 # Add any paths that contain templates here, relative to this directory.
@@ -55,6 +65,7 @@ version_epoch = re.search("^W*#\W*define\W+CV_VERSION_EPOCH\W+(\d+)\W*$", versio
 version_major = re.search("^W*#\W*define\W+CV_VERSION_MAJOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
 version_minor = re.search("^W*#\W*define\W+CV_VERSION_MINOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
 version_patch = re.search("^W*#\W*define\W+CV_VERSION_REVISION\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
+version_status = re.search("^W*#\W*define\W+CV_VERSION_STATUS\W+\"(.*?)\"\W*$", version_file, re.MULTILINE).group(1)
 
 # The short X.Y version.
 version = version_epoch + '.' + version_major
@@ -62,6 +73,7 @@ version = version_epoch + '.' + version_major
 release = version_epoch + '.' + version_major + '.' + version_minor
 if version_patch:
     release = release + '.' + version_patch
+release += version_status
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -77,6 +89,9 @@ if version_patch:
 # directories to ignore when looking for source files.
 exclude_patterns = ['doc/tutorials/definitions']
 
+if not have_plantuml_ext:
+  exclude_patterns.append('**/uml/*')
+
 # The reST default role (used for this markup: `text`) to use for all documents.
 #default_role = None
 
@@ -284,120 +299,122 @@ latex_domain_indices = True
 # (source start file, name, description, authors, manual section).
 man_pages = [
     ('index', 'opencv', u'The OpenCV Reference Manual',
-     [u'opencv-dev@itseez.com'], 1)
+     [u'admin@opencv.org'], 1)
 ]
 
 # ---- External links for tutorials -----------------
 extlinks = {
-            'basicstructures' : ('http://opencv.itseez.com/modules/core/doc/basic_structures.html#%s', None),
-            'oldbasicstructures' : ('http://opencv.itseez.com/modules/core/doc/old_basic_structures.html#%s', None),
-            'readwriteimagevideo' : ('http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None),
-            'operationsonarrays' : ('http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html#%s', None),
-            'utilitysystemfunctions':('http://opencv.itseez.com/modules/core/doc/utility_and_system_functions_and_macros.html#%s', None),
-            'imgprocfilter':('http://opencv.itseez.com/modules/imgproc/doc/filtering.html#%s', None),
-            'svms':('http://opencv.itseez.com/modules/ml/doc/support_vector_machines.html#%s', None),
-            'drawingfunc':('http://opencv.itseez.com/modules/core/doc/drawing_functions.html#%s', None),
-            'xmlymlpers':('http://opencv.itseez.com/modules/core/doc/xml_yaml_persistence.html#%s', None),
-            'hgvideo' : ('http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None),
-            'gpuinit' : ('http://opencv.itseez.com/modules/gpu/doc/initalization_and_information.html#%s', None),
-            'gpudatastructure' : ('http://opencv.itseez.com/modules/gpu/doc/data_structures.html#%s', None),
-            'gpuopmatrices' : ('http://opencv.itseez.com/modules/gpu/doc/operations_on_matrices.html#%s', None),
-            'gpuperelement' : ('http://opencv.itseez.com/modules/gpu/doc/per_element_operations.html#%s', None),
-            'gpuimgproc' : ('http://opencv.itseez.com/modules/gpu/doc/image_processing.html#%s', None),
-            'gpumatrixreduct' : ('http://opencv.itseez.com/modules/gpu/doc/matrix_reductions.html#%s', None),
-            'filtering':('http://opencv.itseez.com/modules/imgproc/doc/filtering.html#%s', None),
-            'flann' : ('http://opencv.itseez.com/modules/flann/doc/flann_fast_approximate_nearest_neighbor_search.html#%s', None ),
-            'calib3d' : ('http://opencv.itseez.com/trunk/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#%s', None ),
-            'feature2d' : ('http://opencv.itseez.com/trunk/modules/imgproc/doc/feature_detection.html#%s', None ),
-            'imgproc_geometric' : ('http://opencv.itseez.com/trunk/modules/imgproc/doc/geometric_transformations.html#%s', None ),
+            'basicstructures' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html#%s', None),
+            'oldbasicstructures' : ('http://docs.opencv.org/modules/core/doc/old_basic_structures.html#%s', None),
+            'readwriteimagevideo' : ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None),
+            'operationsonarrays' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html#%s', None),
+            'utilitysystemfunctions':('http://docs.opencv.org/modules/core/doc/utility_and_system_functions_and_macros.html#%s', None),
+            'imgprocfilter':('http://docs.opencv.org/modules/imgproc/doc/filtering.html#%s', None),
+            'svms':('http://docs.opencv.org/modules/ml/doc/support_vector_machines.html#%s', None),
+            'drawingfunc':('http://docs.opencv.org/modules/core/doc/drawing_functions.html#%s', None),
+            'xmlymlpers':('http://docs.opencv.org/modules/core/doc/xml_yaml_persistence.html#%s', None),
+            'hgvideo' : ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None),
+            'gpuinit' : ('http://docs.opencv.org/modules/gpu/doc/initalization_and_information.html#%s', None),
+            'gpudatastructure' : ('http://docs.opencv.org/modules/gpu/doc/data_structures.html#%s', None),
+            'gpuopmatrices' : ('http://docs.opencv.org/modules/gpu/doc/operations_on_matrices.html#%s', None),
+            'gpuperelement' : ('http://docs.opencv.org/modules/gpu/doc/per_element_operations.html#%s', None),
+            'gpuimgproc' : ('http://docs.opencv.org/modules/gpu/doc/image_processing.html#%s', None),
+            'gpumatrixreduct' : ('http://docs.opencv.org/modules/gpu/doc/matrix_reductions.html#%s', None),
+            'filtering':('http://docs.opencv.org/modules/imgproc/doc/filtering.html#%s', None),
+            'flann' : ('http://docs.opencv.org/modules/flann/doc/flann_fast_approximate_nearest_neighbor_search.html#%s', None ),
+            'calib3d' : ('http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#%s', None ),
+            'feature2d' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#%s', None ),
+            'imgproc_geometric' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html#%s', None ),
 
-            'opencv_group' : ('http://tech.groups.yahoo.com/group/OpenCV/%s', None),
+            # 'opencv_group' : ('http://answers.opencv.org/%s', None),
+            'opencv_qa' : ('http://answers.opencv.org/%s', None),
+            'how_to_contribute' : ('http://code.opencv.org/projects/opencv/wiki/How_to_contribute/%s', None),
 
-            'cvt_color': ('http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html?highlight=cvtcolor#cvtcolor%s', None),
-            'imread':    ('http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imread#imread%s', None),
-            'imwrite':   ('http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imwrite#imwrite%s', None),
-            'imshow':    ('http://opencv.itseez.com/modules/highgui/doc/user_interface.html?highlight=imshow#imshow%s', None),
-            'named_window': ('http://opencv.itseez.com/modules/highgui/doc/user_interface.html?highlight=namedwindow#namedwindow%s', None),
-            'wait_key': ('http://opencv.itseez.com/modules/highgui/doc/user_interface.html?highlight=waitkey#waitkey%s', None),
-            'add_weighted': ('http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html?highlight=addweighted#addweighted%s', None),
-            'saturate_cast': ('http://opencv.itseez.com/modules/core/doc/utility_and_system_functions_and_macros.html?highlight=saturate_cast#saturate-cast%s', None),
-            'mat_zeros': ('http://opencv.itseez.com/modules/core/doc/basic_structures.html?highlight=zeros#mat-zeros%s', None),
-            'convert_to': ('http://opencv.itseez.com/modules/core/doc/basic_structures.html#mat-convertto%s', None),
-            'create_trackbar': ('http://opencv.itseez.com/modules/highgui/doc/user_interface.html?highlight=createtrackbar#createtrackbar%s', None),
-            'point': ('http://opencv.itseez.com/modules/core/doc/basic_structures.html#point%s', None),
-            'scalar': ('http://opencv.itseez.com/modules/core/doc/basic_structures.html#scalar%s', None),
-            'line': ('http://opencv.itseez.com/modules/core/doc/drawing_functions.html#line%s', None),
-            'ellipse': ('http://opencv.itseez.com/modules/core/doc/drawing_functions.html#ellipse%s', None),
-            'rectangle': ('http://opencv.itseez.com/modules/core/doc/drawing_functions.html#rectangle%s', None),
-            'circle': ('http://opencv.itseez.com/modules/core/doc/drawing_functions.html#circle%s', None),
-            'fill_poly': ('http://opencv.itseez.com/modules/core/doc/drawing_functions.html#fillpoly%s', None),
-            'rng': ('http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html?highlight=rng#rng%s', None),
-            'put_text': ('http://opencv.itseez.com/modules/core/doc/drawing_functions.html#puttext%s', None),
-            'gaussian_blur': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=gaussianblur#gaussianblur%s', None),
-            'blur': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=blur#blur%s', None),
-            'median_blur': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=medianblur#medianblur%s', None),
-            'bilateral_filter': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=bilateralfilter#bilateralfilter%s', None),
-            'erode': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=erode#erode%s', None),
-            'dilate': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=dilate#dilate%s', None),
-            'get_structuring_element': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=getstructuringelement#getstructuringelement%s', None),
-            'flood_fill': ( 'http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html?highlight=floodfill#floodfill%s', None),
-            'morphology_ex': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=morphologyex#morphologyex%s', None),
-            'pyr_down': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=pyrdown#pyrdown%s', None),
-            'pyr_up': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=pyrup#pyrup%s', None),
-            'resize': ('http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html?highlight=resize#resize%s', None),
-            'threshold': ('http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html?highlight=threshold#threshold%s', None),
-            'filter2d': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=filter2d#filter2d%s', None),
-            'copy_make_border': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=copymakeborder#copymakeborder%s', None),
-            'sobel': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=sobel#sobel%s', None),
-            'scharr': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=scharr#scharr%s', None),
-            'laplacian': ('http://opencv.itseez.com/modules/imgproc/doc/filtering.html?highlight=laplacian#laplacian%s', None),
-            'canny': ('http://opencv.itseez.com/modules/imgproc/doc/feature_detection.html?highlight=canny#canny%s', None),
-            'copy_to': ('http://opencv.itseez.com/modules/core/doc/basic_structures.html?highlight=copyto#mat-copyto%s', None),
-            'hough_lines' : ('http://opencv.itseez.com/modules/imgproc/doc/feature_detection.html?highlight=houghlines#houghlines%s', None),
-            'hough_lines_p' : ('http://opencv.itseez.com/modules/imgproc/doc/feature_detection.html?highlight=houghlinesp#houghlinesp%s', None),
-            'hough_circles' : ('http://opencv.itseez.com/modules/imgproc/doc/feature_detection.html?highlight=houghcircles#houghcircles%s', None),
-            'remap' : ('http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html?highlight=remap#remap%s', None),
-            'warp_affine' : ('http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html?highlight=warpaffine#warpaffine%s' , None),
-            'get_rotation_matrix_2d' : ('http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html?highlight=getrotationmatrix2d#getrotationmatrix2d%s', None),
-            'get_affine_transform' : ('http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html?highlight=getaffinetransform#getaffinetransform%s', None),
-            'equalize_hist' : ('http://opencv.itseez.com/modules/imgproc/doc/histograms.html?highlight=equalizehist#equalizehist%s', None),
-            'split' : ('http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html?highlight=split#split%s', None),
-            'calc_hist' : ('http://opencv.itseez.com/modules/imgproc/doc/histograms.html?highlight=calchist#calchist%s', None),
-            'normalize' : ('http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html?highlight=normalize#normalize%s', None),
-            'match_template' : ('http://opencv.itseez.com/modules/imgproc/doc/object_detection.html?highlight=matchtemplate#matchtemplate%s', None),
-            'min_max_loc' : ('http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html?highlight=minmaxloc#minmaxloc%s', None),
-            'mix_channels' : ( 'http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html?highlight=mixchannels#mixchannels%s', None),
-            'calc_back_project' : ('http://opencv.itseez.com/modules/imgproc/doc/histograms.html?highlight=calcbackproject#calcbackproject%s', None),
-            'compare_hist' : ('http://opencv.itseez.com/modules/imgproc/doc/histograms.html?highlight=comparehist#comparehist%s', None),
-            'corner_harris' : ('http://opencv.itseez.com/modules/imgproc/doc/feature_detection.html?highlight=cornerharris#cornerharris%s', None),
-            'good_features_to_track' : ('http://opencv.itseez.com/modules/imgproc/doc/feature_detection.html?highlight=goodfeaturestotrack#goodfeaturestotrack%s', None),
-            'corner_min_eigenval' : ('http://opencv.itseez.com/modules/imgproc/doc/feature_detection.html?highlight=cornermineigenval#cornermineigenval%s', None),
-            'corner_eigenvals_and_vecs' : ('http://opencv.itseez.com/modules/imgproc/doc/feature_detection.html?highlight=cornereigenvalsandvecs#cornereigenvalsandvecs%s', None),
-            'corner_sub_pix' : ('http://opencv.itseez.com/modules/imgproc/doc/feature_detection.html?highlight=cornersubpix#cornersubpix%s', None),
-            'find_contours' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=findcontours#findcontours%s', None),
-            'convex_hull' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=convexhull#convexhull%s', None),
-            'draw_contours' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=drawcontours#drawcontours%s', None),
-            'bounding_rect' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=boundingrect#boundingrect%s', None),
-            'min_enclosing_circle' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=minenclosingcircle#minenclosingcircle%s', None),
-            'min_area_rect' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=minarearect#minarearect%s', None),
-            'fit_ellipse' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=fitellipse#fitellipse%s', None),
-            'moments' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=moments#moments%s', None),
-            'contour_area' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=contourarea#contourarea%s', None),
-            'arc_length' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=arclength#arclength%s', None),
-            'point_polygon_test' : ('http://opencv.itseez.com/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=pointpolygontest#pointpolygontest%s', None),
-            'feature_detector' : ( 'http://opencv.itseez.com/modules/features2d/doc/common_interfaces_of_feature_detectors.html?highlight=featuredetector#FeatureDetector%s', None),
-            'feature_detector_detect' : ('http://opencv.itseez.com/modules/features2d/doc/common_interfaces_of_feature_detectors.html?highlight=detect#featuredetector-detect%s', None ),
-            'surf_feature_detector' : ('http://opencv.itseez.com/modules/features2d/doc/common_interfaces_of_feature_detectors.html?highlight=surffeaturedetector#surffeaturedetector%s', None ),
-            'draw_keypoints' : ('http://opencv.itseez.com/modules/features2d/doc/drawing_function_of_keypoints_and_matches.html?highlight=drawkeypoints#drawkeypoints%s', None ),
-            'descriptor_extractor': ( 'http://opencv.itseez.com/modules/features2d/doc/common_interfaces_of_descriptor_extractors.html?highlight=descriptorextractor#descriptorextractor%s', None ),
-            'descriptor_extractor_compute' : ( 'http://opencv.itseez.com/modules/features2d/doc/common_interfaces_of_descriptor_extractors.html?highlight=compute#descriptorextractor-compute%s', None ),
-            'surf_descriptor_extractor' : ( 'http://opencv.itseez.com/modules/features2d/doc/common_interfaces_of_descriptor_extractors.html?highlight=surfdescriptorextractor#surfdescriptorextractor%s', None ),
-            'draw_matches' : ( 'http://opencv.itseez.com/modules/features2d/doc/drawing_function_of_keypoints_and_matches.html?highlight=drawmatches#drawmatches%s', None ),
-            'find_homography' : ('http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html?highlight=findhomography#findhomography%s', None),
-            'perspective_transform' : ('http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html?highlight=perspectivetransform#perspectivetransform%s', None ),
-            'flann_based_matcher' : ('http://opencv.itseez.com/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html?highlight=flannbasedmatcher#flannbasedmatcher%s', None),
-            'brute_force_matcher' : ('http://opencv.itseez.com/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html?highlight=bruteforcematcher#bruteforcematcher%s', None ),
-            'cascade_classifier' : ('http://opencv.itseez.com/modules/objdetect/doc/cascade_classification.html?highlight=cascadeclassifier#cascadeclassifier%s', None ),
-            'cascade_classifier_load' : ('http://opencv.itseez.com/modules/objdetect/doc/cascade_classification.html?highlight=load#cascadeclassifier-load%s', None ),
-            'cascade_classifier_detect_multiscale' : ('http://opencv.itseez.com/modules/objdetect/doc/cascade_classification.html?highlight=detectmultiscale#cascadeclassifier-detectmultiscale%s', None )
+            'cvt_color': ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=cvtcolor#cvtcolor%s', None),
+            'imread':    ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imread#imread%s', None),
+            'imwrite':   ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imwrite#imwrite%s', None),
+            'imshow':    ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=imshow#imshow%s', None),
+            'named_window': ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=namedwindow#namedwindow%s', None),
+            'wait_key': ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=waitkey#waitkey%s', None),
+            'add_weighted': ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=addweighted#addweighted%s', None),
+            'saturate_cast': ('http://docs.opencv.org/modules/core/doc/utility_and_system_functions_and_macros.html?highlight=saturate_cast#saturate-cast%s', None),
+            'mat_zeros': ('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=zeros#mat-zeros%s', None),
+            'convert_to': ('http://docs.opencv.org/modules/core/doc/basic_structures.html#mat-convertto%s', None),
+            'create_trackbar': ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=createtrackbar#createtrackbar%s', None),
+            'point': ('http://docs.opencv.org/modules/core/doc/basic_structures.html#point%s', None),
+            'scalar': ('http://docs.opencv.org/modules/core/doc/basic_structures.html#scalar%s', None),
+            'line': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#line%s', None),
+            'ellipse': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#ellipse%s', None),
+            'rectangle': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#rectangle%s', None),
+            'circle': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#circle%s', None),
+            'fill_poly': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#fillpoly%s', None),
+            'rng': ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=rng#rng%s', None),
+            'put_text': ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#puttext%s', None),
+            'gaussian_blur': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=gaussianblur#gaussianblur%s', None),
+            'blur': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=blur#blur%s', None),
+            'median_blur': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=medianblur#medianblur%s', None),
+            'bilateral_filter': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=bilateralfilter#bilateralfilter%s', None),
+            'erode': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=erode#erode%s', None),
+            'dilate': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=dilate#dilate%s', None),
+            'get_structuring_element': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=getstructuringelement#getstructuringelement%s', None),
+            'flood_fill': ( 'http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=floodfill#floodfill%s', None),
+            'morphology_ex': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=morphologyex#morphologyex%s', None),
+            'pyr_down': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=pyrdown#pyrdown%s', None),
+            'pyr_up': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=pyrup#pyrup%s', None),
+            'resize': ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=resize#resize%s', None),
+            'threshold': ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=threshold#threshold%s', None),
+            'filter2d': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=filter2d#filter2d%s', None),
+            'copy_make_border': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=copymakeborder#copymakeborder%s', None),
+            'sobel': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=sobel#sobel%s', None),
+            'scharr': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=scharr#scharr%s', None),
+            'laplacian': ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=laplacian#laplacian%s', None),
+            'canny': ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=canny#canny%s', None),
+            'copy_to': ('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=copyto#mat-copyto%s', None),
+            'hough_lines' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlines#houghlines%s', None),
+            'hough_lines_p' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlinesp#houghlinesp%s', None),
+            'hough_circles' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghcircles#houghcircles%s', None),
+            'remap' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=remap#remap%s', None),
+            'warp_affine' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=warpaffine#warpaffine%s' , None),
+            'get_rotation_matrix_2d' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=getrotationmatrix2d#getrotationmatrix2d%s', None),
+            'get_affine_transform' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=getaffinetransform#getaffinetransform%s', None),
+            'equalize_hist' : ('http://docs.opencv.org/modules/imgproc/doc/histograms.html?highlight=equalizehist#equalizehist%s', None),
+            'split' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=split#split%s', None),
+            'calc_hist' : ('http://docs.opencv.org/modules/imgproc/doc/histograms.html?highlight=calchist#calchist%s', None),
+            'normalize' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=normalize#normalize%s', None),
+            'match_template' : ('http://docs.opencv.org/modules/imgproc/doc/object_detection.html?highlight=matchtemplate#matchtemplate%s', None),
+            'min_max_loc' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=minmaxloc#minmaxloc%s', None),
+            'mix_channels' : ( 'http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=mixchannels#mixchannels%s', None),
+            'calc_back_project' : ('http://docs.opencv.org/modules/imgproc/doc/histograms.html?highlight=calcbackproject#calcbackproject%s', None),
+            'compare_hist' : ('http://docs.opencv.org/modules/imgproc/doc/histograms.html?highlight=comparehist#comparehist%s', None),
+            'corner_harris' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=cornerharris#cornerharris%s', None),
+            'good_features_to_track' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=goodfeaturestotrack#goodfeaturestotrack%s', None),
+            'corner_min_eigenval' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=cornermineigenval#cornermineigenval%s', None),
+            'corner_eigenvals_and_vecs' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=cornereigenvalsandvecs#cornereigenvalsandvecs%s', None),
+            'corner_sub_pix' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=cornersubpix#cornersubpix%s', None),
+            'find_contours' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=findcontours#findcontours%s', None),
+            'convex_hull' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=convexhull#convexhull%s', None),
+            'draw_contours' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=drawcontours#drawcontours%s', None),
+            'bounding_rect' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=boundingrect#boundingrect%s', None),
+            'min_enclosing_circle' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=minenclosingcircle#minenclosingcircle%s', None),
+            'min_area_rect' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=minarearect#minarearect%s', None),
+            'fit_ellipse' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=fitellipse#fitellipse%s', None),
+            'moments' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=moments#moments%s', None),
+            'contour_area' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=contourarea#contourarea%s', None),
+            'arc_length' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=arclength#arclength%s', None),
+            'point_polygon_test' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=pointpolygontest#pointpolygontest%s', None),
+            'feature_detector' : ( 'http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_feature_detectors.html?highlight=featuredetector#FeatureDetector%s', None),
+            'feature_detector_detect' : ('http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_feature_detectors.html?highlight=detect#featuredetector-detect%s', None ),
+            'surf_feature_detector' : ('http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_feature_detectors.html?highlight=surffeaturedetector#surffeaturedetector%s', None ),
+            'draw_keypoints' : ('http://docs.opencv.org/modules/features2d/doc/drawing_function_of_keypoints_and_matches.html?highlight=drawkeypoints#drawkeypoints%s', None ),
+            'descriptor_extractor': ( 'http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_extractors.html?highlight=descriptorextractor#descriptorextractor%s', None ),
+            'descriptor_extractor_compute' : ( 'http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_extractors.html?highlight=compute#descriptorextractor-compute%s', None ),
+            'surf_descriptor_extractor' : ( 'http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_extractors.html?highlight=surfdescriptorextractor#surfdescriptorextractor%s', None ),
+            'draw_matches' : ( 'http://docs.opencv.org/modules/features2d/doc/drawing_function_of_keypoints_and_matches.html?highlight=drawmatches#drawmatches%s', None ),
+            'find_homography' : ('http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html?highlight=findhomography#findhomography%s', None),
+            'perspective_transform' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=perspectivetransform#perspectivetransform%s', None ),
+            'flann_based_matcher' : ('http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html?highlight=flannbasedmatcher#flannbasedmatcher%s', None),
+            'brute_force_matcher' : ('http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html?highlight=bruteforcematcher#bruteforcematcher%s', None ),
+            'cascade_classifier' : ('http://docs.opencv.org/modules/objdetect/doc/cascade_classification.html?highlight=cascadeclassifier#cascadeclassifier%s', None ),
+            'cascade_classifier_load' : ('http://docs.opencv.org/modules/objdetect/doc/cascade_classification.html?highlight=load#cascadeclassifier-load%s', None ),
+            'cascade_classifier_detect_multiscale' : ('http://docs.opencv.org/modules/objdetect/doc/cascade_classification.html?highlight=detectmultiscale#cascadeclassifier-detectmultiscale%s', None )
            }
diff --git a/doc/mymath.sty b/doc/mymath.sty
index e69de29bb..491c160cc 100644
--- a/doc/mymath.sty
+++ b/doc/mymath.sty
@@ -0,0 +1,42 @@
+\ProvidesPackage{mymath}
+
+\newcommand{\matTT}[9]{
+\[
+\left|\begin{array}{ccc}
+ #1 & #2 & #3\\
+ #4 & #5 & #6\\
+ #7 & #8 & #9
+\end{array}\right|
+\]
+}
+
+\newcommand{\fork}[4]{
+  \left\{
+  \begin{array}{l l}
+  #1 & \mbox{#2}\\
+  #3 & \mbox{#4}\\
+  \end{array} \right.}
+\newcommand{\forkthree}[6]{
+  \left\{
+  \begin{array}{l l}
+  #1 & \mbox{#2}\\
+  #3 & \mbox{#4}\\
+  #5 & \mbox{#6}\\
+  \end{array} \right.}
+
+\newcommand{\vecthree}[3]{
+\begin{bmatrix}
+ #1\\
+ #2\\
+ #3
+\end{bmatrix}
+}
+
+\newcommand{\vecthreethree}[9]{
+\begin{bmatrix}
+ #1 & #2 & #3\\
+ #4 & #5 & #6\\
+ #7 & #8 & #9
+\end{bmatrix}
+}
+
diff --git a/doc/opencv-logo2.png b/doc/opencv-logo2.png
index 615fd2add..bc71a2ae5 100644
Binary files a/doc/opencv-logo2.png and b/doc/opencv-logo2.png differ
diff --git a/doc/opencv_cheatsheet.tex b/doc/opencv_cheatsheet.tex
index e76bd016e..fc0c0fa41 100644
--- a/doc/opencv_cheatsheet.tex
+++ b/doc/opencv_cheatsheet.tex
@@ -75,11 +75,11 @@
 % if using A4 paper. (This probably isn't strictly necessary.)
 % If using another size paper, use default 1cm margins.
 \ifthenelse{\lengthtest { \paperwidth = 11in}}
-	{ \geometry{top=.5in,left=.5in,right=.5in,bottom=.5in} }
-	{\ifthenelse{ \lengthtest{ \paperwidth = 297mm}}
-		{\geometry{top=1cm,left=1cm,right=1cm,bottom=1cm} }
-		{\geometry{top=1cm,left=1cm,right=1cm,bottom=1cm} }
-	}
+    { \geometry{top=.5in,left=.5in,right=.5in,bottom=.5in} }
+    {\ifthenelse{ \lengthtest{ \paperwidth = 297mm}}
+        {\geometry{top=1cm,left=1cm,right=1cm,bottom=1cm} }
+        {\geometry{top=1cm,left=1cm,right=1cm,bottom=1cm} }
+    }
 
 % Turn off header and footer
 % \pagestyle{empty}
diff --git a/doc/pattern_tools/svgfig.py b/doc/pattern_tools/svgfig.py
index 86afa5913..c690c9ff0 100755
--- a/doc/pattern_tools/svgfig.py
+++ b/doc/pattern_tools/svgfig.py
@@ -3667,4 +3667,3 @@ class YErrorBars:
             output.append(LineAxis(x, start, x, end, start, end, bars, False, False, **self.attr).SVG(trans))
 
         return output
-
diff --git a/doc/tutorials/contrib/retina_model/images/retina_TreeHdr_retina.jpg b/doc/tutorials/bioinspired/retina_model/images/retina_TreeHdr_retina.jpg
similarity index 100%
rename from doc/tutorials/contrib/retina_model/images/retina_TreeHdr_retina.jpg
rename to doc/tutorials/bioinspired/retina_model/images/retina_TreeHdr_retina.jpg
diff --git a/doc/tutorials/contrib/retina_model/images/retina_TreeHdr_small.jpg b/doc/tutorials/bioinspired/retina_model/images/retina_TreeHdr_small.jpg
similarity index 100%
rename from doc/tutorials/contrib/retina_model/images/retina_TreeHdr_small.jpg
rename to doc/tutorials/bioinspired/retina_model/images/retina_TreeHdr_small.jpg
diff --git a/doc/tutorials/contrib/retina_model/images/studentsSample_input.jpg b/doc/tutorials/bioinspired/retina_model/images/studentsSample_input.jpg
similarity index 100%
rename from doc/tutorials/contrib/retina_model/images/studentsSample_input.jpg
rename to doc/tutorials/bioinspired/retina_model/images/studentsSample_input.jpg
diff --git a/doc/tutorials/contrib/retina_model/images/studentsSample_magno.jpg b/doc/tutorials/bioinspired/retina_model/images/studentsSample_magno.jpg
similarity index 100%
rename from doc/tutorials/contrib/retina_model/images/studentsSample_magno.jpg
rename to doc/tutorials/bioinspired/retina_model/images/studentsSample_magno.jpg
diff --git a/doc/tutorials/contrib/retina_model/images/studentsSample_parvo.jpg b/doc/tutorials/bioinspired/retina_model/images/studentsSample_parvo.jpg
similarity index 100%
rename from doc/tutorials/contrib/retina_model/images/studentsSample_parvo.jpg
rename to doc/tutorials/bioinspired/retina_model/images/studentsSample_parvo.jpg
diff --git a/doc/tutorials/contrib/retina_model/retina_model.rst b/doc/tutorials/bioinspired/retina_model/retina_model.rst
similarity index 96%
rename from doc/tutorials/contrib/retina_model/retina_model.rst
rename to doc/tutorials/bioinspired/retina_model/retina_model.rst
index 86e13e732..e8527ee8b 100644
--- a/doc/tutorials/contrib/retina_model/retina_model.rst
+++ b/doc/tutorials/bioinspired/retina_model/retina_model.rst
@@ -107,14 +107,16 @@ This retina filter code includes the research contributions of phd/research coll
 Code tutorial
 =============
 
-Please refer to the original tutorial source code in file *opencv_folder/samples/cpp/tutorial_code/contrib/retina_tutorial.cpp*.
+Please refer to the original tutorial source code in file *opencv_folder/samples/cpp/tutorial_code/bioinspired/retina_tutorial.cpp*.
 
-To compile it, assuming OpenCV is correctly installed, use the following command. It requires the opencv_core *(cv::Mat and friends objects management)*, opencv_highgui *(display and image/video read)* and opencv_contrib *(Retina description)* libraries to compile.
+**Note :** do not forget that the retina model is included in the following namespace : *cv::bioinspired*.
+
+To compile it, assuming OpenCV is correctly installed, use the following command. It requires the opencv_core *(cv::Mat and friends objects management)*, opencv_highgui *(display and image/video read)* and opencv_bioinspired *(Retina description)* libraries to compile.
 
 .. code-block:: cpp
 
    // compile
-   gcc retina_tutorial.cpp -o Retina_tuto -lopencv_core -lopencv_highgui -lopencv_contrib
+   gcc retina_tutorial.cpp -o Retina_tuto -lopencv_core -lopencv_highgui -lopencv_bioinspired
 
    // Run commands : add 'log' as a last parameter to apply a spatial log sampling (simulates retina sampling)
    // run on webcam
@@ -128,7 +130,7 @@ To compile it, assuming OpenCV is correctly installed, use the following command
 
 Here is a code explanation :
 
-Retina definition is present in the contrib package and a simple include allows to use it
+Retina definition is present in the bioinspired package and a simple include allows to use it. You can rather use the specific header : *opencv2/bioinspired.hpp* if you prefer but then include the other required openv modules : *opencv2/core.hpp* and *opencv2/highgui.hpp*
 
 .. code-block:: cpp
 
@@ -229,20 +231,20 @@ Once all input parameters are processed, a first image should have been loaded,
         return -1;
     }
 
-Now, everything is ready to run the retina model. I propose here to allocate a retina instance and to manage the eventual log sampling option. The Retina constructor expects at least a cv::Size object that shows the input data size that will have to be managed. One can activate other options such as color and its related color multiplexing strategy (here Bayer multiplexing is chosen using enum cv::RETINA_COLOR_BAYER). If using log sampling, the image reduction factor (smaller output images) and log sampling strengh can be adjusted.
+Now, everything is ready to run the retina model. I propose here to allocate a retina instance and to manage the eventual log sampling option. The Retina constructor expects at least a cv::Size object that shows the input data size that will have to be managed. One can activate other options such as color and its related color multiplexing strategy (here Bayer multiplexing is chosen using *enum cv::bioinspired::RETINA_COLOR_BAYER*). If using log sampling, the image reduction factor (smaller output images) and log sampling strengh can be adjusted.
 
 .. code-block:: cpp
 
     // pointer to a retina object
-    cv::Ptr<cv::Retina> myRetina;
+    cv::Ptr<cv::bioinspired::Retina> myRetina;
 
     // if the last parameter is 'log', then activate log sampling (favour foveal vision and subsamples peripheral vision)
     if (useLogSampling)
     {
-        myRetina = cv::createRetina(inputFrame.size(), true, cv::RETINA_COLOR_BAYER, true, 2.0, 10.0);
+        myRetina = cv::bioinspired::createRetina(inputFrame.size(), true, cv::bioinspired::RETINA_COLOR_BAYER, true, 2.0, 10.0);
     }
     else// -> else allocate "classical" retina :
-        myRetina = cv::createRetina(inputFrame.size());
+        myRetina = cv::bioinspired::createRetina(inputFrame.size());
 
 Once done, the proposed code writes a default xml file that contains the default parameters of the retina. This is useful to make your own config using this template. Here generated template xml file is called *RetinaDefaultParameters.xml*.
 
diff --git a/doc/tutorials/bioinspired/retina_model/retina_model.rst~ b/doc/tutorials/bioinspired/retina_model/retina_model.rst~
new file mode 100644
index 000000000..50bdcbb62
--- /dev/null
+++ b/doc/tutorials/bioinspired/retina_model/retina_model.rst~
@@ -0,0 +1,418 @@
+.. _Retina_Model:
+
+Discovering the human retina and its use for image processing
+*************************************************************
+
+Goal
+=====
+
+I present here a model of human retina that shows some interesting properties for image preprocessing and enhancement.
+In this tutorial you will learn how to:
+
+.. container:: enumeratevisibleitemswithsquare
+
+   + discover the main two channels outing from your retina
+
+   + see the basics to use the retina model
+
+   + discover some parameters tweaks
+
+
+General overview
+================
+
+The proposed model originates from Jeanny Herault's research [herault2010]_ at `Gipsa <http://www.gipsa-lab.inpg.fr>`_. It is involved in image processing applications with `Listic <http://www.listic.univ-savoie.fr>`_ (code maintainer and user) lab. This is not a complete model but it already present interesting properties that can be involved for enhanced image processing experience. The model allows the following human retina properties to be used :
+
+* spectral whitening that has 3 important effects: high spatio-temporal frequency signals canceling (noise), mid-frequencies details enhancement and low frequencies luminance energy reduction. This *all in one* property directly allows visual signals cleaning of classical undesired distortions introduced by image sensors and input luminance range.
+
+* local logarithmic luminance compression allows details to be enhanced even in low light conditions.
+
+* decorrelation of the details information (Parvocellular output channel) and transient information (events, motion made available at the Magnocellular output channel).
+
+The first two points are illustrated below :
+
+In the figure below, the OpenEXR image sample *CrissyField.exr*, a High Dynamic Range image is shown. In order to make it visible on this web-page, the original input image is linearly rescaled to the classical image luminance range [0-255] and is converted to 8bit/channel format. Such strong conversion hides many details because of too strong local contrasts. Furthermore, noise energy is also strong and pollutes visual information.
+
+.. image:: images/retina_TreeHdr_small.jpg
+   :alt: A High dynamic range image linearly rescaled within range [0-255].
+   :align: center
+
+In the following image, applying the ideas proposed in [benoit2010]_, as your retina does, local luminance adaptation, spatial noise removal and spectral whitening work together and transmit accurate information on lower range 8bit data channels. On this picture, noise in significantly removed, local details hidden by strong luminance contrasts are enhanced. Output image keeps its naturalness and visual content is enhanced. Color processing is based on the color multiplexing/demultiplexing method proposed in [chaix2007]_.
+
+.. image:: images/retina_TreeHdr_retina.jpg
+   :alt: A High dynamic range image compressed within range [0-255] using the retina.
+   :align: center
+
+
+*Note :* image sample can be downloaded from the `OpenEXR website <http://www.openexr.com>`_. Regarding this demonstration, before retina processing, input image has been linearly rescaled within 0-255 keeping its channels float format. 5% of its histogram ends has been cut (mostly removes wrong HDR pixels). Check out the sample *opencv/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp* for similar processing. The following demonstration will only consider classical 8bit/channel images.
+
+The retina model output channels
+================================
+
+The retina model presents two outputs that benefit from the above cited behaviors.
+
+* The first one is called the Parvocellular channel. It is mainly active in the foveal retina area (high resolution central vision with color sensitive photo-receptors), its aim is to provide accurate color vision for visual details remaining static on the retina. On the other hand objects moving on the retina projection are blurred.
+
+* The second well known channel is the Magnocellular channel. It is mainly active in the retina peripheral vision and send signals related to change events (motion, transient events, etc.). These outing signals also help visual system to focus/center retina on 'transient'/moving areas for more detailed analysis thus improving visual scene context and object classification.
+
+**NOTE :** regarding the proposed model, contrary to the real retina, we apply these two channels on the entire input images using the same resolution. This allows enhanced visual details and motion information to be extracted on all the considered images... but remember, that these two channels are complementary. For example, if Magnocellular channel gives strong energy in an area, then, the Parvocellular channel is certainly blurred there since there is a transient event.
+
+As an illustration, we apply in the following the retina model on a webcam video stream of a dark visual scene. In this visual scene, captured in an amphitheater of the university, some students are moving while talking to the teacher.
+
+In this video sequence, because of the dark ambiance, signal to noise ratio is low and color artifacts are present on visual features edges because of the low quality image capture tool-chain.
+
+.. image:: images/studentsSample_input.jpg
+   :alt: an input video stream extract sample
+   :align: center
+
+Below is shown the retina foveal vision applied on the entire image. In the used retina configuration, global luminance is preserved and local contrasts are enhanced. Also, signal to noise ratio is improved : since high frequency spatio-temporal noise is reduced, enhanced details are not corrupted by any enhanced noise.
+
+.. image:: images/studentsSample_parvo.jpg
+   :alt: the retina Parvocellular output. Enhanced details, luminance adaptation and noise removal. A processing tool for image analysis.
+   :align: center
+
+Below is the output of the Magnocellular output of the retina model. Its signals are strong where transient events occur. Here, a student is moving at the bottom of the image thus generating high energy. The remaining of the image is static however, it is corrupted by a strong noise. Here, the retina filters out most of the noise thus generating low false motion area 'alarms'. This channel can be used as a transient/moving areas detector : it would provide relevant information for a low cost segmentation tool that would highlight areas in which an event is occurring.
+
+.. image:: images/studentsSample_magno.jpg
+   :alt: the retina Magnocellular output. Enhanced transient signals (motion, etc.). A preprocessing tool for event detection.
+   :align: center
+
+Retina use case
+===============
+
+This model can be used basically for spatio-temporal video effects but also in the aim of :
+
+* performing texture analysis with enhanced signal to noise ratio and enhanced details robust against input images luminance ranges (check out the Parvocellular retina channel output)
+
+* performing motion analysis also taking benefit of the previously cited properties.
+
+Literature
+==========
+For more information, refer to the following papers :
+
+.. [benoit2010] Benoit A., Caplier A., Durette B., Herault, J., "Using Human Visual System Modeling For Bio-Inspired Low Level Image Processing", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773. DOI <http://dx.doi.org/10.1016/j.cviu.2010.01.011>
+
+* Please have a look at the reference work of Jeanny Herault that you can read in his book :
+
+.. [herault2010] Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+
+This retina filter code includes the research contributions of phd/research collegues from which code has been redrawn by the author :
+
+* take a look at the *retinacolor.hpp* module to discover Brice Chaix de Lavarene phD color mosaicing/demosaicing and his reference paper:
+
+.. [chaix2007] B. Chaix de Lavarene, D. Alleysson, B. Durette, J. Herault (2007). "Efficient demosaicing through recursive filtering", IEEE International Conference on Image Processing ICIP 2007
+
+* take a look at *imagelogpolprojection.hpp* to discover retina spatial log sampling which originates from Barthelemy Durette phd with Jeanny Herault. A Retina / V1 cortex projection is also proposed and originates from Jeanny's discussions. More informations in the above cited Jeanny Heraults's book.
+
+Code tutorial
+=============
+
+Please refer to the original tutorial source code in file *opencv_folder/samples/cpp/tutorial_code/bioinspired/retina_tutorial.cpp*.
+
+**Note :** do not forget that the retina model is included in the following namespace : *cv::bioinspired*.
+
+To compile it, assuming OpenCV is correctly installed, use the following command. It requires the opencv_core *(cv::Mat and friends objects management)*, opencv_highgui *(display and image/video read)* and opencv_bioinspired *(Retina description)* libraries to compile.
+
+.. code-block:: cpp
+
+   // compile
+   gcc retina_tutorial.cpp -o Retina_tuto -lopencv_core -lopencv_highgui -lopencv_bioinspired
+
+   // Run commands : add 'log' as a last parameter to apply a spatial log sampling (simulates retina sampling)
+   // run on webcam
+   ./Retina_tuto -video
+   // run on video file
+   ./Retina_tuto -video myVideo.avi
+   // run on an image
+   ./Retina_tuto -image myPicture.jpg
+   // run on an image with log sampling
+   ./Retina_tuto -image myPicture.jpg log
+
+Here is a code explanation :
+
+Retina definition is present in the bioinspired package and a simple include allows to use it. You can rather use the specific header : *opencv2/bioinspired.hpp* if you prefer but then include the other required openv modules : *opencv2/core.hpp* and *opencv2/highgui.hpp* 
+
+.. code-block:: cpp
+
+   #include "opencv2/opencv.hpp"
+
+Provide user some hints to run the program with a help function
+
+.. code-block:: cpp
+
+   // the help procedure
+   static void help(std::string errorMessage)
+   {
+    std::cout<<"Program init error : "<<errorMessage<<std::endl;
+    std::cout<<"\nProgram call procedure : retinaDemo [processing mode] [Optional : media target] [Optional LAST parameter: \"log\" to activate retina log sampling]"<<std::endl;
+    std::cout<<"\t[processing mode] :"<<std::endl;
+    std::cout<<"\t -image : for still image processing"<<std::endl;
+    std::cout<<"\t -video : for video stream processing"<<std::endl;
+    std::cout<<"\t[Optional : media target] :"<<std::endl;
+    std::cout<<"\t if processing an image or video file, then, specify the path and filename of the target to process"<<std::endl;
+    std::cout<<"\t leave empty if processing video stream coming from a connected video device"<<std::endl;
+    std::cout<<"\t[Optional : activate retina log sampling] : an optional last parameter can be specified for retina spatial log sampling"<<std::endl;
+    std::cout<<"\t set \"log\" without quotes to activate this sampling, output frame size will be divided by 4"<<std::endl;
+    std::cout<<"\nExamples:"<<std::endl;
+    std::cout<<"\t-Image processing : ./retinaDemo -image lena.jpg"<<std::endl;
+    std::cout<<"\t-Image processing with log sampling : ./retinaDemo -image lena.jpg log"<<std::endl;
+    std::cout<<"\t-Video processing : ./retinaDemo -video myMovie.mp4"<<std::endl;
+    std::cout<<"\t-Live video processing : ./retinaDemo -video"<<std::endl;
+    std::cout<<"\nPlease start again with new parameters"<<std::endl;
+    std::cout<<"****************************************************"<<std::endl;
+    std::cout<<" NOTE : this program generates the default retina parameters file 'RetinaDefaultParameters.xml'"<<std::endl;
+    std::cout<<" => you can use this to fine tune parameters and load them if you save to file 'RetinaSpecificParameters.xml'"<<std::endl;
+   }
+
+Then, start the main program and first declare a *cv::Mat* matrix in which input images will be loaded. Also allocate a *cv::VideoCapture* object ready to load video streams (if necessary)
+
+.. code-block:: cpp
+
+  int main(int argc, char* argv[]) {
+    // declare the retina input buffer... that will be fed differently in regard of the input media
+    cv::Mat inputFrame;
+    cv::VideoCapture videoCapture; // in case a video media is used, its manager is declared here
+
+
+In the main program, before processing, first check input command parameters. Here it loads a first input image coming from a single loaded image (if user chose command *-image*) or from a video stream (if user chose command *-video*). Also, if the user added *log* command at the end of its program call, the spatial logarithmic image sampling performed by the retina is taken into account by the Boolean flag *useLogSampling*.
+
+.. code-block:: cpp
+
+  // welcome message
+    std::cout<<"****************************************************"<<std::endl;
+    std::cout<<"* Retina demonstration : demonstrates the use of is a wrapper class of the Gipsa/Listic Labs retina model."<<std::endl;
+    std::cout<<"* This demo will try to load the file 'RetinaSpecificParameters.xml' (if exists).\nTo create it, copy the autogenerated template 'RetinaDefaultParameters.xml'.\nThen twaek it with your own retina parameters."<<std::endl;
+    // basic input arguments checking
+    if (argc<2)
+    {
+        help("bad number of parameter");
+        return -1;
+    }
+
+    bool useLogSampling = !strcmp(argv[argc-1], "log"); // check if user wants retina log sampling processing
+
+    std::string inputMediaType=argv[1];
+
+    //////////////////////////////////////////////////////////////////////////////
+    // checking input media type (still image, video file, live video acquisition)
+    if (!strcmp(inputMediaType.c_str(), "-image") && argc >= 3)
+    {
+        std::cout<<"RetinaDemo: processing image "<<argv[2]<<std::endl;
+        // image processing case
+        inputFrame = cv::imread(std::string(argv[2]), 1); // load image in RGB mode
+    }else
+        if (!strcmp(inputMediaType.c_str(), "-video"))
+        {
+            if (argc == 2 || (argc == 3 && useLogSampling)) // attempt to grab images from a video capture device
+            {
+                videoCapture.open(0);
+            }else// attempt to grab images from a video filestream
+            {
+                std::cout<<"RetinaDemo: processing video stream "<<argv[2]<<std::endl;
+                videoCapture.open(argv[2]);
+            }
+
+            // grab a first frame to check if everything is ok
+            videoCapture>>inputFrame;
+        }else
+        {
+            // bad command parameter
+            help("bad command parameter");
+            return -1;
+        }
+
+Once all input parameters are processed, a first image should have been loaded, if not, display error and stop program :
+
+.. code-block:: cpp
+
+    if (inputFrame.empty())
+    {
+        help("Input media could not be loaded, aborting");
+        return -1;
+    }
+
+Now, everything is ready to run the retina model. I propose here to allocate a retina instance and to manage the eventual log sampling option. The Retina constructor expects at least a cv::Size object that shows the input data size that will have to be managed. One can activate other options such as color and its related color multiplexing strategy (here Bayer multiplexing is chosen using *enum cv::bioinspired::RETINA_COLOR_BAYER*). If using log sampling, the image reduction factor (smaller output images) and log sampling strengh can be adjusted.
+
+.. code-block:: cpp
+
+    // pointer to a retina object
+    cv::Ptr<Retina> myRetina;
+
+    // if the last parameter is 'log', then activate log sampling (favour foveal vision and subsamples peripheral vision)
+    if (useLogSampling)
+    {
+        myRetina = cv::bioinspired::createRetina(inputFrame.size(), true, cv::bioinspired::RETINA_COLOR_BAYER, true, 2.0, 10.0);
+    }
+    else// -> else allocate "classical" retina :
+        myRetina = cv::bioinspired::createRetina(inputFrame.size());
+
+Once done, the proposed code writes a default xml file that contains the default parameters of the retina. This is useful to make your own config using this template. Here generated template xml file is called *RetinaDefaultParameters.xml*.
+
+.. code-block:: cpp
+
+    // save default retina parameters file in order to let you see this and maybe modify it and reload using method "setup"
+    myRetina->write("RetinaDefaultParameters.xml");
+
+In the following line, the retina attempts to load another xml file called *RetinaSpecificParameters.xml*. If you created it and introduced your own setup, it will be loaded, in the other case, default retina parameters are used.
+
+.. code-block:: cpp
+
+    // load parameters if file exists
+    myRetina->setup("RetinaSpecificParameters.xml");
+
+It is not required here but just to show it is possible, you can reset the retina buffers to zero to force it to forget past events.
+
+.. code-block:: cpp
+
+    // reset all retina buffers (imagine you close your eyes for a long time)
+    myRetina->clearBuffers();
+
+Now, it is time to run the retina ! First create some output buffers ready to receive the two retina channels outputs
+
+.. code-block:: cpp
+
+    // declare retina output buffers
+    cv::Mat retinaOutput_parvo;
+    cv::Mat retinaOutput_magno;
+
+Then, run retina in a loop, load new frames from video sequence if necessary and get retina outputs back to dedicated buffers.
+
+.. code-block:: cpp
+
+    // processing loop with no stop condition
+    while(true)
+    {
+        // if using video stream, then, grabbing a new frame, else, input remains the same
+        if (videoCapture.isOpened())
+            videoCapture>>inputFrame;
+
+        // run retina filter on the loaded input frame
+        myRetina->run(inputFrame);
+        // Retrieve and display retina output
+        myRetina->getParvo(retinaOutput_parvo);
+        myRetina->getMagno(retinaOutput_magno);
+        cv::imshow("retina input", inputFrame);
+        cv::imshow("Retina Parvo", retinaOutput_parvo);
+        cv::imshow("Retina Magno", retinaOutput_magno);
+        cv::waitKey(10);
+    }
+
+That's done ! But if you want to secure the system, take care and manage Exceptions. The retina can throw some when it sees irrelevant data (no input frame, wrong setup, etc.).
+Then, i recommend to surround all the retina code by a try/catch system like this :
+
+.. code-block:: cpp
+
+    try{
+         // pointer to a retina object
+         cv::Ptr<cv::Retina> myRetina;
+         [---]
+         // processing loop with no stop condition
+         while(true)
+         {
+             [---]
+         }
+
+    }catch(cv::Exception e)
+    {
+        std::cerr<<"Error using Retina : "<<e.what()<<std::endl;
+    }
+
+Retina parameters, what to do ?
+===============================
+
+First, it is recommended to read the reference paper :
+
+* Benoit A., Caplier A., Durette B., Herault, J., *"Using Human Visual System Modeling For Bio-Inspired Low Level Image Processing"*, Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773. DOI <http://dx.doi.org/10.1016/j.cviu.2010.01.011>
+
+Once done open the configuration file *RetinaDefaultParameters.xml* generated by the demo and let's have a look at it.
+
+.. code-block:: cpp
+
+    <?xml version="1.0"?>
+    <opencv_storage>
+    <OPLandIPLparvo>
+        <colorMode>1</colorMode>
+        <normaliseOutput>1</normaliseOutput>
+        <photoreceptorsLocalAdaptationSensitivity>7.5e-01</photoreceptorsLocalAdaptationSensitivity>
+        <photoreceptorsTemporalConstant>9.0e-01</photoreceptorsTemporalConstant>
+        <photoreceptorsSpatialConstant>5.7e-01</photoreceptorsSpatialConstant>
+        <horizontalCellsGain>0.01</horizontalCellsGain>
+        <hcellsTemporalConstant>0.5</hcellsTemporalConstant>
+        <hcellsSpatialConstant>7.</hcellsSpatialConstant>
+        <ganglionCellsSensitivity>7.5e-01</ganglionCellsSensitivity></OPLandIPLparvo>
+    <IPLmagno>
+        <normaliseOutput>1</normaliseOutput>
+        <parasolCells_beta>0.</parasolCells_beta>
+        <parasolCells_tau>0.</parasolCells_tau>
+        <parasolCells_k>7.</parasolCells_k>
+        <amacrinCellsTemporalCutFrequency>2.0e+00</amacrinCellsTemporalCutFrequency>
+        <V0CompressionParameter>9.5e-01</V0CompressionParameter>
+        <localAdaptintegration_tau>0.</localAdaptintegration_tau>
+        <localAdaptintegration_k>7.</localAdaptintegration_k></IPLmagno>
+    </opencv_storage>
+
+Here are some hints but actually, the best parameter setup depends more on what you want to do with the retina rather than the images input that you give to retina. Apart from the more specific case of High Dynamic Range images (HDR) that require more specific setup for specific luminance compression objective, the retina behaviors should be rather stable from content to content. Note that OpenCV is able to manage such HDR format thanks to the OpenEXR images compatibility.
+
+Then, if the application target requires details enhancement prior to specific image processing, you need to know if mean luminance information is required or not. If not, the the retina can cancel or significantly reduce its energy thus giving more visibility to higher spatial frequency details.
+
+
+Basic parameters
+----------------
+
+The most simple parameters are the following :
+
+* **colorMode** : let the retina process color information (if 1) or gray scale images (if 0). In this last case, only the first channel of the input will be processed.
+
+* **normaliseOutput** : each channel has this parameter, if value is 1, then the considered channel output is rescaled between 0 and 255. Take care in this case at the Magnocellular output level (motion/transient channel detection). Residual noise will also be rescaled !
+
+**Note :** using color requires color channels multiplexing/demultipexing which requires more processing. You can expect much faster processing using gray levels : it would require around 30 product per pixel for all the retina processes and it has recently been parallelized for multicore architectures.
+
+Photo-receptors parameters
+--------------------------
+
+The following parameters act on the entry point of the retina - photo-receptors - and impact all the following processes. These sensors are low pass spatio-temporal filters that smooth temporal and spatial data and also adjust there sensitivity to local luminance thus improving details extraction and high frequency noise canceling.
+
+* **photoreceptorsLocalAdaptationSensitivity** between 0 and 1. Values close to 1 allow high luminance log compression effect at the photo-receptors level. Values closer to 0 give a more linear sensitivity. Increased alone, it can burn the *Parvo (details channel)* output image. If adjusted in collaboration with **ganglionCellsSensitivity** images can be very contrasted whatever the local luminance there is... at the price of a naturalness decrease.
+
+* **photoreceptorsTemporalConstant** this setups the temporal constant of the low pass filter effect at the entry of the retina. High value lead to strong temporal smoothing effect : moving objects are blurred and can disappear while static object are favored. But when starting the retina processing, stable state is reached lately.
+
+* **photoreceptorsSpatialConstant** specifies the spatial constant related to photo-receptors low pass filter effect. This parameters specify the minimum allowed spatial signal period allowed in the following. Typically, this filter should cut high frequency noise. Then a 0 value doesn't cut anything noise while higher values start to cut high spatial frequencies and more and more lower frequencies... Then, do not go to high if you wanna see some details of the input images ! A good compromise for color images is 0.53 since this won't affect too much the color spectrum. Higher values would lead to gray and blurred output images.
+
+Horizontal cells parameters
+---------------------------
+
+This parameter set tunes the neural network connected to the photo-receptors, the horizontal cells. It modulates photo-receptors sensitivity and completes the processing for final spectral whitening (part of the spatial band pass effect thus favoring visual details enhancement).
+
+* **horizontalCellsGain** here is a critical parameter ! If you are not interested by the mean luminance and focus on details enhancement, then, set to zero. But if you want to keep some environment luminance data, let some low spatial frequencies pass into the system and set a higher value (<1).
+
+* **hcellsTemporalConstant** similar to photo-receptors, this acts on the temporal constant of a low pass temporal filter that smooths input data. Here, a high value generates a high retina after effect while a lower value makes the retina more reactive. This value should be lower than **photoreceptorsTemporalConstant** to limit strong retina after effects.
+
+* **hcellsSpatialConstant** is the spatial constant of the low pass filter of these cells filter. It specifies the lowest spatial frequency allowed in the following. Visually, a high value leads to very low spatial frequencies processing and leads to salient halo effects. Lower values reduce this effect but the limit is : do not go lower than the value of **photoreceptorsSpatialConstant**. Those 2 parameters actually specify the spatial band-pass of the retina.
+
+**NOTE** after the processing managed by the previous parameters, input data is cleaned from noise and luminance in already partly enhanced. The following parameters act on the last processing stages of the two outing retina signals.
+
+Parvo (details channel) dedicated parameter
+-------------------------------------------
+
+* **ganglionCellsSensitivity** specifies the strength of the final local adaptation occurring at the output of this details dedicated channel. Parameter values remain between 0 and 1. Low value tend to give a linear response while higher values enforces the remaining low contrasted areas.
+
+**Note :** this parameter can correct eventual burned images by favoring low energetic details of the visual scene, even in bright areas.
+
+IPL Magno (motion/transient channel) parameters
+-----------------------------------------------
+
+Once image information is cleaned, this channel acts as a high pass temporal filter that only selects signals related to transient signals (events, motion, etc.). A low pass spatial filter smooths extracted transient data and a final logarithmic compression enhances low transient events thus enhancing event sensitivity.
+
+* **parasolCells_beta** generally set to zero, can be considered as an amplifier gain at the entry point of this processing stage. Generally set to 0.
+
+* **parasolCells_tau** the temporal smoothing effect that can be added
+
+* **parasolCells_k** the spatial constant of the spatial filtering effect, set it at a high value to favor low spatial frequency signals that are lower subject to residual noise.
+
+* **amacrinCellsTemporalCutFrequency** specifies the temporal constant of the high pass filter. High values let slow transient events to be selected.
+
+* **V0CompressionParameter** specifies the strength of the log compression. Similar behaviors to previous description but here it enforces sensitivity of transient events.
+
+* **localAdaptintegration_tau** generally set to 0, no real use here actually
+
+* **localAdaptintegration_k** specifies the size of the area on which local adaptation is performed. Low values lead to short range local adaptation (higher sensitivity to noise), high values secure log compression.
diff --git a/doc/tutorials/contrib/table_of_content_contrib/images/retina_TreeHdr_small.jpg b/doc/tutorials/bioinspired/table_of_content_bioinspired/images/retina_TreeHdr_small.jpg
similarity index 100%
rename from doc/tutorials/contrib/table_of_content_contrib/images/retina_TreeHdr_small.jpg
rename to doc/tutorials/bioinspired/table_of_content_bioinspired/images/retina_TreeHdr_small.jpg
diff --git a/doc/tutorials/bioinspired/table_of_content_bioinspired/table_of_content_bioinspired.rst b/doc/tutorials/bioinspired/table_of_content_bioinspired/table_of_content_bioinspired.rst
new file mode 100644
index 000000000..88869e98f
--- /dev/null
+++ b/doc/tutorials/bioinspired/table_of_content_bioinspired/table_of_content_bioinspired.rst
@@ -0,0 +1,36 @@
+.. _Table-Of-Content-Bioinspired:
+
+*bioinspired* module. Algorithms inspired from biological models
+----------------------------------------------------------------
+
+Here you will learn how to use additional modules of OpenCV defined in the "bioinspired" module.
+
+ .. include:: ../../definitions/tocDefinitions.rst
+
++
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
+
+  =============== ======================================================
+  |RetinaDemoImg| **Title:** :ref:`Retina_Model`
+
+                  *Compatibility:* > OpenCV 2.4
+
+                  *Author:* |Author_AlexB|
+
+                  You will learn how to process images and video streams with a model of retina filter for details enhancement, spatio-temporal noise removal, luminance correction and spatio-temporal events detection.
+
+  =============== ======================================================
+
+  .. |RetinaDemoImg| image:: images/retina_TreeHdr_small.jpg
+                   :height: 90pt
+                   :width:  90pt
+
+ .. raw:: latex
+
+    \pagebreak
+
+.. toctree::
+   :hidden:
+
+   ../retina_model/retina_model
diff --git a/doc/tutorials/contrib/table_of_content_contrib/table_of_content_contrib.rst b/doc/tutorials/bioinspired/table_of_content_bioinspired/table_of_content_bioinspired.rst~
similarity index 100%
rename from doc/tutorials/contrib/table_of_content_contrib/table_of_content_contrib.rst
rename to doc/tutorials/bioinspired/table_of_content_bioinspired/table_of_content_bioinspired.rst~
diff --git a/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst b/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst
index 77723b2b8..2cf00f42a 100644
--- a/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst
+++ b/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst
@@ -3,42 +3,42 @@
 Camera calibration With OpenCV
 ******************************
 
-Cameras have been around for a long-long time. However, with the introduction of the cheap *pinhole* cameras in the late 20th century, they became a common occurrence in our everyday life. Unfortunately, this cheapness comes with its price: significant distortion. Luckily, these are constants and with a calibration and some remapping we can correct this. Furthermore, with calibration you may also determinate the relation between the camera's natural units (pixels) and the real world units (for example millimeters).
+Cameras have been around for a long-long time. However, with the introduction of the cheap *pinhole* cameras in the late 20th century, they became a common occurrence in our everyday life. Unfortunately, this cheapness comes with its price: significant distortion. Luckily, these are constants and with a calibration and some remapping we can correct this. Furthermore, with calibration you may also determine the relation between the camera's natural units (pixels) and the real world units (for example millimeters).
 
 Theory
 ======
 
-For the distortion OpenCV takes into account the radial and tangential factors. For the radial one uses the following formula:
+For the distortion OpenCV takes into account the radial and tangential factors. For the radial factor one uses the following formula:
 
 .. math::
 
    x_{corrected} = x( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) \\
    y_{corrected} = y( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6)
 
-So for an old pixel point at :math:`(x,y)` coordinate in the input image, for a corrected output image its position will be :math:`(x_{corrected} y_{corrected})` . The presence of the radial distortion manifests in form of the "barrel" or "fish-eye" effect.
+So for an old pixel point at :math:`(x,y)` coordinates in the input image, its position on the corrected output image will be :math:`(x_{corrected} y_{corrected})`. The presence of the radial distortion manifests in form of the "barrel" or "fish-eye" effect.
 
-Tangential distortion occurs because the image taking lenses are not perfectly parallel to the imaging plane. Correcting this is made via the formulas:
+Tangential distortion occurs because the image taking lenses are not perfectly parallel to the imaging plane. It can be corrected via the formulas:
 
 .. math::
 
    x_{corrected} = x + [ 2p_1xy + p_2(r^2+2x^2)] \\
    y_{corrected} = y + [ p_1(r^2+ 2y^2)+ 2p_2xy]
 
-So we have five distortion parameters, which in OpenCV are organized in a 5 column one row matrix:
+So we have five distortion parameters which in OpenCV are presented as one row matrix with 5 columns:
 
 .. math::
 
   Distortion_{coefficients}=(k_1 \hspace{10pt} k_2 \hspace{10pt} p_1 \hspace{10pt} p_2 \hspace{10pt} k_3)
 
-Now for the unit conversion, we use the following formula:
+Now for the unit conversion we use the following formula:
 
 .. math::
 
    \left [  \begin{matrix}   x \\   y \\  w \end{matrix} \right ] = \left [ \begin{matrix}   f_x & 0 & c_x \\  0 & f_y & c_y \\   0 & 0 & 1 \end{matrix} \right ] \left [ \begin{matrix}  X \\  Y \\   Z \end{matrix} \right ]
 
-Here the presence of the :math:`w` is cause we use a homography coordinate system (and :math:`w=Z`). The unknown parameters are :math:`f_x` and :math:`f_y` (camera focal lengths) and :math:`(c_x, c_y)` what are the optical centers expressed in pixels coordinates. If for both axes a common focal length is used with a given :math:`a` aspect ratio (usually 1), then :math:`f_y=f_x*a` and in the upper formula we will have a single :math:`f` focal length. The matrix containing these four parameters is referred to as the *camera matrix*. While the distortion coefficients are the same regardless of the camera resolutions used, these should be scaled along with the current resolution from the calibrated resolution.
+Here the presence of :math:`w` is explained by the use of homography coordinate system (and :math:`w=Z`). The unknown parameters are :math:`f_x` and :math:`f_y` (camera focal lengths) and :math:`(c_x, c_y)` which are the optical centers expressed in pixels coordinates. If for both axes a common focal length is used with a given :math:`a` aspect ratio (usually 1), then :math:`f_y=f_x*a` and in the upper formula we will have a single focal length :math:`f`. The matrix containing these four parameters is referred to as the *camera matrix*. While the distortion coefficients are the same regardless of the camera resolutions used, these should be scaled along with the current resolution from the calibrated resolution.
 
-The process of determining these two matrices is the calibration. Calculating these parameters is done by some basic geometrical equations. The equations used depend on the calibrating objects used. Currently OpenCV supports three types of object for calibration:
+The process of determining these two matrices is the calibration. Calculation of these parameters is done through basic geometrical equations. The equations used depend on the chosen calibrating objects. Currently OpenCV supports three types of objects for calibration:
 
 .. container:: enumeratevisibleitemswithsquare
 
@@ -46,7 +46,7 @@ The process of determining these two matrices is the calibration. Calculating th
    + Symmetrical circle pattern
    + Asymmetrical circle pattern
 
-Basically, you need to take snapshots of these patterns with your camera and let OpenCV find them. Each found pattern equals in a new equation. To solve the equation you need at least a predetermined number of pattern snapshots to form a well-posed equation system. This number is higher for the chessboard pattern and less for the circle ones. For example, in theory the chessboard one requires at least two. However, in practice we have a good amount of noise present in our input images, so for good results you will probably want at least 10 good snapshots of the input pattern in different position.
+Basically, you need to take snapshots of these patterns with your camera and let OpenCV find them. Each found pattern results in a new equation. To solve the equation you need at least a predetermined number of pattern snapshots to form a well-posed equation system. This number is higher for the chessboard pattern and less for the circle ones. For example, in theory the chessboard pattern requires at least two snapshots. However, in practice we have a good amount of noise present in our input images, so for good results you will probably need at least 10 good snapshots of the input pattern in different positions.
 
 Goal
 ====
@@ -55,19 +55,19 @@ The sample application will:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   + Determinate the distortion matrix
-   + Determinate the camera matrix
-   + Input from Camera, Video and Image file list
-   + Configuration from XML/YAML file
+   + Determine the distortion matrix
+   + Determine the camera matrix
+   + Take input from Camera, Video and Image file list
+   + Read configuration from XML/YAML file
    + Save the results into XML/YAML file
    + Calculate re-projection error
 
 Source code
 ===========
 
-You may also find the source code in the :file:`samples/cpp/tutorial_code/calib3d/camera_calibration/` folder of the OpenCV source library or :download:`download it from here <../../../../samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp>`. The program has a single argument. The name of its configuration file. If none given it will try to open the one named "default.xml". :download:`Here's a sample configuration file <../../../../samples/cpp/tutorial_code/calib3d/camera_calibration/in_VID5.xml>` in XML format. In the configuration file you may choose to use as input a camera, a video file or an image list. If you opt for the later one, you need to create a configuration file where you enumerate the images to use. Here's :download:`an example of this <../../../../samples/cpp/tutorial_code/calib3d/camera_calibration/VID5.xml>`. The important part to remember is that the images needs to be specified using the absolute path or the relative one from your applications working directory. You may find all this in the beforehand mentioned directory.
+You may also find the source code in the :file:`samples/cpp/tutorial_code/calib3d/camera_calibration/` folder of the OpenCV source library or :download:`download it from here <../../../../samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp>`. The program has a single argument: the name of its configuration file. If none is given then it will try to open the one named "default.xml". :download:`Here's a sample configuration file <../../../../samples/cpp/tutorial_code/calib3d/camera_calibration/in_VID5.xml>` in XML format. In the configuration file you may choose to use camera as an input, a video file or an image list. If you opt for the last one, you will need to create a configuration file where you enumerate the images to use. Here's :download:`an example of this <../../../../samples/cpp/tutorial_code/calib3d/camera_calibration/VID5.xml>`. The important part to remember is that the images need to be specified using the absolute path or the relative one from your application's working directory. You may find all this in the samples directory mentioned above.
 
-The application starts up with reading the settings from the configuration file. Although, this is an important part of it, it has nothing to do with the subject of this tutorial: *camera calibration*. Therefore, I've chosen to do not post here the code part for that. The technical background on how to do this you can find in the :ref:`fileInputOutputXMLYAML` tutorial.
+The application starts up with reading the settings from the configuration file. Although, this is an important part of it, it has nothing to do with the subject of this tutorial: *camera calibration*. Therefore, I've chosen not to post the code for that part here. Technical background on how to do this you can find in the :ref:`fileInputOutputXMLYAML` tutorial.
 
 Explanation
 ===========
@@ -93,9 +93,9 @@ Explanation
             return -1;
       }
 
-   For this I've used simple OpenCV class input operation. After reading the file I've an additional post-process function that checks for the validity of the input. Only if all of them are good will be the *goodInput* variable true.
+   For this I've used simple OpenCV class input operation. After reading the file I've an additional post-processing function that checks validity of the input. Only if all inputs are good then *goodInput* variable will be true.
 
-#. **Get next input, if it fails or we have enough of them calibrate**. After this we have a big loop where we do the following operations: get the next image from the image list, camera or video file. If this fails or we have enough images we run the calibration process. In case of image we step out of the loop and otherwise the remaining frames will be undistorted (if the option is set) via changing from *DETECTION* mode to *CALIBRATED* one.
+#. **Get next input, if it fails or we have enough of them - calibrate**. After this we have a big loop where we do the following operations: get the next image from the image list, camera or video file. If this fails or we have enough images then we run the calibration process. In case of image we step out of the loop and otherwise the remaining frames will be undistorted (if the option is set) via changing from *DETECTION* mode to the *CALIBRATED* one.
 
    .. code-block:: cpp
 
@@ -125,7 +125,7 @@ Explanation
 
    For some cameras we may need to flip the input image. Here we do this too.
 
-#. **Find the pattern in the current input**. The formation of the equations I mentioned above consists of finding the major patterns in the input: in case of the chessboard this is their corners of the squares and for the circles, well, the circles itself. The position of these will form the result and is collected into the *pointBuf* vector.
+#. **Find the pattern in the current input**. The formation of the equations I mentioned above aims to finding major patterns in the input: in case of the chessboard this are corners of the squares and for the circles, well, the circles themselves. The position of these will form the result which will be written into the *pointBuf* vector.
 
    .. code-block:: cpp
 
@@ -146,9 +146,9 @@ Explanation
         break;
       }
 
-   Depending on the type of the input pattern you use either the :calib3d:`findChessboardCorners <findchessboardcorners>` or the :calib3d:`findCirclesGrid <findcirclesgrid>` function. For both of them you pass on the current image, the size of the board and you'll get back the positions of the patterns. Furthermore, they return a boolean variable that states if in the input we could find or not the pattern (we only need to take into account images where this is true!).
+   Depending on the type of the input pattern you use either the :calib3d:`findChessboardCorners <findchessboardcorners>` or the :calib3d:`findCirclesGrid <findcirclesgrid>` function. For both of them you pass the current image and the size of the board and you'll get the positions of the patterns. Furthermore, they return a boolean variable which states if the pattern was found in the input (we only need to take into account those images where this is true!).
 
-   Then again in case of cameras we only take camera images after an input delay time passed. This is in order to allow for the user to move the chessboard around and as getting different images. Same images mean same equations, and same equations at the calibration will form an ill-posed problem, so the calibration will fail. For square images the position of the corners are only approximate. We may improve this by calling the :feature2d:`cornerSubPix <cornersubpix>` function. This way will get a better calibration result. After this we add a valid inputs result to the *imagePoints* vector to collect all of the equations into a single container. Finally, for visualization feedback purposes we will draw the found points on the input image with the :calib3d:`findChessboardCorners <drawchessboardcorners>` function.
+   Then again in case of cameras we only take camera images when an input delay time is passed. This is done in order to allow user moving the chessboard around and getting different images. Similar images result in similar equations, and similar equations at the calibration step will form an ill-posed problem, so the calibration will fail. For square images the positions of the corners are only approximate. We may improve this by calling the :feature2d:`cornerSubPix <cornersubpix>` function. It will produce better calibration result. After this we add a valid inputs result to the *imagePoints* vector to collect all of the equations into a single container. Finally, for visualization feedback purposes we will draw the found points on the input image using :calib3d:`findChessboardCorners <drawchessboardcorners>` function.
 
    .. code-block:: cpp
 
@@ -175,7 +175,7 @@ Explanation
               drawChessboardCorners( view, s.boardSize, Mat(pointBuf), found );
         }
 
-#. **Show state and result for the user, plus command line control of the application**. The showing part consists of a text output on the live feed, and for video or camera input to show the "capturing" frame we simply bitwise negate the input image.
+#. **Show state and result to the user, plus command line control of the application**. This  part shows text output on the image.
 
    .. code-block:: cpp
 
@@ -199,7 +199,7 @@ Explanation
       if( blinkOutput )
          bitwise_not(view, view);
 
-   If we only ran the calibration and got the camera matrix plus the distortion coefficients we may just as correct the image with the :imgproc_geometric:`undistort <undistort>` function:
+   If we ran calibration and got camera's matrix with the distortion coefficients we may want to correct the image using :imgproc_geometric:`undistort <undistort>` function:
 
    .. code-block:: cpp
 
@@ -212,7 +212,7 @@ Explanation
       //------------------------------ Show image and check for input commands -------------------
       imshow("Image View", view);
 
-   Then we wait for an input key and if this is *u* we toggle the distortion removal, if it is *g* we start all over the detection process (or simply start it), and finally for the *ESC* key quit the application:
+   Then we wait for an input key and if this is *u* we toggle the distortion removal, if it is *g* we start again the detection process, and finally for the *ESC* key we quit the application:
 
    .. code-block:: cpp
 
@@ -229,7 +229,7 @@ Explanation
         imagePoints.clear();
       }
 
-#. **Show the distortion removal for the images too**. When you work with an image list it is not possible to remove the distortion inside the loop. Therefore, you must append this after the loop. Taking advantage of this now I'll expand the :imgproc_geometric:`undistort <undistort>` function, which is in fact first a call of the :imgproc_geometric:`initUndistortRectifyMap <initundistortrectifymap>` to find out the transformation matrices and then doing the transformation with the :imgproc_geometric:`remap <remap>` function. Because, after a successful calibration the map calculation needs to be done only once, by using this expanded form you may speed up your application:
+#. **Show the distortion removal for the images too**. When you work with an image list it is not possible to remove the distortion inside the loop. Therefore, you must do this after the loop. Taking advantage of this now I'll expand the :imgproc_geometric:`undistort <undistort>` function, which is in fact first calls :imgproc_geometric:`initUndistortRectifyMap <initundistortrectifymap>` to find transformation matrices and then performs transformation using :imgproc_geometric:`remap <remap>` function. Because, after successful calibration map calculation needs to be done only once, by using this expanded form you may speed up your application:
 
    .. code-block:: cpp
 
@@ -256,7 +256,7 @@ Explanation
 The calibration and save
 ========================
 
-Because the calibration needs to be only once per camera it makes sense to save them after a successful calibration. This way later on you can just load these values into your program. Due to this we first make the calibration, and if it succeeds we save the result into an OpenCV style XML or YAML file, depending on the extension you give in the configuration file.
+Because the calibration needs to be done only once per camera, it makes sense to save it after a successful calibration. This way later on you can just load these values into your program. Due to this we first make the calibration, and if it succeeds we save the result into an OpenCV style XML or YAML file, depending on the extension you give in the configuration file.
 
 Therefore in the first function we just split up these two processes. Because we want to save many of the calibration variables we'll create these variables here and pass on both of them to the calibration and saving function. Again, I'll not show the saving part as that has little in common with the calibration. Explore the source file in order to find out how and what:
 
@@ -280,7 +280,7 @@ Therefore in the first function we just split up these two processes. Because we
     return ok;
    }
 
-We do the calibration with the help of the :calib3d:`calibrateCamera <calibratecamera>` function. This has the following parameters:
+We do the calibration with the help of the :calib3d:`calibrateCamera <calibratecamera>` function. It has the following parameters:
 
 .. container:: enumeratevisibleitemswithsquare
 
@@ -318,11 +318,11 @@ We do the calibration with the help of the :calib3d:`calibrateCamera <calibratec
         calcBoardCornerPositions(s.boardSize, s.squareSize, objectPoints[0], s.calibrationPattern);
         objectPoints.resize(imagePoints.size(),objectPoints[0]);
 
-   + The image points. This is a vector of *Point2f* vector that for each input image contains where the important points (corners for chessboard, and center of circles for the circle patterns) were found. We already collected this from what the :calib3d:`findChessboardCorners <findchessboardcorners>` or the :calib3d:`findCirclesGrid <findcirclesgrid>` function returned. We just need to pass it on.
+   + The image points. This is a vector of *Point2f* vector which for each input image contains coordinates of the important points (corners for chessboard and centers of the circles for the circle pattern). We have already collected this from :calib3d:`findChessboardCorners <findchessboardcorners>` or :calib3d:`findCirclesGrid <findcirclesgrid>` function. We just need to pass it on.
 
    + The size of the image acquired from the camera, video file or the images.
 
-   + The camera matrix. If we used the fix aspect ratio option we need to set the :math:`f_x` to zero:
+   + The camera matrix. If we used the fixed aspect ratio option we need to set the :math:`f_x` to zero:
 
      .. code-block:: cpp
 
@@ -336,16 +336,16 @@ We do the calibration with the help of the :calib3d:`calibrateCamera <calibratec
 
         distCoeffs = Mat::zeros(8, 1, CV_64F);
 
-   + The function will calculate for all the views the rotation and translation vector that transform the object points (given in the model coordinate space) to the image points (given in the world coordinate space). The 7th and 8th parameters are an output vector of matrices containing in the ith position the rotation and translation vector for the ith object point to the ith image point.
+   + For all the views the function will calculate rotation and translation vectors which transform the object points (given in the model coordinate space) to the image points (given in the world coordinate space). The 7-th and 8-th parameters are the output vector of matrices containing in the i-th position the rotation and translation vector for the i-th object point to the i-th image point.
 
-   + The final argument is a flag. You need to specify here options like fix the aspect ratio for the focal length, assume zero tangential distortion or to fix the principal point.
+   + The final argument is the flag. You need to specify here options like fix the aspect ratio for the focal length, assume zero tangential distortion or to fix the principal point.
 
    .. code-block:: cpp
 
      double rms = calibrateCamera(objectPoints, imagePoints, imageSize, cameraMatrix,
                                  distCoeffs, rvecs, tvecs, s.flag|CV_CALIB_FIX_K4|CV_CALIB_FIX_K5);
 
-   + The function returns the average re-projection error. This number gives a good estimation of just how exact is the found parameters. This should be as close to zero as possible. Given the intrinsic, distortion, rotation and translation matrices we may calculate the error for one view by using the :calib3d:`projectPoints <projectpoints>` to first transform the object point to image point. Then we calculate the absolute norm between what we got with our transformation and the corner/circle finding algorithm. To find the average error we calculate the arithmetical mean of the errors calculate for all the calibration images.
+   + The function returns the average re-projection error. This number gives a good estimation of precision of the found parameters. This should be as close to zero as possible. Given the intrinsic, distortion, rotation and translation matrices we may calculate the error for one view by using the :calib3d:`projectPoints <projectpoints>` to first transform the object point to image point. Then we calculate the absolute norm between what we got with our transformation and the corner/circle finding algorithm. To find the average error we calculate the arithmetical mean of the errors calculated for all the calibration images.
 
      .. code-block:: cpp
 
@@ -378,25 +378,25 @@ We do the calibration with the help of the :calib3d:`calibrateCamera <calibratec
 Results
 =======
 
-Let there be :download:`this input chessboard pattern <../../../pattern.png>` that has a size of 9 X 6. I've used an AXIS IP camera to create a couple of snapshots of the board and saved it into a VID5 directory. I've put this inside the :file:`images/CameraCalibraation` folder of my working directory and created the following :file:`VID5.XML` file that describes which images to use:
+Let there be :download:`this input chessboard pattern <../../../pattern.png>` which has a size of 9 X 6. I've used an AXIS IP camera to create a couple of snapshots of the board and saved it into VID5 directory. I've put this inside the :file:`images/CameraCalibration` folder of my working directory and created the following :file:`VID5.XML` file that describes which images to use:
 
 .. code-block:: xml
 
    <?xml version="1.0"?>
    <opencv_storage>
    <images>
-   images/CameraCalibraation/VID5/xx1.jpg
-   images/CameraCalibraation/VID5/xx2.jpg
-   images/CameraCalibraation/VID5/xx3.jpg
-   images/CameraCalibraation/VID5/xx4.jpg
-   images/CameraCalibraation/VID5/xx5.jpg
-   images/CameraCalibraation/VID5/xx6.jpg
-   images/CameraCalibraation/VID5/xx7.jpg
-   images/CameraCalibraation/VID5/xx8.jpg
+   images/CameraCalibration/VID5/xx1.jpg
+   images/CameraCalibration/VID5/xx2.jpg
+   images/CameraCalibration/VID5/xx3.jpg
+   images/CameraCalibration/VID5/xx4.jpg
+   images/CameraCalibration/VID5/xx5.jpg
+   images/CameraCalibration/VID5/xx6.jpg
+   images/CameraCalibration/VID5/xx7.jpg
+   images/CameraCalibration/VID5/xx8.jpg
    </images>
    </opencv_storage>
 
-Then specified the :file:`images/CameraCalibraation/VID5/VID5.XML` as input in the configuration file. Here's a chessboard pattern found during the runtime of the application:
+Then passed :file:`images/CameraCalibration/VID5/VID5.XML` as an input in the configuration file. Here's a chessboard pattern found during the runtime of the application:
 
 .. image:: images/fileListImage.jpg
    :alt: A found chessboard
@@ -433,7 +433,7 @@ In both cases in the specified output XML/YAML file you'll find the camera and d
     -4.1802327176423804e-001 5.0715244063187526e-001 0. 0.
     -5.7843597214487474e-001</data></Distortion_Coefficients>
 
-Add these values as constants to your program, call the :imgproc_geometric:`initUndistortRectifyMap <initundistortrectifymap>` and the :imgproc_geometric:`remap <remap>` function to remove distortion and enjoy distortion free inputs with cheap and low quality cameras.
+Add these values as constants to your program, call the :imgproc_geometric:`initUndistortRectifyMap <initundistortrectifymap>` and the :imgproc_geometric:`remap <remap>` function to remove distortion and enjoy distortion free inputs for cheap and low quality cameras.
 
 You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=ViPN810E0SU>`_.
 
diff --git a/doc/tutorials/calib3d/camera_calibration_square_chess/camera_calibration_square_chess.rst b/doc/tutorials/calib3d/camera_calibration_square_chess/camera_calibration_square_chess.rst
index 4eff2640a..e08550750 100644
--- a/doc/tutorials/calib3d/camera_calibration_square_chess/camera_calibration_square_chess.rst
+++ b/doc/tutorials/calib3d/camera_calibration_square_chess/camera_calibration_square_chess.rst
@@ -59,4 +59,4 @@ Now, let us write a code that detects a chessboard in a new image and finds its
 #.
     Calculate reprojection error like it is done in ``calibration`` sample (see ``opencv/samples/cpp/calibration.cpp``, function ``computeReprojectionErrors``).
 
-Question: how to calculate the distance from the camera origin to any of the corners?
\ No newline at end of file
+Question: how to calculate the distance from the camera origin to any of the corners?
diff --git a/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.rst b/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.rst
index 87166b7cc..42f6a6091 100644
--- a/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.rst
+++ b/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.rst
@@ -277,4 +277,3 @@ You may observe a runtime instance of this on the `YouTube here <https://www.you
   <div align="center">
   <iframe title="File Input and Output using XML and YAML files in OpenCV" width="560" height="349" src="http://www.youtube.com/embed/A4yqVnByMMM?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
   </div>
-
diff --git a/doc/tutorials/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.rst b/doc/tutorials/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.rst
index 99d669274..9285509b0 100644
--- a/doc/tutorials/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.rst
+++ b/doc/tutorials/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.rst
@@ -127,6 +127,3 @@ You may observe a runtime instance of this on the `YouTube here <https://www.you
   <div align="center">
   <iframe title="Interoperability with OpenCV 1" width="560" height="349" src="http://www.youtube.com/embed/qckm-zvo31w?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
   </div>
-
-
-
diff --git a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst
index 03d82bbd4..76c5a4541 100644
--- a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst
+++ b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst
@@ -143,7 +143,7 @@ Although *Mat* works really well as an image container, it is also a general mat
 
     You cannot initialize the matrix values with this construction. It will only reallocate its matrix data memory if the new size will not fit into the old one.
 
-   + MATLAB style initializer: :basicstructures:`zeros() <mat-zeros>`, :basicstructures:`ones() <mat-ones>`, ::basicstructures:`eyes() <mat-eye>`. Specify size and data type to use:
+   + MATLAB style initializer: :basicstructures:`zeros() <mat-zeros>`, :basicstructures:`ones() <mat-ones>`, :basicstructures:`eye() <mat-eye>`. Specify size and data type to use:
 
      .. literalinclude:: ../../../../samples/cpp/tutorial_code/core/mat_the_basic_image_container/mat_the_basic_image_container.cpp
         :language: cpp
diff --git a/doc/tutorials/core/table_of_content_core/table_of_content_core.rst b/doc/tutorials/core/table_of_content_core/table_of_content_core.rst
index b50d97635..4150265d1 100644
--- a/doc/tutorials/core/table_of_content_core/table_of_content_core.rst
+++ b/doc/tutorials/core/table_of_content_core/table_of_content_core.rst
@@ -218,4 +218,4 @@ Here you will learn the about the basic building blocks of the library. A must r
    ../random_generator_and_text/random_generator_and_text
    ../discrete_fourier_transform/discrete_fourier_transform
    ../file_input_output_with_xml_yml/file_input_output_with_xml_yml
-   ../interoperability_with_OpenCV_1/interoperability_with_OpenCV_1
\ No newline at end of file
+   ../interoperability_with_OpenCV_1/interoperability_with_OpenCV_1
diff --git a/doc/tutorials/definitions/README.txt b/doc/tutorials/definitions/README.txt
index 829e27cac..a598a95dd 100644
--- a/doc/tutorials/definitions/README.txt
+++ b/doc/tutorials/definitions/README.txt
@@ -1 +1 @@
-Include in this directory only defintion files. None of the reST files entered here will be parsed by the Sphinx Builder.
\ No newline at end of file
+Include in this directory only defintion files. None of the reST files entered here will be parsed by the Sphinx Builder.
diff --git a/doc/tutorials/definitions/noContent.rst b/doc/tutorials/definitions/noContent.rst
index ded9e78fa..c2780c266 100644
--- a/doc/tutorials/definitions/noContent.rst
+++ b/doc/tutorials/definitions/noContent.rst
@@ -1,3 +1,3 @@
 
 .. note::
-   Unfortunetly we have no tutorials into this section. Nevertheless, our tutorial writting team is working on it. If you have a tutorial suggestion or you have writen yourself a tutorial (or coded a sample code) that you would like to see here please contact us via our :opencv_group:`user group <>`.
\ No newline at end of file
+   Unfortunetly we have no tutorials into this section. And you can help us with that, since OpenCV is a community effort. If you have a tutorial suggestion or you have written a tutorial yourself (or coded a sample code) that you would like to see here, please contact follow these instructions: :ref:`howToWriteTutorial` and :how_to_contribute:`How to contribute <>`.
diff --git a/doc/tutorials/features2d/feature_description/feature_description.rst b/doc/tutorials/features2d/feature_description/feature_description.rst
index 2d97f83be..b6e2dfa2b 100644
--- a/doc/tutorials/features2d/feature_description/feature_description.rst
+++ b/doc/tutorials/features2d/feature_description/feature_description.rst
@@ -100,6 +100,3 @@ Result
    .. image:: images/Feature_Description_BruteForce_Result.jpg
       :align: center
       :height: 200pt
-
-
-
diff --git a/doc/tutorials/features2d/feature_detection/feature_detection.rst b/doc/tutorials/features2d/feature_detection/feature_detection.rst
index 1c9ca7cf8..02da6d080 100644
--- a/doc/tutorials/features2d/feature_detection/feature_detection.rst
+++ b/doc/tutorials/features2d/feature_detection/feature_detection.rst
@@ -31,6 +31,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
    #include "opencv2/core.hpp"
    #include "opencv2/features2d.hpp"
    #include "opencv2/highgui.hpp"
+   #include "opencv2/nonfree.hpp"
 
    using namespace cv;
 
@@ -94,4 +95,3 @@ Result
    .. image:: images/Feature_Detection_Result_b.jpg
       :align: center
       :height: 200pt
-
diff --git a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst
index 4b3ffbcae..9839c8984 100644
--- a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst
+++ b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst
@@ -28,6 +28,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
    #include "opencv2/core.hpp"
    #include "opencv2/features2d.hpp"
    #include "opencv2/highgui.hpp"
+   #include "opencv2/nonfree.hpp"
 
    using namespace cv;
 
diff --git a/doc/tutorials/features2d/feature_homography/feature_homography.rst b/doc/tutorials/features2d/feature_homography/feature_homography.rst
index 0d7822959..3040ed7d7 100644
--- a/doc/tutorials/features2d/feature_homography/feature_homography.rst
+++ b/doc/tutorials/features2d/feature_homography/feature_homography.rst
@@ -30,6 +30,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
    #include "opencv2/features2d.hpp"
    #include "opencv2/highgui.hpp"
    #include "opencv2/calib3d.hpp"
+   #include "opencv2/nonfree.hpp"
 
    using namespace cv;
 
@@ -145,4 +146,3 @@ Result
    .. image:: images/Feature_Homography_Result.jpg
       :align: center
       :height: 200pt
-
diff --git a/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.rst b/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.rst
index cc90082b8..f4107804b 100644
--- a/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.rst
+++ b/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.rst
@@ -201,4 +201,3 @@ Learn about how to use the feature points  detectors, descriptors and matching f
    ../feature_flann_matcher/feature_flann_matcher
    ../feature_homography/feature_homography
    ../detection_of_planar_objects/detection_of_planar_objects
-
diff --git a/doc/tutorials/features2d/trackingmotion/corner_subpixeles/corner_subpixeles.rst b/doc/tutorials/features2d/trackingmotion/corner_subpixeles/corner_subpixeles.rst
index a267b1380..4e9015cb6 100644
--- a/doc/tutorials/features2d/trackingmotion/corner_subpixeles/corner_subpixeles.rst
+++ b/doc/tutorials/features2d/trackingmotion/corner_subpixeles/corner_subpixeles.rst
@@ -135,4 +135,3 @@ Here is the result:
 
 .. image:: images/Corner_Subpixeles_Result.jpg
               :align: center
-
diff --git a/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.rst b/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.rst
index 465ff216c..d33bf3df6 100644
--- a/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.rst
+++ b/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.rst
@@ -37,4 +37,3 @@ Result
 
 .. image:: images/My_Shi_Tomasi_corner_detector_Result.jpg
               :align: center
-
diff --git a/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.rst b/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.rst
index 9f71e987f..1495befc7 100644
--- a/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.rst
+++ b/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.rst
@@ -118,5 +118,3 @@ Result
 
 .. image:: images/Feature_Detection_Result_a.jpg
               :align: center
-
-
diff --git a/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.rst b/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.rst
index 0f6bb33fa..e0e4ea47e 100644
--- a/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.rst
+++ b/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.rst
@@ -243,5 +243,3 @@ The detected corners are surrounded by a small black circle
 
 .. image:: images/Harris_Detector_Result.jpg
               :align: center
-
-
diff --git a/doc/tutorials/general/table_of_content_general/table_of_content_general.rst b/doc/tutorials/general/table_of_content_general/table_of_content_general.rst
index 2a6d57ffb..6e127a98f 100644
--- a/doc/tutorials/general/table_of_content_general/table_of_content_general.rst
+++ b/doc/tutorials/general/table_of_content_general/table_of_content_general.rst
@@ -10,4 +10,3 @@ These tutorials are the bottom of the iceberg as they link together multiple of
 .. raw:: latex
 
    \pagebreak
-
diff --git a/doc/tutorials/gpu/gpu-basics-similarity/gpu-basics-similarity.rst b/doc/tutorials/gpu/gpu-basics-similarity/gpu-basics-similarity.rst
index 86400bfae..2e8fe9559 100644
--- a/doc/tutorials/gpu/gpu-basics-similarity/gpu-basics-similarity.rst
+++ b/doc/tutorials/gpu/gpu-basics-similarity/gpu-basics-similarity.rst
@@ -1 +1,234 @@
-﻿.. _gpuBasicsSimilarity:Similarity check (PNSR and SSIM) on the GPU*******************************************Goal====In the :ref:`videoInputPSNRMSSIM` tutorial I already presented the PSNR and SSIM methods forchecking the similarity between the two images. And as you could see there performing these takesquite some time, especially in the case of the SSIM. However, if the performance numbers of anOpenCV implementation for the CPU do not satisfy you and you happen to have an NVidia CUDA GPUdevice in your system all is not lost. You may try to port or write your algorithm for the videocard.This tutorial will give a good grasp on how to approach coding by using the GPU module of OpenCV. Asa prerequisite you should already know how to handle the core, highgui and imgproc modules. So, ourgoals are:.. container:: enumeratevisibleitemswithsquare   + What's different compared to the CPU?   + Create the GPU code for the PSNR and SSIM   + Optimize the code for maximal performanceThe source code===============You may also find the source code and these video file in the:file:`samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity` folder of theOpenCV source library or :download:`download it from here<../../../../samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp>`. Thefull source code is quite long (due to the controlling of the application via the command linearguments and performance measurement). Therefore, to avoid cluttering up these sections with thoseyou'll find here only the functions itself.The PSNR returns a float number, that if the two inputs are similar between 30 and 50 (higher isbetter)... literalinclude:: ../../../../samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp   :language: cpp   :linenos:   :tab-width: 4   :lines: 165-210, 18-23, 210-235The SSIM returns the MSSIM of the images. This is too a float number between zero and one (higher isbetter), however we have one for each channel. Therefore, we return a *Scalar* OpenCV datastructure:.. literalinclude:: ../../../../samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp   :language: cpp   :linenos:   :tab-width: 4   :lines: 235-355, 26-42, 357-How to do it? - The GPU=======================Now as you can see we have three types of functions for each operation. One for the CPU and two forthe GPU. The reason I made two for the GPU is too illustrate that often simple porting your CPU toGPU will actually make it slower. If you want some performance gain you will need to remember a fewrules, whose I'm going to detail later on.The development of the GPU module was made so that it resembles as much as possible its CPUcounterpart. This is to make porting easy. The first thing you need to do before writing any code isto link the GPU module to your project, and include the header file for the module. All thefunctions and data structures of the GPU are in a *gpu* sub namespace of the *cv* namespace. You mayadd this to the default one via the *use namespace* keyword, or mark it everywhere explicitly viathe cv:: to avoid confusion. I'll do the later... code-block:: cpp   #include <opencv2/gpu.hpp>        // GPU structures and methodsGPU stands for **g**\ raphics **p**\ rocessing **u**\ nit. It was originally build to rendergraphical scenes. These scenes somehow build on a lot of data. Nevertheless, these aren't alldependent one from another in a sequential way and as it is possible a parallel processing of them.Due to this a GPU will contain multiple smaller processing units. These aren't the state of the artprocessors and on a one on one test with a CPU it will fall behind. However, its strength lies inits numbers. In the last years there has been an increasing trend to harvest these massive parallelpowers of the GPU in non-graphical scene rendering too. This gave birth to the general-purposecomputation on graphics processing units (GPGPU).The GPU has its own memory. When you read data from the hard drive with OpenCV into a *Mat* objectthat takes place in your systems memory. The CPU works somehow directly on this (via its cache),however the GPU cannot. He has too transferred the information he will use for calculations from thesystem memory to its own. This is done via an upload process and takes time. In the end the resultwill have to be downloaded back to your system memory for your CPU to see it and use it. Portingsmall functions to GPU is not recommended as the upload/download time will be larger than the amountyou gain by a parallel execution.Mat objects are stored only in the system memory (or the CPU cache). For getting an OpenCV matrixto the GPU you'll need to use its GPU counterpart :gpudatastructure:`GpuMat <gpu-gpumat>`. It workssimilar to the Mat with a 2D only limitation and no reference returning for its functions (cannotmix GPU references with CPU ones). To upload a Mat object to the GPU you need to call the uploadfunction after creating an instance of the class. To download you may use simple assignment to aMat object or use the download function... code-block:: cpp   Mat I1;         // Main memory item - read image into with imread for example   gpu::GpuMat gI; // GPU matrix - for now empty   gI1.upload(I1); // Upload a data from the system memory to the GPU memory   I1 = gI1;       // Download, gI1.download(I1) will work tooOnce you have your data up in the GPU memory you may call GPU enabled functions of OpenCV. Most ofthe functions keep the same name just as on the CPU, with the difference that they only accept*GpuMat* inputs. A full list of these you will find in the documentation: `online here<http://opencv.itseez.com/modules/gpu/doc/gpu.html>`_ or the OpenCV reference manual that comes withthe source code.Another thing to keep in mind is that not for all channel numbers you can make efficient algorithmson the GPU. Generally, I found that the input images for the GPU images need to be either one orfour channel ones and one of the char or float type for the item sizes. No double support on theGPU, sorry. Passing other types of objects for some functions will result in an exception thrown,and an error message on the error output. The documentation details in most of the places the typesaccepted for the inputs. If you have three channel images as an input you can do two things: eitheradds a new channel (and use char elements) or split up the image and call the function for eachimage. The first one isn't really recommended as you waste memory.For some functions, where the position of the elements (neighbor items) doesn't matter quicksolution is to just reshape it into a single channel image. This is the case for the PSNRimplementation where for the *absdiff* method the value of the neighbors is not important. However,for the *GaussianBlur* this isn't an option and such need to use the split method for the SSIM. Withthis knowledge you can already make a GPU viable code (like mine GPU one) and run it. You'll besurprised to see that it might turn out slower than your CPU implementation.Optimization============The reason for this is that you're throwing out on the window the price for memory allocation anddata transfer. And on the GPU this is damn high. Another possibility for optimization is tointroduce asynchronous OpenCV GPU calls too with the help of the:gpudatastructure:`gpu::Stream<gpu-stream>`.1. Memory allocation on the GPU is considerable. Therefore, if it’s possible allocate new memory as   few times as possible. If you create a function what you intend to call multiple times it is a   good idea to allocate any local parameters for the function only once, during the first call.   To do this you create a data structure containing all the local variables you will use. For   instance in case of the PSNR these are:   .. code-block:: cpp      struct BufferPSNR                                     // Optimized GPU versions        {   // Data allocations are very expensive on GPU. Use a buffer to solve: allocate once reuse later.        gpu::GpuMat gI1, gI2, gs, t1,t2;        gpu::GpuMat buf;      };   Then create an instance of this in the main program:   .. code-block:: cpp      BufferPSNR bufferPSNR;   And finally pass this to the function each time you call it:   .. code-block:: cpp      double getPSNR_GPU_optimized(const Mat& I1, const Mat& I2, BufferPSNR& b)   Now you access these local parameters as: *b.gI1*, *b.buf* and so on. The GpuMat will only   reallocate itself on a new call if the new matrix size is different from the previous one.#. Avoid unnecessary function data transfers. Any small data transfer will be significant one once   you go to the GPU. Therefore, if possible make all calculations in-place (in other words do not   create new memory objects - for reasons explained at the previous point). For example, although   expressing arithmetical operations may be easier to express in one line formulas, it will be   slower. In case of the SSIM at one point I need to calculate:   .. code-block:: cpp      b.t1 = 2 * b.mu1_mu2 + C1;   Although the upper call will succeed observe that there is a hidden data transfer present. Before   it makes the addition it needs to store somewhere the multiplication. Therefore, it will create a   local matrix in the background, add to that the *C1* value and finally assign that to *t1*. To   avoid this we use the gpu functions, instead of the arithmetic operators:   .. code-block:: cpp      gpu::multiply(b.mu1_mu2, 2, b.t1); //b.t1 = 2 * b.mu1_mu2 + C1;      gpu::add(b.t1, C1, b.t1);#. Use asynchronous calls (the :gpudatastructure:`gpu::Stream <gpu-stream>`). By default whenever   you call a gpu function it will wait for the call to finish and return with the result   afterwards. However, it is possible to make asynchronous calls, meaning it will call for the   operation execution, make the costly data allocations for the algorithm and return back right   away. Now you can call another function if you wish to do so. For the MSSIM this is a small   optimization point. In our default implementation we split up the image into channels and call   then for each channel the gpu functions. A small degree of parallelization is possible with the   stream. By using a stream we can make the data allocation, upload operations while the GPU is   already executing a given method. For example we need to upload two images. We queue these one   after another and call already the function that processes it. The functions will wait for the   upload to finish, however while that happens makes the output buffer allocations for the function   to be executed next.   .. code-block:: cpp      gpu::Stream stream;      stream.enqueueConvert(b.gI1, b.t1, CV_32F);    // Upload      gpu::split(b.t1, b.vI1, stream);              // Methods (pass the stream as final parameter).      gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, stream);        // I1^2Result and conclusion=====================On an Intel P8700 laptop CPU paired with a low end NVidia GT220M here are the performance numbers:.. code-block:: cpp   Time of PSNR CPU (averaged for 10 runs): 41.4122 milliseconds. With result of: 19.2506   Time of PSNR GPU (averaged for 10 runs): 158.977 milliseconds. With result of: 19.2506   Initial call GPU optimized:              31.3418 milliseconds. With result of: 19.2506   Time of PSNR GPU OPTIMIZED ( / 10 runs): 24.8171 milliseconds. With result of: 19.2506   Time of MSSIM CPU (averaged for 10 runs): 484.343 milliseconds. With result of B0.890964 G0.903845 R0.936934   Time of MSSIM GPU (averaged for 10 runs): 745.105 milliseconds. With result of B0.89922 G0.909051 R0.968223   Time of MSSIM GPU Initial Call            357.746 milliseconds. With result of B0.890964 G0.903845 R0.936934   Time of MSSIM GPU OPTIMIZED ( / 10 runs): 203.091 milliseconds. With result of B0.890964 G0.903845 R0.936934In both cases we managed a performance increase of almost 100% compared to the CPU implementation.It may be just the improvement needed for your application to work. You may observe a runtimeinstance of this on the `YouTube here <https://www.youtube.com/watch?v=3_ESXmFlnvY>`_... raw:: html  <div align="center">  <iframe title="Similarity check (PNSR and SSIM) on the GPU" width="560" height="349" src="http://www.youtube.com/embed/3_ESXmFlnvY?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>  </div>
\ No newline at end of file
+.. _gpuBasicsSimilarity:
+
+Similarity check (PNSR and SSIM) on the GPU
+*******************************************
+
+Goal
+====
+
+In the :ref:`videoInputPSNRMSSIM` tutorial I already presented the PSNR and SSIM methods for
+checking the similarity between the two images. And as you could see there performing these takes
+quite some time, especially in the case of the SSIM. However, if the performance numbers of an
+OpenCV implementation for the CPU do not satisfy you and you happen to have an NVidia CUDA GPU
+device in your system all is not lost. You may try to port or write your algorithm for the video
+card.
+
+This tutorial will give a good grasp on how to approach coding by using the GPU module of OpenCV. As
+a prerequisite you should already know how to handle the core, highgui and imgproc modules. So, our
+goals are:
+
+.. container:: enumeratevisibleitemswithsquare
+
+   + What's different compared to the CPU?
+   + Create the GPU code for the PSNR and SSIM
+   + Optimize the code for maximal performance
+
+The source code
+===============
+
+You may also find the source code and these video file in the
+:file:`samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity` folder of the
+OpenCV source library or :download:`download it from here
+<../../../../samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp>`. The
+full source code is quite long (due to the controlling of the application via the command line
+arguments and performance measurement). Therefore, to avoid cluttering up these sections with those
+you'll find here only the functions itself.
+
+The PSNR returns a float number, that if the two inputs are similar between 30 and 50 (higher is
+better).
+
+.. literalinclude:: ../../../../samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
+   :language: cpp
+   :linenos:
+   :tab-width: 4
+   :lines: 165-210, 18-23, 210-235
+
+The SSIM returns the MSSIM of the images. This is too a float number between zero and one (higher is
+better), however we have one for each channel. Therefore, we return a *Scalar* OpenCV data
+structure:
+
+.. literalinclude:: ../../../../samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
+   :language: cpp
+   :linenos:
+   :tab-width: 4
+   :lines: 235-355, 26-42, 357-
+
+How to do it? - The GPU
+=======================
+
+Now as you can see we have three types of functions for each operation. One for the CPU and two for
+the GPU. The reason I made two for the GPU is too illustrate that often simple porting your CPU to
+GPU will actually make it slower. If you want some performance gain you will need to remember a few
+rules, whose I'm going to detail later on.
+
+The development of the GPU module was made so that it resembles as much as possible its CPU
+counterpart. This is to make porting easy. The first thing you need to do before writing any code is
+to link the GPU module to your project, and include the header file for the module. All the
+functions and data structures of the GPU are in a *gpu* sub namespace of the *cv* namespace. You may
+add this to the default one via the *use namespace* keyword, or mark it everywhere explicitly via
+the cv:: to avoid confusion. I'll do the later.
+
+.. code-block:: cpp
+
+   #include <opencv2/gpu.hpp>        // GPU structures and methods
+
+GPU stands for **g**\ raphics **p**\ rocessing **u**\ nit. It was originally build to render
+graphical scenes. These scenes somehow build on a lot of data. Nevertheless, these aren't all
+dependent one from another in a sequential way and as it is possible a parallel processing of them.
+Due to this a GPU will contain multiple smaller processing units. These aren't the state of the art
+processors and on a one on one test with a CPU it will fall behind. However, its strength lies in
+its numbers. In the last years there has been an increasing trend to harvest these massive parallel
+powers of the GPU in non-graphical scene rendering too. This gave birth to the general-purpose
+computation on graphics processing units (GPGPU).
+
+The GPU has its own memory. When you read data from the hard drive with OpenCV into a *Mat* object
+that takes place in your systems memory. The CPU works somehow directly on this (via its cache),
+however the GPU cannot. He has too transferred the information he will use for calculations from the
+system memory to its own. This is done via an upload process and takes time. In the end the result
+will have to be downloaded back to your system memory for your CPU to see it and use it. Porting
+small functions to GPU is not recommended as the upload/download time will be larger than the amount
+you gain by a parallel execution.
+
+Mat objects are stored only in the system memory (or the CPU cache). For getting an OpenCV matrix
+to the GPU you'll need to use its GPU counterpart :gpudatastructure:`GpuMat <gpu-gpumat>`. It works
+similar to the Mat with a 2D only limitation and no reference returning for its functions (cannot
+mix GPU references with CPU ones). To upload a Mat object to the GPU you need to call the upload
+function after creating an instance of the class. To download you may use simple assignment to a
+Mat object or use the download function.
+
+.. code-block:: cpp
+
+   Mat I1;         // Main memory item - read image into with imread for example
+   gpu::GpuMat gI; // GPU matrix - for now empty
+   gI1.upload(I1); // Upload a data from the system memory to the GPU memory
+
+   I1 = gI1;       // Download, gI1.download(I1) will work too
+
+Once you have your data up in the GPU memory you may call GPU enabled functions of OpenCV. Most of
+the functions keep the same name just as on the CPU, with the difference that they only accept
+*GpuMat* inputs. A full list of these you will find in the documentation: `online here
+<http://docs.opencv.org/modules/gpu/doc/gpu.html>`_ or the OpenCV reference manual that comes with
+the source code.
+
+Another thing to keep in mind is that not for all channel numbers you can make efficient algorithms
+on the GPU. Generally, I found that the input images for the GPU images need to be either one or
+four channel ones and one of the char or float type for the item sizes. No double support on the
+GPU, sorry. Passing other types of objects for some functions will result in an exception thrown,
+and an error message on the error output. The documentation details in most of the places the types
+accepted for the inputs. If you have three channel images as an input you can do two things: either
+adds a new channel (and use char elements) or split up the image and call the function for each
+image. The first one isn't really recommended as you waste memory.
+
+For some functions, where the position of the elements (neighbor items) doesn't matter quick
+solution is to just reshape it into a single channel image. This is the case for the PSNR
+implementation where for the *absdiff* method the value of the neighbors is not important. However,
+for the *GaussianBlur* this isn't an option and such need to use the split method for the SSIM. With
+this knowledge you can already make a GPU viable code (like mine GPU one) and run it. You'll be
+surprised to see that it might turn out slower than your CPU implementation.
+
+Optimization
+============
+
+The reason for this is that you're throwing out on the window the price for memory allocation and
+data transfer. And on the GPU this is damn high. Another possibility for optimization is to
+introduce asynchronous OpenCV GPU calls too with the help of the
+:gpudatastructure:`gpu::Stream<gpu-stream>`.
+
+1. Memory allocation on the GPU is considerable. Therefore, if it’s possible allocate new memory as
+   few times as possible. If you create a function what you intend to call multiple times it is a
+   good idea to allocate any local parameters for the function only once, during the first call.
+   To do this you create a data structure containing all the local variables you will use. For
+   instance in case of the PSNR these are:
+
+   .. code-block:: cpp
+
+      struct BufferPSNR                                     // Optimized GPU versions
+        {   // Data allocations are very expensive on GPU. Use a buffer to solve: allocate once reuse later.
+        gpu::GpuMat gI1, gI2, gs, t1,t2;
+
+        gpu::GpuMat buf;
+      };
+
+   Then create an instance of this in the main program:
+
+   .. code-block:: cpp
+
+      BufferPSNR bufferPSNR;
+
+   And finally pass this to the function each time you call it:
+
+   .. code-block:: cpp
+
+      double getPSNR_GPU_optimized(const Mat& I1, const Mat& I2, BufferPSNR& b)
+
+   Now you access these local parameters as: *b.gI1*, *b.buf* and so on. The GpuMat will only
+   reallocate itself on a new call if the new matrix size is different from the previous one.
+
+#. Avoid unnecessary function data transfers. Any small data transfer will be significant one once
+   you go to the GPU. Therefore, if possible make all calculations in-place (in other words do not
+   create new memory objects - for reasons explained at the previous point). For example, although
+   expressing arithmetical operations may be easier to express in one line formulas, it will be
+   slower. In case of the SSIM at one point I need to calculate:
+
+   .. code-block:: cpp
+
+      b.t1 = 2 * b.mu1_mu2 + C1;
+
+   Although the upper call will succeed observe that there is a hidden data transfer present. Before
+   it makes the addition it needs to store somewhere the multiplication. Therefore, it will create a
+   local matrix in the background, add to that the *C1* value and finally assign that to *t1*. To
+   avoid this we use the gpu functions, instead of the arithmetic operators:
+
+   .. code-block:: cpp
+
+      gpu::multiply(b.mu1_mu2, 2, b.t1); //b.t1 = 2 * b.mu1_mu2 + C1;
+      gpu::add(b.t1, C1, b.t1);
+
+#. Use asynchronous calls (the :gpudatastructure:`gpu::Stream <gpu-stream>`). By default whenever
+   you call a gpu function it will wait for the call to finish and return with the result
+   afterwards. However, it is possible to make asynchronous calls, meaning it will call for the
+   operation execution, make the costly data allocations for the algorithm and return back right
+   away. Now you can call another function if you wish to do so. For the MSSIM this is a small
+   optimization point. In our default implementation we split up the image into channels and call
+   then for each channel the gpu functions. A small degree of parallelization is possible with the
+   stream. By using a stream we can make the data allocation, upload operations while the GPU is
+   already executing a given method. For example we need to upload two images. We queue these one
+   after another and call already the function that processes it. The functions will wait for the
+   upload to finish, however while that happens makes the output buffer allocations for the function
+   to be executed next.
+
+   .. code-block:: cpp
+
+      gpu::Stream stream;
+
+      stream.enqueueConvert(b.gI1, b.t1, CV_32F);    // Upload
+
+      gpu::split(b.t1, b.vI1, stream);              // Methods (pass the stream as final parameter).
+      gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, stream);        // I1^2
+
+Result and conclusion
+=====================
+
+On an Intel P8700 laptop CPU paired with a low end NVidia GT220M here are the performance numbers:
+
+.. code-block:: cpp
+
+   Time of PSNR CPU (averaged for 10 runs): 41.4122 milliseconds. With result of: 19.2506
+   Time of PSNR GPU (averaged for 10 runs): 158.977 milliseconds. With result of: 19.2506
+   Initial call GPU optimized:              31.3418 milliseconds. With result of: 19.2506
+   Time of PSNR GPU OPTIMIZED ( / 10 runs): 24.8171 milliseconds. With result of: 19.2506
+
+   Time of MSSIM CPU (averaged for 10 runs): 484.343 milliseconds. With result of B0.890964 G0.903845 R0.936934
+   Time of MSSIM GPU (averaged for 10 runs): 745.105 milliseconds. With result of B0.89922 G0.909051 R0.968223
+   Time of MSSIM GPU Initial Call            357.746 milliseconds. With result of B0.890964 G0.903845 R0.936934
+   Time of MSSIM GPU OPTIMIZED ( / 10 runs): 203.091 milliseconds. With result of B0.890964 G0.903845 R0.936934
+
+In both cases we managed a performance increase of almost 100% compared to the CPU implementation.
+It may be just the improvement needed for your application to work. You may observe a runtime
+instance of this on the `YouTube here <https://www.youtube.com/watch?v=3_ESXmFlnvY>`_.
+
+.. raw:: html
+
+  <div align="center">
+  <iframe title="Similarity check (PNSR and SSIM) on the GPU" width="560" height="349" src="http://www.youtube.com/embed/3_ESXmFlnvY?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
+  </div>
diff --git a/doc/tutorials/highgui/table_of_content_highgui/table_of_content_highgui.rst b/doc/tutorials/highgui/table_of_content_highgui/table_of_content_highgui.rst
index 0ba7c323c..ef6eacce2 100644
--- a/doc/tutorials/highgui/table_of_content_highgui/table_of_content_highgui.rst
+++ b/doc/tutorials/highgui/table_of_content_highgui/table_of_content_highgui.rst
@@ -74,4 +74,4 @@ This section contains valuable tutorials about how to read/save your image/video
 
    ../trackbar/trackbar
    ../video-input-psnr-ssim/video-input-psnr-ssim
-   ../video-write/video-write
\ No newline at end of file
+   ../video-write/video-write
diff --git a/doc/tutorials/highgui/trackbar/trackbar.rst b/doc/tutorials/highgui/trackbar/trackbar.rst
index dabfa5e8c..5749123c1 100644
--- a/doc/tutorials/highgui/trackbar/trackbar.rst
+++ b/doc/tutorials/highgui/trackbar/trackbar.rst
@@ -152,8 +152,3 @@ Result
   .. image:: images/Adding_Trackbars_Tutorial_Result_1.jpg
      :alt: Adding Trackbars - Lena
      :align: center
-
-
-
-
-
diff --git a/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.rst b/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.rst
index 133a613ad..9277a101d 100644
--- a/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.rst
+++ b/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.rst
@@ -329,4 +329,3 @@ Result
 
      .. image:: images/Histogram_Calculation_Result.jpg
               :align: center
-
diff --git a/doc/tutorials/imgproc/histograms/template_matching/template_matching.rst b/doc/tutorials/imgproc/histograms/template_matching/template_matching.rst
index d8a91560e..afe15ee0d 100644
--- a/doc/tutorials/imgproc/histograms/template_matching/template_matching.rst
+++ b/doc/tutorials/imgproc/histograms/template_matching/template_matching.rst
@@ -369,4 +369,3 @@ Results
 
    .. image:: images/Template_Matching_Image_Result.jpg
             :align: center
-
diff --git a/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.rst b/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.rst
index 01c9050f2..ae2d0c8ac 100644
--- a/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.rst
+++ b/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.rst
@@ -282,6 +282,3 @@ Result
            :align: center
 
 * Notice how the image is superposed to the black background on the edge regions.
-
-
-
diff --git a/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst b/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
index ecd4ba219..96257683e 100644
--- a/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
+++ b/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
@@ -40,7 +40,7 @@ Code
    * Display the detected circle in a window.
 
    .. |TutorialHoughCirclesSimpleDownload| replace:: here
-   .. _TutorialHoughCirclesSimpleDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/houghlines.cpp
+   .. _TutorialHoughCirclesSimpleDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/houghcircles.cpp
    .. |TutorialHoughCirclesFancyDownload| replace:: here
    .. _TutorialHoughCirclesFancyDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp
 
diff --git a/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.rst b/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.rst
index 6b872bff5..d716c0832 100644
--- a/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.rst
+++ b/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.rst
@@ -290,4 +290,3 @@ We get the following result by using the Probabilistic Hough Line Transform:
    :align: center
 
 You may observe that the number of lines detected vary while you change the *threshold*. The explanation is sort of evident: If you establish a higher threshold, fewer lines will be detected (since you will need more points to declare a line detected).
-
diff --git a/doc/tutorials/imgproc/imgtrans/remap/remap.rst b/doc/tutorials/imgproc/imgtrans/remap/remap.rst
index a8b9fdf74..a060aa779 100644
--- a/doc/tutorials/imgproc/imgtrans/remap/remap.rst
+++ b/doc/tutorials/imgproc/imgtrans/remap/remap.rst
@@ -311,4 +311,3 @@ Result
          :alt: Result 0 for remapping
          :width: 250pt
          :align: center
-
diff --git a/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.rst b/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.rst
index b60192978..d61b49337 100644
--- a/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.rst
+++ b/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.rst
@@ -306,4 +306,3 @@ Result
             :alt: Original image
             :width: 250pt
             :align: center
-
diff --git a/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.rst b/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.rst
index 6baa3a3d7..ca9d44546 100644
--- a/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.rst
+++ b/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.rst
@@ -279,4 +279,3 @@ Results
   .. image:: images/Morphology_2_Tutorial_Cover.jpg
      :alt: Morphology 2: Result sample
      :align: center
-
diff --git a/doc/tutorials/imgproc/pyramids/pyramids.rst b/doc/tutorials/imgproc/pyramids/pyramids.rst
index 00baae2ac..dacc5b93a 100644
--- a/doc/tutorials/imgproc/pyramids/pyramids.rst
+++ b/doc/tutorials/imgproc/pyramids/pyramids.rst
@@ -259,5 +259,3 @@ Results
   .. image:: images/Pyramids_Tutorial_PyrUp_Result.jpg
      :alt: Pyramids: PyrUp Result
      :align: center
-
-
diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.rst b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.rst
index f91730ec8..24c276dc3 100644
--- a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.rst
@@ -121,4 +121,3 @@ Result
 
    .. |BRC_1|  image:: images/Bounding_Rects_Circles_Result.jpg
                     :align: middle
-
diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.rst b/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.rst
index 0986e1edc..9c7fbb85f 100644
--- a/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.rst
@@ -123,4 +123,3 @@ Result
 
    .. |BRE_1|  image:: images/Bounding_Rotated_Ellipses_Result.jpg
                     :align: middle
-
diff --git a/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.rst b/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.rst
index 1fca7df93..68cf80dc7 100644
--- a/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.rst
@@ -104,4 +104,3 @@ Result
 
    .. |contour_1|  image:: images/Find_Contours_Result.jpg
                      :align: middle
-
diff --git a/doc/tutorials/imgproc/shapedescriptors/hull/hull.rst b/doc/tutorials/imgproc/shapedescriptors/hull/hull.rst
index c1ed79cea..eb5d19e3b 100644
--- a/doc/tutorials/imgproc/shapedescriptors/hull/hull.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/hull/hull.rst
@@ -113,4 +113,3 @@ Result
 
    .. |Hull_1|  image:: images/Hull_Result.jpg
                      :align: middle
-
diff --git a/doc/tutorials/imgproc/shapedescriptors/moments/moments.rst b/doc/tutorials/imgproc/shapedescriptors/moments/moments.rst
index 15ac2f51f..350ca38d4 100644
--- a/doc/tutorials/imgproc/shapedescriptors/moments/moments.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/moments/moments.rst
@@ -133,4 +133,3 @@ Result
    .. |MU_2|  image:: images/Moments_Result2.jpg
                     :width: 250pt
                     :align: middle
-
diff --git a/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.rst b/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.rst
index b7f72c815..2d68cc247 100644
--- a/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.rst
@@ -114,4 +114,3 @@ Result
 
    .. |PPT_1|  image:: images/Point_Polygon_Test_Result.jpg
                     :align: middle
-
diff --git a/doc/tutorials/imgproc/table_of_content_imgproc/table_of_content_imgproc.rst b/doc/tutorials/imgproc/table_of_content_imgproc/table_of_content_imgproc.rst
index 1be239cfb..a371c02c8 100644
--- a/doc/tutorials/imgproc/table_of_content_imgproc/table_of_content_imgproc.rst
+++ b/doc/tutorials/imgproc/table_of_content_imgproc/table_of_content_imgproc.rst
@@ -539,6 +539,3 @@ In this section you will learn about the image processing (manipulation) functio
    ../shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses
    ../shapedescriptors/moments/moments
    ../shapedescriptors/point_polygon_test/point_polygon_test
-
-
-
diff --git a/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
index b6c859dc3..78566e7d2 100644
--- a/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
+++ b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
@@ -48,10 +48,10 @@ The structure of package contents looks as follows:
 
 ::
 
-    OpenCV-2.4.5-android-sdk
+    OpenCV-2.4.6-android-sdk
     |_ apk
-    |   |_ OpenCV_2.4.5_binary_pack_armv7a.apk
-    |   |_ OpenCV_2.4.5_Manager_2.7_XXX.apk
+    |   |_ OpenCV_2.4.6_binary_pack_armv7a.apk
+    |   |_ OpenCV_2.4.6_Manager_2.9_XXX.apk
     |
     |_ doc
     |_ samples
@@ -98,7 +98,7 @@ The structure of package contents looks as follows:
 * :file:`doc` folder contains various OpenCV documentation in PDF format.
   It's also available online at http://docs.opencv.org.
 
-  .. note:: The most recent docs (nightly build) are at http://docs.opencv.org/trunk/.
+  .. note:: The most recent docs (nightly build) are at http://docs.opencv.org/2.4.
             Generally, it's more up-to-date, but can refer to not-yet-released functionality.
 
 .. TODO: I'm not sure that this is the best place to talk about OpenCV Manager
@@ -157,10 +157,10 @@ Get the OpenCV4Android SDK
 
    .. code-block:: bash
 
-      unzip ~/Downloads/OpenCV-2.4.5-android-sdk.zip
+      unzip ~/Downloads/OpenCV-2.4.6-android-sdk.zip
 
-.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.5-android-sdk.zip`
-.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.5/OpenCV-2.4.5-android-sdk.zip/download
+.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.6-android-sdk.zip`
+.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.6/OpenCV-2.4.6-android-sdk.zip/download
 .. |opencv_android_bin_pack_url| replace:: |opencv_android_bin_pack|
 .. |seven_zip| replace:: 7-Zip
 .. _seven_zip: http://www.7-zip.org/
@@ -295,7 +295,7 @@ Well, running samples from Eclipse is very simple:
   .. code-block:: sh
     :linenos:
 
-    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.5_Manager_2.7_armv7a-neon.apk
+    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.6_Manager_2.9_armv7a-neon.apk
 
   .. note:: ``armeabi``, ``armv7a-neon``, ``arm7a-neon-android8``, ``mips`` and ``x86`` stand for
             platform targets:
diff --git a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
index 231fe5afa..243dc35dd 100644
--- a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
+++ b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
@@ -55,14 +55,14 @@ Manager to access OpenCV libraries externally installed in the target system.
    :guilabel:`File -> Import -> Existing project in your workspace`.
 
    Press :guilabel:`Browse`  button and locate OpenCV4Android SDK
-   (:file:`OpenCV-2.4.5-android-sdk/sdk`).
+   (:file:`OpenCV-2.4.6-android-sdk/sdk`).
 
    .. image:: images/eclipse_opencv_dependency0.png
         :alt: Add dependency from OpenCV library
         :align: center
 
 #. In application project add a reference to the OpenCV Java SDK in
-   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.5``.
+   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.6``.
 
    .. image:: images/eclipse_opencv_dependency1.png
         :alt: Add dependency from OpenCV library
@@ -101,7 +101,7 @@ See the "15-puzzle" OpenCV sample for details.
         public void onResume()
         {
             super.onResume();
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_5, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_6, this, mLoaderCallback);
         }
 
         ...
@@ -128,27 +128,27 @@ described above.
 #. Add the OpenCV library project to your workspace the same way as for the async initialization
    above. Use menu :guilabel:`File -> Import -> Existing project in your workspace`,
    press :guilabel:`Browse` button and select OpenCV SDK path
-   (:file:`OpenCV-2.4.5-android-sdk/sdk`).
+   (:file:`OpenCV-2.4.6-android-sdk/sdk`).
 
    .. image:: images/eclipse_opencv_dependency0.png
         :alt: Add dependency from OpenCV library
         :align: center
 
 #. In the application project add a reference to the OpenCV4Android SDK in
-   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.5``;
+   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.6``;
 
    .. image:: images/eclipse_opencv_dependency1.png
        :alt: Add dependency from OpenCV library
        :align: center
 
 #. If your application project **doesn't have a JNI part**, just copy the corresponding OpenCV
-   native libs from :file:`<OpenCV-2.4.5-android-sdk>/sdk/native/libs/<target_arch>` to your
+   native libs from :file:`<OpenCV-2.4.6-android-sdk>/sdk/native/libs/<target_arch>` to your
    project directory to folder :file:`libs/<target_arch>`.
 
    In case of the application project **with a JNI part**, instead of manual libraries copying you
    need to modify your ``Android.mk`` file:
    add the following two code lines after the ``"include $(CLEAR_VARS)"`` and before
-   ``"include path_to_OpenCV-2.4.5-android-sdk/sdk/native/jni/OpenCV.mk"``
+   ``"include path_to_OpenCV-2.4.6-android-sdk/sdk/native/jni/OpenCV.mk"``
 
    .. code-block:: make
       :linenos:
@@ -221,7 +221,7 @@ taken:
 
    .. code-block:: make
 
-      include C:\Work\OpenCV4Android\OpenCV-2.4.5-android-sdk\sdk\native\jni\OpenCV.mk
+      include C:\Work\OpenCV4Android\OpenCV-2.4.6-android-sdk\sdk\native\jni\OpenCV.mk
 
    Should be inserted into the :file:`jni/Android.mk` file **after** this line:
 
@@ -379,7 +379,7 @@ result.
        public void onResume()
        {
            super.onResume();
-           OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
+           OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_6, this, mLoaderCallback);
        }
 
 #. Defines that your activity implements ``CvViewFrameListener2`` interface and fix activity related
diff --git a/doc/tutorials/introduction/how_to_write_a_tutorial/how_to_write_a_tutorial.rst b/doc/tutorials/introduction/how_to_write_a_tutorial/how_to_write_a_tutorial.rst
index 26ec076f5..5ae506249 100644
--- a/doc/tutorials/introduction/how_to_write_a_tutorial/how_to_write_a_tutorial.rst
+++ b/doc/tutorials/introduction/how_to_write_a_tutorial/how_to_write_a_tutorial.rst
@@ -1 +1,440 @@
-.. _howToWriteTutorial:How to write a tutorial for OpenCV?***********************************Okay, so assume you have just finished a project of yours implementing something based on OpenCV and you want to present/share it with the community. Luckily, OpenCV is an *open source project*. This means that in theory anyone has access to the full source code and may extend it. While making a robust and practical library (like OpenCV) is great, the success of a library also depends on how user friendly it is. To improve on this aspect, the OpenCV team has already been listening to user feedback from its :opencv_group:`Yahoo user group <>` and by making samples you can find in the source directories sample folder. The addition of the tutorials (in both online and PDF format) is an extension of these efforts.Goal====.. _reST: http://docutils.sourceforge.net/rst.html.. |reST| replace:: reStructuredText.. |Sphinx| replace:: Sphinx.. _Sphinx: http://sphinx.pocoo.org/The tutorials are just as an important part of the library as  the implementation of those crafty data structures and algorithms you can find in OpenCV. Therefore, the source codes for the tutorials are part of the library. And yes, I meant source codes. The reason for this formulation is that the tutorials are written by using the |Sphinx|_ documentation generation system. This is based on the popular python documentation system called |reST|_ (reST). ReStructuredText is a really neat language that by using a few simple conventions (indentation, directives) and emulating old school e-mail writing techniques (text only) tries to offer a simple way to create and edit documents. Sphinx extends this with some new features and creates the resulting document in both HTML (for web) and PDF (for offline usage) format.Usually, an OpenCV tutorial has the following parts:1. A source code demonstration of an OpenCV feature:   a. One or more CPP, Python, Java or other type of files depending for what OpenCV offers support and for what language you make the tutorial.   #. Occasionaly, input resource files required for running your tutorials application.#. A table of content entry (so people may easily find the tutorial):   a. Adding your stuff to the tutorials table of content (**reST** file).   #. Add an image file near the TOC entry.#. The content of the tutorial itself:   a. The **reST** text of the tutorial   #. Images following the idea that "*A picture is worth a thousand words*".   #. For more complex demonstrations you may create a video.As you can see you will need at least some basic knowledge of the *reST* system in order to complete the task at hand with success. However, don't worry *reST* (and *Sphinx*) was made with simplicity in mind. It is easy to grasp its basics. I found that the `OpenAlea documentations introduction on this subject <http://openalea.gforge.inria.fr/doc/openalea/doc/_build/html/source/tutorial/rest_syntax.html>`_ (or the `Thomas Cokelaer one <http://thomas-cokelaer.info/tutorials/sphinx/rest_syntax.html>`_ ) should enough for this. If for some directive or feature you need a more in-depth description look it up in the official |reST|_ help files or at the |Sphinx|_ documentation.In our world achieving some tasks is possible in multiple ways. However, some of the roads to take may have obvious or hidden advantages over others. Then again, in some other cases it may come down to just simple user preference. Here, I'll present how I decided to write the tutorials, based on my personal experience. If for some of them you know a better solution and you can back it up feel free to use that. I've nothing against it, as long as it gets the job done in an elegant fashion.Now the best would be if you could make the integration yourself. For this you need first to have the source code. I recommend following the guides for your operating system on acquiring OpenCV sources. For Linux users look :ref:`here <Linux-Installation>` and for :ref:`Windows here <Windows_Installation>`. You must also install python and sphinx with its dependencies in order to be able to build the documentation.Once you have downloaded the repository to your hard drive you can take a look in the OpenCV directory to make sure you have both the samples and doc folder present. Anyone may download the trunk source files from  :file:`git://code.opencv.org/opencv.git` . Nevertheless, not everyone has upload (commit/submit) rights. This is to protect the integrity of the library. If you plan doing more than one tutorial, and would like to have an account with commit user rights you should first register an account at http://code.opencv.org/ and then contact dr. Gary Bradski at -delete-bradski@-delete-willowgarage.com. Otherwise, you can just send the resulting files to us via the :opencv_group:`Yahoo user group <>` or to me at -delete-bernat@-delete-primeranks.net and I'll add it. If you have questions, suggestions or constructive critics I will gladly listen to them. If you send it to the OpenCV group please tag its subject with a **[Tutorial]** entry.Format the Source Code======================Before I start this let it be clear: the main goal is to have a working sample code. However, for your tutorial to be of a top notch quality you should follow a few guide lines I am going to present here.In case you have an application by using the older interface (with *IplImage*, *CVMat*, *cvLoadImage* and such) consider migrating it to the new C++ interface. The tutorials are intended to be an up to date help for our users. And as of OpenCV 2 the OpenCV emphasis on using the less error prone and clearer C++ interface. Therefore, if possible please convert your code to the C++ interface. For this it may help to read the :ref:`InteroperabilityWithOpenCV1` tutorial. However, once you have an OpenCV 2 working code, then you should make your source code snippet as easy to read as possible. Here're a couple of advices for this:.. container:: enumeratevisibleitemswithsquare   + Add a standard output with the description of what your program does. Keep it short and yet, descriptive. This output is at the start of the program. In my example files this usually takes the form of a *help* function containing the output. This way both the source file viewer and application runner can see what all is about in your sample. Here's an instance of this:     .. code-block:: cpp        void help()        {        cout        << "--------------------------------------------------------------------------"   << endl        << "This program shows how to write video files. You can extract the R or G or B color channel "        << " of the input video. You can choose to use the source codec (Y) or select a custom one. (N)"<< endl        << "Usage:"                                                                       << endl        << "./video-write inputvideoName [ R | G | B] [Y | N]"                            << endl        << "--------------------------------------------------------------------------"   << endl        << endl;        }        // ...        int main(int argc, char *argv[], char *window_name)        {        help();        // here comes the actual source code        }     Additionally, finalize the description with a short usage guide. This way the user will know how to call your programs, what leads us to the next point.   + Prefer command line argument controlling instead of hard coded one. If your program has some variables that may be changed use command line arguments for this. The tutorials, can be a simple try-out ground for the user. If you offer command line controlling for the input image (for example), then you offer the possibility for the user to try it out with his/her own images, without the need to mess in the source code. In the upper example you can see that the input image, channel and codec selection may all be changed from the command line. Just compile the program and run it with your own input arguments.   + Be as verbose as possible. There is no shame in filling the source code with comments. This way the more advanced user may figure out what's happening right from the sample code. This advice goes for the output console too. Specify to the user what's happening. Never leave the user hanging there and thinking on: "Is this program now crashing or just doing some computationally intensive task?." So, if you do a training task that may take some time, make sure you print out a message about this before starting and after finishing it.   + Throw out unnecessary stuff from your source code. This is a warning to not take the previous point too seriously. Balance is the key. If it's something that can be done in a fewer lines or simpler than that's the way you should do it. Nevertheless, if for some reason you have such sections notify the user why you have chosen to do so. Keep the amount of information as low as possible, while still getting the job done in an elegant way.   + Put your sample file into the :file:`opencv/samples/cpp/tutorial_code/sectionName` folder. If you write a tutorial for other languages than cpp, then change that part of the path. Before completing this you need to decide that to what section (module) does your tutorial goes. Think about on what module relies most heavily your code and that is the one to use. If the answer to this question is more than one modules then the *general* section is the one to use. For finding the *opencv* directory open up your file system and navigate where you downloaded our repository.   + If the input resources are hard to acquire for the end user consider adding a few of them to the :file:`opencv/samples/cpp/tutorial_code/images`. Make sure that who reads your code can try it out!Add the TOC entry=================For this you will need to know some |reST|_. There is no going around this. |reST|_ files have **rst** extensions. However, these are simple text files. Use any text editor you like. Finding a text editor that offers syntax highlighting for |reST|_ was quite a challenge at the time of writing this tutorial. In my experience, `Intype <http://intype.info/>`_ is a solid option on Windows, although there is still place for improvement.Adding your source code to a table of content is important for multiple reasons. First and foremost this will allow for the user base to find your tutorial from our websites tutorial table of content. Secondly, if you omit this *Sphinx* will throw a warning that your tutorial file isn't part of any TOC tree entry. And there is nothing more than the developer team hates than an ever increasing warning/error list for their builds. *Sphinx* also uses this to build up the previous-back-up buttons on the website. Finally, omitting this step will lead to that your tutorial will **not** be added to the PDF version of the tutorials.Navigate to the :file:`opencv/doc/tutorials/section/table_of_content_section` folder (where the section is the module to which you're adding the tutorial). Open the *table_of_content_section* file. Now this may have two forms. If no prior tutorials are present in this section that there is a template message about this and has the following form:.. code-block:: rst  .. _Table-Of-Content-Section:   Section title   -----------------------------------------------------------   Description about the section.   .. include:: ../../definitions/noContent.rst   .. raw:: latex      \pagebreakThe first line is a reference to the section title in the reST system. The section title will be a link and you may refer to it via the ``:ref:`` directive. The *include* directive imports the template text from the definitions directories *noContent.rst* file. *Sphinx* does not creates the PDF from scratch. It does this by first creating a latex file. Then creates the PDF from the latex file. With the *raw* directive you can directly add to this output commands. Its unique argument is for what kind of output to add the content of the directive. For the PDFs it may happen that multiple sections will overlap on a single page. To avoid this at the end of the TOC we add a *pagebreak* latex command, that hints to the LATEX system that the next line should be on a new page.If you have one of this, try to transform it to the following form:.. include:: ../../definitions/tocDefinitions.rst.. code-block:: rst   .. _Table-Of-Content-Section:   Section title   -----------------------------------------------------------   .. include:: ../../definitions/tocDefinitions.rst   +     .. tabularcolumns:: m{100pt} m{300pt}     .. cssclass:: toctableopencv     =============== ======================================================      |MatBasicIma|  **Title:** :ref:`matTheBasicImageContainer`                     *Compatibility:* > OpenCV 2.0                     *Author:* |Author_BernatG|                     You will learn how to store images in the memory and how to print out their content to the console.     =============== =====================================================     .. |MatBasicIma| image:: images/matTheBasicImageStructure.jpg                      :height: 90pt                      :width:  90pt   .. raw:: latex      \pagebreak   .. toctree::      :hidden:      ../mat - the basic image container/mat - the basic image containerIf this is already present just add a new section of the content between the include and the raw directives (excluding those lines). Here you'll see a new include directive. This should be present only once in a TOC tree and the reST file contains the definitions of all the authors contributing to the OpenCV tutorials. We are a multicultural community and some of our name may contain some funky characters. However, reST **only supports** ANSI characters. Luckily we can specify Unicode characters with the *unicode* directive. Doing this for all of your tutorials is a troublesome procedure. Therefore, the tocDefinitions file contains the definition of your author name. Add it here once and afterwards just use the replace construction. For example here's the definition for my name:.. code-block:: rst   .. |Author_BernatG| unicode:: Bern U+00E1 t U+0020 G U+00E1 borThe ``|Author_BernatG|`` is the text definitions alias. I can use later this to add the definition, like I've done in the TOCs *Author* part. After the ``::`` and a space you start the definition. If you want to add an UNICODE character (non-ASCI) leave an empty space and specify it in the format U+(UNICODE code). To find the UNICODE code of a character I recommend using the `FileFormat <http://www.fileformat.info>`_ websites service. Spaces are trimmed from the definition, therefore we add a space by its UNICODE character (U+0020).Until the *raw* directive what you can see is a TOC tree entry. Here's how a TOC entry will look like:+  .. tabularcolumns:: m{100pt} m{300pt}  .. cssclass:: toctableopencv  =============== ======================================================   |MatBasicIma|  **Title:** :ref:`matTheBasicImageContainer`                  *Compatibility:* > OpenCV 2.0                  *Author:* |Author_BernatG|                  You will learn how to store images in the memory and how to print out their content to the console.  =============== ======================================================  .. |MatBasicIma| image:: images/matTheBasicImageStructure.jpg                   :height: 90pt                   :width:  90ptAs you can see we have an image to the left and a description box to the right. To create two boxes we use a table with two columns and a single row. In the left column is the image and in the right one the description. However, the image directive is way too long to fit in a column. Therefore, we need to use the substitution definition system. We add this definition after the TOC tree. All images for the TOC tree are to be put in the images folder near its |reST|_ file. We use the point measurement system because we are also creating PDFs. PDFs are printable documents, where there is no such thing that pixels (px), just points (pt). And while generally space is no problem for web pages (we have monitors with **huge** resolutions) the size of the paper (A4 or letter) is constant and will be for a long time in the future. Therefore, size constrains come in play more like for the PDF, than the generated HTML code.Now your images should be as small as possible, while still offering the intended information for the user. Remember that the tutorial will become part of the OpenCV source code. If you add large images (that manifest in form of large image size) it will just increase the size of the repository pointlessly. If someone wants to download it later, its download time will be that much longer. Not to mention the larger PDF size for the tutorials and the longer load time for the web pages. In terms of pixels a TOC image should not be larger than 120 X 120 pixels. Resize your images if they are larger!.. note::   If you add a larger image and specify a smaller image size, *Sphinx* will not resize that. At build time will add the full size image and the resize will be done by your browser after the image is loaded. A 120 X 120 image is somewhere below 10KB. If you add a 110KB image, you have just pointlessly added a 100KB extra data to transfer over the internet for every user!Generally speaking you shouldn't need to specify your images size (excluding the TOC entries). If no such is found *Sphinx* will use the size of the image itself (so no resize occurs). Then again if for some reason you decide to specify a size that should be the **width** of the image rather than its height. The reason for this again goes back to the PDFs. On a PDF page the height is larger than the width. In the PDF the images will not be resized. If you specify a size that does not fit in the page, then what does not fits in **will be cut off**. When creating your images for your tutorial you should try to keep the image widths below 500 pixels, and calculate with around 400 point page width when specifying image widths.The image format depends on the content of the image. If you have some complex scene (many random like colors) then use *jpg*. Otherwise, prefer using *png*. They are even some tools out there that optimize the size of *PNG* images, such as `PNGGauntlet <http://pnggauntlet.com/>`_. Use them to make your images as small as possible in size.Now on the right side column of the table we add the information about the tutorial:.. container:: enumeratevisibleitemswithsquare   + In the first line it is the title of the tutorial. However, there is no need to specify it explicitly. We use the reference system. We'll start up our tutorial with a reference specification, just like in case of this TOC entry with its  `` .. _Table-Of-Content-Section:`` . If after this you have a title (pointed out by the following line of -), then Sphinx will replace the ``:ref:`Table-Of-Content-Section``` directive with the tile of the section in reference form (creates a link in web page). Here's how the definition looks in my case:     .. code-block:: rst        .. _matTheBasicImageContainer:           Mat - The Basic Image Container           *******************************     Note, that according to the |reST|_ rules the * should be as long as your title.   + Compatibility. What version of OpenCV is required to run your sample code.   + Author. Use the substitution markup of |reST|_.   + A short sentence describing the essence of your tutorial.Now before each TOC entry you need to add the three lines of:.. code-block:: cpp   +     .. tabularcolumns:: m{100pt} m{300pt}     .. cssclass:: toctableopencvThe plus sign (+) is to enumerate tutorials by using bullet points. So for every TOC entry we have a corresponding bullet point represented by the +. Sphinx is highly indenting sensitive. Indentation is used to express from which point until to which point does a construction last. Un-indentation means end of that construction. So to keep all the bullet points to the same group the following TOC entries (until the next +) should be indented by two spaces.Here, I should also mention that **always** prefer using spaces instead of tabs. Working with only spaces makes possible that if we both use monotype fonts we will see the same thing. Tab size is text editor dependent and as should be avoided. *Sphinx* translates all tabs into 8 spaces before interpreting it.It turns out that the automatic formatting of both the HTML and PDF(LATEX) system messes up our tables. Therefore, we need to help them out a little. For the PDF generation we add the ``.. tabularcolumns:: m{100pt} m{300pt}`` directive. This means that the first column should be 100 points wide and middle aligned. For the HTML look we simply name the following table of a *toctableopencv* class type. Then, we can modify the look of the table by modifying the CSS of our web page. The CSS definitions go into the :file:`opencv/doc/_themes/blue/static/default.css_t` file... code-block:: css   .toctableopencv   {    width: 100% ;    table-layout: fixed;   }   .toctableopencv colgroup col:first-child   {    width: 100pt !important;    max-width: 100pt !important;    min-width: 100pt !important;   }   .toctableopencv colgroup col:nth-child(2)   {    width: 100% !important;   }However, you should not need to modify this. Just add these three lines (plus keep the two space indentation) for all TOC entries you add. At the end of the TOC file you'll find:.. code-block:: rst   .. raw:: latex      \pagebreak   .. toctree::      :hidden:      ../mat - the basic image container/mat - the basic image containerThe page break entry comes for separating sections and should be only one in a TOC tree |reST|_ file. Finally, at the end of the TOC tree we need to add our tutorial to the *Sphinx* TOC tree system. *Sphinx* will generate from this the previous-next-up information for the HTML file and add items to the PDF according to the order here. By default this TOC tree directive generates a simple table of contents. However, we already created a fancy looking one so we no longer need this basic one. Therefore, we add the *hidden* option to do not show it.The path is of a relative type. We step back in the file system and then go into the :file:`mat - the basic image container` directory for the :file:`mat - the basic image container.rst` file. Putting out the *rst* extension for the file is optional.Write the tutorial==================Create a folder with the name of your tutorial. Preferably, use small letters only. Then create a text file in this folder with *rst* extension and the same name. If you have images for the tutorial create an :file:`images` folder and add your images there. When creating your images follow the guidelines described in the previous part!Now here's our recommendation for the structure of the tutorial (although, remember that this is not carved in the stone; if you have a better idea, use it!):.. container:: enumeratevisibleitemswithsquare   + Create the reference point and the title.     .. code-block:: rst        .. _matTheBasicImageContainer:        Mat - The Basic Image Container        *******************************     You start the tutorial by specifying a reference point by the ``.. _matTheBasicImageContainer:`` and then its title. The name of the reference point should be a unique one over the whole documentation. Therefore, do not use general names like *tutorial1*. Use the * character to underline the title for its full width. The subtitles of the tutorial should be underlined with = charachter.   + Goals. You start your tutorial by specifying what you will present. You can also enumerate the sub jobs to be done. For this you can use a bullet point construction. There is a single configuration file for both the reference manual and the tutorial documentation. In the reference manuals at the argument enumeration we do not want any kind of bullet point style enumeration. Therefore, by default all the bullet points at this level are set to do not show the dot before the entries in the HTML. You can override this by putting the bullet point in a container. I've defined a square type bullet point view under the name *enumeratevisibleitemswithsquare*. The CSS style definition for this is again in the  :file:`opencv\doc\_themes\blue\static\default.css_t` file. Here's a quick example of using it:     .. code-block:: rst        .. container:: enumeratevisibleitemswithsquare           + Create the reference point and the title.           + Second entry           + Third entry     Note that you need the keep the indentation of the container directive. Directive indentations are always three (3) spaces. Here you may even give usage tips for your sample code.   + Source code. Present your samples code to the user. It's a good idea to offer a quick download link for the HTML page by using the *download* directive and pointing out where the user may find your source code in the file system by using the *file* directive:     .. code-block:: rst        Text :file:`samples/cpp/tutorial_code/highgui/video-write/` folder of the OpenCV source library        or :download:`text to appear in the webpage        <../../../../samples/cpp/tutorial_code/HighGUI/video-write/video-write.cpp>`.     For the download link the path is a relative one, hence the multiple back stepping operations (..). Then you can add the source code either by using the *code block* directive or the *literal include* one. In case of the code block you will need to actually add all the source code text into your |reST|_ text and also apply the required indentation:     .. code-block:: rst        .. code-block:: cpp           int i = 0;           l = ++j;     The only argument of the directive is the language used (here CPP). Then you add the source code into its content (meaning one empty line after the directive) by keeping the indentation of the directive (3 spaces). With the *literal include* directive you do not need to add the source code of the sample. You just specify the sample and *Sphinx* will load it for you, during build time. Here's an example usage:     .. code-block:: rst        .. literalinclude:: ../../../../samples/cpp/tutorial_code/HighGUI/video-write/video-write.cpp           :language: cpp           :linenos:           :tab-width: 4           :lines: 1-8, 21-22, 24-     After the directive you specify a relative path to the file from what to import. It has four options: the language to use, if you add the ``:linenos:`` the line numbers will be shown, you can specify the tab size with the ``:tab-width:`` and you do not need to load the whole file, you can show just the important lines. Use the *lines* option to do not show redundant information (such as the *help* function). Here basically you specify ranges, if the second range line number is missing than that means that until the end of the file. The ranges specified here do no need to be in an ascending order, you may even reorganize the structure of how you want to show your sample inside the tutorial.   + The tutorial. Well here goes the explanation for why and what have you used. Try to be short, clear, concise and yet a thorough one. There's no magic formula. Look into a few already made tutorials and start out from there. Try to mix sample OpenCV code with your explanations. If with words is hard to describe something do not hesitate to add in a reasonable size image, to overcome this issue.     When you present OpenCV functionality it's a good idea to give a link to the used OpenCV data structure or function. Because the OpenCV tutorials and reference manual are in separate PDF files it is not possible to make this link work for the PDF format. Therefore, we use here only web page links to the **opencv.itseez.com** website. The OpenCV functions and data structures may be used for multiple tasks. Nevertheless, we want to avoid that every users creates its own reference to a commonly used function. So for this we use the global link collection of *Sphinx*. This is defined in the file:`opencv/doc/conf.py` configuration file. Open it and go all the way down to the last entry:     .. code-block:: py       # ---- External links for tutorials -----------------       extlinks = {           'hgvideo' : ('http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None)           }     In short here we defined a new **hgvideo** directive that refers to an external webpage link. Its usage is:     .. code-block:: rst       A sample function of the highgui modules image write and read page is the :hgvideo:`imread() function <imread>`.     Which turns to: A sample function of the highgui modules image write and read page is the :hgvideo:`imread() function <imread>`. The argument you give between the <> will be put in place of the ``%s`` in the upper definition, and as the link will anchor to the correct function. To find out the anchor of a given function just open up a web page, search for the function and click on it. In the address bar it should appear like: ``http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#imread`` .  Look here for the name of the directives for each page of the OpenCV reference manual. If none present for one of them feel free to add one for it.     For formulas you can add LATEX code that will translate in the web pages into images. You do this by using the *math* directive. A usage tip:     .. code-block:: latex        .. math::           MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}     That after build turns into:     .. math::        MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}     You can even use it inline as ``:math:` MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}``` that turns into :math:`MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}`.     If you use some crazy LATEX library extension you need to add those to the ones to use at build time. Look into the file:`opencv/doc/conf.py` configuration file for more information on this.   + Results. Well, here depending on your program show one of more of the following:     - Console outputs by using the code block directive.     - Output images.     - Runtime videos, visualization. For this use your favorite screens capture software. `Camtasia Studio <http://www.techsmith.com/camtasia/>`_ certainly is one of the better choices, however their prices are out of this world. `CamStudio <http://camstudio.org/>`_ is a free alternative, but less powerful. If you do a video you can upload it to YouTube and then use the raw directive with HTML option to embed it into the generated web page:       .. code-block:: rst          You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=jpBwHxsl1_0>`_.          .. raw:: html             <div align="center">             <iframe title="Creating a video with OpenCV" width="560" height="349" src="http://www.youtube.com/embed/jpBwHxsl1_0?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>             </div>       This results in the text and video: You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=jpBwHxsl1_0>`_.       .. raw:: html          <div align="center">          <iframe title="Creating a video with OpenCV" width="560" height="349" src="http://www.youtube.com/embed/jpBwHxsl1_0?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>          </div>     When these aren't self-explanatory make sure to throw in a few guiding lines about what and why we can see.   + Build the documentation and check for errors or warnings. In the CMake make sure you check or pass the option for building documentation. Then simply build the **docs** project for the PDF file and the **docs_html** project for the web page. Read the output of the build and check for errors/warnings for what you have added. This is also the time to observe and correct any kind of *not so good looking* parts. Remember to keep clean our build logs.   + Read again your tutorial and check for both programming and spelling errors. If found any, please correct them.Take home the pride and joy of a job well done!===============================================Once you are done contact me or dr. Gary Bradski with the tutorial. We may submit the tutorial ourselves to the trunk branch of our repository or ask you to do so.Now, to see your work **live** you may need to wait some time. The PDFs are updated usually at the launch of a new OpenCV version. The web pages are a little more diverse. They are automatically rebuilt in each evening. However, the **opencv.itseez.com** website contains only the most recent **stable branch** of OpenCV. Currently this is 2.3. When we add something new (like a tutorial) that first goes to the **trunk branch** of our repository. A build of this you may find on the **opencv.itseez.com/trunk** website. Although, we try to make a build every night occasionally we might freeze any of the branches to fix upcoming issues. During this it may take a little longer to see your work *live*, however if you submited it, be sure that eventually it will show up.If you have any questions or advices relating to this tutorial you can contact me at -delete-bernat@-delete-primeranks.net. Of course, delete the -delete- parts of that e-mail address.
\ No newline at end of file
+.. _howToWriteTutorial:
+
+How to write a tutorial for OpenCV
+**********************************
+
+Okay, so assume you have just finished a project of yours implementing something
+based on OpenCV and you want to present/share it with the community. Luckily, OpenCV
+is an *open source project*. This means that anyone has access to the full source
+code and may propose extensions. And a good tutorial is a valuable addition to the
+library! Please read instructions on contribution process here:
+http://opencv.org/contribute.html. You may also find this page helpful:
+:how_to_contribute:`How to contribute <>`.
+
+While making a robust and practical library (like OpenCV) is great, the success of a
+library also depends on how user friendly it is. To improve on this aspect, the
+OpenCV team has already been listening to user feedback at :opencv_qa:`OpenCV Q&A
+forum <>` and by making samples you can find in the source directories
+:file:`samples` folder. The addition of the tutorials (in both online and PDF format)
+is an extension of these efforts.
+
+Goal
+====
+
+.. _reST: http://docutils.sourceforge.net/rst.html
+.. |reST| replace:: reStructuredText
+.. |Sphinx| replace:: Sphinx
+.. _Sphinx: http://sphinx.pocoo.org/
+
+The tutorials are just as an important part of the library as the implementation of
+those crafty data structures and algorithms you can find in OpenCV. Therefore, the
+source codes for the tutorials are part of the library. And yes, I meant source
+codes. The reason for this formulation is that the tutorials are written by using the
+|Sphinx|_ documentation generation system. This is based on the popular Python
+documentation system called |reST|_ (reST). ReStructuredText is a really neat
+language that by using a few simple conventions (indentation, directives) and
+emulating old school email writing techniques (text only) tries to offer a simple
+way to create and edit documents. Sphinx extends this with some new features and
+creates the resulting document in both HTML (for web) and PDF (for offline usage)
+format.
+
+
+Usually, an OpenCV tutorial has the following parts:
+
+1. A source code demonstration of an OpenCV feature:
+
+   a. One or more CPP, Python, Java or other type of files depending for what OpenCV offers support and for what language you make the tutorial.
+   #. Occasionaly, input resource files required for running your tutorials application.
+
+
+#. A table of content entry (so people may easily find the tutorial):
+
+   a. Adding your stuff to the tutorials table of content (**reST** file).
+   #. Add an image file near the TOC entry.
+
+
+#. The content of the tutorial itself:
+
+   a. The **reST** text of the tutorial
+   #. Images following the idea that "*A picture is worth a thousand words*".
+   #. For more complex demonstrations you may create a video.
+
+As you can see you will need at least some basic knowledge of the *reST* system in order to complete the task at hand with success. However, don't worry *reST* (and *Sphinx*) was made with simplicity in mind. It is easy to grasp its basics. I found that the `OpenAlea documentations introduction on this subject <http://openalea.gforge.inria.fr/doc/openalea/doc/_build/html/source/tutorial/rest_syntax.html>`_ (or the `Thomas Cokelaer one <http://thomas-cokelaer.info/tutorials/sphinx/rest_syntax.html>`_ ) should enough for this. If for some directive or feature you need a more in-depth description look it up in the official |reST|_ help files or at the |Sphinx|_ documentation.
+
+In our world achieving some tasks is possible in multiple ways. However, some of the roads to take may have obvious or hidden advantages over others. Then again, in some other cases it may come down to just simple user preference. Here, I'll present how I decided to write the tutorials, based on my personal experience. If for some of them you know a better solution and you can back it up feel free to use that. I've nothing against it, as long as it gets the job done in an elegant fashion.
+
+Now the best would be if you could make the integration yourself. For this you need first to have the source code. I recommend following the guides for your operating system on acquiring OpenCV sources. For Linux users look :ref:`here <Linux-Installation>` and for :ref:`Windows here <Windows_Installation>`. You must also install python and sphinx with its dependencies in order to be able to build the documentation.
+
+Once you have downloaded the repository to your hard drive you can take a look in the OpenCV directory to make sure you have both the samples and doc folder present. Anyone may download the latest source files from :file:`git://github.com/Itseez/opencv.git` . Nevertheless, not everyone has upload (commit/submit) rights. This is to protect the integrity of the library. If you plan doing more than one tutorial, and would like to have an account with commit user rights you should first register an account at http://code.opencv.org/ and then contact OpenCV administrator -delete-admin@-delete-opencv.org. Otherwise, you can just send the resulting files to us at -delete-admin@-delete-opencv.org and we'll add it.
+
+
+Format the Source Code
+======================
+
+Before I start this let it be clear: the main goal is to have a working sample code. However, for your tutorial to be of a top notch quality you should follow a few guide lines I am going to present here. In case you have an application by using the older interface (with *IplImage*, *cvMat*, *cvLoadImage* and such) consider migrating it to the new C++ interface. The tutorials are intended to be an up to date help for our users. And as of OpenCV 2 the OpenCV emphasis on using the less error prone and clearer C++ interface. Therefore, if possible please convert your code to the C++ interface. For this it may help to read the :ref:`InteroperabilityWithOpenCV1` tutorial. However, once you have an OpenCV 2 working code, then you should make your source code snippet as easy to read as possible. Here're a couple of advices for this:
+
+
+.. container:: enumeratevisibleitemswithsquare
+
+   + Add a standard output with the description of what your program does. Keep it short and yet, descriptive. This output is at the start of the program. In my example files this usually takes the form of a *help* function containing the output. This way both the source file viewer and application runner can see what all is about in your sample. Here's an instance of this:
+
+     .. code-block:: cpp
+
+        void help()
+        {
+        cout
+        << "--------------------------------------------------------------------------"   << endl
+        << "This program shows how to write video files. You can extract the R or G or B color channel "
+        << " of the input video. You can choose to use the source codec (Y) or select a custom one. (N)"<< endl
+        << "Usage:"                                                                       << endl
+        << "./video-write inputvideoName [ R | G | B] [Y | N]"                            << endl
+        << "--------------------------------------------------------------------------"   << endl
+        << endl;
+        }
+        // ...
+        int main(int argc, char *argv[], char *window_name)
+        {
+        help();
+        // here comes the actual source code
+        }
+
+     Additionally, finalize the description with a short usage guide. This way the user will know how to call your programs, what leads us to the next point.
+
+   + Prefer command line argument controlling instead of hard coded one. If your program has some variables that may be changed use command line arguments for this. The tutorials, can be a simple try-out ground for the user. If you offer command line controlling for the input image (for example), then you offer the possibility for the user to try it out with his/her own images, without the need to mess in the source code. In the upper example you can see that the input image, channel and codec selection may all be changed from the command line. Just compile the program and run it with your own input arguments.
+
+   + Be as verbose as possible. There is no shame in filling the source code with comments. This way the more advanced user may figure out what's happening right from the sample code. This advice goes for the output console too. Specify to the user what's happening. Never leave the user hanging there and thinking on: "Is this program now crashing or just doing some computationally intensive task?." So, if you do a training task that may take some time, make sure you print out a message about this before starting and after finishing it.
+
+   + Throw out unnecessary stuff from your source code. This is a warning to not take the previous point too seriously. Balance is the key. If it's something that can be done in a fewer lines or simpler than that's the way you should do it. Nevertheless, if for some reason you have such sections notify the user why you have chosen to do so. Keep the amount of information as low as possible, while still getting the job done in an elegant way.
+
+   + Put your sample file into the :file:`opencv/samples/cpp/tutorial_code/sectionName` folder. If you write a tutorial for other languages than cpp, then change that part of the path. Before completing this you need to decide that to what section (module) does your tutorial goes. Think about on what module relies most heavily your code and that is the one to use. If the answer to this question is more than one modules then the *general* section is the one to use. For finding the *opencv* directory open up your file system and navigate where you downloaded our repository.
+
+   + If the input resources are hard to acquire for the end user consider adding a few of them to the :file:`opencv/samples/cpp/tutorial_code/images`. Make sure that who reads your code can try it out!
+
+Add the TOC entry
+=================
+
+For this you will need to know some |reST|_. There is no going around this. |reST|_ files have **rst** extensions. However, these are simple text files. Use any text editor you like. Finding a text editor that offers syntax highlighting for |reST|_ was quite a challenge at the time of writing this tutorial. In my experience, `Intype <http://intype.info/>`_ is a solid option on Windows, although there is still place for improvement.
+
+Adding your source code to a table of content is important for multiple reasons. First and foremost this will allow for the user base to find your tutorial from our websites tutorial table of content. Secondly, if you omit this *Sphinx* will throw a warning that your tutorial file isn't part of any TOC tree entry. And there is nothing more than the developer team hates than an ever increasing warning/error list for their builds. *Sphinx* also uses this to build up the previous-back-up buttons on the website. Finally, omitting this step will lead to that your tutorial will **not** be added to the PDF version of the tutorials.
+
+Navigate to the :file:`opencv/doc/tutorials/section/table_of_content_section` folder (where the section is the module to which you're adding the tutorial). Open the *table_of_content_section* file. Now this may have two forms. If no prior tutorials are present in this section that there is a template message about this and has the following form:
+
+.. code-block:: rst
+
+  .. _Table-Of-Content-Section:
+
+   Section title
+   -----------------------------------------------------------
+
+   Description about the section.
+
+   .. include:: ../../definitions/noContent.rst
+
+   .. raw:: latex
+
+      \pagebreak
+
+The first line is a reference to the section title in the reST system. The section title will be a link and you may refer to it via the ``:ref:`` directive. The *include* directive imports the template text from the definitions directories *noContent.rst* file. *Sphinx* does not creates the PDF from scratch. It does this by first creating a latex file. Then creates the PDF from the latex file. With the *raw* directive you can directly add to this output commands. Its unique argument is for what kind of output to add the content of the directive. For the PDFs it may happen that multiple sections will overlap on a single page. To avoid this at the end of the TOC we add a *pagebreak* latex command, that hints to the LATEX system that the next line should be on a new page.
+
+If you have one of this, try to transform it to the following form:
+
+.. include:: ../../definitions/tocDefinitions.rst
+
+.. code-block:: rst
+
+   .. _Table-Of-Content-Section:
+
+   Section title
+   -----------------------------------------------------------
+
+   .. include:: ../../definitions/tocDefinitions.rst
+
+   +
+     .. tabularcolumns:: m{100pt} m{300pt}
+     .. cssclass:: toctableopencv
+
+     =============== ======================================================
+      |MatBasicIma|  **Title:** :ref:`matTheBasicImageContainer`
+
+                     *Compatibility:* > OpenCV 2.0
+
+                     *Author:* |Author_BernatG|
+
+                     You will learn how to store images in the memory and how to print out their content to the console.
+
+     =============== =====================================================
+
+     .. |MatBasicIma| image:: images/matTheBasicImageStructure.jpg
+                      :height: 90pt
+                      :width:  90pt
+
+   .. raw:: latex
+
+      \pagebreak
+
+   .. toctree::
+      :hidden:
+
+      ../mat - the basic image container/mat - the basic image container
+
+If this is already present just add a new section of the content between the include and the raw directives (excluding those lines). Here you'll see a new include directive. This should be present only once in a TOC tree and the reST file contains the definitions of all the authors contributing to the OpenCV tutorials. We are a multicultural community and some of our name may contain some funky characters. However, reST **only supports** ANSI characters. Luckily we can specify Unicode characters with the *unicode* directive. Doing this for all of your tutorials is a troublesome procedure. Therefore, the tocDefinitions file contains the definition of your author name. Add it here once and afterwards just use the replace construction. For example here's the definition for my name:
+
+.. code-block:: rst
+
+   .. |Author_BernatG| unicode:: Bern U+00E1 t U+0020 G U+00E1 bor
+
+The ``|Author_BernatG|`` is the text definitions alias. I can use later this to add the definition, like I've done in the TOCs *Author* part. After the ``::`` and a space you start the definition. If you want to add an UNICODE character (non-ASCI) leave an empty space and specify it in the format U+(UNICODE code). To find the UNICODE code of a character I recommend using the `FileFormat <http://www.fileformat.info>`_ websites service. Spaces are trimmed from the definition, therefore we add a space by its UNICODE character (U+0020).
+
+Until the *raw* directive what you can see is a TOC tree entry. Here's how a TOC entry will look like:
+
++
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
+
+  =============== ======================================================
+   |MatBasicIma|  **Title:** :ref:`matTheBasicImageContainer`
+
+                  *Compatibility:* > OpenCV 2.0
+
+                  *Author:* |Author_BernatG|
+
+                  You will learn how to store images in the memory and how to print out their content to the console.
+
+  =============== ======================================================
+
+  .. |MatBasicIma| image:: images/matTheBasicImageStructure.jpg
+                   :height: 90pt
+                   :width:  90pt
+
+As you can see we have an image to the left and a description box to the right. To create two boxes we use a table with two columns and a single row. In the left column is the image and in the right one the description. However, the image directive is way too long to fit in a column. Therefore, we need to use the substitution definition system. We add this definition after the TOC tree. All images for the TOC tree are to be put in the images folder near its |reST|_ file. We use the point measurement system because we are also creating PDFs. PDFs are printable documents, where there is no such thing that pixels (px), just points (pt). And while generally space is no problem for web pages (we have monitors with **huge** resolutions) the size of the paper (A4 or letter) is constant and will be for a long time in the future. Therefore, size constrains come in play more like for the PDF, than the generated HTML code.
+
+Now your images should be as small as possible, while still offering the intended information for the user. Remember that the tutorial will become part of the OpenCV source code. If you add large images (that manifest in form of large image size) it will just increase the size of the repository pointlessly. If someone wants to download it later, its download time will be that much longer. Not to mention the larger PDF size for the tutorials and the longer load time for the web pages. In terms of pixels a TOC image should not be larger than 120 X 120 pixels. Resize your images if they are larger!
+
+.. note::
+
+   If you add a larger image and specify a smaller image size, *Sphinx* will not resize that. At build time will add the full size image and the resize will be done by your browser after the image is loaded. A 120 X 120 image is somewhere below 10KB. If you add a 110KB image, you have just pointlessly added a 100KB extra data to transfer over the internet for every user!
+
+Generally speaking you shouldn't need to specify your images size (excluding the TOC entries). If no such is found *Sphinx* will use the size of the image itself (so no resize occurs). Then again if for some reason you decide to specify a size that should be the **width** of the image rather than its height. The reason for this again goes back to the PDFs. On a PDF page the height is larger than the width. In the PDF the images will not be resized. If you specify a size that does not fit in the page, then what does not fits in **will be cut off**. When creating your images for your tutorial you should try to keep the image widths below 500 pixels, and calculate with around 400 point page width when specifying image widths.
+
+The image format depends on the content of the image. If you have some complex scene (many random like colors) then use *jpg*. Otherwise, prefer using *png*. They are even some tools out there that optimize the size of *PNG* images, such as `PNGGauntlet <http://pnggauntlet.com/>`_. Use them to make your images as small as possible in size.
+
+Now on the right side column of the table we add the information about the tutorial:
+
+.. container:: enumeratevisibleitemswithsquare
+
+   + In the first line it is the title of the tutorial. However, there is no need to specify it explicitly. We use the reference system. We'll start up our tutorial with a reference specification, just like in case of this TOC entry with its  `` .. _Table-Of-Content-Section:`` . If after this you have a title (pointed out by the following line of -), then Sphinx will replace the ``:ref:`Table-Of-Content-Section``` directive with the tile of the section in reference form (creates a link in web page). Here's how the definition looks in my case:
+
+     .. code-block:: rst
+
+        .. _matTheBasicImageContainer:
+
+           Mat - The Basic Image Container
+           *******************************
+
+     Note, that according to the |reST|_ rules the * should be as long as your title.
+
+   + Compatibility. What version of OpenCV is required to run your sample code.
+
+   + Author. Use the substitution markup of |reST|_.
+
+   + A short sentence describing the essence of your tutorial.
+
+Now before each TOC entry you need to add the three lines of:
+
+.. code-block:: cpp
+
+   +
+     .. tabularcolumns:: m{100pt} m{300pt}
+     .. cssclass:: toctableopencv
+
+The plus sign (+) is to enumerate tutorials by using bullet points. So for every TOC entry we have a corresponding bullet point represented by the +. Sphinx is highly indenting sensitive. Indentation is used to express from which point until to which point does a construction last. Un-indentation means end of that construction. So to keep all the bullet points to the same group the following TOC entries (until the next +) should be indented by two spaces.
+
+Here, I should also mention that **always** prefer using spaces instead of tabs. Working with only spaces makes possible that if we both use monotype fonts we will see the same thing. Tab size is text editor dependent and as should be avoided. *Sphinx* translates all tabs into 8 spaces before interpreting it.
+
+It turns out that the automatic formatting of both the HTML and PDF(LATEX) system messes up our tables. Therefore, we need to help them out a little. For the PDF generation we add the ``.. tabularcolumns:: m{100pt} m{300pt}`` directive. This means that the first column should be 100 points wide and middle aligned. For the HTML look we simply name the following table of a *toctableopencv* class type. Then, we can modify the look of the table by modifying the CSS of our web page. The CSS definitions go into the :file:`opencv/doc/_themes/blue/static/default.css_t` file.
+
+.. code-block:: css
+
+   .toctableopencv
+   {
+    width: 100% ;
+    table-layout: fixed;
+   }
+
+
+   .toctableopencv colgroup col:first-child
+   {
+    width: 100pt !important;
+    max-width: 100pt !important;
+    min-width: 100pt !important;
+   }
+
+   .toctableopencv colgroup col:nth-child(2)
+   {
+    width: 100% !important;
+   }
+
+However, you should not need to modify this. Just add these three lines (plus keep the two space indentation) for all TOC entries you add. At the end of the TOC file you'll find:
+
+.. code-block:: rst
+
+   .. raw:: latex
+
+      \pagebreak
+
+   .. toctree::
+      :hidden:
+
+      ../mat - the basic image container/mat - the basic image container
+
+The page break entry comes for separating sections and should be only one in a TOC tree |reST|_ file. Finally, at the end of the TOC tree we need to add our tutorial to the *Sphinx* TOC tree system. *Sphinx* will generate from this the previous-next-up information for the HTML file and add items to the PDF according to the order here. By default this TOC tree directive generates a simple table of contents. However, we already created a fancy looking one so we no longer need this basic one. Therefore, we add the *hidden* option to do not show it.
+
+The path is of a relative type. We step back in the file system and then go into the :file:`mat - the basic image container` directory for the :file:`mat - the basic image container.rst` file. Putting out the *rst* extension for the file is optional.
+
+Write the tutorial
+==================
+
+Create a folder with the name of your tutorial. Preferably, use small letters only. Then create a text file in this folder with *rst* extension and the same name. If you have images for the tutorial create an :file:`images` folder and add your images there. When creating your images follow the guidelines described in the previous part!
+
+Now here's our recommendation for the structure of the tutorial (although, remember that this is not carved in the stone; if you have a better idea, use it!):
+
+
+.. container:: enumeratevisibleitemswithsquare
+
+   + Create the reference point and the title.
+
+     .. code-block:: rst
+
+        .. _matTheBasicImageContainer:
+
+        Mat - The Basic Image Container
+        *******************************
+
+     You start the tutorial by specifying a reference point by the ``.. _matTheBasicImageContainer:`` and then its title. The name of the reference point should be a unique one over the whole documentation. Therefore, do not use general names like *tutorial1*. Use the * character to underline the title for its full width. The subtitles of the tutorial should be underlined with = charachter.
+
+   + Goals. You start your tutorial by specifying what you will present. You can also enumerate the sub jobs to be done. For this you can use a bullet point construction. There is a single configuration file for both the reference manual and the tutorial documentation. In the reference manuals at the argument enumeration we do not want any kind of bullet point style enumeration. Therefore, by default all the bullet points at this level are set to do not show the dot before the entries in the HTML. You can override this by putting the bullet point in a container. I've defined a square type bullet point view under the name *enumeratevisibleitemswithsquare*. The CSS style definition for this is again in the  :file:`opencv\doc\_themes\blue\static\default.css_t` file. Here's a quick example of using it:
+
+     .. code-block:: rst
+
+        .. container:: enumeratevisibleitemswithsquare
+
+           + Create the reference point and the title.
+           + Second entry
+           + Third entry
+
+     Note that you need the keep the indentation of the container directive. Directive indentations are always three (3) spaces. Here you may even give usage tips for your sample code.
+
+   + Source code. Present your samples code to the user. It's a good idea to offer a quick download link for the HTML page by using the *download* directive and pointing out where the user may find your source code in the file system by using the *file* directive:
+
+     .. code-block:: rst
+
+        Text :file:`samples/cpp/tutorial_code/highgui/video-write/` folder of the OpenCV source library
+        or :download:`text to appear in the webpage
+        <../../../../samples/cpp/tutorial_code/HighGUI/video-write/video-write.cpp>`.
+
+     For the download link the path is a relative one, hence the multiple back stepping operations (..). Then you can add the source code either by using the *code block* directive or the *literal include* one. In case of the code block you will need to actually add all the source code text into your |reST|_ text and also apply the required indentation:
+
+     .. code-block:: rst
+
+        .. code-block:: cpp
+
+           int i = 0;
+           l = ++j;
+
+     The only argument of the directive is the language used (here CPP). Then you add the source code into its content (meaning one empty line after the directive) by keeping the indentation of the directive (3 spaces). With the *literal include* directive you do not need to add the source code of the sample. You just specify the sample and *Sphinx* will load it for you, during build time. Here's an example usage:
+
+     .. code-block:: rst
+
+        .. literalinclude:: ../../../../samples/cpp/tutorial_code/HighGUI/video-write/video-write.cpp
+           :language: cpp
+           :linenos:
+           :tab-width: 4
+           :lines: 1-8, 21-22, 24-
+
+     After the directive you specify a relative path to the file from what to import. It has four options: the language to use, if you add the ``:linenos:`` the line numbers will be shown, you can specify the tab size with the ``:tab-width:`` and you do not need to load the whole file, you can show just the important lines. Use the *lines* option to do not show redundant information (such as the *help* function). Here basically you specify ranges, if the second range line number is missing than that means that until the end of the file. The ranges specified here do no need to be in an ascending order, you may even reorganize the structure of how you want to show your sample inside the tutorial.
+
+   + The tutorial. Well here goes the explanation for why and what have you used. Try to be short, clear, concise and yet a thorough one. There's no magic formula. Look into a few already made tutorials and start out from there. Try to mix sample OpenCV code with your explanations. If with words is hard to describe something do not hesitate to add in a reasonable size image, to overcome this issue.
+
+     When you present OpenCV functionality it's a good idea to give a link to the used OpenCV data structure or function. Because the OpenCV tutorials and reference manual are in separate PDF files it is not possible to make this link work for the PDF format. Therefore, we use here only web page links to the http://docs.opencv.org website. The OpenCV functions and data structures may be used for multiple tasks. Nevertheless, we want to avoid that every users creates its own reference to a commonly used function. So for this we use the global link collection of *Sphinx*. This is defined in the file:`opencv/doc/conf.py` configuration file. Open it and go all the way down to the last entry:
+
+     .. code-block:: py
+
+       # ---- External links for tutorials -----------------
+       extlinks = {
+           'hgvideo' : ('http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None)
+           }
+
+     In short here we defined a new **hgvideo** directive that refers to an external webpage link. Its usage is:
+
+     .. code-block:: rst
+
+       A sample function of the highgui modules image write and read page is the :hgvideo:`imread() function <imread>`.
+
+     Which turns to: A sample function of the highgui modules image write and read page is the :hgvideo:`imread() function <imread>`. The argument you give between the <> will be put in place of the ``%s`` in the upper definition, and as the link will anchor to the correct function. To find out the anchor of a given function just open up a web page, search for the function and click on it. In the address bar it should appear like: ``http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#imread`` .  Look here for the name of the directives for each page of the OpenCV reference manual. If none present for one of them feel free to add one for it.
+
+     For formulas you can add LATEX code that will translate in the web pages into images. You do this by using the *math* directive. A usage tip:
+
+     .. code-block:: latex
+
+        .. math::
+
+           MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}
+
+     That after build turns into:
+
+     .. math::
+
+        MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}
+
+     You can even use it inline as ``:math:` MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}``` that turns into :math:`MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}`.
+
+     If you use some crazy LATEX library extension you need to add those to the ones to use at build time. Look into the file:`opencv/doc/conf.py` configuration file for more information on this.
+
+   + Results. Well, here depending on your program show one of more of the following:
+
+     - Console outputs by using the code block directive.
+     - Output images.
+     - Runtime videos, visualization. For this use your favorite screens capture software. `Camtasia Studio <http://www.techsmith.com/camtasia/>`_ certainly is one of the better choices, however their prices are out of this world. `CamStudio <http://camstudio.org/>`_ is a free alternative, but less powerful. If you do a video you can upload it to YouTube and then use the raw directive with HTML option to embed it into the generated web page:
+
+       .. code-block:: rst
+
+          You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=jpBwHxsl1_0>`_.
+
+          .. raw:: html
+
+             <div align="center">
+             <iframe title="Creating a video with OpenCV" width="560" height="349" src="http://www.youtube.com/embed/jpBwHxsl1_0?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
+             </div>
+
+       This results in the text and video: You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=jpBwHxsl1_0>`_.
+
+       .. raw:: html
+
+          <div align="center">
+          <iframe title="Creating a video with OpenCV" width="560" height="349" src="http://www.youtube.com/embed/jpBwHxsl1_0?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
+          </div>
+
+     When these aren't self-explanatory make sure to throw in a few guiding lines about what and why we can see.
+
+   + Build the documentation and check for errors or warnings. In the CMake make sure you check or pass the option for building documentation. Then simply build the **docs** project for the PDF file and the **docs_html** project for the web page. Read the output of the build and check for errors/warnings for what you have added. This is also the time to observe and correct any kind of *not so good looking* parts. Remember to keep clean our build logs.
+
+   + Read again your tutorial and check for both programming and spelling errors. If found any, please correct them.
+
+
+Take home the pride and joy of a job well done!
+===============================================
+
+Once you are done please make a GitHub pull request with the tutorial. Now, to see
+your work **live** you may need to wait some time. The PDFs are updated usually at
+the launch of a new OpenCV version. The web pages are a little more diverse. They are
+automatically rebuilt nightly. Currently we use ``2.4`` and ``master`` branches for
+daily builds. So, if your pull request was merged to any of these branches, your
+material will be published at `docs.opencv.org/2.4 <http:/docs.opencv.org/2.4>`_ or
+`docs.opencv.org/master <http:/docs.opencv.org/master>`_ correspondingly. Everything
+that was added to ``2.4`` is merged to ``master`` branch every week. Although, we try
+to make a build every night, occasionally we might freeze any of the branches to fix
+upcoming issues. During this it may take a little longer to see your work online,
+however if you submitted it, be sure that eventually it will show up.
+
+If you have any questions or advices relating to this tutorial you can contact us at
+-delete-admin@-delete-opencv.org (delete the -delete- parts of that email address).
diff --git a/doc/tutorials/introduction/ios_install/ios_install.rst b/doc/tutorials/introduction/ios_install/ios_install.rst
index cb9e0650b..2973b7ec2 100644
--- a/doc/tutorials/introduction/ios_install/ios_install.rst
+++ b/doc/tutorials/introduction/ios_install/ios_install.rst
@@ -37,7 +37,7 @@ Building OpenCV from Source, using CMake and Command Line
     .. code-block:: bash
 
        cd ~/<my_working_directory>
-       python opencv/ios/build_framework.py ios
+       python opencv/platforms/ios/build_framework.py ios
 
 If everything's fine, a few minutes later you will get ~/<my_working_directory>/ios/opencv2.framework. You can add this framework to your Xcode projects.
 
diff --git a/doc/tutorials/introduction/linux_eclipse/linux_eclipse.rst b/doc/tutorials/introduction/linux_eclipse/linux_eclipse.rst
index 451a875c8..74e0ff9e0 100644
--- a/doc/tutorials/introduction/linux_eclipse/linux_eclipse.rst
+++ b/doc/tutorials/introduction/linux_eclipse/linux_eclipse.rst
@@ -245,6 +245,3 @@ Say you have or create a new file, *helloworld.cpp* in a directory called *foo*:
  a. You can also optionally modify the ``Build command:`` from ``make`` to something like ``make VERBOSE=1 -j4`` which tells the compiler to produce detailed symbol files for debugging and also to compile in 4 parallel threads.
 
 #. Done!
-
-
-
diff --git a/doc/tutorials/introduction/linux_install/linux_install.rst b/doc/tutorials/introduction/linux_install/linux_install.rst
index e8b96dab7..1e02b64c9 100644
--- a/doc/tutorials/introduction/linux_install/linux_install.rst
+++ b/doc/tutorials/introduction/linux_install/linux_install.rst
@@ -80,4 +80,3 @@ Building OpenCV from Source Using CMake, Using the Command Line
 .. note::
 
    If the size of the created library is a critical issue (like in case of an Android build) you can use the ``install/strip`` command to get the smallest size as possible. The *stripped* version appears to be twice as small. However, we do not recommend using this unless those extra megabytes do really matter.
-
diff --git a/doc/tutorials/introduction/windows_install/windows_install.rst b/doc/tutorials/introduction/windows_install/windows_install.rst
index eebda7b06..c29c13aed 100644
--- a/doc/tutorials/introduction/windows_install/windows_install.rst
+++ b/doc/tutorials/introduction/windows_install/windows_install.rst
@@ -292,7 +292,7 @@ Building the library
 
    This will create an *Install* directory inside the *Build* one collecting all the built binaries into a single place. Use this only after you built both the *Release* and *Debug* versions.
 
-   To test your build just go into the :file:`Build/bin/Debug` or :file:`Build/bin/Release` directory and start a couple of applications like the *contours.exe*. If they run, you are done. Otherwise, something definitely went awfully wrong. In this case you should contact us via our :opencv_group:`user group <>`.
+   To test your build just go into the :file:`Build/bin/Debug` or :file:`Build/bin/Release` directory and start a couple of applications like the *contours.exe*. If they run, you are done. Otherwise, something definitely went awfully wrong. In this case you should contact us at our :opencv_qa:`Q&A forum <>`.
    If everything is okay the *contours.exe* output should resemble the following image (if built with Qt support):
 
    .. image:: images/WindowsQtContoursOutput.png
@@ -312,9 +312,13 @@ First we set an enviroment variable to make easier our work. This will hold the
 
 ::
 
-   setx -m OPENCV_DIR D:\OpenCV\Build\x86\vc10
+   setx -m OPENCV_DIR D:\OpenCV\Build\x86\vc10     (suggested for Visual Studio 2010 - 32 bit Windows)
+   setx -m OPENCV_DIR D:\OpenCV\Build\x64\vc10     (suggested for Visual Studio 2010 - 64 bit Windows)
 
-Here the directory is where you have your OpenCV binaries (*extracted* or *built*). You can have different platform (e.g. x64 instead of x86) or compiler type, so substitute appropriate value. Inside this you should have folders like *bin* and *include*. The -m should be added if you wish to make the settings computer wise, instead of user wise.
+   setx -m OPENCV_DIR D:\OpenCV\Build\x86\vc11     (suggested for Visual Studio 2012 - 32 bit Windows)
+   setx -m OPENCV_DIR D:\OpenCV\Build\x64\vc11     (suggested for Visual Studio 2012 - 64 bit Windows)
+
+Here the directory is where you have your OpenCV binaries (*extracted* or *built*). You can have different platform (e.g. x64 instead of x86) or compiler type, so substitute appropriate value. Inside this you should have two folders called *lib* and *bin*. The -m should be added if you wish to make the settings computer wise, instead of user wise.
 
 If you built static libraries then you are done. Otherwise, you need to add the *bin* folders path to the systems path. This is cause you will use the OpenCV library in form of *\"Dynamic-link libraries\"* (also known as **DLL**). Inside these are stored all the algorithms and information the OpenCV library contains. The operating system will load them only on demand, during runtime. However, to do this he needs to know where they are. The systems **PATH** contains a list of folders where DLLs can be found. Add the OpenCV library path to this and the OS will know where to look if he ever needs the OpenCV binaries. Otherwise, you will need to copy the used DLLs right beside the applications executable file (*exe*) for the OS to find it, which is highly unpleasent if you work on many projects. To do this start up again the |PathEditor|_ and add the following new entry (right click in the application to bring up the menu):
 
diff --git a/doc/tutorials/introduction/windows_visual_studio_Opencv/images/PropertySheetOpenCVInclude.jpg b/doc/tutorials/introduction/windows_visual_studio_Opencv/images/PropertySheetOpenCVInclude.jpg
index 5826f4dd6..bc61d3386 100644
Binary files a/doc/tutorials/introduction/windows_visual_studio_Opencv/images/PropertySheetOpenCVInclude.jpg and b/doc/tutorials/introduction/windows_visual_studio_Opencv/images/PropertySheetOpenCVInclude.jpg differ
diff --git a/doc/tutorials/introduction/windows_visual_studio_Opencv/windows_visual_studio_Opencv.rst b/doc/tutorials/introduction/windows_visual_studio_Opencv/windows_visual_studio_Opencv.rst
index ec227e724..f3058a74d 100644
--- a/doc/tutorials/introduction/windows_visual_studio_Opencv/windows_visual_studio_Opencv.rst
+++ b/doc/tutorials/introduction/windows_visual_studio_Opencv/windows_visual_studio_Opencv.rst
@@ -52,7 +52,7 @@ Use for example the *OpenCV_Debug* name. Then by selecting the sheet :menuselect
 
 .. code-block:: bash
 
-   $(OPENCV_DIR)\include
+   $(OPENCV_DIR)\..\..\include
 
 .. image:: images/PropertySheetOpenCVInclude.jpg
    :alt: Add the include dir like this.
@@ -64,7 +64,7 @@ Next go to the :menuselection:`Linker --> General` and under the *"Additional Li
 
 .. code-block:: bash
 
-   $(OPENCV_DIR)\libs
+   $(OPENCV_DIR)\lib
 
 .. image:: images/PropertySheetOpenCVLib.jpg
    :alt: Add the library folder like this.
@@ -86,7 +86,7 @@ The names of the libraries are as follow:
 
    opencv_(The Name of the module)(The version Number of the library you use)d.lib
 
-A full list, for the currently latest trunk version would contain:
+A full list, for the latest version would contain:
 
 .. code-block:: bash
 
diff --git a/doc/tutorials/ios/hello/hello.rst b/doc/tutorials/ios/hello/hello.rst
index b2a37aa70..8435be54d 100644
--- a/doc/tutorials/ios/hello/hello.rst
+++ b/doc/tutorials/ios/hello/hello.rst
@@ -73,4 +73,3 @@ Now we will learn how to write a simple Hello World Application in Xcode using O
 .. image:: images/output.png
      :alt: output
      :align: center
-
diff --git a/doc/tutorials/ios/image_manipulation/image_manipulation.rst b/doc/tutorials/ios/image_manipulation/image_manipulation.rst
index fd2d9c6e3..c4cde1990 100644
--- a/doc/tutorials/ios/image_manipulation/image_manipulation.rst
+++ b/doc/tutorials/ios/image_manipulation/image_manipulation.rst
@@ -127,4 +127,4 @@ Check out an instance of running code with more Image Effects on `YouTube <http:
 
   <div align="center">
  <iframe width="560" height="350" src="http://www.youtube.com/embed/Ko3K_xdhJ1I" frameborder="0" allowfullscreen></iframe>
-  </div>
\ No newline at end of file
+  </div>
diff --git a/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.rst b/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.rst
index 051765def..50f734803 100644
--- a/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.rst
+++ b/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.rst
@@ -129,7 +129,7 @@ Explanation
 
 3. **Train the SVM**
 
-   We call the method `CvSVM::train <http://opencv.itseez.com/modules/ml/doc/support_vector_machines.html#cvsvm-train>`_ to build the SVM model.
+   We call the method `CvSVM::train <http://docs.opencv.org/modules/ml/doc/support_vector_machines.html#cvsvm-train>`_ to build the SVM model.
 
    .. code-block:: cpp
 
@@ -185,4 +185,3 @@ Results
 .. image:: images/result.png
   :alt: The seperated planes
   :align: center
-
diff --git a/doc/tutorials/ml/non_linear_svms/non_linear_svms.rst b/doc/tutorials/ml/non_linear_svms/non_linear_svms.rst
index 8fbcc563a..57e0b1b6e 100644
--- a/doc/tutorials/ml/non_linear_svms/non_linear_svms.rst
+++ b/doc/tutorials/ml/non_linear_svms/non_linear_svms.rst
@@ -1 +1,232 @@
-.. _nonLinearSvmS: Support Vector Machines for Non-Linearly Separable Data*******************************************************Goal====In this tutorial you will learn how to:.. container:: enumeratevisibleitemswithsquare  + Define the optimization problem for SVMs when it is not possible to separate linearly the training data.  + How to configure the parameters in :svms:`CvSVMParams <cvsvmparams>` to adapt your SVM for this class of problems.Motivation==========Why is it interesting to extend the SVM optimation problem in order to handle non-linearly separable training data? Most of the applications in which SVMs are used in computer vision require a more powerful tool than a simple linear classifier. This stems from the fact that in these tasks **the training data can be rarely separated using an hyperplane**.Consider one of these tasks, for example, face detection. The training data in this case is composed by a set of images that are faces and another set of images that are non-faces (*every other thing in the world except from faces*). This training data is too complex so as to find a representation of each sample (*feature vector*) that could make the whole set of faces linearly separable from the whole set of non-faces.Extension of the Optimization Problem=====================================Remember that using SVMs we obtain a separating hyperplane. Therefore, since the training data is now non-linearly separable, we must admit that the hyperplane found will misclassify some of the samples. This *misclassification* is a new variable in the optimization that must be taken into account. The new model has to include both the old requirement of finding the hyperplane that gives the biggest margin and the new one of generalizing the training data correctly by not allowing too many classification errors.  We start here from the formulation of the optimization problem of finding the hyperplane which maximizes the **margin** (this is explained in the :ref:`previous tutorial <introductiontosvms>`):.. math::  \min_{\beta, \beta_{0}} L(\beta) = \frac{1}{2}||\beta||^{2} \text{ subject to } y_{i}(\beta^{T} x_{i} + \beta_{0}) \geq 1 \text{ } \forall iThere are multiple ways in which this model can be modified so it takes into account the misclassification errors. For example, one could think of minimizing the same quantity plus a constant times the number of misclassification errors in the training data, i.e.:.. math::  \min ||\beta||^{2} + C \text{(\# misclassication errors)}However, this one is not a very good solution since, among some other reasons, we do not distinguish between samples that are misclassified with a small distance to their appropriate decision region or samples that are not. Therefore, a better solution will take into account the *distance of the misclassified samples to their correct decision regions*, i.e.:.. math::  \min ||\beta||^{2} + C \text{(distance of misclassified samples to their correct regions)}For each sample of the training data a new parameter :math:`\xi_{i}` is defined. Each one of these parameters contains the distance from its corresponding training sample to their correct decision region. The following picture shows non-linearly separable training data from two classes, a separating hyperplane and the distances to their correct regions of the samples that are misclassified... image:: images/sample-errors-dist.png   :alt: Samples misclassified and their distances to their correct regions   :align: center .. note:: Only the distances of the samples that are misclassified are shown in the picture. The distances of the rest of the samples are zero since they lay already in their correct decision region.The red and blue lines that appear on the picture are the margins to each one of the decision regions. It is very **important** to realize that each of the :math:`\xi_{i}` goes from a misclassified training sample to the margin of its appropriate region.Finally, the new formulation for the optimization problem is:.. math::  \min_{\beta, \beta_{0}} L(\beta) = ||\beta||^{2} + C \sum_{i} {\xi_{i}} \text{ subject to } y_{i}(\beta^{T} x_{i} + \beta_{0}) \geq 1 - \xi_{i} \text{ and } \xi_{i} \geq 0 \text{ } \forall i How should the parameter C be chosen? It is obvious that the answer to this question depends on how the training data is distributed. Although there is no general answer, it is useful to take into account these rules:.. container:: enumeratevisibleitemswithsquare   * Large values of C give solutions with *less misclassification errors* but a *smaller margin*. Consider that in this case it is expensive to make misclassification errors. Since the aim of the optimization is to minimize the argument, few misclassifications errors are allowed.   * Small values of C give solutions with *bigger margin* and *more classification errors*. In this case the minimization does not consider that much the term of the sum so it focuses more on finding a hyperplane with big margin.Source Code===========You may also find the source code and these video file in the :file:`samples/cpp/tutorial_code/gpu/non_linear_svms/non_linear_svms` folder of the OpenCV source library or :download:`download it from here <../../../../samples/cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp>`... literalinclude:: ../../../../samples/cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp   :language: cpp   :linenos:   :tab-width: 4   :lines: 1-11, 22-23, 26-Explanation===========1. **Set up the training data**  The training data of this exercise is formed by a set of labeled 2D-points that belong to one of two different classes. To make the exercise more appealing, the training data is generated randomly using a uniform probability density functions (PDFs).  We have divided the generation of the training data into two main parts.  In the first part we generate data for both classes that is linearly separable.  .. code-block:: cpp     // Generate random points for the class 1     Mat trainClass = trainData.rowRange(0, nLinearSamples);     // The x coordinate of the points is in [0, 0.4)     Mat c = trainClass.colRange(0, 1);     rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(0.4 * WIDTH));     // The y coordinate of the points is in [0, 1)     c = trainClass.colRange(1,2);     rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));     // Generate random points for the class 2     trainClass = trainData.rowRange(2*NTRAINING_SAMPLES-nLinearSamples, 2*NTRAINING_SAMPLES);     // The x coordinate of the points is in [0.6, 1]     c = trainClass.colRange(0 , 1);      rng.fill(c, RNG::UNIFORM, Scalar(0.6*WIDTH), Scalar(WIDTH));     // The y coordinate of the points is in [0, 1)     c = trainClass.colRange(1,2);     rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));  In the second part we create data for both classes that is non-linearly separable, data that overlaps.  .. code-block:: cpp     // Generate random points for the classes 1 and 2     trainClass = trainData.rowRange(  nLinearSamples, 2*NTRAINING_SAMPLES-nLinearSamples);     // The x coordinate of the points is in [0.4, 0.6)     c = trainClass.colRange(0,1);     rng.fill(c, RNG::UNIFORM, Scalar(0.4*WIDTH), Scalar(0.6*WIDTH));      // The y coordinate of the points is in [0, 1)     c = trainClass.colRange(1,2);     rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT)); 2. **Set up SVM's parameters**  .. seealso::      In the previous tutorial :ref:`introductiontosvms` there is an explanation of the atributes of the class :svms:`CvSVMParams <cvsvmparams>` that we configure here before training the SVM.  .. code-block:: cpp     CvSVMParams params;     params.svm_type    = SVM::C_SVC;     params.C              = 0.1;     params.kernel_type = SVM::LINEAR;     params.term_crit   = TermCriteria(CV_TERMCRIT_ITER, (int)1e7, 1e-6);  There are just two differences between the configuration we do here and the one that was done in the :ref:`previous tutorial <introductiontosvms>` that we use as reference.  * *CvSVM::C_SVC*. We chose here a small value of this parameter in order not to punish too much the misclassification errors in the optimization. The idea of doing this stems from the will of obtaining a solution close to the one intuitively expected. However, we recommend to get a better insight of the problem by making adjustments to this parameter.      .. note:: Here there are just very few points in the overlapping region between classes, giving a smaller value to **FRAC_LINEAR_SEP** the density of points can be incremented and the impact of the parameter **CvSVM::C_SVC** explored deeply.  * *Termination Criteria of the algorithm*. The maximum number of iterations has to be increased considerably in order to solve correctly a problem with non-linearly separable training data. In particular, we have increased in five orders of magnitude this value.   3. **Train the SVM**  We call the method :svms:`CvSVM::train <cvsvm-train>` to build the SVM model. Watch out that the training process may take a quite long time. Have patiance when your run the program.  .. code-block:: cpp     CvSVM svm;     svm.train(trainData, labels, Mat(), Mat(), params);4. **Show the Decision Regions**  The method :svms:`CvSVM::predict <cvsvm-predict>` is used to classify an input sample using a trained SVM. In this example we have used this method in order to color the space depending on the prediction done by the SVM. In other words, an image is traversed interpreting its pixels as points of the Cartesian plane. Each of the points is colored depending on the class predicted by the SVM; in dark green if it is the class with label 1 and in dark blue if it is the class with label 2.  .. code-block:: cpp     Vec3b green(0,100,0), blue (100,0,0);     for (int i = 0; i < I.rows; ++i)          for (int j = 0; j < I.cols; ++j)          {               Mat sampleMat = (Mat_<float>(1,2) << i, j);               float response = svm.predict(sampleMat);               if      (response == 1)    I.at<Vec3b>(j, i)  = green;               else if (response == 2)    I.at<Vec3b>(j, i)  = blue;          }5. **Show the training data**  The method :drawingFunc:`circle <circle>` is used to show the samples that compose the training data. The samples of the class labeled with 1 are shown in light green and in light blue the samples of the class labeled with 2.  .. code-block:: cpp     int thick = -1;     int lineType = 8;     float px, py;     // Class 1     for (int i = 0; i < NTRAINING_SAMPLES; ++i)     {          px = trainData.at<float>(i,0);          py = trainData.at<float>(i,1);          circle(I, Point( (int) px,  (int) py ), 3, Scalar(0, 255, 0), thick, lineType);     }     // Class 2     for (int i = NTRAINING_SAMPLES; i <2*NTRAINING_SAMPLES; ++i)     {          px = trainData.at<float>(i,0);          py = trainData.at<float>(i,1);          circle(I, Point( (int) px, (int) py ), 3, Scalar(255, 0, 0), thick, lineType);     }6. **Support vectors**  We use here a couple of methods to obtain information about the support vectors. The method :svms:`CvSVM::get_support_vector_count <cvsvm-get-support-vector>` outputs the total number of support vectors used in the problem and with the method :svms:`CvSVM::get_support_vector <cvsvm-get-support-vector>` we obtain each of the support vectors using an index. We have used this methods here to find the training examples that are support vectors and highlight them.  .. code-block:: cpp     thick = 2;     lineType  = 8;     int x     = svm.get_support_vector_count();     for (int i = 0; i < x; ++i)     {          const float* v = svm.get_support_vector(i);          circle(     I,  Point( (int) v[0], (int) v[1]), 6, Scalar(128, 128, 128), thick, lineType);     }Results=======.. container:: enumeratevisibleitemswithsquare   * The code opens an image and shows the training examples of both classes. The points of one class are represented with light green and light blue ones are used for the other class.   * The SVM is trained and used to classify all the pixels of the image. This results in a division of the image in a blue region and a green region. The boundary between both regions is the separating hyperplane. Since the training data is non-linearly separable, it can be seen that some of the examples of both classes are misclassified; some green points lay on the blue region and some blue points lay on the green one.   * Finally the support vectors are shown using gray rings around the training examples... image:: images/result.png  :alt: Training data and decision regions given by the SVM  :width: 300pt  :align: center You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=vFv2yPcSo-Q>`_. .. raw:: html  <div align="center">  <iframe title="Support Vector Machines for Non-Linearly Separable Data" width="560" height="349" src="http://www.youtube.com/embed/vFv2yPcSo-Q?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>  </div>
\ No newline at end of file
+.. _nonLinearSvmS:
+
+Support Vector Machines for Non-Linearly Separable Data
+*******************************************************
+
+Goal
+====
+
+In this tutorial you will learn how to:
+
+.. container:: enumeratevisibleitemswithsquare
+
+  + Define the optimization problem for SVMs when it is not possible to separate linearly the training data.
+
+  + How to configure the parameters in :svms:`CvSVMParams <cvsvmparams>` to adapt your SVM for this class of problems.
+
+Motivation
+==========
+
+Why is it interesting to extend the SVM optimation problem in order to handle non-linearly separable training data? Most of the applications in which SVMs are used in computer vision require a more powerful tool than a simple linear classifier. This stems from the fact that in these tasks **the training data can be rarely separated using an hyperplane**.
+
+Consider one of these tasks, for example, face detection. The training data in this case is composed by a set of images that are faces and another set of images that are non-faces (*every other thing in the world except from faces*). This training data is too complex so as to find a representation of each sample (*feature vector*) that could make the whole set of faces linearly separable from the whole set of non-faces.
+
+Extension of the Optimization Problem
+=====================================
+
+Remember that using SVMs we obtain a separating hyperplane. Therefore, since the training data is now non-linearly separable, we must admit that the hyperplane found will misclassify some of the samples. This *misclassification* is a new variable in the optimization that must be taken into account. The new model has to include both the old requirement of finding the hyperplane that gives the biggest margin and the new one of generalizing the training data correctly by not allowing too many classification errors.
+
+We start here from the formulation of the optimization problem of finding the hyperplane which maximizes the **margin** (this is explained in the :ref:`previous tutorial <introductiontosvms>`):
+
+.. math::
+  \min_{\beta, \beta_{0}} L(\beta) = \frac{1}{2}||\beta||^{2} \text{ subject to } y_{i}(\beta^{T} x_{i} + \beta_{0}) \geq 1 \text{ } \forall i
+
+There are multiple ways in which this model can be modified so it takes into account the misclassification errors. For example, one could think of minimizing the same quantity plus a constant times the number of misclassification errors in the training data, i.e.:
+
+.. math::
+  \min ||\beta||^{2} + C \text{(\# misclassication errors)}
+
+However, this one is not a very good solution since, among some other reasons, we do not distinguish between samples that are misclassified with a small distance to their appropriate decision region or samples that are not. Therefore, a better solution will take into account the *distance of the misclassified samples to their correct decision regions*, i.e.:
+
+.. math::
+  \min ||\beta||^{2} + C \text{(distance of misclassified samples to their correct regions)}
+
+For each sample of the training data a new parameter :math:`\xi_{i}` is defined. Each one of these parameters contains the distance from its corresponding training sample to their correct decision region. The following picture shows non-linearly separable training data from two classes, a separating hyperplane and the distances to their correct regions of the samples that are misclassified.
+
+.. image:: images/sample-errors-dist.png
+   :alt: Samples misclassified and their distances to their correct regions
+   :align: center
+
+.. note:: Only the distances of the samples that are misclassified are shown in the picture. The distances of the rest of the samples are zero since they lay already in their correct decision region.
+
+The red and blue lines that appear on the picture are the margins to each one of the decision regions. It is very **important** to realize that each of the :math:`\xi_{i}` goes from a misclassified training sample to the margin of its appropriate region.
+
+Finally, the new formulation for the optimization problem is:
+
+.. math::
+  \min_{\beta, \beta_{0}} L(\beta) = ||\beta||^{2} + C \sum_{i} {\xi_{i}} \text{ subject to } y_{i}(\beta^{T} x_{i} + \beta_{0}) \geq 1 - \xi_{i} \text{ and } \xi_{i} \geq 0 \text{ } \forall i
+
+How should the parameter C be chosen? It is obvious that the answer to this question depends on how the training data is distributed. Although there is no general answer, it is useful to take into account these rules:
+
+.. container:: enumeratevisibleitemswithsquare
+
+   * Large values of C give solutions with *less misclassification errors* but a *smaller margin*. Consider that in this case it is expensive to make misclassification errors. Since the aim of the optimization is to minimize the argument, few misclassifications errors are allowed.
+
+   * Small values of C give solutions with *bigger margin* and *more classification errors*. In this case the minimization does not consider that much the term of the sum so it focuses more on finding a hyperplane with big margin.
+
+Source Code
+===========
+
+You may also find the source code and these video file in the :file:`samples/cpp/tutorial_code/gpu/non_linear_svms/non_linear_svms` folder of the OpenCV source library or :download:`download it from here <../../../../samples/cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp>`.
+
+.. literalinclude:: ../../../../samples/cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp
+   :language: cpp
+   :linenos:
+   :tab-width: 4
+   :lines: 1-11, 22-23, 26-
+
+Explanation
+===========
+
+1. **Set up the training data**
+
+  The training data of this exercise is formed by a set of labeled 2D-points that belong to one of two different classes. To make the exercise more appealing, the training data is generated randomly using a uniform probability density functions (PDFs).
+
+  We have divided the generation of the training data into two main parts.
+
+  In the first part we generate data for both classes that is linearly separable.
+
+  .. code-block:: cpp
+
+     // Generate random points for the class 1
+     Mat trainClass = trainData.rowRange(0, nLinearSamples);
+     // The x coordinate of the points is in [0, 0.4)
+     Mat c = trainClass.colRange(0, 1);
+     rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(0.4 * WIDTH));
+     // The y coordinate of the points is in [0, 1)
+     c = trainClass.colRange(1,2);
+     rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
+
+     // Generate random points for the class 2
+     trainClass = trainData.rowRange(2*NTRAINING_SAMPLES-nLinearSamples, 2*NTRAINING_SAMPLES);
+     // The x coordinate of the points is in [0.6, 1]
+     c = trainClass.colRange(0 , 1);
+     rng.fill(c, RNG::UNIFORM, Scalar(0.6*WIDTH), Scalar(WIDTH));
+     // The y coordinate of the points is in [0, 1)
+     c = trainClass.colRange(1,2);
+     rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
+
+  In the second part we create data for both classes that is non-linearly separable, data that overlaps.
+
+  .. code-block:: cpp
+
+     // Generate random points for the classes 1 and 2
+     trainClass = trainData.rowRange(  nLinearSamples, 2*NTRAINING_SAMPLES-nLinearSamples);
+     // The x coordinate of the points is in [0.4, 0.6)
+     c = trainClass.colRange(0,1);
+     rng.fill(c, RNG::UNIFORM, Scalar(0.4*WIDTH), Scalar(0.6*WIDTH));
+     // The y coordinate of the points is in [0, 1)
+     c = trainClass.colRange(1,2);
+     rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
+
+2. **Set up SVM's parameters**
+
+  .. seealso::
+
+      In the previous tutorial :ref:`introductiontosvms` there is an explanation of the atributes of the class :svms:`CvSVMParams <cvsvmparams>` that we configure here before training the SVM.
+
+  .. code-block:: cpp
+
+     CvSVMParams params;
+     params.svm_type    = SVM::C_SVC;
+     params.C              = 0.1;
+     params.kernel_type = SVM::LINEAR;
+     params.term_crit   = TermCriteria(CV_TERMCRIT_ITER, (int)1e7, 1e-6);
+
+  There are just two differences between the configuration we do here and the one that was done in the :ref:`previous tutorial <introductiontosvms>` that we use as reference.
+
+  * *CvSVM::C_SVC*. We chose here a small value of this parameter in order not to punish too much the misclassification errors in the optimization. The idea of doing this stems from the will of obtaining a solution close to the one intuitively expected. However, we recommend to get a better insight of the problem by making adjustments to this parameter.
+
+      .. note:: Here there are just very few points in the overlapping region between classes, giving a smaller value to **FRAC_LINEAR_SEP** the density of points can be incremented and the impact of the parameter **CvSVM::C_SVC** explored deeply.
+
+  * *Termination Criteria of the algorithm*. The maximum number of iterations has to be increased considerably in order to solve correctly a problem with non-linearly separable training data. In particular, we have increased in five orders of magnitude this value.
+
+3. **Train the SVM**
+
+  We call the method :svms:`CvSVM::train <cvsvm-train>` to build the SVM model. Watch out that the training process may take a quite long time. Have patiance when your run the program.
+
+  .. code-block:: cpp
+
+     CvSVM svm;
+     svm.train(trainData, labels, Mat(), Mat(), params);
+
+4. **Show the Decision Regions**
+
+  The method :svms:`CvSVM::predict <cvsvm-predict>` is used to classify an input sample using a trained SVM. In this example we have used this method in order to color the space depending on the prediction done by the SVM. In other words, an image is traversed interpreting its pixels as points of the Cartesian plane. Each of the points is colored depending on the class predicted by the SVM; in dark green if it is the class with label 1 and in dark blue if it is the class with label 2.
+
+  .. code-block:: cpp
+
+     Vec3b green(0,100,0), blue (100,0,0);
+     for (int i = 0; i < I.rows; ++i)
+          for (int j = 0; j < I.cols; ++j)
+          {
+               Mat sampleMat = (Mat_<float>(1,2) << i, j);
+               float response = svm.predict(sampleMat);
+
+               if      (response == 1)    I.at<Vec3b>(j, i)  = green;
+               else if (response == 2)    I.at<Vec3b>(j, i)  = blue;
+          }
+
+5. **Show the training data**
+
+  The method :drawingFunc:`circle <circle>` is used to show the samples that compose the training data. The samples of the class labeled with 1 are shown in light green and in light blue the samples of the class labeled with 2.
+
+  .. code-block:: cpp
+
+     int thick = -1;
+     int lineType = 8;
+     float px, py;
+     // Class 1
+     for (int i = 0; i < NTRAINING_SAMPLES; ++i)
+     {
+          px = trainData.at<float>(i,0);
+          py = trainData.at<float>(i,1);
+          circle(I, Point( (int) px,  (int) py ), 3, Scalar(0, 255, 0), thick, lineType);
+     }
+     // Class 2
+     for (int i = NTRAINING_SAMPLES; i <2*NTRAINING_SAMPLES; ++i)
+     {
+          px = trainData.at<float>(i,0);
+          py = trainData.at<float>(i,1);
+          circle(I, Point( (int) px, (int) py ), 3, Scalar(255, 0, 0), thick, lineType);
+     }
+
+6. **Support vectors**
+
+  We use here a couple of methods to obtain information about the support vectors. The method :svms:`CvSVM::get_support_vector_count <cvsvm-get-support-vector>` outputs the total number of support vectors used in the problem and with the method :svms:`CvSVM::get_support_vector <cvsvm-get-support-vector>` we obtain each of the support vectors using an index. We have used this methods here to find the training examples that are support vectors and highlight them.
+
+  .. code-block:: cpp
+
+     thick = 2;
+     lineType  = 8;
+     int x     = svm.get_support_vector_count();
+
+     for (int i = 0; i < x; ++i)
+     {
+          const float* v = svm.get_support_vector(i);
+          circle(     I,  Point( (int) v[0], (int) v[1]), 6, Scalar(128, 128, 128), thick, lineType);
+     }
+
+Results
+========
+
+.. container:: enumeratevisibleitemswithsquare
+
+   * The code opens an image and shows the training examples of both classes. The points of one class are represented with light green and light blue ones are used for the other class.
+
+   * The SVM is trained and used to classify all the pixels of the image. This results in a division of the image in a blue region and a green region. The boundary between both regions is the separating hyperplane. Since the training data is non-linearly separable, it can be seen that some of the examples of both classes are misclassified; some green points lay on the blue region and some blue points lay on the green one.
+
+   * Finally the support vectors are shown using gray rings around the training examples.
+
+.. image:: images/result.png
+  :alt: Training data and decision regions given by the SVM
+  :width: 300pt
+  :align: center
+
+You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=vFv2yPcSo-Q>`_.
+
+.. raw:: html
+
+  <div align="center">
+  <iframe title="Support Vector Machines for Non-Linearly Separable Data" width="560" height="349" src="http://www.youtube.com/embed/vFv2yPcSo-Q?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
+  </div>
diff --git a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst
index 146a0ec0b..ba9c2740e 100644
--- a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst
+++ b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst
@@ -26,91 +26,90 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
 .. code-block:: cpp
 
-   #include "opencv2/objdetect.hpp"
-   #include "opencv2/highgui.hpp"
-   #include "opencv2/imgproc.hpp"
+    #include "opencv2/objdetect.hpp"
+    #include "opencv2/highgui.hpp"
+    #include "opencv2/imgproc.hpp"
 
-   #include <iostream>
-   #include <stdio.h>
+    #include <iostream>
+    #include <stdio.h>
 
-   using namespace std;
-   using namespace cv;
+    using namespace std;
+    using namespace cv;
 
-   /** Function Headers */
-   void detectAndDisplay( Mat frame );
+    /** Function Headers */
+    void detectAndDisplay( Mat frame );
 
-   /** Global variables */
-   String face_cascade_name = "haarcascade_frontalface_alt.xml";
-   String eyes_cascade_name = "haarcascade_eye_tree_eyeglasses.xml";
-   CascadeClassifier face_cascade;
-   CascadeClassifier eyes_cascade;
-   string window_name = "Capture - Face detection";
-   RNG rng(12345);
+    /** Global variables */
+    String face_cascade_name = "haarcascade_frontalface_alt.xml";
+    String eyes_cascade_name = "haarcascade_eye_tree_eyeglasses.xml";
+    CascadeClassifier face_cascade;
+    CascadeClassifier eyes_cascade;
+    String window_name = "Capture - Face detection";
 
-   /** @function main */
-   int main( int argc, const char** argv )
-   {
-     CvCapture* capture;
-     Mat frame;
-
-     //-- 1. Load the cascades
-     if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
-     if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
-
-     //-- 2. Read the video stream
-     capture = cvCaptureFromCAM( -1 );
-     if( capture )
-     {
-       while( true )
-       {
-     frame = cvQueryFrame( capture );
-
-     //-- 3. Apply the classifier to the frame
-         if( !frame.empty() )
-         { detectAndDisplay( frame ); }
-         else
-         { printf(" --(!) No captured frame -- Break!"); break; }
-
-         int c = waitKey(10);
-         if( (char)c == 'c' ) { break; }
-        }
-     }
-     return 0;
-   }
-
-  /** @function detectAndDisplay */
-  void detectAndDisplay( Mat frame )
-  {
-    std::vector<Rect> faces;
-    Mat frame_gray;
-
-    cvtColor( frame, frame_gray, CV_BGR2GRAY );
-    equalizeHist( frame_gray, frame_gray );
-
-    //-- Detect faces
-    face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30) );
-
-    for( int i = 0; i < faces.size(); i++ )
+    /** @function main */
+    int main( void )
     {
-      Point center( faces[i].x + faces[i].width*0.5, faces[i].y + faces[i].height*0.5 );
-      ellipse( frame, center, Size( faces[i].width*0.5, faces[i].height*0.5), 0, 0, 360, Scalar( 255, 0, 255 ), 4, 8, 0 );
+        VideoCapture capture;
+        Mat frame;
 
-      Mat faceROI = frame_gray( faces[i] );
-      std::vector<Rect> eyes;
+        //-- 1. Load the cascades
+        if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading face cascade\n"); return -1; };
+        if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading eyes cascade\n"); return -1; };
 
-      //-- In each face, detect eyes
-      eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CV_HAAR_SCALE_IMAGE, Size(30, 30) );
+        //-- 2. Read the video stream
+        capture.open( -1 );
+        if ( ! capture.isOpened() ) { printf("--(!)Error opening video capture\n"); return -1; }
 
-      for( int j = 0; j < eyes.size(); j++ )
-       {
-         Point center( faces[i].x + eyes[j].x + eyes[j].width*0.5, faces[i].y + eyes[j].y + eyes[j].height*0.5 );
-         int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );
-         circle( frame, center, radius, Scalar( 255, 0, 0 ), 4, 8, 0 );
-       }
+        while (  capture.read(frame) )
+        {
+            if( frame.empty() )
+            {
+                printf(" --(!) No captured frame -- Break!");
+                break;
+            }
+
+            //-- 3. Apply the classifier to the frame
+            detectAndDisplay( frame );
+
+            int c = waitKey(10);
+            if( (char)c == 27 ) { break; } // escape
+        }
+        return 0;
+    }
+
+    /** @function detectAndDisplay */
+    void detectAndDisplay( Mat frame )
+    {
+        std::vector<Rect> faces;
+        Mat frame_gray;
+
+        cvtColor( frame, frame_gray, COLOR_BGR2GRAY );
+        equalizeHist( frame_gray, frame_gray );
+
+        //-- Detect faces
+        face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CASCADE_SCALE_IMAGE, Size(30, 30) );
+
+        for( size_t i = 0; i < faces.size(); i++ )
+        {
+            Point center( faces[i].x + faces[i].width/2, faces[i].y + faces[i].height/2 );
+            ellipse( frame, center, Size( faces[i].width/2, faces[i].height/2), 0, 0, 360, Scalar( 255, 0, 255 ), 4, 8, 0 );
+
+            Mat faceROI = frame_gray( faces[i] );
+            std::vector<Rect> eyes;
+
+            //-- In each face, detect eyes
+            eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CASCADE_SCALE_IMAGE, Size(30, 30) );
+
+            for( size_t j = 0; j < eyes.size(); j++ )
+            {
+                Point eye_center( faces[i].x + eyes[j].x + eyes[j].width/2, faces[i].y + eyes[j].y + eyes[j].height/2 );
+                int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );
+                circle( frame, eye_center, radius, Scalar( 255, 0, 0 ), 4, 8, 0 );
+            }
+        }
+        //-- Show what you got
+        imshow( window_name, frame );
     }
-    //-- Show what you got
-    imshow( window_name, frame );
-   }
 
 Explanation
 ============
@@ -131,4 +130,3 @@ Result
    .. image:: images/Cascade_Classifier_Tutorial_Result_LBP.jpg
       :align: center
       :height: 300pt
-
diff --git a/doc/tutorials/tutorials.rst b/doc/tutorials/tutorials.rst
index c57166b0e..822c54d56 100644
--- a/doc/tutorials/tutorials.rst
+++ b/doc/tutorials/tutorials.rst
@@ -171,17 +171,17 @@ As always, we would be happy to hear your comments and receive your contribution
                  :width:  80pt
                  :alt: gpu icon
 
-*  :ref:`Table-Of-Content-Contrib`
+*  :ref:`Table-Of-Content-Bioinspired`
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
 
-   =========== =======================================================
-   |Contrib|       Discover additional contribution to OpenCV.
+   ============= =======================================================
+   |Bioinspired|       Algorithms inspired from biological models.
 
-   =========== =======================================================
+   ============= =======================================================
 
-   .. |Contrib| image:: images/retina.jpg
+   .. |Bioinspired| image:: images/retina.jpg
                  :height: 80pt
                  :width:  80pt
                  :alt: gpu icon
@@ -235,6 +235,6 @@ As always, we would be happy to hear your comments and receive your contribution
    ml/table_of_content_ml/table_of_content_ml
    photo/table_of_content_photo/table_of_content_photo
    gpu/table_of_content_gpu/table_of_content_gpu
-   contrib/table_of_content_contrib/table_of_content_contrib
+   bioinspired/table_of_content_bioinspired/table_of_content_bioinspired
    ios/table_of_content_ios/table_of_content_ios
    general/table_of_content_general/table_of_content_general
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index 1d5909616..ed3b85a8f 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -5,5 +5,3 @@ install(FILES ${old_hdrs}
 install(FILES "opencv2/opencv.hpp"
     DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2
     COMPONENT main)
-
-
diff --git a/include/opencv/cv.h b/include/opencv/cv.h
index 5a517dc73..1ed020a35 100644
--- a/include/opencv/cv.h
+++ b/include/opencv/cv.h
@@ -73,4 +73,3 @@
 #endif //CV_IMPL
 
 #endif // __OPENCV_OLD_CV_H_
-
diff --git a/include/opencv/cxeigen.hpp b/include/opencv/cxeigen.hpp
index c503f713d..1f04d1a3a 100644
--- a/include/opencv/cxeigen.hpp
+++ b/include/opencv/cxeigen.hpp
@@ -46,4 +46,3 @@
 #include "opencv2/core/eigen.hpp"
 
 #endif
-
diff --git a/include/opencv2/opencv.hpp b/include/opencv2/opencv.hpp
index 020a45373..3b96bdd36 100644
--- a/include/opencv2/opencv.hpp
+++ b/include/opencv2/opencv.hpp
@@ -52,6 +52,7 @@
 #include "opencv2/calib3d.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/contrib.hpp"
+#include "opencv2/bioinspired.hpp"
 #include "opencv2/ml.hpp"
 
 #endif
diff --git a/ios/configure-device_xcode.sh b/ios/configure-device_xcode.sh
deleted file mode 100755
index 8c28a3e90..000000000
--- a/ios/configure-device_xcode.sh
+++ /dev/null
@@ -1 +0,0 @@
-cmake -GXcode -DCMAKE_TOOLCHAIN_FILE=../opencv/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake -DCMAKE_INSTALL_PREFIX=../OpenCV_iPhoneOS ../opencv 
diff --git a/ios/configure-simulator_xcode.sh b/ios/configure-simulator_xcode.sh
deleted file mode 100755
index 50e00261d..000000000
--- a/ios/configure-simulator_xcode.sh
+++ /dev/null
@@ -1 +0,0 @@
-cmake -GXcode -DCMAKE_TOOLCHAIN_FILE=../opencv/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake -DCMAKE_INSTALL_PREFIX=../OpenCV_iPhoneSimulator ../opencv 
diff --git a/ios/readme.txt b/ios/readme.txt
deleted file mode 100644
index 1441b241b..000000000
--- a/ios/readme.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Assuming that your build directory is on the same level that opencv source,
-From the build directory run
-  ../opencv/ios/configure-device_xcode.sh
-or
-  ../opencv/ios/configure-simulator_xcode.sh
-
-Then from the same folder invoke
-
-xcodebuild -sdk iphoneos -configuration Release -target ALL_BUILD
-xcodebuild -sdk iphoneos -configuration Release -target install install
-
-or
-
-xcodebuild -sdk iphonesimulator -configuration Release -target ALL_BUILD
-xcodebuild -sdk iphonesimulator -configuration Release -target install install
\ No newline at end of file
diff --git a/modules/androidcamera/camera_wrapper/camera_wrapper.cpp b/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
index 2d0ebc7a1..ca631fc21 100644
--- a/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
+++ b/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
@@ -1,5 +1,8 @@
-#if !defined(ANDROID_r2_2_0) && !defined(ANDROID_r2_3_3) && !defined(ANDROID_r3_0_1) && !defined(ANDROID_r4_0_0) && !defined(ANDROID_r4_0_3) && !defined(ANDROID_r4_1_1) && !defined(ANDROID_r4_2_0)
-# error Building camera wrapper for your version of Android is not supported by OpenCV. You need to modify OpenCV sources in order to compile camera wrapper for your version of Android.
+#if !defined(ANDROID_r2_2_0) && !defined(ANDROID_r2_3_3) && !defined(ANDROID_r3_0_1) && \
+ !defined(ANDROID_r4_0_0) && !defined(ANDROID_r4_0_3) && !defined(ANDROID_r4_1_1) && \
+ !defined(ANDROID_r4_2_0) && !defined(ANDROID_r4_3_0)
+# error Building camera wrapper for your version of Android is not supported by OpenCV.\
+ You need to modify OpenCV sources in order to compile camera wrapper for your version of Android.
 #endif
 
 #include <camera/Camera.h>
@@ -16,17 +19,18 @@
 //Include SurfaceTexture.h file with the SurfaceTexture class
 # include <gui/SurfaceTexture.h>
 # define MAGIC_OPENCV_TEXTURE_ID (0x10)
-#else // defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
-//TODO: This is either 2.2 or 2.3. Include the headers for ISurface.h access
-#if defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
-#include <gui/ISurface.h>
-#include <gui/BufferQueue.h>
+#elif defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
+# include <gui/ISurface.h>
+# include <gui/BufferQueue.h>
+#elif defined(ANDROID_r4_3_0)
+# include <gui/IGraphicBufferProducer.h>
+# include <gui/BufferQueue.h>
 #else
 # include <surfaceflinger/ISurface.h>
-#endif  // defined(ANDROID_r4_1_1)
-#endif  // defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
+#endif
 
 #include <string>
+#include <fstream>
 
 //undef logging macro from /system/core/libcutils/loghack.h
 #ifdef LOGD
@@ -45,7 +49,6 @@
 # undef LOGE
 #endif
 
-
 // LOGGING
 #include <android/log.h>
 #define CAMERA_LOG_TAG "OpenCV_NativeCamera"
@@ -60,7 +63,7 @@ using namespace android;
 
 void debugShowFPS();
 
-#if defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
+#if defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
 class ConsumerListenerStub: public BufferQueue::ConsumerListener
 {
 public:
@@ -73,6 +76,29 @@ public:
 };
 #endif
 
+std::string getProcessName()
+{
+    std::string result;
+    std::ifstream f;
+
+    f.open("/proc/self/cmdline");
+    if (f.is_open())
+    {
+        std::string fullPath;
+        std::getline(f, fullPath, '\0');
+        if (!fullPath.empty())
+        {
+            int i = fullPath.size()-1;
+            while ((i >= 0) && (fullPath[i] != '/')) i--;
+            result = fullPath.substr(i+1, std::string::npos);
+        }
+    }
+
+    f.close();
+
+    return result;
+}
+
 void debugShowFPS()
 {
     static int mFrameCount = 0;
@@ -280,7 +306,7 @@ public:
     }
 
     virtual void postData(int32_t msgType, const sp<IMemory>& dataPtr
-    #if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
+    #if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
                           ,camera_frame_metadata_t*
 #endif
                           )
@@ -361,7 +387,9 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
     typedef sp<Camera> (*Android22ConnectFuncType)();
     typedef sp<Camera> (*Android23ConnectFuncType)(int);
     typedef sp<Camera> (*Android3DConnectFuncType)(int, int);
+    typedef sp<Camera> (*Android43ConnectFuncType)(int, const String16&, int);
 
+    const int ANY_CAMERA_INDEX = -1;
     const int BACK_CAMERA_INDEX = 99;
     const int FRONT_CAMERA_INDEX = 98;
 
@@ -372,14 +400,24 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
     CAMERA_SUPPORT_MODE_ZSL = 0x08 /* Camera Sensor supports ZSL mode. */
     };
 
+    // used for Android 4.3
+    enum {
+        USE_CALLING_UID = -1
+    };
+
     const char Android22ConnectName[] = "_ZN7android6Camera7connectEv";
     const char Android23ConnectName[] = "_ZN7android6Camera7connectEi";
     const char Android3DConnectName[] = "_ZN7android6Camera7connectEii";
+    const char Android43ConnectName[] = "_ZN7android6Camera7connectEiRKNS_8String16Ei";
 
     int localCameraIndex = cameraId;
 
+    if (cameraId == ANY_CAMERA_INDEX)
+    {
+        localCameraIndex = 0;
+    }
 #if !defined(ANDROID_r2_2_0)
-    if (cameraId == BACK_CAMERA_INDEX)
+    else if (cameraId == BACK_CAMERA_INDEX)
     {
         LOGD("Back camera selected");
         for (int i = 0; i < Camera::getNumberOfCameras(); i++)
@@ -450,6 +488,12 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
         LOGD("Connecting to CameraService v 3D");
         camera = Android3DConnect(localCameraIndex, CAMERA_SUPPORT_MODE_2D);
     }
+    else if (Android43ConnectFuncType Android43Connect = (Android43ConnectFuncType)dlsym(CameraHALHandle, Android43ConnectName))
+    {
+        std::string currentProcName = getProcessName();
+        LOGD("Current process name for camera init: %s", currentProcName.c_str());
+        camera = Android43Connect(localCameraIndex, String16(currentProcName.c_str()), USE_CALLING_UID);
+    }
     else
     {
         dlclose(CameraHALHandle);
@@ -471,7 +515,7 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
     handler->camera = camera;
     handler->cameraId = localCameraIndex;
 
-    if (prevCameraParameters != 0)
+    if (prevCameraParameters != NULL)
     {
         LOGI("initCameraConnect: Setting paramers from previous camera handler");
         camera->setParameters(prevCameraParameters->flatten());
@@ -503,11 +547,11 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
         const char* available_focus_modes = handler->params.get(CameraParameters::KEY_SUPPORTED_FOCUS_MODES);
         if (available_focus_modes != 0)
         {
-        if (strstr(available_focus_modes, "continuous-video") != NULL)
-        {
-        handler->params.set(CameraParameters::KEY_FOCUS_MODE, CameraParameters::FOCUS_MODE_CONTINUOUS_VIDEO);
+            if (strstr(available_focus_modes, "continuous-video") != NULL)
+            {
+                handler->params.set(CameraParameters::KEY_FOCUS_MODE, CameraParameters::FOCUS_MODE_CONTINUOUS_VIDEO);
 
-        status_t resParams = handler->camera->setParameters(handler->params.flatten());
+                status_t resParams = handler->camera->setParameters(handler->params.flatten());
 
                 if (resParams != 0)
                 {
@@ -517,8 +561,8 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
                 {
                     LOGD("initCameraConnect: autofocus is set to mode \"continuous-video\"");
                 }
+            }
         }
-    }
 #endif
 
         //check if yuv420sp format available. Set this format as preview format.
@@ -560,26 +604,25 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
         }
     }
 
-    status_t pdstatus;
+    status_t bufferStatus;
 #if defined(ANDROID_r2_2_0)
-    pdstatus = camera->setPreviewDisplay(sp<ISurface>(0 /*new DummySurface*/));
-    if (pdstatus != 0)
-        LOGE("initCameraConnect: failed setPreviewDisplay(0) call; camera migth not work correctly on some devices");
+    bufferStatus = camera->setPreviewDisplay(sp<ISurface>(0 /*new DummySurface*/));
+    if (bufferStatus != 0)
+        LOGE("initCameraConnect: failed setPreviewDisplay(0) call (status %d); camera might not work correctly on some devices", bufferStatus);
 #elif defined(ANDROID_r2_3_3)
     /* Do nothing in case of 2.3 for now */
-
 #elif defined(ANDROID_r3_0_1) || defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
     sp<SurfaceTexture> surfaceTexture = new SurfaceTexture(MAGIC_OPENCV_TEXTURE_ID);
-    pdstatus = camera->setPreviewTexture(surfaceTexture);
-    if (pdstatus != 0)
-        LOGE("initCameraConnect: failed setPreviewTexture call; camera migth not work correctly");
-#elif defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0)
+    bufferStatus = camera->setPreviewTexture(surfaceTexture);
+    if (bufferStatus != 0)
+        LOGE("initCameraConnect: failed setPreviewTexture call (status %d); camera might not work correctly", bufferStatus);
+#elif defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
     sp<BufferQueue> bufferQueue = new BufferQueue();
     sp<BufferQueue::ConsumerListener> queueListener = new ConsumerListenerStub();
     bufferQueue->consumerConnect(queueListener);
-    pdstatus = camera->setPreviewTexture(bufferQueue);
-    if (pdstatus != 0)
-    LOGE("initCameraConnect: failed setPreviewTexture call; camera migth not work correctly");
+    bufferStatus = camera->setPreviewTexture(bufferQueue);
+    if (bufferStatus != 0)
+        LOGE("initCameraConnect: failed setPreviewTexture call; camera might not work correctly");
 #endif
 
 #if (defined(ANDROID_r2_2_0) || defined(ANDROID_r2_3_3) || defined(ANDROID_r3_0_1))
@@ -595,9 +638,9 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
 #endif //!(defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3))
 
     LOGD("Starting preview");
-    status_t resStart = camera->startPreview();
+    status_t previewStatus = camera->startPreview();
 
-    if (resStart != 0)
+    if (previewStatus != 0)
     {
         LOGE("initCameraConnect: startPreview() fails. Closing camera connection...");
         handler->closeCameraConnect();
@@ -605,7 +648,7 @@ CameraHandler* CameraHandler::initCameraConnect(const CameraCallback& callback,
     }
     else
     {
-    LOGD("Preview started successfully");
+        LOGD("Preview started successfully");
     }
 
     return handler;
@@ -620,9 +663,11 @@ void CameraHandler::closeCameraConnect()
     }
 
     camera->stopPreview();
+#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
+    camera->setPreviewCallbackFlags(CAMERA_FRAME_CALLBACK_FLAG_NOOP);
+#endif
     camera->disconnect();
     camera.clear();
-
     camera=NULL;
     // ATTENTION!!!!!!!!!!!!!!!!!!!!!!!!!!
     // When we set
@@ -863,14 +908,60 @@ void CameraHandler::applyProperties(CameraHandler** ppcameraHandler)
 
     if (*ppcameraHandler == 0)
     {
-        LOGE("applyProperties: Passed null *ppcameraHandler");
+        LOGE("applyProperties: Passed NULL *ppcameraHandler");
         return;
     }
 
-    LOGD("CameraHandler::applyProperties()");
-    CameraHandler* previousCameraHandler=*ppcameraHandler;
-    CameraParameters curCameraParameters(previousCameraHandler->params.flatten());
+    CameraParameters curCameraParameters((*ppcameraHandler)->params.flatten());
 
+#if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3) || defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
+    CameraHandler* handler=*ppcameraHandler;
+
+    handler->camera->stopPreview();
+    handler->camera->setPreviewCallbackFlags(CAMERA_FRAME_CALLBACK_FLAG_NOOP);
+
+    status_t reconnectStatus = handler->camera->reconnect();
+    if (reconnectStatus != 0)
+    {
+        LOGE("applyProperties: failed to reconnect camera (status %d)", reconnectStatus);
+        return;
+    }
+
+    handler->camera->setParameters(curCameraParameters.flatten());
+    handler->params.unflatten(curCameraParameters.flatten());
+
+    status_t bufferStatus;
+# if defined(ANDROID_r4_0_0) || defined(ANDROID_r4_0_3)
+    sp<SurfaceTexture> surfaceTexture = new SurfaceTexture(MAGIC_OPENCV_TEXTURE_ID);
+    bufferStatus = handler->camera->setPreviewTexture(surfaceTexture);
+    if (bufferStatus != 0)
+        LOGE("applyProperties: failed setPreviewTexture call (status %d); camera might not work correctly", bufferStatus);
+# elif defined(ANDROID_r4_1_1) || defined(ANDROID_r4_2_0) || defined(ANDROID_r4_3_0)
+    sp<BufferQueue> bufferQueue = new BufferQueue();
+    sp<BufferQueue::ConsumerListener> queueListener = new ConsumerListenerStub();
+    bufferQueue->consumerConnect(queueListener);
+    bufferStatus = handler->camera->setPreviewTexture(bufferQueue);
+    if (bufferStatus != 0)
+        LOGE("applyProperties: failed setPreviewTexture call; camera might not work correctly");
+# endif
+
+    handler->camera->setPreviewCallbackFlags( CAMERA_FRAME_CALLBACK_FLAG_ENABLE_MASK | CAMERA_FRAME_CALLBACK_FLAG_COPY_OUT_MASK);//with copy
+
+    LOGD("Starting preview");
+    status_t previewStatus = handler->camera->startPreview();
+
+    if (previewStatus != 0)
+    {
+        LOGE("initCameraConnect: startPreview() fails. Closing camera connection...");
+        handler->closeCameraConnect();
+        handler = NULL;
+    }
+    else
+    {
+        LOGD("Preview started successfully");
+    }
+#else
+    CameraHandler* previousCameraHandler=*ppcameraHandler;
     CameraCallback cameraCallback=previousCameraHandler->cameraCallback;
     void* userData=previousCameraHandler->userData;
     int cameraId=previousCameraHandler->cameraId;
@@ -879,7 +970,6 @@ void CameraHandler::applyProperties(CameraHandler** ppcameraHandler)
     previousCameraHandler->closeCameraConnect();
     LOGD("CameraHandler::applyProperties(): after previousCameraHandler->closeCameraConnect");
 
-
     LOGD("CameraHandler::applyProperties(): before initCameraConnect");
     CameraHandler* handler=initCameraConnect(cameraCallback, cameraId, userData, &curCameraParameters);
     LOGD("CameraHandler::applyProperties(): after initCameraConnect, handler=0x%x", (int)handler);
@@ -892,6 +982,7 @@ void CameraHandler::applyProperties(CameraHandler** ppcameraHandler)
         }
     }
     (*ppcameraHandler)=handler;
+#endif
 }
 
 
diff --git a/modules/androidcamera/camera_wrapper/camera_wrapper.h b/modules/androidcamera/camera_wrapper/camera_wrapper.h
index 88c9c4100..430dea2dd 100644
--- a/modules/androidcamera/camera_wrapper/camera_wrapper.h
+++ b/modules/androidcamera/camera_wrapper/camera_wrapper.h
@@ -14,4 +14,3 @@ double getCameraPropertyC(void* camera, int propIdx);
 void setCameraPropertyC(void* camera, int propIdx, double value);
 void applyCameraPropertiesC(void** camera);
 }
-
diff --git a/modules/androidcamera/include/camera_activity.hpp b/modules/androidcamera/include/camera_activity.hpp
index 8aa25b620..2af7befe3 100644
--- a/modules/androidcamera/include/camera_activity.hpp
+++ b/modules/androidcamera/include/camera_activity.hpp
@@ -44,4 +44,4 @@ private:
     int frameHeight;
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/androidcamera/src/camera_activity.cpp b/modules/androidcamera/src/camera_activity.cpp
index 3ce4089be..a265d2e8d 100644
--- a/modules/androidcamera/src/camera_activity.cpp
+++ b/modules/androidcamera/src/camera_activity.cpp
@@ -434,14 +434,14 @@ void CameraActivity::applyProperties()
 int CameraActivity::getFrameWidth()
 {
     if (frameWidth <= 0)
-    frameWidth = getProperty(ANDROID_CAMERA_PROPERTY_FRAMEWIDTH);
+        frameWidth = getProperty(ANDROID_CAMERA_PROPERTY_FRAMEWIDTH);
     return frameWidth;
 }
 
 int CameraActivity::getFrameHeight()
 {
     if (frameHeight <= 0)
-    frameHeight = getProperty(ANDROID_CAMERA_PROPERTY_FRAMEHEIGHT);
+        frameHeight = getProperty(ANDROID_CAMERA_PROPERTY_FRAMEHEIGHT);
     return frameHeight;
 }
 
diff --git a/modules/bioinspired/CMakeLists.txt b/modules/bioinspired/CMakeLists.txt
new file mode 100644
index 000000000..b0f152cfc
--- /dev/null
+++ b/modules/bioinspired/CMakeLists.txt
@@ -0,0 +1,2 @@
+set(the_description "Biologically inspired algorithms")
+ocv_define_module(bioinspired opencv_core OPTIONAL opencv_highgui opencv_ocl)
diff --git a/modules/bioinspired/doc/bioinspired.rst b/modules/bioinspired/doc/bioinspired.rst
new file mode 100644
index 000000000..6bffcdcf2
--- /dev/null
+++ b/modules/bioinspired/doc/bioinspired.rst
@@ -0,0 +1,10 @@
+********************************************************************
+bioinspired. Biologically inspired vision models and derivated tools
+********************************************************************
+
+The module provides biological visual systems models (human visual system and others). It also provides derivated objects that take advantage of those bio-inspired models.
+
+.. toctree::
+    :maxdepth: 2
+
+    Human retina documentation <retina/index>
diff --git a/modules/contrib/doc/retina/images/retinaInput.jpg b/modules/bioinspired/doc/retina/images/retinaInput.jpg
similarity index 100%
rename from modules/contrib/doc/retina/images/retinaInput.jpg
rename to modules/bioinspired/doc/retina/images/retinaInput.jpg
diff --git a/modules/contrib/doc/retina/images/retinaOutput_default.jpg b/modules/bioinspired/doc/retina/images/retinaOutput_default.jpg
similarity index 100%
rename from modules/contrib/doc/retina/images/retinaOutput_default.jpg
rename to modules/bioinspired/doc/retina/images/retinaOutput_default.jpg
diff --git a/modules/contrib/doc/retina/images/retinaOutput_realistic.jpg b/modules/bioinspired/doc/retina/images/retinaOutput_realistic.jpg
similarity index 100%
rename from modules/contrib/doc/retina/images/retinaOutput_realistic.jpg
rename to modules/bioinspired/doc/retina/images/retinaOutput_realistic.jpg
diff --git a/modules/contrib/doc/retina/index.rst b/modules/bioinspired/doc/retina/index.rst
similarity index 87%
rename from modules/contrib/doc/retina/index.rst
rename to modules/bioinspired/doc/retina/index.rst
index 5671df77c..242416baa 100644
--- a/modules/contrib/doc/retina/index.rst
+++ b/modules/bioinspired/doc/retina/index.rst
@@ -7,6 +7,8 @@ Retina
 ======
 .. ocv:class:: Retina : public Algorithm
 
+**Note** : do not forget that the retina model is included in the following namespace : *cv::bioinspired*.
+
 Introduction
 ++++++++++++
 
@@ -17,7 +19,7 @@ Class which provides the main controls to the Gipsa/Listic labs human  retina mo
 * peripheral vision for sensitive transient signals detection (motion and events) : the magnocellular pathway.
 
 From a general point of view, this filter whitens the image spectrum and corrects luminance thanks to local adaptation. An other important property is its hability to filter out spatio-temporal noise while enhancing details.
-This model originates from Jeanny Herault work [Herault2010]_. It has been involved in Alexandre Benoit phd and his current research [Benoit2010]_ (he currently maintains this module within OpenCV). It includes the work of other Jeanny's phd student such as [Chaix2007]_ and the log polar transformations of Barthelemy Durette described in Jeanny's book.
+This model originates from Jeanny Herault work [Herault2010]_. It has been involved in Alexandre Benoit phd and his current research [Benoit2010]_, [Strat2013]_ (he currently maintains this module within OpenCV). It includes the work of other Jeanny's phd student such as [Chaix2007]_ and the log polar transformations of Barthelemy Durette described in Jeanny's book.
 
 **NOTES :**
 
@@ -55,15 +57,20 @@ As observed in this preliminary demo, the retina can be settled up with various
 
 Here is an overview of the abstract Retina interface, allocate one instance with the *createRetina* functions.::
 
+  namespace cv{namespace bioinspired{
+
   class Retina : public Algorithm
   {
   public:
     // parameters setup instance
     struct RetinaParameters; // this class is detailled later
 
-    // main method for input frame processing
+    // main method for input frame processing (all use method, can also perform High Dynamic Range tone mapping)
     void run (InputArray inputImage);
 
+    // specific method aiming at correcting luminance only (faster High Dynamic Range tone mapping)
+    void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)
+
     // output buffers retreival methods
     // -> foveal color vision details channel with luminance and noise correction
     void getParvo (OutputArray retinaOutput_parvo);
@@ -99,7 +106,13 @@ Here is an overview of the abstract Retina interface, allocate one instance with
     // Allocators
     cv::Ptr<Retina> createRetina (Size inputSize);
     cv::Ptr<Retina> createRetina (Size inputSize, const bool colorMode, RETINA_COLORSAMPLINGMETHOD colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
+    }} // cv and bioinspired namespaces end
 
+.. Sample code::
+
+   * An example on retina tone mapping can be found at opencv_source_code/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp
+   * An example on retina tone mapping on video input can be found at opencv_source_code/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp
+   * A complete example illustrating the retina interface can be found at opencv_source_code/samples/cpp/retinaDemo.cpp
 
 Description
 +++++++++++
@@ -120,12 +133,20 @@ Use : this model can be used basically for spatio-temporal video effects but als
 
 * performing motion analysis also taking benefit of the previously cited properties  (check out the magnocellular retina channel output, by using the provided **getMagno** methods)
 
+* general image/video sequence description using either one or both channels. An example of the use of Retina in a Bag of Words approach is given in [Strat2013]_.
+
 Literature
 ==========
 For more information, refer to the following papers :
 
+* Model description :
+
 .. [Benoit2010] Benoit A., Caplier A., Durette B., Herault, J., "Using Human Visual System Modeling For Bio-Inspired Low Level Image Processing", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773. DOI <http://dx.doi.org/10.1016/j.cviu.2010.01.011>
 
+* Model use in a Bag of Words approach :
+
+.. [Strat2013] Strat S., Benoit A., Lambert P., "Retina enhanced SIFT descriptors for video indexing", CBMI2013, Veszprém, Hungary, 2013.
+
 * Please have a look at the reference work of Jeanny Herault that you can read in his book :
 
 .. [Herault2010] Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
@@ -138,6 +159,10 @@ This retina filter code includes the research contributions of phd/research coll
 
 * take a look at *imagelogpolprojection.hpp* to discover retina spatial log sampling which originates from Barthelemy Durette phd with Jeanny Herault. A Retina / V1 cortex projection is also proposed and originates from Jeanny's discussions. More informations in the above cited Jeanny Heraults's book.
 
+* Meylan&al work on HDR tone mapping that is implemented as a specific method within the model :
+
+.. [Meylan2007] L. Meylan , D. Alleysson, S. Susstrunk, "A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images", Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816
+
 Demos and experiments !
 =======================
 
@@ -161,12 +186,14 @@ Take a look at the provided C++ examples provided with OpenCV :
 
    Then, take a HDR image using bracketing with your camera and generate an OpenEXR image and then process it using the demo.
 
-   Typical use, supposing that you have the OpenEXR image *memorial.exr* (present in the samples/cpp/ folder)
+   Typical use, supposing that you have the OpenEXR image such as *memorial.exr* (present in the samples/cpp/ folder)
 
-   **OpenCVReleaseFolder/bin/OpenEXRimages_HighDynamicRange_Retina_toneMapping memorial.exr**
+   **OpenCVReleaseFolder/bin/OpenEXRimages_HighDynamicRange_Retina_toneMapping memorial.exr [optionnal: 'fast']**
 
       Note that some sliders are made available to allow you to play with luminance compression.
 
+      If not using the 'fast' option, then, tone mapping is performed using the full retina model [Benoit2010]_. It includes spectral whitening that allows luminance energy to be reduced. When using the 'fast' option, then, a simpler method is used, it is an adaptation of the algorithm presented in [Meylan2007]_. This method gives also good results and is faster to process but it sometimes requires some more parameters adjustement.
+
 
 Methods description
 ===================
@@ -176,8 +203,8 @@ Here are detailled the main methods to control the retina model
 Ptr<Retina>::createRetina
 +++++++++++++++++++++++++
 
-.. ocv:function:: Ptr<Retina> createRetina(Size inputSize)
-.. ocv:function:: Ptr<Retina> createRetina(Size inputSize, const bool colorMode, RETINA_COLORSAMPLINGMETHOD colorSamplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0 )
+.. ocv:function:: Ptr<cv::bioinspired::Retina> createRetina(Size inputSize)
+.. ocv:function:: Ptr<cv::bioinspired::Retina> createRetina(Size inputSize, const bool colorMode, cv::bioinspired::RETINA_COLORSAMPLINGMETHOD colorSamplingMethod = cv::bioinspired::RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0 )
 
     Constructors from standardized interfaces : retreive a smart pointer to a Retina instance
 
@@ -185,11 +212,11 @@ Ptr<Retina>::createRetina
     :param colorMode: the chosen processing mode : with or without color processing
     :param colorSamplingMethod: specifies which kind of color sampling will be used :
 
-        * RETINA_COLOR_RANDOM: each pixel position is either R, G or B in a random choice
+        * cv::bioinspired::RETINA_COLOR_RANDOM: each pixel position is either R, G or B in a random choice
 
-        * RETINA_COLOR_DIAGONAL: color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR...
+        * cv::bioinspired::RETINA_COLOR_DIAGONAL: color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR...
 
-        * RETINA_COLOR_BAYER: standard bayer sampling
+        * cv::bioinspired::RETINA_COLOR_BAYER: standard bayer sampling
 
     :param useRetinaLogSampling: activate retina log sampling, if true, the 2 following parameters can be used
     :param reductionFactor: only usefull if param useRetinaLogSampling=true, specifies the reduction factor of the output frame (as the center (fovea) is high resolution and corners can be underscaled, then a reduction of the output is allowed without precision leak
@@ -275,7 +302,7 @@ Retina::printSetup
 
     Outputs a string showing the used parameters setup
 
-    :return: a string which contains formatted parameters information
+    :return: a string which contains formated parameters information
 
 Retina::run
 +++++++++++
@@ -286,6 +313,18 @@ Retina::run
 
     :param inputImage: the input Mat image to be processed, can be gray level or BGR coded in any format (from 8bit to 16bits)
 
+Retina::applyFastToneMapping
+++++++++++++++++++++++++++++
+
+.. ocv:function:: void Retina::applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)
+
+    Method which processes an image in the aim to correct its luminance : correct backlight problems, enhance details in shadows. This method is designed to perform High Dynamic Range image tone mapping (compress >8bit/pixel images to 8bit/pixel). This is a simplified version of the Retina Parvocellular model (simplified version of the run/getParvo methods call) since it does not include the spatio-temporal filter modelling the Outer Plexiform Layer of the retina that performs spectral whitening and many other stuff. However, it works great for tone mapping and in a faster way.
+
+    Check the demos and experiments section to see examples and the way to perform tone mapping using the original retina model and the method.
+
+    :param inputImage: the input image to process (should be coded in float format : CV_32F, CV_32FC1, CV_32F_C3, CV_32F_C4, the 4th channel won't be considered).
+    :param outputToneMappedImage: the output 8bit/channel tone mapped image (CV_8U or CV_8UC3 format).
+
 Retina::setColorSaturation
 ++++++++++++++++++++++++++
 
diff --git a/modules/core/src/precomp.cpp b/modules/bioinspired/include/opencv2/bioinspired.hpp
similarity index 91%
rename from modules/core/src/precomp.cpp
rename to modules/bioinspired/include/opencv2/bioinspired.hpp
index e540cc5e8..5f2f8644d 100644
--- a/modules/core/src/precomp.cpp
+++ b/modules/bioinspired/include/opencv2/bioinspired.hpp
@@ -40,6 +40,11 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#ifndef __OPENCV_BIOINSPIRED_HPP__
+#define __OPENCV_BIOINSPIRED_HPP__
 
-/* End of file. */
+#include "opencv2/core.hpp"
+#include "opencv2/bioinspired/retina.hpp"
+#include "opencv2/bioinspired/retinafasttonemapping.hpp"
+
+#endif
diff --git a/modules/bioinspired/include/opencv2/bioinspired/bioinspired.hpp b/modules/bioinspired/include/opencv2/bioinspired/bioinspired.hpp
new file mode 100644
index 000000000..40be2854e
--- /dev/null
+++ b/modules/bioinspired/include/opencv2/bioinspired/bioinspired.hpp
@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
+#endif
+
+#include "opencv2/bioinspired.hpp"
diff --git a/modules/contrib/include/opencv2/contrib/retina.hpp b/modules/bioinspired/include/opencv2/bioinspired/retina.hpp
similarity index 90%
rename from modules/contrib/include/opencv2/contrib/retina.hpp
rename to modules/bioinspired/include/opencv2/bioinspired/retina.hpp
index 579c15d1d..b4fda7038 100644
--- a/modules/contrib/include/opencv2/contrib/retina.hpp
+++ b/modules/bioinspired/include/opencv2/bioinspired/retina.hpp
@@ -6,12 +6,12 @@
  ** copy or use the software.
  **
  **
- ** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+ ** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
  ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
  **
  ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
  **
- **  Creation - enhancement process 2007-2011
+ **  Creation - enhancement process 2007-2013
  **      Author: Alexandre Benoit (benoit.alexandre.vision@gmail.com), LISTIC lab, Annecy le vieux, France
  **
  ** Theses algorithm have been developped by Alexandre BENOIT since his thesis with Alice Caplier at Gipsa-Lab (www.gipsa-lab.inpg.fr) and the research he pursues at LISTIC Lab (www.listic.univ-savoie.fr).
@@ -32,7 +32,7 @@
  ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
  **
- **               For Human Visual System tools (hvstools)
+ **               For Human Visual System tools (bioinspired)
  ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
  **
  ** Third party copyrights are property of their respective owners.
@@ -62,8 +62,8 @@
  ** the use of this software, even if advised of the possibility of such damage.
  *******************************************************************************/
 
-#ifndef __OPENCV_CONTRIB_RETINA_HPP__
-#define __OPENCV_CONTRIB_RETINA_HPP__
+#ifndef __OPENCV_BIOINSPIRED_RETINA_HPP__
+#define __OPENCV_BIOINSPIRED_RETINA_HPP__
 
 /*
  * Retina.hpp
@@ -73,13 +73,12 @@
  */
 
 #include "opencv2/core.hpp" // for all OpenCV core functionalities access, including cv::Exception support
-#include <valarray>
 
-namespace cv
-{
 
-enum RETINA_COLORSAMPLINGMETHOD
-{
+namespace cv{
+namespace bioinspired{
+
+enum {
     RETINA_COLOR_RANDOM, //!< each pixel position is either R, G or B in a random choice
     RETINA_COLOR_DIAGONAL,//!< color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR...
     RETINA_COLOR_BAYER//!< standard bayer sampling
@@ -241,6 +240,14 @@ public:
      */
     virtual void run(InputArray inputImage)=0;
 
+    /**
+     * method that applies a luminance correction (initially High Dynamic Range (HDR) tone mapping) using only the 2 local adaptation stages of the retina parvo channel : photoreceptors level and ganlion cells level. Spatio temporal filtering is applied but limited to temporal smoothing and eventually high frequencies attenuation. This is a lighter method than the one available using the regular run method. It is then faster but it does not include complete temporal filtering nor retina spectral whitening. Then, it can have a more limited effect on images with a very high dynamic range. This is an adptation of the original still image HDR tone mapping algorithm of David Alleyson, Sabine Susstruck and Laurence Meylan's work, please cite:
+    * -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816
+     @param inputImage the input image to process RGB or gray levels
+     @param outputToneMappedImage the output tone mapped image
+     */
+    virtual void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)=0;
+
     /**
      * accessor of the details channel of the retina (models foveal vision)
      * @param retinaOutput_parvo : the output buffer (reallocated if necessary), this output is rescaled for standard 8bits image processing use in OpenCV
@@ -295,8 +302,10 @@ public:
     virtual void activateContoursProcessing(const bool activate)=0;
 };
 CV_EXPORTS Ptr<Retina> createRetina(Size inputSize);
-CV_EXPORTS Ptr<Retina> createRetina(Size inputSize, const bool colorMode, RETINA_COLORSAMPLINGMETHOD colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
+CV_EXPORTS Ptr<Retina> createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
 
+CV_EXPORTS Ptr<Retina> createRetina_OCL(Size inputSize);
+CV_EXPORTS Ptr<Retina> createRetina_OCL(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
 }
-#endif /* __OPENCV_CONTRIB_RETINA_HPP__ */
-
+}
+#endif /* __OPENCV_BIOINSPIRED_RETINA_HPP__ */
diff --git a/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp b/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp
new file mode 100644
index 000000000..6c83f885c
--- /dev/null
+++ b/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp
@@ -0,0 +1,121 @@
+
+/*#******************************************************************************
+ ** IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ **
+ ** By downloading, copying, installing or using the software you agree to this license.
+ ** If you do not agree to this license, do not download, install,
+ ** copy or use the software.
+ **
+ **
+ ** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+ **
+ ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
+ **
+ **  Creation - enhancement process 2007-2013
+ **      Author: Alexandre Benoit (benoit.alexandre.vision@gmail.com), LISTIC lab, Annecy le vieux, France
+ **
+ ** Theses algorithm have been developped by Alexandre BENOIT since his thesis with Alice Caplier at Gipsa-Lab (www.gipsa-lab.inpg.fr) and the research he pursues at LISTIC Lab (www.listic.univ-savoie.fr).
+ ** Refer to the following research paper for more information:
+ ** Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+ ** This work have been carried out thanks to Jeanny Herault who's research and great discussions are the basis of all this work, please take a look at his book:
+ ** Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+ **
+ **
+ **
+ **
+ **
+ ** This class is based on image processing tools of the author and already used within the Retina class (this is the same code as method retina::applyFastToneMapping, but in an independent class, it is ligth from a memory requirement point of view). It implements an adaptation of the efficient tone mapping algorithm propose by David Alleyson, Sabine Susstruck and Laurence Meylan's work, please cite:
+ ** -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816
+ **
+ **
+ **                          License Agreement
+ **               For Open Source Computer Vision Library
+ **
+ ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+ **
+ **               For Human Visual System tools (bioinspired)
+ ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
+ **
+ ** Third party copyrights are property of their respective owners.
+ **
+ ** Redistribution and use in source and binary forms, with or without modification,
+ ** are permitted provided that the following conditions are met:
+ **
+ ** * Redistributions of source code must retain the above copyright notice,
+ **    this list of conditions and the following disclaimer.
+ **
+ ** * Redistributions in binary form must reproduce the above copyright notice,
+ **    this list of conditions and the following disclaimer in the documentation
+ **    and/or other materials provided with the distribution.
+ **
+ ** * The name of the copyright holders may not be used to endorse or promote products
+ **    derived from this software without specific prior written permission.
+ **
+ ** This software is provided by the copyright holders and contributors "as is" and
+ ** any express or implied warranties, including, but not limited to, the implied
+ ** warranties of merchantability and fitness for a particular purpose are disclaimed.
+ ** In no event shall the Intel Corporation or contributors be liable for any direct,
+ ** indirect, incidental, special, exemplary, or consequential damages
+ ** (including, but not limited to, procurement of substitute goods or services;
+ ** loss of use, data, or profits; or business interruption) however caused
+ ** and on any theory of liability, whether in contract, strict liability,
+ ** or tort (including negligence or otherwise) arising in any way out of
+ ** the use of this software, even if advised of the possibility of such damage.
+ *******************************************************************************/
+
+#ifndef __OPENCV_BIOINSPIRED_RETINAFASTTONEMAPPING_HPP__
+#define __OPENCV_BIOINSPIRED_RETINAFASTTONEMAPPING_HPP__
+
+/*
+ * retinafasttonemapping.hpp
+ *
+ *  Created on: May 26, 2013
+ *      Author: Alexandre Benoit
+ */
+
+#include "opencv2/core.hpp" // for all OpenCV core functionalities access, including cv::Exception support
+
+namespace cv{
+namespace bioinspired{
+
+/**
+ * @class RetinaFastToneMappingImpl a wrapper class which allows the tone mapping algorithm of Meylan&al(2007) to be used with OpenCV.
+ * This algorithm is already implemented in thre Retina class (retina::applyFastToneMapping) but used it does not require all the retina model to be allocated. This allows a light memory use for low memory devices (smartphones, etc.
+ * As a summary, these are the model properties:
+ * => 2 stages of local luminance adaptation with a different local neighborhood for each.
+ * => first stage models the retina photorecetors local luminance adaptation
+ * => second stage models th ganglion cells local information adaptation
+ * => compared to the initial publication, this class uses spatio-temporal low pass filters instead of spatial only filters.
+ * ====> this can help noise robustness and temporal stability for video sequence use cases.
+ * for more information, read to the following papers :
+ *  Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+ * regarding spatio-temporal filter and the bigger retina model :
+ * Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+ */
+class CV_EXPORTS RetinaFastToneMapping : public Algorithm
+{
+public:
+
+    /**
+     * method that applies a luminance correction (initially High Dynamic Range (HDR) tone mapping) using only the 2 local adaptation stages of the retina parvocellular channel : photoreceptors level and ganlion cells level. Spatio temporal filtering is applied but limited to temporal smoothing and eventually high frequencies attenuation. This is a lighter method than the one available using the regular retina::run method. It is then faster but it does not include complete temporal filtering nor retina spectral whitening. Then, it can have a more limited effect on images with a very high dynamic range. This is an adptation of the original still image HDR tone mapping algorithm of David Alleyson, Sabine Susstruck and Laurence Meylan's work, please cite:
+    * -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816
+     @param inputImage the input image to process RGB or gray levels
+     @param outputToneMappedImage the output tone mapped image
+     */
+    virtual void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)=0;
+
+    /**
+     * setup method that updates tone mapping behaviors by adjusing the local luminance computation area
+     * @param photoreceptorsNeighborhoodRadius the first stage local adaptation area
+     * @param ganglioncellsNeighborhoodRadius the second stage local adaptation area
+     * @param meanLuminanceModulatorK the factor applied to modulate the meanLuminance information (default is 1, see reference paper)
+     */
+    virtual void setup(const float photoreceptorsNeighborhoodRadius=3.f, const float ganglioncellsNeighborhoodRadius=1.f, const float meanLuminanceModulatorK=1.f)=0;
+};
+
+CV_EXPORTS Ptr<RetinaFastToneMapping> createRetinaFastToneMapping(Size inputSize);
+
+}
+}
+#endif /* __OPENCV_BIOINSPIRED_RETINAFASTTONEMAPPING_HPP__ */
diff --git a/modules/contrib/src/basicretinafilter.cpp b/modules/bioinspired/src/basicretinafilter.cpp
similarity index 98%
rename from modules/contrib/src/basicretinafilter.cpp
rename to modules/bioinspired/src/basicretinafilter.cpp
index 020b8f04e..7e7b467fa 100644
--- a/modules/contrib/src/basicretinafilter.cpp
+++ b/modules/bioinspired/src/basicretinafilter.cpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -72,7 +72,8 @@
 
 namespace cv
 {
-
+namespace bioinspired
+{
 // @author Alexandre BENOIT, benoit.alexandre.vision@gmail.com, LISTIC : www.listic.univ-savoie.fr Gipsa-Lab, France: www.gipsa-lab.inpg.fr/
 
 //////////////////////////////////////////////////////////
@@ -883,4 +884,5 @@ void BasicRetinaFilter::_verticalAnticausalFilter_Irregular_multGain(float *outp
     }
 
 }
-}
+}// end of namespace bioinspired
+}// end of namespace cv
diff --git a/modules/contrib/src/basicretinafilter.hpp b/modules/bioinspired/src/basicretinafilter.hpp
similarity index 98%
rename from modules/contrib/src/basicretinafilter.hpp
rename to modules/bioinspired/src/basicretinafilter.hpp
index a2ece415d..323bff940 100644
--- a/modules/contrib/src/basicretinafilter.hpp
+++ b/modules/bioinspired/src/basicretinafilter.hpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -113,6 +113,8 @@
 //#define __BASIC_RETINA_ELEMENT_DEBUG
 
 namespace cv
+{
+namespace bioinspired
 {
     class BasicRetinaFilter
     {
@@ -287,7 +289,7 @@ namespace cv
         * @param maxInputValue: the maximum amplitude value measured after local adaptation processing (c.f. function runFilter_LocalAdapdation & runFilter_LocalAdapdation_autonomous)
         * @param meanLuminance: the a priori meann luminance of the input data (should be 128 for 8bits images but can vary greatly in case of High Dynamic Range Images (HDRI)
         */
-        void setV0CompressionParameterToneMapping(const float v0, const float maxInputValue, const float meanLuminance=128.0f){ _v0=v0*maxInputValue; _localLuminanceFactor=1.0f; _localLuminanceAddon=meanLuminance*_v0; _maxInputValue=maxInputValue;};
+        void setV0CompressionParameterToneMapping(const float v0, const float maxInputValue, const float meanLuminance=128.0f){ _v0=v0*maxInputValue; _localLuminanceFactor=1.0f; _localLuminanceAddon=meanLuminance*v0; _maxInputValue=maxInputValue;};
 
         /**
         * update compression parameters while keeping v0 parameter value
@@ -650,7 +652,6 @@ namespace cv
 
     };
 
-}
+}// end of namespace bioinspired
+}// end of namespace cv
 #endif
-
-
diff --git a/modules/contrib/src/imagelogpolprojection.cpp b/modules/bioinspired/src/imagelogpolprojection.cpp
similarity index 98%
rename from modules/contrib/src/imagelogpolprojection.cpp
rename to modules/bioinspired/src/imagelogpolprojection.cpp
index 22f5214e7..0a4c1ed0d 100644
--- a/modules/contrib/src/imagelogpolprojection.cpp
+++ b/modules/bioinspired/src/imagelogpolprojection.cpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -72,7 +72,8 @@
 
 namespace cv
 {
-
+namespace bioinspired
+{
 // constructor
 ImageLogPolProjection::ImageLogPolProjection(const unsigned int nbRows, const unsigned int nbColumns, const PROJECTIONTYPE projection, const bool colorModeCapable)
 :BasicRetinaFilter(nbRows, nbColumns),
@@ -446,4 +447,5 @@ std::valarray<float> &ImageLogPolProjection::runProjection(const std::valarray<f
     return _sampledFrame;
 }
 
-}
+}// end of namespace bioinspired
+}// end of namespace cv
diff --git a/modules/contrib/src/imagelogpolprojection.hpp b/modules/bioinspired/src/imagelogpolprojection.hpp
similarity index 96%
rename from modules/contrib/src/imagelogpolprojection.hpp
rename to modules/bioinspired/src/imagelogpolprojection.hpp
index 1572ee83e..41ecd5eaf 100644
--- a/modules/contrib/src/imagelogpolprojection.hpp
+++ b/modules/bioinspired/src/imagelogpolprojection.hpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -103,6 +103,8 @@
 
 namespace cv
 {
+namespace bioinspired
+{
 
 class ImageLogPolProjection:public BasicRetinaFilter
 {
@@ -236,5 +238,6 @@ private:
 
 };
 
-}
+}// end of namespace bioinspired
+}// end of namespace cv
 #endif /*IMAGELOGPOLPROJECTION_H_*/
diff --git a/modules/contrib/src/magnoretinafilter.cpp b/modules/bioinspired/src/magnoretinafilter.cpp
similarity index 95%
rename from modules/contrib/src/magnoretinafilter.cpp
rename to modules/bioinspired/src/magnoretinafilter.cpp
index 48e10cf53..81fdb1df5 100644
--- a/modules/contrib/src/magnoretinafilter.cpp
+++ b/modules/bioinspired/src/magnoretinafilter.cpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -72,6 +72,8 @@
 
 namespace cv
 {
+namespace bioinspired
+{
 // Constructor and Desctructor of the OPL retina filter
 MagnoRetinaFilter::MagnoRetinaFilter(const unsigned int NBrows, const unsigned int NBcolumns)
 :BasicRetinaFilter(NBrows, NBcolumns, 2),
@@ -206,6 +208,5 @@ const std::valarray<float> &MagnoRetinaFilter::runFilter(const std::valarray<flo
 
     return (*_magnoYOutput);
 }
-}
-
-
+}// end of namespace bioinspired
+}// end of namespace cv
diff --git a/modules/contrib/src/magnoretinafilter.hpp b/modules/bioinspired/src/magnoretinafilter.hpp
similarity index 96%
rename from modules/contrib/src/magnoretinafilter.hpp
rename to modules/bioinspired/src/magnoretinafilter.hpp
index 991e75daf..e06d14ddc 100644
--- a/modules/contrib/src/magnoretinafilter.hpp
+++ b/modules/bioinspired/src/magnoretinafilter.hpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -99,7 +99,8 @@
 
 namespace cv
 {
-
+namespace bioinspired
+{
     class MagnoRetinaFilter: public BasicRetinaFilter
     {
     public:
@@ -238,8 +239,7 @@ namespace cv
 #endif
     };
 
-}
+}// end of namespace bioinspired
+}// end of namespace cv
 
 #endif /*MagnoRetinaFilter_H_*/
-
-
diff --git a/modules/bioinspired/src/opencl/retina_kernel.cl b/modules/bioinspired/src/opencl/retina_kernel.cl
new file mode 100644
index 000000000..169be4d27
--- /dev/null
+++ b/modules/bioinspired/src/opencl/retina_kernel.cl
@@ -0,0 +1,779 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+//data (which is float) is aligend in 32 bytes
+#define WIDTH_MULTIPLE (32 >> 2)
+
+/////////////////////////////////////////////////////////
+//*******************************************************
+// basicretinafilter
+//////////////// _spatiotemporalLPfilter ////////////////
+//_horizontalCausalFilter_addInput
+kernel void horizontalCausalFilter_addInput(
+    global const float * input,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const int in_offset,
+    const int out_offset,
+    const float _tau,
+    const float _a
+)
+{
+    int gid = get_global_id(0);
+    if(gid >= rows)
+    {
+        return;
+    }
+
+    global const float * iptr =
+        input  + mad24(gid, elements_per_row, in_offset / 4);
+    global float * optr =
+        output + mad24(gid, elements_per_row, out_offset / 4);
+
+    float res;
+    float4 in_v4, out_v4, res_v4 = (float4)(0);
+    //vectorize to increase throughput
+    for(int i = 0; i < cols / 4; ++i, iptr += 4, optr += 4)
+    {
+        in_v4  = vload4(0, iptr);
+        out_v4 = vload4(0, optr);
+
+        res_v4.x = in_v4.x + _tau * out_v4.x + _a * res_v4.w;
+        res_v4.y = in_v4.y + _tau * out_v4.y + _a * res_v4.x;
+        res_v4.z = in_v4.z + _tau * out_v4.z + _a * res_v4.y;
+        res_v4.w = in_v4.w + _tau * out_v4.w + _a * res_v4.z;
+
+        vstore4(res_v4, 0, optr);
+    }
+    res = res_v4.w;
+    // there may be left some
+    for(int i = 0; i < cols % 4;  ++i, ++iptr, ++optr)
+    {
+        res = *iptr + _tau * *optr + _a * res;
+        *optr = res;
+    }
+}
+
+//_horizontalAnticausalFilter
+kernel void horizontalAnticausalFilter(
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const int out_offset,
+    const float _a
+)
+{
+    int gid = get_global_id(0);
+    if(gid >= rows)
+    {
+        return;
+    }
+
+    global float * optr = output +
+                          mad24(gid + 1, elements_per_row, - 1 + out_offset / 4);
+
+    float4 result_v4 = (float4)(0), out_v4;
+    float result = 0;
+    // we assume elements_per_row is multple of WIDTH_MULTIPLE
+    for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr)
+    {
+        if(i >= elements_per_row - cols)
+        {
+            result = *optr + _a * result;
+        }
+        *optr = result;
+    }
+    result_v4.x = result;
+    optr -= 3;
+    for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4)
+    {
+        // shift left, `offset` is type `size_t` so it cannot be negative
+        out_v4 = vload4(0, optr);
+
+        result_v4.w = out_v4.w + _a * result_v4.x;
+        result_v4.z = out_v4.z + _a * result_v4.w;
+        result_v4.y = out_v4.y + _a * result_v4.z;
+        result_v4.x = out_v4.x + _a * result_v4.y;
+
+        vstore4(result_v4, 0, optr);
+    }
+}
+
+//_verticalCausalFilter
+kernel void verticalCausalFilter(
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const int out_offset,
+    const float _a
+)
+{
+    int gid = get_global_id(0);
+    if(gid >= cols)
+    {
+        return;
+    }
+
+    global float * optr = output + gid + out_offset / 4;
+    float result = 0;
+    for(int i = 0; i < rows; ++i, optr += elements_per_row)
+    {
+        result = *optr + _a * result;
+        *optr = result;
+    }
+}
+
+//_verticalCausalFilter
+kernel void verticalAnticausalFilter_multGain(
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const int out_offset,
+    const float _a,
+    const float _gain
+)
+{
+    int gid = get_global_id(0);
+    if(gid >= cols)
+    {
+        return;
+    }
+
+    global float * optr = output + (rows - 1) * elements_per_row + gid + out_offset / 4;
+    float result = 0;
+    for(int i = 0; i < rows; ++i, optr -= elements_per_row)
+    {
+        result = *optr + _a * result;
+        *optr = _gain * result;
+    }
+}
+//
+// end of _spatiotemporalLPfilter
+/////////////////////////////////////////////////////////////////////
+
+//////////////// horizontalAnticausalFilter_Irregular ////////////////
+kernel void horizontalAnticausalFilter_Irregular(
+    global float * output,
+    global float * buffer,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const int out_offset,
+    const int buffer_offset
+)
+{
+    int gid = get_global_id(0);
+    if(gid >= rows)
+    {
+        return;
+    }
+
+    global float * optr =
+        output + mad24(rows - gid, elements_per_row, -1 + out_offset / 4);
+    global float * bptr =
+        buffer + mad24(rows - gid, elements_per_row, -1 + buffer_offset / 4);
+
+    float4 buf_v4, out_v4, res_v4 = (float4)(0);
+    float result = 0;
+    // we assume elements_per_row is multple of WIDTH_MULTIPLE
+    for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr, -- bptr)
+    {
+        if(i >= elements_per_row - cols)
+        {
+            result = *optr + *bptr * result;
+        }
+        *optr = result;
+    }
+    res_v4.x = result;
+    optr -= 3;
+    bptr -= 3;
+    for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4, bptr -= 4)
+    {
+        buf_v4 = vload4(0, bptr);
+        out_v4 = vload4(0, optr);
+
+        res_v4.w = out_v4.w + buf_v4.w * res_v4.x;
+        res_v4.z = out_v4.z + buf_v4.z * res_v4.w;
+        res_v4.y = out_v4.y + buf_v4.y * res_v4.z;
+        res_v4.x = out_v4.x + buf_v4.x * res_v4.y;
+
+        vstore4(res_v4, 0, optr);
+    }
+}
+
+//////////////// verticalCausalFilter_Irregular ////////////////
+kernel void verticalCausalFilter_Irregular(
+    global float * output,
+    global float * buffer,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const int out_offset,
+    const int buffer_offset
+)
+{
+    int gid = get_global_id(0);
+    if(gid >= cols)
+    {
+        return;
+    }
+
+    global float * optr = output + gid + out_offset / 4;
+    global float * bptr = buffer + gid + buffer_offset / 4;
+    float result = 0;
+    for(int i = 0; i < rows; ++i, optr += elements_per_row, bptr += elements_per_row)
+    {
+        result = *optr + *bptr * result;
+        *optr = result;
+    }
+}
+
+//////////////// _adaptiveHorizontalCausalFilter_addInput ////////////////
+kernel void adaptiveHorizontalCausalFilter_addInput(
+    global const float * input,
+    global const float * gradient,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const int in_offset,
+    const int grad_offset,
+    const int out_offset
+)
+{
+    int gid = get_global_id(0);
+    if(gid >= rows)
+    {
+        return;
+    }
+
+    global const float * iptr =
+        input + mad24(gid, elements_per_row, in_offset / 4);
+    global const float * gptr =
+        gradient + mad24(gid, elements_per_row, grad_offset / 4);
+    global float * optr =
+        output + mad24(gid, elements_per_row, out_offset / 4);
+
+    float4 in_v4, grad_v4, out_v4, res_v4 = (float4)(0);
+    for(int i = 0; i < cols / 4; ++i, iptr += 4, gptr += 4, optr += 4)
+    {
+        in_v4   = vload4(0, iptr);
+        grad_v4 = vload4(0, gptr);
+
+        res_v4.x = in_v4.x + grad_v4.x * res_v4.w;
+        res_v4.y = in_v4.y + grad_v4.y * res_v4.x;
+        res_v4.z = in_v4.z + grad_v4.z * res_v4.y;
+        res_v4.w = in_v4.w + grad_v4.w * res_v4.z;
+
+        vstore4(res_v4, 0, optr);
+    }
+    for(int i = 0; i < cols % 4; ++i, ++iptr, ++gptr, ++optr)
+    {
+        res_v4.w = *iptr + *gptr * res_v4.w;
+        *optr = res_v4.w;
+    }
+}
+
+//////////////// _adaptiveVerticalAnticausalFilter_multGain ////////////////
+kernel void adaptiveVerticalAnticausalFilter_multGain(
+    global const float * gradient,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const int grad_offset,
+    const int out_offset,
+    const float gain
+)
+{
+    int gid = get_global_id(0);
+    if(gid >= cols)
+    {
+        return;
+    }
+
+    int start_idx = mad24(rows - 1, elements_per_row, gid);
+
+    global const float * gptr = gradient + start_idx + grad_offset / 4;
+    global float * optr = output + start_idx + out_offset / 4;
+
+    float result = 0;
+    for(int i = 0; i < rows; ++i, gptr -= elements_per_row, optr -= elements_per_row)
+    {
+        result = *optr + *gptr * result;
+        *optr = gain * result;
+    }
+}
+
+//////////////// _localLuminanceAdaptation ////////////////
+// FIXME:
+//  This kernel seems to have precision problem on GPU
+kernel void localLuminanceAdaptation(
+    global const float * luma,
+    global const float * input,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const float _localLuminanceAddon,
+    const float _localLuminanceFactor,
+    const float _maxInputValue
+)
+{
+    int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+    int offset = mad24(gidy, elements_per_row, gidx);
+
+    float X0 = luma[offset] * _localLuminanceFactor + _localLuminanceAddon;
+    float input_val = input[offset];
+    // output of the following line may be different between GPU and CPU
+    output[offset] = (_maxInputValue + X0) * input_val / (input_val + X0 + 0.00000000001f);
+}
+// end of basicretinafilter
+//*******************************************************
+/////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////
+//******************************************************
+// magno
+// TODO: this kernel has too many buffer accesses, better to make it
+//   vector read/write for fetch efficiency
+kernel void amacrineCellsComputing(
+    global const float * opl_on,
+    global const float * opl_off,
+    global float * prev_in_on,
+    global float * prev_in_off,
+    global float * out_on,
+    global float * out_off,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const float coeff
+)
+{
+    int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+
+    int offset = mad24(gidy, elements_per_row, gidx);
+    opl_on      += offset;
+    opl_off     += offset;
+    prev_in_on  += offset;
+    prev_in_off += offset;
+    out_on      += offset;
+    out_off     += offset;
+
+    float magnoXonPixelResult = coeff * (*out_on + *opl_on - *prev_in_on);
+    *out_on = fmax(magnoXonPixelResult, 0);
+    float magnoXoffPixelResult = coeff * (*out_off + *opl_off - *prev_in_off);
+    *out_off = fmax(magnoXoffPixelResult, 0);
+
+    *prev_in_on = *opl_on;
+    *prev_in_off = *opl_off;
+}
+
+/////////////////////////////////////////////////////////
+//******************************************************
+// parvo
+// TODO: this kernel has too many buffer accesses, needs optimization
+kernel void OPL_OnOffWaysComputing(
+    global float4 * photo_out,
+    global float4 * horiz_out,
+    global float4 * bipol_on,
+    global float4 * bipol_off,
+    global float4 * parvo_on,
+    global float4 * parvo_off,
+    const int cols,
+    const int rows,
+    const int elements_per_row
+)
+{
+    int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx * 4 >= cols || gidy >= rows)
+    {
+        return;
+    }
+    // we assume elements_per_row must be multiples of 4
+    int offset = mad24(gidy, elements_per_row >> 2, gidx);
+    photo_out += offset;
+    horiz_out += offset;
+    bipol_on  += offset;
+    bipol_off += offset;
+    parvo_on  += offset;
+    parvo_off += offset;
+
+    float4 diff = *photo_out - *horiz_out;
+    float4 isPositive;// = convert_float4(diff > (float4)(0.0f, 0.0f, 0.0f, 0.0f));
+    isPositive.x = diff.x > 0.0f;
+    isPositive.y = diff.y > 0.0f;
+    isPositive.z = diff.z > 0.0f;
+    isPositive.w = diff.w > 0.0f;
+    float4 res_on  = isPositive * diff;
+    float4 res_off = (isPositive - (float4)(1.0f)) * diff;
+
+    *bipol_on = res_on;
+    *parvo_on = res_on;
+
+    *bipol_off = res_off;
+    *parvo_off = res_off;
+}
+
+/////////////////////////////////////////////////////////
+//******************************************************
+// retinacolor
+inline int bayerSampleOffset(int step, int rows, int x, int y)
+{
+    return mad24(y, step, x) +
+           ((y % 2) + (x % 2)) * rows * step;
+}
+
+
+/////// colorMultiplexing //////
+kernel void runColorMultiplexingBayer(
+    global const float * input,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row
+)
+{
+    int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+
+    int offset = mad24(gidy, elements_per_row, gidx);
+    output[offset] = input[bayerSampleOffset(elements_per_row, rows, gidx, gidy)];
+}
+
+kernel void runColorDemultiplexingBayer(
+    global const float * input,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row
+)
+{
+    int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+
+    int offset = mad24(gidy, elements_per_row, gidx);
+    output[bayerSampleOffset(elements_per_row, rows, gidx, gidy)] = input[offset];
+}
+
+kernel void demultiplexAssign(
+    global const float * input,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row
+)
+{
+    int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+
+    int offset = bayerSampleOffset(elements_per_row, rows, gidx, gidy);
+    output[offset] = input[offset];
+}
+
+
+//// normalizeGrayOutputCentredSigmoide
+kernel void normalizeGrayOutputCentredSigmoide(
+    global const float * input,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const float meanval,
+    const float X0
+)
+
+{
+    int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+    int offset = mad24(gidy, elements_per_row, gidx);
+
+    float input_val = input[offset];
+    output[offset] = meanval +
+                     (meanval + X0) * (input_val - meanval) / (fabs(input_val - meanval) + X0);
+}
+
+//// normalize by photoreceptors density
+kernel void normalizePhotoDensity(
+    global const float * chroma,
+    global const float * colorDensity,
+    global const float * multiplex,
+    global float * luma,
+    global float * demultiplex,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const float pG
+)
+{
+    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+    const int offset = mad24(gidy, elements_per_row, gidx);
+    int index = offset;
+
+    float Cr = chroma[index] * colorDensity[index];
+    index += elements_per_row * rows;
+    float Cg = chroma[index] * colorDensity[index];
+    index += elements_per_row * rows;
+    float Cb = chroma[index] * colorDensity[index];
+
+    const float luma_res = (Cr + Cg + Cb) * pG;
+    luma[offset] = luma_res;
+    demultiplex[bayerSampleOffset(elements_per_row, rows, gidx, gidy)] =
+        multiplex[offset] - luma_res;
+}
+
+
+
+//////// computeGradient ///////
+// TODO:
+// this function maybe accelerated by image2d_t or lds
+kernel void computeGradient(
+    global const float * luma,
+    global float * gradient,
+    const int cols,
+    const int rows,
+    const int elements_per_row
+)
+{
+    int gidx = get_global_id(0) + 2, gidy = get_global_id(1) + 2;
+    if(gidx >= cols - 2 || gidy >= rows - 2)
+    {
+        return;
+    }
+    int offset = mad24(gidy, elements_per_row, gidx);
+    luma += offset;
+
+    // horizontal and vertical local gradients
+    const float v_grad = fabs(luma[elements_per_row] - luma[- elements_per_row]);
+    const float h_grad = fabs(luma[1] - luma[-1]);
+
+    // neighborhood horizontal and vertical gradients
+    const float cur_val  = luma[0];
+    const float v_grad_p = fabs(cur_val - luma[- 2 * elements_per_row]);
+    const float h_grad_p = fabs(cur_val - luma[- 2]);
+    const float v_grad_n = fabs(cur_val - luma[2 * elements_per_row]);
+    const float h_grad_n = fabs(cur_val - luma[2]);
+
+    const float horiz_grad = 0.5f * h_grad + 0.25f * (h_grad_p + h_grad_n);
+    const float verti_grad = 0.5f * v_grad + 0.25f * (v_grad_p + v_grad_n);
+    const bool is_vertical_greater = horiz_grad < verti_grad;
+
+    gradient[offset + elements_per_row * rows] = is_vertical_greater ? 0.06f : 0.57f;
+    gradient[offset                          ] = is_vertical_greater ? 0.57f : 0.06f;
+}
+
+
+/////// substractResidual ///////
+kernel void substractResidual(
+    global float * input,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const float pR,
+    const float pG,
+    const float pB
+)
+{
+    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+    int indices [3] =
+    {
+        mad24(gidy, elements_per_row, gidx),
+        mad24(gidy + rows, elements_per_row, gidx),
+        mad24(gidy + 2 * rows, elements_per_row, gidx)
+    };
+    float vals[3] = {input[indices[0]], input[indices[1]], input[indices[2]]};
+    float residu = pR * vals[0] + pG * vals[1] + pB * vals[2];
+
+    input[indices[0]] = vals[0] - residu;
+    input[indices[1]] = vals[1] - residu;
+    input[indices[2]] = vals[2] - residu;
+}
+
+///// clipRGBOutput_0_maxInputValue /////
+kernel void clipRGBOutput_0_maxInputValue(
+    global float * input,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const float maxVal
+)
+{
+    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+    const int offset = mad24(gidy, elements_per_row, gidx);
+    float val = input[offset];
+    val = clamp(val, 0.0f, maxVal);
+    input[offset] = val;
+}
+
+//// normalizeGrayOutputNearZeroCentreredSigmoide ////
+kernel void normalizeGrayOutputNearZeroCentreredSigmoide(
+    global float * input,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const float maxVal,
+    const float X0cube
+)
+{
+    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+    const int offset = mad24(gidy, elements_per_row, gidx);
+    float currentCubeLuminance = input[offset];
+    currentCubeLuminance = currentCubeLuminance * currentCubeLuminance * currentCubeLuminance;
+    output[offset] = currentCubeLuminance * X0cube / (X0cube + currentCubeLuminance);
+}
+
+//// centerReductImageLuminance ////
+kernel void centerReductImageLuminance(
+    global float * input,
+    const int cols,
+    const int rows,
+    const int elements_per_row,
+    const float mean,
+    const float std_dev
+)
+{
+    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+    const int offset = mad24(gidy, elements_per_row, gidx);
+
+    float val = input[offset];
+    input[offset] = (val - mean) / std_dev;
+}
+
+//// inverseValue ////
+kernel void inverseValue(
+    global float * input,
+    const int cols,
+    const int rows,
+    const int elements_per_row
+)
+{
+    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+    const int offset = mad24(gidy, elements_per_row, gidx);
+    input[offset] = 1.f / input[offset];
+}
+
+#define CV_PI 3.1415926535897932384626433832795
+
+//// _processRetinaParvoMagnoMapping ////
+kernel void processRetinaParvoMagnoMapping(
+    global float * parvo,
+    global float * magno,
+    global float * output,
+    const int cols,
+    const int rows,
+    const int halfCols,
+    const int halfRows,
+    const int elements_per_row,
+    const float minDistance
+)
+{
+    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    if(gidx >= cols || gidy >= rows)
+    {
+        return;
+    }
+    const int offset = mad24(gidy, elements_per_row, gidx);
+
+    float distanceToCenter =
+        sqrt(((float)(gidy - halfRows) * (gidy - halfRows) + (gidx - halfCols) * (gidx - halfCols)));
+
+    float a = distanceToCenter < minDistance ?
+              (0.5f + 0.5f * (float)cos(CV_PI * distanceToCenter / minDistance)) : 0;
+    float b = 1.f - a;
+
+    output[offset] = parvo[offset] * a + magno[offset] * b;
+}
diff --git a/modules/contrib/src/parvoretinafilter.cpp b/modules/bioinspired/src/parvoretinafilter.cpp
similarity index 96%
rename from modules/contrib/src/parvoretinafilter.cpp
rename to modules/bioinspired/src/parvoretinafilter.cpp
index 6043b7232..a276d97a4 100644
--- a/modules/contrib/src/parvoretinafilter.cpp
+++ b/modules/bioinspired/src/parvoretinafilter.cpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -73,6 +73,8 @@
 
 namespace cv
 {
+namespace bioinspired
+{
 //////////////////////////////////////////////////////////
 //                 OPL RETINA FILTER
 //////////////////////////////////////////////////////////
@@ -227,5 +229,5 @@ void ParvoRetinaFilter::_OPL_OnOffWaysComputing() // WARNING : this method requi
     }
 #endif
 }
-}
-
+}// end of namespace bioinspired
+}// end of namespace cv
diff --git a/modules/contrib/src/parvoretinafilter.hpp b/modules/bioinspired/src/parvoretinafilter.hpp
similarity index 97%
rename from modules/contrib/src/parvoretinafilter.hpp
rename to modules/bioinspired/src/parvoretinafilter.hpp
index 40f71ebe9..f5ffa1a06 100644
--- a/modules/contrib/src/parvoretinafilter.hpp
+++ b/modules/bioinspired/src/parvoretinafilter.hpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -103,6 +103,8 @@
 
 namespace cv
 {
+namespace bioinspired
+{
 //retina classes that derivate from the Basic Retrina class
 class ParvoRetinaFilter: public BasicRetinaFilter
 {
@@ -256,6 +258,6 @@ private:
 #endif
 
 };
-}
+}// end of namespace bioinspired
+}// end of namespace cv
 #endif
-
diff --git a/modules/bioinspired/src/precomp.hpp b/modules/bioinspired/src/precomp.hpp
new file mode 100644
index 000000000..541b97032
--- /dev/null
+++ b/modules/bioinspired/src/precomp.hpp
@@ -0,0 +1,67 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include "opencv2/opencv_modules.hpp"
+#include "opencv2/bioinspired.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/core/private.hpp"
+
+#include <valarray>
+
+#ifdef HAVE_OPENCV_OCL
+    #include "opencv2/ocl/private/util.hpp"
+#endif
+
+namespace cv
+{
+
+// special function to get pointer to constant valarray elements, since
+// simple &arr[0] does not compile on VS2005/VS2008.
+template<typename T> inline const T* get_data(const std::valarray<T>& arr)
+{ return &((std::valarray<T>&)arr)[0]; }
+
+}
+
+#endif
diff --git a/modules/contrib/src/retina.cpp b/modules/bioinspired/src/retina.cpp
similarity index 88%
rename from modules/contrib/src/retina.cpp
rename to modules/bioinspired/src/retina.cpp
index de752c024..460433107 100644
--- a/modules/contrib/src/retina.cpp
+++ b/modules/bioinspired/src/retina.cpp
@@ -6,7 +6,7 @@
  ** copy or use the software.
  **
  **
- ** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+ ** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
  ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
  **
  ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
  ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
  **
- **               For Human Visual System tools (hvstools)
+ **               For Human Visual System tools (bioinspired)
  ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
  **
  ** Third party copyrights are property of their respective owners.
@@ -72,9 +72,12 @@
 #include "retinafilter.hpp"
 #include <cstdio>
 #include <sstream>
+#include <valarray>
 
 namespace cv
 {
+namespace bioinspired
+{
 
 class RetinaImpl : public Retina
 {
@@ -94,7 +97,7 @@ public:
      * @param reductionFactor: only usefull if param useRetinaLogSampling=true, specifies the reduction factor of the output frame (as the center (fovea) is high resolution and corners can be underscaled, then a reduction of the output is allowed without precision leak
      * @param samplingStrenght: only usefull if param useRetinaLogSampling=true, specifies the strenght of the log scale that is applied
      */
-    RetinaImpl(Size inputSize, const bool colorMode, RETINA_COLORSAMPLINGMETHOD colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
+    RetinaImpl(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
 
     virtual ~RetinaImpl();
     /**
@@ -196,6 +199,14 @@ public:
      */
     void run(InputArray inputImage);
 
+    /**
+     * method that applies a luminance correction (initially High Dynamic Range (HDR) tone mapping) using only the 2 local adaptation stages of the retina parvo channel : photoreceptors level and ganlion cells level. Spatio temporal filtering is applied but limited to temporal smoothing and eventually high frequencies attenuation. This is a lighter method than the one available using the regular run method. It is then faster but it does not include complete temporal filtering nor retina spectral whitening. This is an adptation of the original still image HDR tone mapping algorithm of David Alleyson, Sabine Susstruck and Laurence Meylan's work, please cite:
+    * -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816
+     @param inputImage the input image to process RGB or gray levels
+     @param outputToneMappedImage the output tone mapped image
+     */
+    void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage);
+
     /**
      * accessor of the details channel of the retina (models foveal vision)
      * @param retinaOutput_parvo : the output buffer (reallocated if necessary), this output is rescaled for standard 8bits image processing use in OpenCV
@@ -253,14 +264,17 @@ private:
     // Parameteres setup members
     RetinaParameters _retinaParameters; // structure of parameters
 
-        // Retina model related modules
+    // Retina model related modules
     std::valarray<float> _inputBuffer; //!< buffer used to convert input cv::Mat to internal retina buffers format (valarrays)
 
     // pointer to retina model
     RetinaFilter* _retinaFilter; //!< the pointer to the retina module, allocated with instance construction
 
+    //! private method called by constructors, gathers their parameters and use them in a unified way
+    void _init(const Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
+
     /**
-     * exports a valarray buffer outing from HVStools objects to a cv::Mat in CV_8UC1 (gray level picture) or CV_8UC3 (color) format
+     * exports a valarray buffer outing from bioinspired objects to a cv::Mat in CV_8UC1 (gray level picture) or CV_8UC3 (color) format
      * @param grayMatrixToConvert the valarray to export to OpenCV
      * @param nbRows : the number of rows of the valarray flatten matrix
      * @param nbColumns : the number of rows of the valarray flatten matrix
@@ -270,22 +284,22 @@ private:
     void _convertValarrayBuffer2cvMat(const std::valarray<float> &grayMatrixToConvert, const unsigned int nbRows, const unsigned int nbColumns, const bool colorMode, OutputArray outBuffer);
 
     /**
-     *
+     * convert a cv::Mat to a valarray buffer in float format
      * @param inputMatToConvert : the OpenCV cv::Mat that has to be converted to gray or RGB valarray buffer that will be processed by the retina model
      * @param outputValarrayMatrix : the output valarray
      * @return the input image color mode (color=true, gray levels=false)
      */
-        bool _convertCvMat2ValarrayBuffer(InputArray inputMatToConvert, std::valarray<float> &outputValarrayMatrix);
-
-    //! private method called by constructors, gathers their parameters and use them in a unified way
-    void _init(const Size inputSize, const bool colorMode, RETINA_COLORSAMPLINGMETHOD colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
+    bool _convertCvMat2ValarrayBuffer(InputArray inputMatToConvert, std::valarray<float> &outputValarrayMatrix);
 
 
 };
 
 // smart pointers allocation :
-Ptr<Retina> createRetina(Size inputSize){ return new RetinaImpl(inputSize); }
-Ptr<Retina> createRetina(Size inputSize, const bool colorMode, RETINA_COLORSAMPLINGMETHOD colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght){return new RetinaImpl(inputSize, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght);}
+Ptr<Retina> createRetina(Size inputSize){ return makePtr<RetinaImpl>(inputSize); }
+Ptr<Retina> createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght){
+    return makePtr<RetinaImpl>(inputSize, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght);
+}
+
 
 // RetinaImpl code
 RetinaImpl::RetinaImpl(const cv::Size inputSz)
@@ -294,7 +308,7 @@ RetinaImpl::RetinaImpl(const cv::Size inputSz)
     _init(inputSz, true, RETINA_COLOR_BAYER, false);
 }
 
-RetinaImpl::RetinaImpl(const cv::Size inputSz, const bool colorMode, RETINA_COLORSAMPLINGMETHOD colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght)
+RetinaImpl::RetinaImpl(const cv::Size inputSz, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght)
 {
     _retinaFilter = 0;
     _init(inputSz, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght);
@@ -401,10 +415,10 @@ void RetinaImpl::setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailur
     printf("%s\n", printSetup().c_str());
 }
 
-void RetinaImpl::setup(cv::Retina::RetinaParameters newConfiguration)
+void RetinaImpl::setup(Retina::RetinaParameters newConfiguration)
 {
     // simply copy structures
-    memcpy(&_retinaParameters, &newConfiguration, sizeof(cv::Retina::RetinaParameters));
+    memcpy(&_retinaParameters, &newConfiguration, sizeof(Retina::RetinaParameters));
     // apply setup
     setupOPLandIPLParvoChannel(_retinaParameters.OPLandIplParvo.colorMode, _retinaParameters.OPLandIplParvo.normaliseOutput, _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity, _retinaParameters.OPLandIplParvo.photoreceptorsTemporalConstant, _retinaParameters.OPLandIplParvo.photoreceptorsSpatialConstant, _retinaParameters.OPLandIplParvo.horizontalCellsGain, _retinaParameters.OPLandIplParvo.hcellsTemporalConstant, _retinaParameters.OPLandIplParvo.hcellsSpatialConstant, _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity);
     setupIPLMagnoChannel(_retinaParameters.IplMagno.normaliseOutput, _retinaParameters.IplMagno.parasolCells_beta, _retinaParameters.IplMagno.parasolCells_tau, _retinaParameters.IplMagno.parasolCells_k, _retinaParameters.IplMagno.amacrinCellsTemporalCutFrequency,_retinaParameters.IplMagno.V0CompressionParameter, _retinaParameters.IplMagno.localAdaptintegration_tau, _retinaParameters.IplMagno.localAdaptintegration_k);
@@ -526,6 +540,27 @@ void RetinaImpl::run(InputArray inputMatToConvert)
         throw cv::Exception(-1, "RetinaImpl cannot be applied, wrong input buffer size", "RetinaImpl::run", "RetinaImpl.h", 0);
 }
 
+void RetinaImpl::applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)
+{
+    // first convert input image to the compatible format :
+    const bool colorMode = _convertCvMat2ValarrayBuffer(inputImage.getMat(), _inputBuffer);
+    const unsigned int nbPixels=_retinaFilter->getOutputNBrows()*_retinaFilter->getOutputNBcolumns();
+
+    // process tone mapping
+    if (colorMode)
+    {
+        std::valarray<float> imageOutput(nbPixels*3);
+        _retinaFilter->runRGBToneMapping(_inputBuffer, imageOutput, true, _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity, _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity);
+        _convertValarrayBuffer2cvMat(imageOutput, _retinaFilter->getOutputNBrows(), _retinaFilter->getOutputNBcolumns(), true, outputToneMappedImage);
+    }else
+    {
+        std::valarray<float> imageOutput(nbPixels);
+        _retinaFilter->runGrayToneMapping(_inputBuffer, imageOutput, _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity, _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity);
+        _convertValarrayBuffer2cvMat(imageOutput, _retinaFilter->getOutputNBrows(), _retinaFilter->getOutputNBcolumns(), false, outputToneMappedImage);
+    }
+
+}
+
 void RetinaImpl::getParvo(OutputArray retinaOutput_parvo)
 {
     if (_retinaFilter->getColorMode())
@@ -580,11 +615,11 @@ const Mat RetinaImpl::getParvoRAW() const {
 }
 
 // private method called by constructirs
-void RetinaImpl::_init(const cv::Size inputSz, const bool colorMode, RETINA_COLORSAMPLINGMETHOD colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght)
+void RetinaImpl::_init(const cv::Size inputSz, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght)
 {
     // basic error check
     if (inputSz.height*inputSz.width <= 0)
-        throw cv::Exception(-1, "Bad retina size setup : size height and with must be superior to zero", "RetinaImpl::setup", "RetinaImpl.h", 0);
+        throw cv::Exception(-1, "Bad retina size setup : size height and with must be superior to zero", "RetinaImpl::setup", "Retina.cpp", 0);
 
     unsigned int nbPixels=inputSz.height*inputSz.width;
     // resize buffers if size does not match
@@ -595,8 +630,9 @@ void RetinaImpl::_init(const cv::Size inputSz, const bool colorMode, RETINA_COLO
            delete _retinaFilter;
     _retinaFilter = new RetinaFilter(inputSz.height, inputSz.width, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght);
 
+    _retinaParameters.OPLandIplParvo.colorMode = colorMode;
     // prepare the default parameter XML file with default setup
-        setup(_retinaParameters);
+    setup(_retinaParameters);
 
     // init retina
     _retinaFilter->clearAllBuffers();
@@ -623,7 +659,8 @@ void RetinaImpl::_convertValarrayBuffer2cvMat(const std::valarray<float> &grayMa
         }
     }else
     {
-        const unsigned int doubleNBpixels=_retinaFilter->getOutputNBpixels()*2;
+        const unsigned int nbPixels=nbColumns*nbRows;
+        const unsigned int doubleNBpixels=nbColumns*nbRows*2;
         outBuffer.create(cv::Size(nbColumns, nbRows), CV_8UC3);
         Mat outMat = outBuffer.getMat();
         for (unsigned int i=0;i<nbRows;++i)
@@ -633,7 +670,7 @@ void RetinaImpl::_convertValarrayBuffer2cvMat(const std::valarray<float> &grayMa
                 cv::Point2d pixel(j,i);
                 cv::Vec3b pixelValues;
                 pixelValues[2]=(unsigned char)*(valarrayPTR);
-                pixelValues[1]=(unsigned char)*(valarrayPTR+_retinaFilter->getOutputNBpixels());
+                pixelValues[1]=(unsigned char)*(valarrayPTR+nbPixels);
                 pixelValues[0]=(unsigned char)*(valarrayPTR+doubleNBpixels);
 
                 outMat.at<cv::Vec3b>(pixel)=pixelValues;
@@ -656,15 +693,16 @@ bool RetinaImpl::_convertCvMat2ValarrayBuffer(InputArray inputMat, std::valarray
     typedef float T; // define here the target pixel format, here, float
     const int dsttype = DataType<T>::depth; // output buffer is float format
 
-    
+    const unsigned int nbPixels=inputMat.getMat().rows*inputMat.getMat().cols;
+    const unsigned int doubleNBpixels=inputMat.getMat().rows*inputMat.getMat().cols*2;
 
     if(imageNumberOfChannels==4)
     {
     // create a cv::Mat table (for RGBA planes)
         cv::Mat planes[4] =
         {
-            cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[_retinaFilter->getInputNBpixels()*2]),
-            cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[_retinaFilter->getInputNBpixels()]),
+            cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[doubleNBpixels]),
+            cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[nbPixels]),
             cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[0])
         };
         planes[3] = cv::Mat(inputMatToConvert.size(), dsttype);     // last channel (alpha) does not point on the valarray (not usefull in our case)
@@ -676,8 +714,8 @@ bool RetinaImpl::_convertCvMat2ValarrayBuffer(InputArray inputMat, std::valarray
         // create a cv::Mat table (for RGB planes)
         cv::Mat planes[] =
         {
-        cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[_retinaFilter->getInputNBpixels()*2]),
-        cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[_retinaFilter->getInputNBpixels()]),
+        cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[doubleNBpixels]),
+        cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[nbPixels]),
         cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[0])
         };
         // split color cv::Mat in 3 planes... it fills valarray directely
@@ -701,5 +739,5 @@ void RetinaImpl::activateMovingContoursProcessing(const bool activate){_retinaFi
 
 void RetinaImpl::activateContoursProcessing(const bool activate){_retinaFilter->activateContoursProcessing(activate);}
 
-} // end of namespace cv
-
+}// end of namespace bioinspired
+}// end of namespace cv
diff --git a/modules/bioinspired/src/retina_ocl.cpp b/modules/bioinspired/src/retina_ocl.cpp
new file mode 100644
index 000000000..c294ebfeb
--- /dev/null
+++ b/modules/bioinspired/src/retina_ocl.cpp
@@ -0,0 +1,1648 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include "retina_ocl.hpp"
+#include <iostream>
+#include <sstream>
+
+#ifdef HAVE_OPENCV_OCL
+
+#define NOT_IMPLEMENTED CV_Error(cv::Error::StsNotImplemented, "Not implemented")
+
+namespace cv
+{
+namespace ocl
+{
+//OpenCL kernel file string pointer
+extern const char * retina_kernel;
+}
+}
+
+namespace cv
+{
+namespace bioinspired
+{
+namespace ocl
+{
+using namespace cv::ocl;
+
+class RetinaOCLImpl : public Retina
+{
+public:
+    RetinaOCLImpl(Size getInputSize);
+    RetinaOCLImpl(Size getInputSize, const bool colorMode, int colorSamplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0);
+    virtual ~RetinaOCLImpl();
+
+    Size getInputSize();
+    Size getOutputSize();
+
+    void setup(String retinaParameterFile = "", const bool applyDefaultSetupOnFailure = true);
+    void setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailure = true);
+    void setup(RetinaParameters newParameters);
+
+    RetinaOCLImpl::RetinaParameters getParameters();
+
+    const String printSetup();
+    virtual void write( String fs ) const;
+    virtual void write( FileStorage& fs ) const;
+
+    void setupOPLandIPLParvoChannel(const bool colorMode = true, const bool normaliseOutput = true, const float photoreceptorsLocalAdaptationSensitivity = 0.7, const float photoreceptorsTemporalConstant = 0.5, const float photoreceptorsSpatialConstant = 0.53, const float horizontalCellsGain = 0, const float HcellsTemporalConstant = 1, const float HcellsSpatialConstant = 7, const float ganglionCellsSensitivity = 0.7);
+    void setupIPLMagnoChannel(const bool normaliseOutput = true, const float parasolCells_beta = 0, const float parasolCells_tau = 0, const float parasolCells_k = 7, const float amacrinCellsTemporalCutFrequency = 1.2, const float V0CompressionParameter = 0.95, const float localAdaptintegration_tau = 0, const float localAdaptintegration_k = 7);
+
+    void run(InputArray inputImage);
+    void getParvo(OutputArray retinaOutput_parvo);
+    void getMagno(OutputArray retinaOutput_magno);
+
+    void setColorSaturation(const bool saturateColors = true, const float colorSaturationValue = 4.0);
+    void clearBuffers();
+    void activateMovingContoursProcessing(const bool activate);
+    void activateContoursProcessing(const bool activate);
+
+    // unimplemented interfaces:
+    void applyFastToneMapping(InputArray /*inputImage*/, OutputArray /*outputToneMappedImage*/) { NOT_IMPLEMENTED; }
+    void getParvoRAW(OutputArray /*retinaOutput_parvo*/) { NOT_IMPLEMENTED; }
+    void getMagnoRAW(OutputArray /*retinaOutput_magno*/) { NOT_IMPLEMENTED; }
+    const Mat getMagnoRAW() const { NOT_IMPLEMENTED; return Mat(); }
+    const Mat getParvoRAW() const { NOT_IMPLEMENTED; return Mat(); }
+
+protected:
+    RetinaParameters _retinaParameters;
+    cv::ocl::oclMat _inputBuffer;
+    RetinaFilter* _retinaFilter;
+    bool convertToColorPlanes(const cv::ocl::oclMat& input, cv::ocl::oclMat &output);
+    void convertToInterleaved(const cv::ocl::oclMat& input, bool colorMode, cv::ocl::oclMat &output);
+    void _init(const Size getInputSize, const bool colorMode, int colorSamplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0);
+};
+
+RetinaOCLImpl::RetinaOCLImpl(const cv::Size inputSz)
+{
+    _retinaFilter = 0;
+    _init(inputSz, true, RETINA_COLOR_BAYER, false);
+}
+
+RetinaOCLImpl::RetinaOCLImpl(const cv::Size inputSz, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght)
+{
+    _retinaFilter = 0;
+    _init(inputSz, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght);
+};
+
+RetinaOCLImpl::~RetinaOCLImpl()
+{
+    if (_retinaFilter)
+    {
+        delete _retinaFilter;
+    }
+}
+
+/**
+* retreive retina input buffer size
+*/
+Size RetinaOCLImpl::getInputSize()
+{
+    return cv::Size(_retinaFilter->getInputNBcolumns(), _retinaFilter->getInputNBrows());
+}
+
+/**
+* retreive retina output buffer size
+*/
+Size RetinaOCLImpl::getOutputSize()
+{
+    return cv::Size(_retinaFilter->getOutputNBcolumns(), _retinaFilter->getOutputNBrows());
+}
+
+
+void RetinaOCLImpl::setColorSaturation(const bool saturateColors, const float colorSaturationValue)
+{
+    _retinaFilter->setColorSaturation(saturateColors, colorSaturationValue);
+}
+
+struct RetinaOCLImpl::RetinaParameters RetinaOCLImpl::getParameters()
+{
+    return _retinaParameters;
+}
+
+
+void RetinaOCLImpl::setup(String retinaParameterFile, const bool applyDefaultSetupOnFailure)
+{
+    try
+    {
+        // opening retinaParameterFile in read mode
+        cv::FileStorage fs(retinaParameterFile, cv::FileStorage::READ);
+        setup(fs, applyDefaultSetupOnFailure);
+    }
+    catch(Exception &e)
+    {
+        std::cout << "RetinaOCLImpl::setup: wrong/unappropriate xml parameter file : error report :`n=>" << e.what() << std::endl;
+        if (applyDefaultSetupOnFailure)
+        {
+            std::cout << "RetinaOCLImpl::setup: resetting retina with default parameters" << std::endl;
+            setupOPLandIPLParvoChannel();
+            setupIPLMagnoChannel();
+        }
+        else
+        {
+            std::cout << "=> keeping current parameters" << std::endl;
+        }
+    }
+}
+
+void RetinaOCLImpl::setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailure)
+{
+    try
+    {
+        // read parameters file if it exists or apply default setup if asked for
+        if (!fs.isOpened())
+        {
+            std::cout << "RetinaOCLImpl::setup: provided parameters file could not be open... skeeping configuration" << std::endl;
+            return;
+            // implicit else case : retinaParameterFile could be open (it exists at least)
+        }
+        // OPL and Parvo init first... update at the same time the parameters structure and the retina core
+        cv::FileNode rootFn = fs.root(), currFn = rootFn["OPLandIPLparvo"];
+        currFn["colorMode"] >> _retinaParameters.OPLandIplParvo.colorMode;
+        currFn["normaliseOutput"] >> _retinaParameters.OPLandIplParvo.normaliseOutput;
+        currFn["photoreceptorsLocalAdaptationSensitivity"] >> _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity;
+        currFn["photoreceptorsTemporalConstant"] >> _retinaParameters.OPLandIplParvo.photoreceptorsTemporalConstant;
+        currFn["photoreceptorsSpatialConstant"] >> _retinaParameters.OPLandIplParvo.photoreceptorsSpatialConstant;
+        currFn["horizontalCellsGain"] >> _retinaParameters.OPLandIplParvo.horizontalCellsGain;
+        currFn["hcellsTemporalConstant"] >> _retinaParameters.OPLandIplParvo.hcellsTemporalConstant;
+        currFn["hcellsSpatialConstant"] >> _retinaParameters.OPLandIplParvo.hcellsSpatialConstant;
+        currFn["ganglionCellsSensitivity"] >> _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity;
+        setupOPLandIPLParvoChannel(_retinaParameters.OPLandIplParvo.colorMode, _retinaParameters.OPLandIplParvo.normaliseOutput, _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity, _retinaParameters.OPLandIplParvo.photoreceptorsTemporalConstant, _retinaParameters.OPLandIplParvo.photoreceptorsSpatialConstant, _retinaParameters.OPLandIplParvo.horizontalCellsGain, _retinaParameters.OPLandIplParvo.hcellsTemporalConstant, _retinaParameters.OPLandIplParvo.hcellsSpatialConstant, _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity);
+
+        // init retina IPL magno setup... update at the same time the parameters structure and the retina core
+        currFn = rootFn["IPLmagno"];
+        currFn["normaliseOutput"] >> _retinaParameters.IplMagno.normaliseOutput;
+        currFn["parasolCells_beta"] >> _retinaParameters.IplMagno.parasolCells_beta;
+        currFn["parasolCells_tau"] >> _retinaParameters.IplMagno.parasolCells_tau;
+        currFn["parasolCells_k"] >> _retinaParameters.IplMagno.parasolCells_k;
+        currFn["amacrinCellsTemporalCutFrequency"] >> _retinaParameters.IplMagno.amacrinCellsTemporalCutFrequency;
+        currFn["V0CompressionParameter"] >> _retinaParameters.IplMagno.V0CompressionParameter;
+        currFn["localAdaptintegration_tau"] >> _retinaParameters.IplMagno.localAdaptintegration_tau;
+        currFn["localAdaptintegration_k"] >> _retinaParameters.IplMagno.localAdaptintegration_k;
+
+        setupIPLMagnoChannel(_retinaParameters.IplMagno.normaliseOutput, _retinaParameters.IplMagno.parasolCells_beta, _retinaParameters.IplMagno.parasolCells_tau, _retinaParameters.IplMagno.parasolCells_k, _retinaParameters.IplMagno.amacrinCellsTemporalCutFrequency, _retinaParameters.IplMagno.V0CompressionParameter, _retinaParameters.IplMagno.localAdaptintegration_tau, _retinaParameters.IplMagno.localAdaptintegration_k);
+
+    }
+    catch(Exception &e)
+    {
+        std::cout << "RetinaOCLImpl::setup: resetting retina with default parameters" << std::endl;
+        if (applyDefaultSetupOnFailure)
+        {
+            setupOPLandIPLParvoChannel();
+            setupIPLMagnoChannel();
+        }
+        std::cout << "RetinaOCLImpl::setup: wrong/unappropriate xml parameter file : error report :`n=>" << e.what() << std::endl;
+        std::cout << "=> keeping current parameters" << std::endl;
+    }
+}
+
+void RetinaOCLImpl::setup(cv::bioinspired::Retina::RetinaParameters newConfiguration)
+{
+    // simply copy structures
+    memcpy(&_retinaParameters, &newConfiguration, sizeof(cv::bioinspired::Retina::RetinaParameters));
+    // apply setup
+    setupOPLandIPLParvoChannel(_retinaParameters.OPLandIplParvo.colorMode, _retinaParameters.OPLandIplParvo.normaliseOutput, _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity, _retinaParameters.OPLandIplParvo.photoreceptorsTemporalConstant, _retinaParameters.OPLandIplParvo.photoreceptorsSpatialConstant, _retinaParameters.OPLandIplParvo.horizontalCellsGain, _retinaParameters.OPLandIplParvo.hcellsTemporalConstant, _retinaParameters.OPLandIplParvo.hcellsSpatialConstant, _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity);
+    setupIPLMagnoChannel(_retinaParameters.IplMagno.normaliseOutput, _retinaParameters.IplMagno.parasolCells_beta, _retinaParameters.IplMagno.parasolCells_tau, _retinaParameters.IplMagno.parasolCells_k, _retinaParameters.IplMagno.amacrinCellsTemporalCutFrequency, _retinaParameters.IplMagno.V0CompressionParameter, _retinaParameters.IplMagno.localAdaptintegration_tau, _retinaParameters.IplMagno.localAdaptintegration_k);
+}
+
+const String RetinaOCLImpl::printSetup()
+{
+    std::stringstream outmessage;
+
+    // displaying OPL and IPL parvo setup
+    outmessage << "Current Retina instance setup :"
+               << "\nOPLandIPLparvo" << "{"
+               << "\n==> colorMode : " << _retinaParameters.OPLandIplParvo.colorMode
+               << "\n==> normalizeParvoOutput :" << _retinaParameters.OPLandIplParvo.normaliseOutput
+               << "\n==> photoreceptorsLocalAdaptationSensitivity : " << _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity
+               << "\n==> photoreceptorsTemporalConstant : " << _retinaParameters.OPLandIplParvo.photoreceptorsTemporalConstant
+               << "\n==> photoreceptorsSpatialConstant : " << _retinaParameters.OPLandIplParvo.photoreceptorsSpatialConstant
+               << "\n==> horizontalCellsGain : " << _retinaParameters.OPLandIplParvo.horizontalCellsGain
+               << "\n==> hcellsTemporalConstant : " << _retinaParameters.OPLandIplParvo.hcellsTemporalConstant
+               << "\n==> hcellsSpatialConstant : " << _retinaParameters.OPLandIplParvo.hcellsSpatialConstant
+               << "\n==> parvoGanglionCellsSensitivity : " << _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity
+               << "}\n";
+
+    // displaying IPL magno setup
+    outmessage << "Current Retina instance setup :"
+               << "\nIPLmagno" << "{"
+               << "\n==> normaliseOutput : " << _retinaParameters.IplMagno.normaliseOutput
+               << "\n==> parasolCells_beta : " << _retinaParameters.IplMagno.parasolCells_beta
+               << "\n==> parasolCells_tau : " << _retinaParameters.IplMagno.parasolCells_tau
+               << "\n==> parasolCells_k : " << _retinaParameters.IplMagno.parasolCells_k
+               << "\n==> amacrinCellsTemporalCutFrequency : " << _retinaParameters.IplMagno.amacrinCellsTemporalCutFrequency
+               << "\n==> V0CompressionParameter : " << _retinaParameters.IplMagno.V0CompressionParameter
+               << "\n==> localAdaptintegration_tau : " << _retinaParameters.IplMagno.localAdaptintegration_tau
+               << "\n==> localAdaptintegration_k : " << _retinaParameters.IplMagno.localAdaptintegration_k
+               << "}";
+    return outmessage.str().c_str();
+}
+
+void RetinaOCLImpl::write( String fs ) const
+{
+    FileStorage parametersSaveFile(fs, cv::FileStorage::WRITE );
+    write(parametersSaveFile);
+}
+
+void RetinaOCLImpl::write( FileStorage& fs ) const
+{
+    if (!fs.isOpened())
+    {
+        return;    // basic error case
+    }
+    fs << "OPLandIPLparvo" << "{";
+    fs << "colorMode" << _retinaParameters.OPLandIplParvo.colorMode;
+    fs << "normaliseOutput" << _retinaParameters.OPLandIplParvo.normaliseOutput;
+    fs << "photoreceptorsLocalAdaptationSensitivity" << _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity;
+    fs << "photoreceptorsTemporalConstant" << _retinaParameters.OPLandIplParvo.photoreceptorsTemporalConstant;
+    fs << "photoreceptorsSpatialConstant" << _retinaParameters.OPLandIplParvo.photoreceptorsSpatialConstant;
+    fs << "horizontalCellsGain" << _retinaParameters.OPLandIplParvo.horizontalCellsGain;
+    fs << "hcellsTemporalConstant" << _retinaParameters.OPLandIplParvo.hcellsTemporalConstant;
+    fs << "hcellsSpatialConstant" << _retinaParameters.OPLandIplParvo.hcellsSpatialConstant;
+    fs << "ganglionCellsSensitivity" << _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity;
+    fs << "}";
+    fs << "IPLmagno" << "{";
+    fs << "normaliseOutput" << _retinaParameters.IplMagno.normaliseOutput;
+    fs << "parasolCells_beta" << _retinaParameters.IplMagno.parasolCells_beta;
+    fs << "parasolCells_tau" << _retinaParameters.IplMagno.parasolCells_tau;
+    fs << "parasolCells_k" << _retinaParameters.IplMagno.parasolCells_k;
+    fs << "amacrinCellsTemporalCutFrequency" << _retinaParameters.IplMagno.amacrinCellsTemporalCutFrequency;
+    fs << "V0CompressionParameter" << _retinaParameters.IplMagno.V0CompressionParameter;
+    fs << "localAdaptintegration_tau" << _retinaParameters.IplMagno.localAdaptintegration_tau;
+    fs << "localAdaptintegration_k" << _retinaParameters.IplMagno.localAdaptintegration_k;
+    fs << "}";
+}
+
+void RetinaOCLImpl::setupOPLandIPLParvoChannel(const bool colorMode, const bool normaliseOutput, const float photoreceptorsLocalAdaptationSensitivity, const float photoreceptorsTemporalConstant, const float photoreceptorsSpatialConstant, const float horizontalCellsGain, const float HcellsTemporalConstant, const float HcellsSpatialConstant, const float ganglionCellsSensitivity)
+{
+    // retina core parameters setup
+    _retinaFilter->setColorMode(colorMode);
+    _retinaFilter->setPhotoreceptorsLocalAdaptationSensitivity(photoreceptorsLocalAdaptationSensitivity);
+    _retinaFilter->setOPLandParvoParameters(0, photoreceptorsTemporalConstant, photoreceptorsSpatialConstant, horizontalCellsGain, HcellsTemporalConstant, HcellsSpatialConstant, ganglionCellsSensitivity);
+    _retinaFilter->setParvoGanglionCellsLocalAdaptationSensitivity(ganglionCellsSensitivity);
+    _retinaFilter->activateNormalizeParvoOutput_0_maxOutputValue(normaliseOutput);
+
+    // update parameters struture
+
+    _retinaParameters.OPLandIplParvo.colorMode = colorMode;
+    _retinaParameters.OPLandIplParvo.normaliseOutput = normaliseOutput;
+    _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity = photoreceptorsLocalAdaptationSensitivity;
+    _retinaParameters.OPLandIplParvo.photoreceptorsTemporalConstant = photoreceptorsTemporalConstant;
+    _retinaParameters.OPLandIplParvo.photoreceptorsSpatialConstant = photoreceptorsSpatialConstant;
+    _retinaParameters.OPLandIplParvo.horizontalCellsGain = horizontalCellsGain;
+    _retinaParameters.OPLandIplParvo.hcellsTemporalConstant = HcellsTemporalConstant;
+    _retinaParameters.OPLandIplParvo.hcellsSpatialConstant = HcellsSpatialConstant;
+    _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity = ganglionCellsSensitivity;
+}
+
+void RetinaOCLImpl::setupIPLMagnoChannel(const bool normaliseOutput, const float parasolCells_beta, const float parasolCells_tau, const float parasolCells_k, const float amacrinCellsTemporalCutFrequency, const float V0CompressionParameter, const float localAdaptintegration_tau, const float localAdaptintegration_k)
+{
+
+    _retinaFilter->setMagnoCoefficientsTable(parasolCells_beta, parasolCells_tau, parasolCells_k, amacrinCellsTemporalCutFrequency, V0CompressionParameter, localAdaptintegration_tau, localAdaptintegration_k);
+    _retinaFilter->activateNormalizeMagnoOutput_0_maxOutputValue(normaliseOutput);
+
+    // update parameters struture
+    _retinaParameters.IplMagno.normaliseOutput = normaliseOutput;
+    _retinaParameters.IplMagno.parasolCells_beta = parasolCells_beta;
+    _retinaParameters.IplMagno.parasolCells_tau = parasolCells_tau;
+    _retinaParameters.IplMagno.parasolCells_k = parasolCells_k;
+    _retinaParameters.IplMagno.amacrinCellsTemporalCutFrequency = amacrinCellsTemporalCutFrequency;
+    _retinaParameters.IplMagno.V0CompressionParameter = V0CompressionParameter;
+    _retinaParameters.IplMagno.localAdaptintegration_tau = localAdaptintegration_tau;
+    _retinaParameters.IplMagno.localAdaptintegration_k = localAdaptintegration_k;
+}
+
+void RetinaOCLImpl::run(const InputArray input)
+{
+    oclMat &inputMatToConvert = getOclMatRef(input);
+    bool colorMode = convertToColorPlanes(inputMatToConvert, _inputBuffer);
+    // first convert input image to the compatible format : std::valarray<float>
+    // process the retina
+    if (!_retinaFilter->runFilter(_inputBuffer, colorMode, false, _retinaParameters.OPLandIplParvo.colorMode && colorMode, false))
+    {
+        throw cv::Exception(-1, "Retina cannot be applied, wrong input buffer size", "RetinaOCLImpl::run", "Retina.h", 0);
+    }
+}
+
+void RetinaOCLImpl::getParvo(OutputArray output)
+{
+    oclMat &retinaOutput_parvo = getOclMatRef(output);
+    if (_retinaFilter->getColorMode())
+    {
+        // reallocate output buffer (if necessary)
+        convertToInterleaved(_retinaFilter->getColorOutput(), true, retinaOutput_parvo);
+    }
+    else
+    {
+        // reallocate output buffer (if necessary)
+        convertToInterleaved(_retinaFilter->getContours(), false, retinaOutput_parvo);
+    }
+    //retinaOutput_parvo/=255.0;
+}
+void RetinaOCLImpl::getMagno(OutputArray output)
+{
+    oclMat &retinaOutput_magno = getOclMatRef(output);
+    // reallocate output buffer (if necessary)
+    convertToInterleaved(_retinaFilter->getMovingContours(), false, retinaOutput_magno);
+    //retinaOutput_magno/=255.0;
+}
+// private method called by constructirs
+void RetinaOCLImpl::_init(const cv::Size inputSz, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght)
+{
+    // basic error check
+    if (inputSz.height*inputSz.width <= 0)
+    {
+        throw cv::Exception(-1, "Bad retina size setup : size height and with must be superior to zero", "RetinaOCLImpl::setup", "Retina.h", 0);
+    }
+
+    // allocate the retina model
+    if (_retinaFilter)
+    {
+        delete _retinaFilter;
+    }
+    _retinaFilter = new RetinaFilter(inputSz.height, inputSz.width, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght);
+
+    // prepare the default parameter XML file with default setup
+    setup(_retinaParameters);
+
+    // init retina
+    _retinaFilter->clearAllBuffers();
+}
+
+bool RetinaOCLImpl::convertToColorPlanes(const oclMat& input, oclMat &output)
+{
+    oclMat convert_input;
+    input.convertTo(convert_input, CV_32F);
+    if(convert_input.channels() == 3 || convert_input.channels() == 4)
+    {
+        ocl::ensureSizeIsEnough(int(_retinaFilter->getInputNBrows() * 4),
+                                int(_retinaFilter->getInputNBcolumns()), CV_32FC1, output);
+        oclMat channel_splits[4] =
+        {
+            output(Rect(Point(0, _retinaFilter->getInputNBrows() * 2), getInputSize())),
+            output(Rect(Point(0, _retinaFilter->getInputNBrows()), getInputSize())),
+            output(Rect(Point(0, 0), getInputSize())),
+            output(Rect(Point(0, _retinaFilter->getInputNBrows() * 3), getInputSize()))
+        };
+        ocl::split(convert_input, channel_splits);
+        return true;
+    }
+    else if(convert_input.channels() == 1)
+    {
+        convert_input.copyTo(output);
+        return false;
+    }
+    else
+    {
+        CV_Error(-1, "Retina ocl only support 1, 3, 4 channel input");
+        return false;
+    }
+}
+void RetinaOCLImpl::convertToInterleaved(const oclMat& input, bool colorMode, oclMat &output)
+{
+    input.convertTo(output, CV_8U);
+    if(colorMode)
+    {
+        int numOfSplits = input.rows / getInputSize().height;
+        std::vector<oclMat> channel_splits(numOfSplits);
+        for(int i = 0; i < static_cast<int>(channel_splits.size()); i ++)
+        {
+            channel_splits[i] =
+                output(Rect(Point(0, _retinaFilter->getInputNBrows() * (numOfSplits - i - 1)), getInputSize()));
+        }
+        merge(channel_splits, output);
+    }
+    else
+    {
+        //...
+    }
+}
+
+void RetinaOCLImpl::clearBuffers()
+{
+    _retinaFilter->clearAllBuffers();
+}
+
+void RetinaOCLImpl::activateMovingContoursProcessing(const bool activate)
+{
+    _retinaFilter->activateMovingContoursProcessing(activate);
+}
+
+void RetinaOCLImpl::activateContoursProcessing(const bool activate)
+{
+    _retinaFilter->activateContoursProcessing(activate);
+}
+
+///////////////////////////////////////
+///////// BasicRetinaFilter ///////////
+///////////////////////////////////////
+BasicRetinaFilter::BasicRetinaFilter(const unsigned int NBrows, const unsigned int NBcolumns, const unsigned int parametersListSize, const bool)
+    : _NBrows(NBrows), _NBcols(NBcolumns),
+      _filterOutput(NBrows, NBcolumns, CV_32FC1),
+      _localBuffer(NBrows, NBcolumns, CV_32FC1),
+      _filteringCoeficientsTable(3 * parametersListSize)
+{
+    _halfNBrows = _filterOutput.rows / 2;
+    _halfNBcolumns = _filterOutput.cols / 2;
+
+    // set default values
+    _maxInputValue = 256.0;
+
+    // reset all buffers
+    clearAllBuffers();
+}
+
+BasicRetinaFilter::~BasicRetinaFilter()
+{
+}
+
+void BasicRetinaFilter::resize(const unsigned int NBrows, const unsigned int NBcolumns)
+{
+    // resizing buffers
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _filterOutput);
+
+    // updating variables
+    _halfNBrows = _filterOutput.rows / 2;
+    _halfNBcolumns = _filterOutput.cols / 2;
+
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _localBuffer);
+    // reset buffers
+    clearAllBuffers();
+}
+
+void BasicRetinaFilter::setLPfilterParameters(const float beta, const float tau, const float desired_k, const unsigned int filterIndex)
+{
+    float _beta = beta + tau;
+    float k = desired_k;
+    // check if the spatial constant is correct (avoid 0 value to avoid division by 0)
+    if (desired_k <= 0)
+    {
+        k = 0.001f;
+        std::cerr << "BasicRetinaFilter::spatial constant of the low pass filter must be superior to zero !!! correcting parameter setting to 0,001" << std::endl;
+    }
+
+    float _alpha = k * k;
+    float _mu = 0.8f;
+    unsigned int tableOffset = filterIndex * 3;
+    if (k <= 0)
+    {
+        std::cerr << "BasicRetinaFilter::spatial filtering coefficient must be superior to zero, correcting value to 0.01" << std::endl;
+        _alpha = 0.0001f;
+    }
+
+    float _temp =  (1.0f + _beta) / (2.0f * _mu * _alpha);
+    float a = _filteringCoeficientsTable[tableOffset] = 1.0f + _temp - (float)sqrt( (1.0f + _temp) * (1.0f + _temp) - 1.0f);
+    _filteringCoeficientsTable[1 + tableOffset] = (1.0f - a) * (1.0f - a) * (1.0f - a) * (1.0f - a) / (1.0f + _beta);
+    _filteringCoeficientsTable[2 + tableOffset] = tau;
+}
+const oclMat &BasicRetinaFilter::runFilter_LocalAdapdation(const oclMat &inputFrame, const oclMat &localLuminance)
+{
+    _localLuminanceAdaptation(inputFrame, localLuminance, _filterOutput);
+    return _filterOutput;
+}
+
+
+void BasicRetinaFilter::runFilter_LocalAdapdation(const oclMat &inputFrame, const oclMat &localLuminance, oclMat &outputFrame)
+{
+    _localLuminanceAdaptation(inputFrame, localLuminance, outputFrame);
+}
+
+const oclMat &BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const oclMat &inputFrame)
+{
+    _spatiotemporalLPfilter(inputFrame, _filterOutput);
+    _localLuminanceAdaptation(inputFrame, _filterOutput, _filterOutput);
+    return _filterOutput;
+}
+void BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const oclMat &inputFrame, oclMat &outputFrame)
+{
+    _spatiotemporalLPfilter(inputFrame, _filterOutput);
+    _localLuminanceAdaptation(inputFrame, _filterOutput, outputFrame);
+}
+
+void BasicRetinaFilter::_localLuminanceAdaptation(oclMat &inputOutputFrame, const oclMat &localLuminance)
+{
+    _localLuminanceAdaptation(inputOutputFrame, localLuminance, inputOutputFrame, false);
+}
+
+void BasicRetinaFilter::_localLuminanceAdaptation(const oclMat &inputFrame, const oclMat &localLuminance, oclMat &outputFrame, const bool updateLuminanceMean)
+{
+    if (updateLuminanceMean)
+    {
+        float meanLuminance = saturate_cast<float>(ocl::sum(inputFrame)[0]) / getNBpixels();
+        updateCompressionParameter(meanLuminance);
+    }
+    int elements_per_row = static_cast<int>(inputFrame.step / inputFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {_NBcols, _NBrows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &localLuminance.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &inputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBcols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBrows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_float), &_localLuminanceAddon));
+    args.push_back(std::make_pair(sizeof(cl_float), &_localLuminanceFactor));
+    args.push_back(std::make_pair(sizeof(cl_float), &_maxInputValue));
+    openCLExecuteKernel(ctx, &retina_kernel, "localLuminanceAdaptation", globalSize, localSize, args, -1, -1);
+}
+
+const oclMat &BasicRetinaFilter::runFilter_LPfilter(const oclMat &inputFrame, const unsigned int filterIndex)
+{
+    _spatiotemporalLPfilter(inputFrame, _filterOutput, filterIndex);
+    return _filterOutput;
+}
+void BasicRetinaFilter::runFilter_LPfilter(const oclMat &inputFrame, oclMat &outputFrame, const unsigned int filterIndex)
+{
+    _spatiotemporalLPfilter(inputFrame, outputFrame, filterIndex);
+}
+
+void BasicRetinaFilter::_spatiotemporalLPfilter(const oclMat &inputFrame, oclMat &LPfilterOutput, const unsigned int filterIndex)
+{
+    unsigned int coefTableOffset = filterIndex * 3;
+
+    _a = _filteringCoeficientsTable[coefTableOffset];
+    _gain = _filteringCoeficientsTable[1 + coefTableOffset];
+    _tau = _filteringCoeficientsTable[2 + coefTableOffset];
+
+    _horizontalCausalFilter_addInput(inputFrame, LPfilterOutput);
+    _horizontalAnticausalFilter(LPfilterOutput);
+    _verticalCausalFilter(LPfilterOutput);
+    _verticalAnticausalFilter_multGain(LPfilterOutput);
+}
+
+void BasicRetinaFilter::_horizontalCausalFilter_addInput(const oclMat &inputFrame, oclMat &outputFrame)
+{
+    int elements_per_row = static_cast<int>(inputFrame.step / inputFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {_NBrows, 1, 1};
+    size_t localSize[]  = {256, 1, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &inputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBcols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBrows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_int),   &inputFrame.offset));
+    args.push_back(std::make_pair(sizeof(cl_int),   &inputFrame.offset));
+    args.push_back(std::make_pair(sizeof(cl_float), &_tau));
+    args.push_back(std::make_pair(sizeof(cl_float), &_a));
+    openCLExecuteKernel(ctx, &retina_kernel, "horizontalCausalFilter_addInput", globalSize, localSize, args, -1, -1);
+}
+
+void BasicRetinaFilter::_horizontalAnticausalFilter(oclMat &outputFrame)
+{
+    int elements_per_row = static_cast<int>(outputFrame.step / outputFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {_NBrows, 1, 1};
+    size_t localSize[]  = {256, 1, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBcols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBrows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.offset));
+    args.push_back(std::make_pair(sizeof(cl_float), &_a));
+    openCLExecuteKernel(ctx, &retina_kernel, "horizontalAnticausalFilter", globalSize, localSize, args, -1, -1);
+}
+
+void BasicRetinaFilter::_verticalCausalFilter(oclMat &outputFrame)
+{
+    int elements_per_row = static_cast<int>(outputFrame.step / outputFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {_NBcols, 1, 1};
+    size_t localSize[]  = {256, 1, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBcols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBrows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.offset));
+    args.push_back(std::make_pair(sizeof(cl_float), &_a));
+    openCLExecuteKernel(ctx, &retina_kernel, "verticalCausalFilter", globalSize, localSize, args, -1, -1);
+}
+
+void BasicRetinaFilter::_verticalAnticausalFilter_multGain(oclMat &outputFrame)
+{
+    int elements_per_row = static_cast<int>(outputFrame.step / outputFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {_NBcols, 1, 1};
+    size_t localSize[]  = {256, 1, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBcols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBrows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.offset));
+    args.push_back(std::make_pair(sizeof(cl_float), &_a));
+    args.push_back(std::make_pair(sizeof(cl_float), &_gain));
+    openCLExecuteKernel(ctx, &retina_kernel, "verticalAnticausalFilter_multGain", globalSize, localSize, args, -1, -1);
+}
+
+void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(oclMat &outputFrame, const oclMat &spatialConstantBuffer)
+{
+    int elements_per_row = static_cast<int>(outputFrame.step / outputFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {outputFrame.rows, 1, 1};
+    size_t localSize[]  = {256, 1, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &spatialConstantBuffer.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.offset));
+    args.push_back(std::make_pair(sizeof(cl_int),   &spatialConstantBuffer.offset));
+    openCLExecuteKernel(ctx, &retina_kernel, "horizontalAnticausalFilter_Irregular", globalSize, localSize, args, -1, -1);
+}
+
+//  vertical anticausal filter
+void BasicRetinaFilter::_verticalCausalFilter_Irregular(oclMat &outputFrame, const oclMat &spatialConstantBuffer)
+{
+    int elements_per_row = static_cast<int>(outputFrame.step / outputFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {outputFrame.cols, 1, 1};
+    size_t localSize[]  = {256, 1, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &spatialConstantBuffer.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.offset));
+    args.push_back(std::make_pair(sizeof(cl_int),   &spatialConstantBuffer.offset));
+    openCLExecuteKernel(ctx, &retina_kernel, "verticalCausalFilter_Irregular", globalSize, localSize, args, -1, -1);
+}
+
+void normalizeGrayOutput_0_maxOutputValue(oclMat &inputOutputBuffer, const float maxOutputValue)
+{
+    double min_val, max_val;
+    ocl::minMax(inputOutputBuffer, &min_val, &max_val);
+    float factor = maxOutputValue / static_cast<float>(max_val - min_val);
+    float offset = - static_cast<float>(min_val) * factor;
+    ocl::multiply(factor, inputOutputBuffer, inputOutputBuffer);
+    ocl::add(inputOutputBuffer, offset, inputOutputBuffer);
+}
+
+void normalizeGrayOutputCentredSigmoide(const float meanValue, const float sensitivity, oclMat &in, oclMat &out, const float maxValue)
+{
+    if (sensitivity == 1.0f)
+    {
+        std::cerr << "TemplateBuffer::TemplateBuffer<type>::normalizeGrayOutputCentredSigmoide error: 2nd parameter (sensitivity) must not equal 0, copying original data..." << std::endl;
+        in.copyTo(out);
+        return;
+    }
+
+    float X0 = maxValue / (sensitivity - 1.0f);
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {in.cols, out.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    int elements_per_row = static_cast<int>(out.step / out.elemSize());
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &in.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &out.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &in.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &in.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_float), &meanValue));
+    args.push_back(std::make_pair(sizeof(cl_float), &X0));
+    openCLExecuteKernel(ctx, &retina_kernel, "normalizeGrayOutputCentredSigmoide", globalSize, localSize, args, -1, -1);
+}
+
+void normalizeGrayOutputNearZeroCentreredSigmoide(oclMat &inputPicture, oclMat &outputBuffer, const float sensitivity, const float maxOutputValue)
+{
+    float X0cube = sensitivity * sensitivity * sensitivity;
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {inputPicture.cols, inputPicture.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    int elements_per_row = static_cast<int>(inputPicture.step / inputPicture.elemSize());
+    args.push_back(std::make_pair(sizeof(cl_mem),   &inputPicture.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputBuffer.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &inputPicture.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &inputPicture.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_float), &maxOutputValue));
+    args.push_back(std::make_pair(sizeof(cl_float), &X0cube));
+    openCLExecuteKernel(ctx, &retina_kernel, "normalizeGrayOutputNearZeroCentreredSigmoide", globalSize, localSize, args, -1, -1);
+}
+
+void centerReductImageLuminance(oclMat &inputoutput)
+{
+    Scalar mean, stddev;
+    cv::meanStdDev((Mat)inputoutput, mean, stddev);
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {inputoutput.cols, inputoutput.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    float f_mean = static_cast<float>(mean[0]);
+    float f_stddev = static_cast<float>(stddev[0]);
+    int elements_per_row = static_cast<int>(inputoutput.step / inputoutput.elemSize());
+    args.push_back(std::make_pair(sizeof(cl_mem),   &inputoutput.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &inputoutput.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &inputoutput.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_float), &f_mean));
+    args.push_back(std::make_pair(sizeof(cl_float), &f_stddev));
+    openCLExecuteKernel(ctx, &retina_kernel, "centerReductImageLuminance", globalSize, localSize, args, -1, -1);
+}
+
+///////////////////////////////////////
+///////// ParvoRetinaFilter ///////////
+///////////////////////////////////////
+ParvoRetinaFilter::ParvoRetinaFilter(const unsigned int NBrows, const unsigned int NBcolumns)
+    : BasicRetinaFilter(NBrows, NBcolumns, 3),
+      _photoreceptorsOutput(NBrows, NBcolumns, CV_32FC1),
+      _horizontalCellsOutput(NBrows, NBcolumns, CV_32FC1),
+      _parvocellularOutputON(NBrows, NBcolumns, CV_32FC1),
+      _parvocellularOutputOFF(NBrows, NBcolumns, CV_32FC1),
+      _bipolarCellsOutputON(NBrows, NBcolumns, CV_32FC1),
+      _bipolarCellsOutputOFF(NBrows, NBcolumns, CV_32FC1),
+      _localAdaptationOFF(NBrows, NBcolumns, CV_32FC1)
+{
+    // link to the required local parent adaptation buffers
+    _localAdaptationON = _localBuffer;
+    _parvocellularOutputONminusOFF = _filterOutput;
+
+    // init: set all the values to 0
+    clearAllBuffers();
+}
+
+ParvoRetinaFilter::~ParvoRetinaFilter()
+{
+}
+
+void ParvoRetinaFilter::clearAllBuffers()
+{
+    BasicRetinaFilter::clearAllBuffers();
+    _photoreceptorsOutput = 0;
+    _horizontalCellsOutput = 0;
+    _parvocellularOutputON = 0;
+    _parvocellularOutputOFF = 0;
+    _bipolarCellsOutputON = 0;
+    _bipolarCellsOutputOFF = 0;
+    _localAdaptationOFF = 0;
+}
+void ParvoRetinaFilter::resize(const unsigned int NBrows, const unsigned int NBcolumns)
+{
+    BasicRetinaFilter::resize(NBrows, NBcolumns);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _photoreceptorsOutput);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _horizontalCellsOutput);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _parvocellularOutputON);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _parvocellularOutputOFF);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _bipolarCellsOutputON);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _bipolarCellsOutputOFF);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _localAdaptationOFF);
+
+    // link to the required local parent adaptation buffers
+    _localAdaptationON = _localBuffer;
+    _parvocellularOutputONminusOFF = _filterOutput;
+
+    // clean buffers
+    clearAllBuffers();
+}
+
+void ParvoRetinaFilter::setOPLandParvoFiltersParameters(const float beta1, const float tau1, const float k1, const float beta2, const float tau2, const float k2)
+{
+    // init photoreceptors low pass filter
+    setLPfilterParameters(beta1, tau1, k1);
+    // init horizontal cells low pass filter
+    setLPfilterParameters(beta2, tau2, k2, 1);
+    // init parasol ganglion cells low pass filter (default parameters)
+    setLPfilterParameters(0, tau1, k1, 2);
+
+}
+const oclMat &ParvoRetinaFilter::runFilter(const oclMat &inputFrame, const bool useParvoOutput)
+{
+    _spatiotemporalLPfilter(inputFrame, _photoreceptorsOutput);
+    _spatiotemporalLPfilter(_photoreceptorsOutput, _horizontalCellsOutput, 1);
+    _OPL_OnOffWaysComputing();
+
+    if (useParvoOutput)
+    {
+        // local adaptation processes on ON and OFF ways
+        _spatiotemporalLPfilter(_bipolarCellsOutputON, _localAdaptationON, 2);
+        _localLuminanceAdaptation(_parvocellularOutputON, _localAdaptationON);
+        _spatiotemporalLPfilter(_bipolarCellsOutputOFF, _localAdaptationOFF, 2);
+        _localLuminanceAdaptation(_parvocellularOutputOFF, _localAdaptationOFF);
+        ocl::subtract(_parvocellularOutputON, _parvocellularOutputOFF, _parvocellularOutputONminusOFF);
+    }
+
+    return _parvocellularOutputONminusOFF;
+}
+void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
+{
+    int elements_per_row = static_cast<int>(_photoreceptorsOutput.step / _photoreceptorsOutput.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {(_photoreceptorsOutput.cols + 3) / 4, _photoreceptorsOutput.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_photoreceptorsOutput.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_horizontalCellsOutput.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_bipolarCellsOutputON.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_bipolarCellsOutputOFF.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_parvocellularOutputON.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_parvocellularOutputOFF.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_photoreceptorsOutput.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_photoreceptorsOutput.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    openCLExecuteKernel(ctx, &retina_kernel, "OPL_OnOffWaysComputing", globalSize, localSize, args, -1, -1);
+}
+
+///////////////////////////////////////
+//////////// MagnoFilter //////////////
+///////////////////////////////////////
+MagnoRetinaFilter::MagnoRetinaFilter(const unsigned int NBrows, const unsigned int NBcolumns)
+    : BasicRetinaFilter(NBrows, NBcolumns, 2),
+      _previousInput_ON(NBrows, NBcolumns, CV_32FC1),
+      _previousInput_OFF(NBrows, NBcolumns, CV_32FC1),
+      _amacrinCellsTempOutput_ON(NBrows, NBcolumns, CV_32FC1),
+      _amacrinCellsTempOutput_OFF(NBrows, NBcolumns, CV_32FC1),
+      _magnoXOutputON(NBrows, NBcolumns, CV_32FC1),
+      _magnoXOutputOFF(NBrows, NBcolumns, CV_32FC1),
+      _localProcessBufferON(NBrows, NBcolumns, CV_32FC1),
+      _localProcessBufferOFF(NBrows, NBcolumns, CV_32FC1)
+{
+    _magnoYOutput = _filterOutput;
+    _magnoYsaturated = _localBuffer;
+
+    clearAllBuffers();
+}
+
+MagnoRetinaFilter::~MagnoRetinaFilter()
+{
+}
+void MagnoRetinaFilter::clearAllBuffers()
+{
+    BasicRetinaFilter::clearAllBuffers();
+    _previousInput_ON = 0;
+    _previousInput_OFF = 0;
+    _amacrinCellsTempOutput_ON = 0;
+    _amacrinCellsTempOutput_OFF = 0;
+    _magnoXOutputON = 0;
+    _magnoXOutputOFF = 0;
+    _localProcessBufferON = 0;
+    _localProcessBufferOFF = 0;
+
+}
+void MagnoRetinaFilter::resize(const unsigned int NBrows, const unsigned int NBcolumns)
+{
+    BasicRetinaFilter::resize(NBrows, NBcolumns);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _previousInput_ON);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _previousInput_OFF);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _amacrinCellsTempOutput_ON);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _amacrinCellsTempOutput_OFF);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _magnoXOutputON);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _magnoXOutputOFF);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _localProcessBufferON);
+    ensureSizeIsEnough(NBrows, NBcolumns, CV_32FC1, _localProcessBufferOFF);
+
+    // to be sure, relink buffers
+    _magnoYOutput = _filterOutput;
+    _magnoYsaturated = _localBuffer;
+
+    // reset all buffers
+    clearAllBuffers();
+}
+
+void MagnoRetinaFilter::setCoefficientsTable(const float parasolCells_beta, const float parasolCells_tau, const float parasolCells_k, const float amacrinCellsTemporalCutFrequency, const float localAdaptIntegration_tau, const float localAdaptIntegration_k )
+{
+    _temporalCoefficient = (float)std::exp(-1.0f / amacrinCellsTemporalCutFrequency);
+    // the first set of parameters is dedicated to the low pass filtering property of the ganglion cells
+    BasicRetinaFilter::setLPfilterParameters(parasolCells_beta, parasolCells_tau, parasolCells_k, 0);
+    // the second set of parameters is dedicated to the ganglion cells output intergartion for their local adaptation property
+    BasicRetinaFilter::setLPfilterParameters(0, localAdaptIntegration_tau, localAdaptIntegration_k, 1);
+}
+
+void MagnoRetinaFilter::_amacrineCellsComputing(
+    const oclMat &OPL_ON,
+    const oclMat &OPL_OFF
+)
+{
+    int elements_per_row = static_cast<int>(OPL_ON.step / OPL_ON.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {OPL_ON.cols, OPL_ON.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &OPL_ON.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &OPL_OFF.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_previousInput_ON.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_previousInput_OFF.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_amacrinCellsTempOutput_ON.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &_amacrinCellsTempOutput_OFF.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &OPL_ON.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &OPL_ON.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_float), &_temporalCoefficient));
+    openCLExecuteKernel(ctx, &retina_kernel, "amacrineCellsComputing", globalSize, localSize, args, -1, -1);
+}
+
+const oclMat &MagnoRetinaFilter::runFilter(const oclMat &OPL_ON, const oclMat &OPL_OFF)
+{
+    // Compute the high pass temporal filter
+    _amacrineCellsComputing(OPL_ON, OPL_OFF);
+
+    // apply low pass filtering on ON and OFF ways after temporal high pass filtering
+    _spatiotemporalLPfilter(_amacrinCellsTempOutput_ON, _magnoXOutputON, 0);
+    _spatiotemporalLPfilter(_amacrinCellsTempOutput_OFF, _magnoXOutputOFF, 0);
+
+    // local adaptation of the ganglion cells to the local contrast of the moving contours
+    _spatiotemporalLPfilter(_magnoXOutputON, _localProcessBufferON, 1);
+    _localLuminanceAdaptation(_magnoXOutputON, _localProcessBufferON);
+
+    _spatiotemporalLPfilter(_magnoXOutputOFF, _localProcessBufferOFF, 1);
+    _localLuminanceAdaptation(_magnoXOutputOFF, _localProcessBufferOFF);
+
+    _magnoYOutput = _magnoXOutputON + _magnoXOutputOFF;
+
+    return _magnoYOutput;
+}
+
+///////////////////////////////////////
+//////////// RetinaColor //////////////
+///////////////////////////////////////
+
+// define an array of ROI headers of input x
+#define MAKE_OCLMAT_SLICES(x, n) \
+    oclMat x##_slices[n];\
+    for(int _SLICE_INDEX_ = 0; _SLICE_INDEX_ < n; _SLICE_INDEX_ ++)\
+    {\
+        x##_slices[_SLICE_INDEX_] = x(getROI(_SLICE_INDEX_));\
+    }
+
+RetinaColor::RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns, const int samplingMethod)
+    : BasicRetinaFilter(NBrows, NBcolumns, 3),
+      _RGBmosaic(NBrows * 3, NBcolumns, CV_32FC1),
+      _tempMultiplexedFrame(NBrows, NBcolumns, CV_32FC1),
+      _demultiplexedTempBuffer(NBrows * 3, NBcolumns, CV_32FC1),
+      _demultiplexedColorFrame(NBrows * 3, NBcolumns, CV_32FC1),
+      _chrominance(NBrows * 3, NBcolumns, CV_32FC1),
+      _colorLocalDensity(NBrows * 3, NBcolumns, CV_32FC1),
+      _imageGradient(NBrows * 3, NBcolumns, CV_32FC1)
+{
+    // link to parent buffers (let's recycle !)
+    _luminance = _filterOutput;
+    _multiplexedFrame = _localBuffer;
+
+    _objectInit = false;
+    _samplingMethod = samplingMethod;
+    _saturateColors = false;
+    _colorSaturationValue = 4.0;
+
+    // set default spatio-temporal filter parameters
+    setLPfilterParameters(0.0, 0.0, 1.5);
+    setLPfilterParameters(0.0, 0.0, 10.5, 1);// for the low pass filter dedicated to contours energy extraction (demultiplexing process)
+    setLPfilterParameters(0.f, 0.f, 0.9f, 2);
+
+    // init default value on image Gradient
+    _imageGradient = 0.57f;
+
+    // init color sampling map
+    _initColorSampling();
+
+    // flush all buffers
+    clearAllBuffers();
+}
+
+RetinaColor::~RetinaColor()
+{
+
+}
+
+void RetinaColor::clearAllBuffers()
+{
+    BasicRetinaFilter::clearAllBuffers();
+    _tempMultiplexedFrame = 0.f;
+    _demultiplexedTempBuffer = 0.f;
+
+    _demultiplexedColorFrame = 0.f;
+    _chrominance = 0.f;
+    _imageGradient = 0.57f;
+}
+
+void RetinaColor::resize(const unsigned int NBrows, const unsigned int NBcolumns)
+{
+    BasicRetinaFilter::clearAllBuffers();
+    ensureSizeIsEnough(NBrows,     NBcolumns, CV_32FC1, _tempMultiplexedFrame);
+    ensureSizeIsEnough(NBrows * 2, NBcolumns, CV_32FC1, _imageGradient);
+    ensureSizeIsEnough(NBrows * 3, NBcolumns, CV_32FC1, _RGBmosaic);
+    ensureSizeIsEnough(NBrows * 3, NBcolumns, CV_32FC1, _demultiplexedTempBuffer);
+    ensureSizeIsEnough(NBrows * 3, NBcolumns, CV_32FC1, _demultiplexedColorFrame);
+    ensureSizeIsEnough(NBrows * 3, NBcolumns, CV_32FC1, _chrominance);
+    ensureSizeIsEnough(NBrows * 3, NBcolumns, CV_32FC1, _colorLocalDensity);
+
+    // link to parent buffers (let's recycle !)
+    _luminance = _filterOutput;
+    _multiplexedFrame = _localBuffer;
+
+    // init color sampling map
+    _initColorSampling();
+
+    // clean buffers
+    clearAllBuffers();
+}
+
+static void inverseValue(oclMat &input)
+{
+    int elements_per_row = static_cast<int>(input.step / input.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {input.cols, input.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &input.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &input.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &input.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    openCLExecuteKernel(ctx, &retina_kernel, "inverseValue", globalSize, localSize, args, -1, -1);
+}
+
+void RetinaColor::_initColorSampling()
+{
+    CV_Assert(_samplingMethod == RETINA_COLOR_BAYER);
+    _pR = _pB = 0.25;
+    _pG = 0.5;
+    // filling the mosaic buffer:
+    _RGBmosaic = 0;
+    Mat tmp_mat(_NBrows * 3, _NBcols, CV_32FC1);
+    float * tmp_mat_ptr = tmp_mat.ptr<float>();
+    tmp_mat.setTo(0);
+    for (unsigned int index = 0 ; index < getNBpixels(); ++index)
+    {
+        tmp_mat_ptr[bayerSampleOffset(index)] = 1.0;
+    }
+    _RGBmosaic.upload(tmp_mat);
+    // computing photoreceptors local density
+    MAKE_OCLMAT_SLICES(_RGBmosaic, 3);
+    MAKE_OCLMAT_SLICES(_colorLocalDensity, 3);
+    _colorLocalDensity.setTo(0);
+    _spatiotemporalLPfilter(_RGBmosaic_slices[0], _colorLocalDensity_slices[0]);
+    _spatiotemporalLPfilter(_RGBmosaic_slices[1], _colorLocalDensity_slices[1]);
+    _spatiotemporalLPfilter(_RGBmosaic_slices[2], _colorLocalDensity_slices[2]);
+
+    //_colorLocalDensity = oclMat(_colorLocalDensity.size(), _colorLocalDensity.type(), 1.f) / _colorLocalDensity;
+    inverseValue(_colorLocalDensity);
+
+    _objectInit = true;
+}
+
+static void demultiplex(const oclMat &input, oclMat &ouput)
+{
+    int elements_per_row = static_cast<int>(input.step / input.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {input.cols, input.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &input.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &ouput.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &input.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &input.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    openCLExecuteKernel(ctx, &retina_kernel, "runColorDemultiplexingBayer", globalSize, localSize, args, -1, -1);
+}
+
+static void normalizePhotoDensity(
+    const oclMat &chroma,
+    const oclMat &colorDensity,
+    const oclMat &multiplex,
+    oclMat &ocl_luma,
+    oclMat &demultiplex,
+    const float pG
+)
+{
+    int elements_per_row = static_cast<int>(ocl_luma.step / ocl_luma.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {ocl_luma.cols, ocl_luma.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &chroma.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &colorDensity.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &multiplex.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &ocl_luma.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &demultiplex.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &ocl_luma.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &ocl_luma.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_float), &pG));
+    openCLExecuteKernel(ctx, &retina_kernel, "normalizePhotoDensity", globalSize, localSize, args, -1, -1);
+}
+
+static void substractResidual(
+    oclMat &colorDemultiplex,
+    float pR,
+    float pG,
+    float pB
+)
+{
+    int elements_per_row = static_cast<int>(colorDemultiplex.step / colorDemultiplex.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    int rows = colorDemultiplex.rows / 3, cols = colorDemultiplex.cols;
+    size_t globalSize[] = {cols, rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &colorDemultiplex.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_float), &pR));
+    args.push_back(std::make_pair(sizeof(cl_float), &pG));
+    args.push_back(std::make_pair(sizeof(cl_float), &pB));
+    openCLExecuteKernel(ctx, &retina_kernel, "substractResidual", globalSize, localSize, args, -1, -1);
+}
+
+static void demultiplexAssign(const oclMat& input, const oclMat& output)
+{
+    // only supports bayer
+    int elements_per_row = static_cast<int>(input.step / input.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    int rows = input.rows / 3, cols = input.cols;
+    size_t globalSize[] = {cols, rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &input.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &output.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    openCLExecuteKernel(ctx, &retina_kernel, "demultiplexAssign", globalSize, localSize, args, -1, -1);
+}
+
+void RetinaColor::runColorDemultiplexing(
+    const oclMat &ocl_multiplexed_input,
+    const bool adaptiveFiltering,
+    const float maxInputValue
+)
+{
+    MAKE_OCLMAT_SLICES(_demultiplexedTempBuffer, 3);
+    MAKE_OCLMAT_SLICES(_chrominance, 3);
+    MAKE_OCLMAT_SLICES(_RGBmosaic, 3);
+    MAKE_OCLMAT_SLICES(_demultiplexedColorFrame, 3);
+    MAKE_OCLMAT_SLICES(_colorLocalDensity, 3);
+
+    _demultiplexedTempBuffer.setTo(0);
+    demultiplex(ocl_multiplexed_input, _demultiplexedTempBuffer);
+
+    // interpolate the demultiplexed frame depending on the color sampling method
+    if (!adaptiveFiltering)
+    {
+        CV_Assert(adaptiveFiltering == false);
+    }
+
+    _spatiotemporalLPfilter(_demultiplexedTempBuffer_slices[0], _chrominance_slices[0]);
+    _spatiotemporalLPfilter(_demultiplexedTempBuffer_slices[1], _chrominance_slices[1]);
+    _spatiotemporalLPfilter(_demultiplexedTempBuffer_slices[2], _chrominance_slices[2]);
+
+    if (!adaptiveFiltering)// compute the gradient on the luminance
+    {
+        // TODO: implement me!
+        CV_Assert(adaptiveFiltering == false);
+    }
+    else
+    {
+        normalizePhotoDensity(_chrominance, _colorLocalDensity, ocl_multiplexed_input, _luminance, _demultiplexedTempBuffer, _pG);
+        // compute the gradient of the luminance
+        _computeGradient(_luminance, _imageGradient);
+
+        _adaptiveSpatialLPfilter(_RGBmosaic_slices[0], _imageGradient, _chrominance_slices[0]);
+        _adaptiveSpatialLPfilter(_RGBmosaic_slices[1], _imageGradient, _chrominance_slices[1]);
+        _adaptiveSpatialLPfilter(_RGBmosaic_slices[2], _imageGradient, _chrominance_slices[2]);
+
+        _adaptiveSpatialLPfilter(_demultiplexedTempBuffer_slices[0], _imageGradient, _demultiplexedColorFrame_slices[0]);
+        _adaptiveSpatialLPfilter(_demultiplexedTempBuffer_slices[1], _imageGradient, _demultiplexedColorFrame_slices[1]);
+        _adaptiveSpatialLPfilter(_demultiplexedTempBuffer_slices[2], _imageGradient, _demultiplexedColorFrame_slices[2]);
+
+        _demultiplexedColorFrame /= _chrominance; // per element division
+        substractResidual(_demultiplexedColorFrame, _pR, _pG, _pB);
+        runColorMultiplexing(_demultiplexedColorFrame, _tempMultiplexedFrame);
+
+        _demultiplexedTempBuffer.setTo(0);
+        _luminance = ocl_multiplexed_input - _tempMultiplexedFrame;
+        demultiplexAssign(_demultiplexedColorFrame, _demultiplexedTempBuffer);
+
+        for(int i = 0; i < 3; i ++)
+        {
+            _spatiotemporalLPfilter(_demultiplexedTempBuffer_slices[i], _demultiplexedTempBuffer_slices[i]);
+            _demultiplexedColorFrame_slices[i] = _demultiplexedTempBuffer_slices[i] * _colorLocalDensity_slices[i] + _luminance;
+        }
+    }
+    // eliminate saturated colors by simple clipping values to the input range
+    clipRGBOutput_0_maxInputValue(_demultiplexedColorFrame, maxInputValue);
+
+    if (_saturateColors)
+    {
+        ocl::normalizeGrayOutputCentredSigmoide(128, maxInputValue, _demultiplexedColorFrame, _demultiplexedColorFrame);
+    }
+}
+void RetinaColor::runColorMultiplexing(const oclMat &demultiplexedInputFrame, oclMat &multiplexedFrame)
+{
+    int elements_per_row = static_cast<int>(multiplexedFrame.step / multiplexedFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {multiplexedFrame.cols, multiplexedFrame.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &demultiplexedInputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &multiplexedFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &multiplexedFrame.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &multiplexedFrame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    openCLExecuteKernel(ctx, &retina_kernel, "runColorMultiplexingBayer", globalSize, localSize, args, -1, -1);
+}
+
+void RetinaColor::clipRGBOutput_0_maxInputValue(oclMat &inputOutputBuffer, const float maxInputValue)
+{
+    // the kernel is equivalent to:
+    //ocl::threshold(inputOutputBuffer, inputOutputBuffer, maxInputValue, maxInputValue, THRESH_TRUNC);
+    //ocl::threshold(inputOutputBuffer, inputOutputBuffer, 0, 0, THRESH_TOZERO);
+    int elements_per_row = static_cast<int>(inputOutputBuffer.step / inputOutputBuffer.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {_NBcols, inputOutputBuffer.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &inputOutputBuffer.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBcols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &inputOutputBuffer.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_float), &maxInputValue));
+    openCLExecuteKernel(ctx, &retina_kernel, "clipRGBOutput_0_maxInputValue", globalSize, localSize, args, -1, -1);
+}
+
+void RetinaColor::_adaptiveSpatialLPfilter(const oclMat &inputFrame, const oclMat &gradient, oclMat &outputFrame)
+{
+    /**********/
+    _gain = (1 - 0.57f) * (1 - 0.57f) * (1 - 0.06f) * (1 - 0.06f);
+
+    // launch the serie of 1D directional filters in order to compute the 2D low pass filter
+    // -> horizontal filters work with the first layer of imageGradient
+    _adaptiveHorizontalCausalFilter_addInput(inputFrame, gradient, outputFrame);
+    _horizontalAnticausalFilter_Irregular(outputFrame, gradient);
+    // -> horizontal filters work with the second layer of imageGradient
+    _verticalCausalFilter_Irregular(outputFrame, gradient(getROI(1)));
+    _adaptiveVerticalAnticausalFilter_multGain(gradient, outputFrame);
+}
+
+void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const oclMat &inputFrame, const oclMat &gradient, oclMat &outputFrame)
+{
+    int elements_per_row = static_cast<int>(inputFrame.step / inputFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {_NBrows, 1, 1};
+    size_t localSize[]  = {256, 1, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &inputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &gradient.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBcols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBrows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_int),   &inputFrame.offset));
+    args.push_back(std::make_pair(sizeof(cl_int),   &gradient.offset));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.offset));
+    openCLExecuteKernel(ctx, &retina_kernel, "adaptiveHorizontalCausalFilter_addInput", globalSize, localSize, args, -1, -1);
+}
+
+void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(const oclMat &gradient, oclMat &outputFrame)
+{
+    int elements_per_row = static_cast<int>(outputFrame.step / outputFrame.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {_NBcols, 1, 1};
+    size_t localSize[]  = {256, 1, 1};
+
+    int gradOffset = gradient.offset + static_cast<int>(gradient.step * _NBrows);
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &gradient.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &outputFrame.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBcols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBrows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_int),   &gradOffset));
+    args.push_back(std::make_pair(sizeof(cl_int),   &outputFrame.offset));
+    args.push_back(std::make_pair(sizeof(cl_float), &_gain));
+    openCLExecuteKernel(ctx, &retina_kernel, "adaptiveVerticalAnticausalFilter_multGain", globalSize, localSize, args, -1, -1);
+}
+void RetinaColor::_computeGradient(const oclMat &luminance, oclMat &gradient)
+{
+    int elements_per_row = static_cast<int>(luminance.step / luminance.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {_NBcols, _NBrows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &luminance.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &gradient.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBcols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &_NBrows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    openCLExecuteKernel(ctx, &retina_kernel, "computeGradient", globalSize, localSize, args, -1, -1);
+}
+
+///////////////////////////////////////
+//////////// RetinaFilter /////////////
+///////////////////////////////////////
+RetinaFilter::RetinaFilter(const unsigned int sizeRows, const unsigned int sizeColumns, const bool colorMode, const int samplingMethod, const bool useRetinaLogSampling, const double, const double)
+    :
+    _photoreceptorsPrefilter(sizeRows, sizeColumns, 4),
+    _ParvoRetinaFilter(sizeRows, sizeColumns),
+    _MagnoRetinaFilter(sizeRows, sizeColumns),
+    _colorEngine(sizeRows, sizeColumns, samplingMethod)
+{
+    CV_Assert(!useRetinaLogSampling);
+
+    // set default processing activities
+    _useParvoOutput = true;
+    _useMagnoOutput = true;
+
+    _useColorMode = colorMode;
+
+    // set default parameters
+    setGlobalParameters();
+
+    // stability controls values init
+    _setInitPeriodCount();
+    _globalTemporalConstant = 25;
+
+    // reset all buffers
+    clearAllBuffers();
+}
+
+RetinaFilter::~RetinaFilter()
+{
+}
+
+void RetinaFilter::clearAllBuffers()
+{
+    _photoreceptorsPrefilter.clearAllBuffers();
+    _ParvoRetinaFilter.clearAllBuffers();
+    _MagnoRetinaFilter.clearAllBuffers();
+    _colorEngine.clearAllBuffers();
+    // stability controls value init
+    _setInitPeriodCount();
+}
+
+void RetinaFilter::resize(const unsigned int NBrows, const unsigned int NBcolumns)
+{
+    unsigned int rows = NBrows, cols = NBcolumns;
+
+    // resize optionnal member and adjust other modules size if required
+    _photoreceptorsPrefilter.resize(rows, cols);
+    _ParvoRetinaFilter.resize(rows, cols);
+    _MagnoRetinaFilter.resize(rows, cols);
+    _colorEngine.resize(rows, cols);
+
+    // clean buffers
+    clearAllBuffers();
+
+}
+
+void RetinaFilter::_setInitPeriodCount()
+{
+    // find out the maximum temporal constant value and apply a security factor
+    // false value (obviously too long) but appropriate for simple use
+    _globalTemporalConstant = (unsigned int)(_ParvoRetinaFilter.getPhotoreceptorsTemporalConstant() + _ParvoRetinaFilter.getHcellsTemporalConstant() + _MagnoRetinaFilter.getTemporalConstant());
+    // reset frame counter
+    _ellapsedFramesSinceLastReset = 0;
+}
+
+void RetinaFilter::setGlobalParameters(const float OPLspatialResponse1, const float OPLtemporalresponse1, const float OPLassymetryGain, const float OPLspatialResponse2, const float OPLtemporalresponse2, const float LPfilterSpatialResponse, const float LPfilterGain, const float LPfilterTemporalresponse, const float MovingContoursExtractorCoefficient, const bool normalizeParvoOutput_0_maxOutputValue, const bool normalizeMagnoOutput_0_maxOutputValue, const float maxOutputValue, const float maxInputValue, const float meanValue)
+{
+    _normalizeParvoOutput_0_maxOutputValue = normalizeParvoOutput_0_maxOutputValue;
+    _normalizeMagnoOutput_0_maxOutputValue = normalizeMagnoOutput_0_maxOutputValue;
+    _maxOutputValue = maxOutputValue;
+    _photoreceptorsPrefilter.setV0CompressionParameter(0.9f, maxInputValue, meanValue);
+    _photoreceptorsPrefilter.setLPfilterParameters(0, 0, 10, 3); // keeps low pass filter with low cut frequency in memory (usefull for the tone mapping function)
+    _ParvoRetinaFilter.setOPLandParvoFiltersParameters(0, OPLtemporalresponse1, OPLspatialResponse1, OPLassymetryGain, OPLtemporalresponse2, OPLspatialResponse2);
+    _ParvoRetinaFilter.setV0CompressionParameter(0.9f, maxInputValue, meanValue);
+    _MagnoRetinaFilter.setCoefficientsTable(LPfilterGain, LPfilterTemporalresponse, LPfilterSpatialResponse, MovingContoursExtractorCoefficient, 0, 2.0f * LPfilterSpatialResponse);
+    _MagnoRetinaFilter.setV0CompressionParameter(0.7f, maxInputValue, meanValue);
+
+    // stability controls value init
+    _setInitPeriodCount();
+}
+
+bool RetinaFilter::checkInput(const oclMat &input, const bool)
+{
+    BasicRetinaFilter *inputTarget = &_photoreceptorsPrefilter;
+
+    bool test = (input.rows == static_cast<int>(inputTarget->getNBrows())
+                 || input.rows == static_cast<int>(inputTarget->getNBrows()) * 3
+                 || input.rows == static_cast<int>(inputTarget->getNBrows()) * 4)
+                && input.cols == static_cast<int>(inputTarget->getNBcolumns());
+    if (!test)
+    {
+        std::cerr << "RetinaFilter::checkInput: input buffer does not match retina buffer size, conversion aborted" << std::endl;
+        return false;
+    }
+
+    return true;
+}
+
+// main function that runs the filter for a given input frame
+bool RetinaFilter::runFilter(const oclMat &imageInput, const bool useAdaptiveFiltering, const bool processRetinaParvoMagnoMapping, const bool useColorMode, const bool inputIsColorMultiplexed)
+{
+    // preliminary check
+    bool processSuccess = true;
+    if (!checkInput(imageInput, useColorMode))
+    {
+        return false;
+    }
+
+    // run the color multiplexing if needed and compute each suub filter of the retina:
+    // -> local adaptation
+    // -> contours OPL extraction
+    // -> moving contours extraction
+
+    // stability controls value update
+    ++_ellapsedFramesSinceLastReset;
+
+    _useColorMode = useColorMode;
+
+    oclMat selectedPhotoreceptorsLocalAdaptationInput = imageInput;
+    oclMat selectedPhotoreceptorsColorInput = imageInput;
+
+    //********** Following is input data specific photoreceptors processing
+    if (useColorMode && (!inputIsColorMultiplexed)) // not multiplexed color input case
+    {
+        _colorEngine.runColorMultiplexing(selectedPhotoreceptorsColorInput);
+        selectedPhotoreceptorsLocalAdaptationInput = _colorEngine.getMultiplexedFrame();
+    }
+    //********** Following is generic Retina processing
+
+    // photoreceptors local adaptation
+    _photoreceptorsPrefilter.runFilter_LocalAdapdation(selectedPhotoreceptorsLocalAdaptationInput, _ParvoRetinaFilter.getHorizontalCellsOutput());
+
+    // run parvo filter
+    _ParvoRetinaFilter.runFilter(_photoreceptorsPrefilter.getOutput(), _useParvoOutput);
+
+    if (_useParvoOutput)
+    {
+        _ParvoRetinaFilter.normalizeGrayOutputCentredSigmoide(); // models the saturation of the cells, usefull for visualisation of the ON-OFF Parvo Output, Bipolar cells outputs do not change !!!
+        _ParvoRetinaFilter.centerReductImageLuminance(); // best for further spectrum analysis
+
+        if (_normalizeParvoOutput_0_maxOutputValue)
+        {
+            _ParvoRetinaFilter.normalizeGrayOutput_0_maxOutputValue(_maxOutputValue);
+        }
+    }
+
+    if (_useParvoOutput && _useMagnoOutput)
+    {
+        _MagnoRetinaFilter.runFilter(_ParvoRetinaFilter.getBipolarCellsON(), _ParvoRetinaFilter.getBipolarCellsOFF());
+        if (_normalizeMagnoOutput_0_maxOutputValue)
+        {
+            _MagnoRetinaFilter.normalizeGrayOutput_0_maxOutputValue(_maxOutputValue);
+        }
+        _MagnoRetinaFilter.normalizeGrayOutputNearZeroCentreredSigmoide();
+    }
+
+    if (_useParvoOutput && _useMagnoOutput && processRetinaParvoMagnoMapping)
+    {
+        _processRetinaParvoMagnoMapping();
+        if (_useColorMode)
+        {
+            _colorEngine.runColorDemultiplexing(_retinaParvoMagnoMappedFrame, useAdaptiveFiltering, _maxOutputValue);
+        }
+        return processSuccess;
+    }
+
+    if (_useParvoOutput && _useColorMode)
+    {
+        _colorEngine.runColorDemultiplexing(_ParvoRetinaFilter.getOutput(), useAdaptiveFiltering, _maxOutputValue);
+    }
+    return processSuccess;
+}
+
+const oclMat &RetinaFilter::getContours()
+{
+    if (_useColorMode)
+    {
+        return _colorEngine.getLuminance();
+    }
+    else
+    {
+        return _ParvoRetinaFilter.getOutput();
+    }
+}
+void RetinaFilter::_processRetinaParvoMagnoMapping()
+{
+    oclMat parvo = _ParvoRetinaFilter.getOutput();
+    oclMat magno = _MagnoRetinaFilter.getOutput();
+
+    int halfRows = parvo.rows / 2;
+    int halfCols = parvo.cols / 2;
+    float minDistance = MIN(halfRows, halfCols) * 0.7f;
+
+    int elements_per_row = static_cast<int>(parvo.step / parvo.elemSize());
+
+    Context * ctx = Context::getContext();
+    std::vector<std::pair<size_t, const void *> > args;
+    size_t globalSize[] = {parvo.cols, parvo.rows, 1};
+    size_t localSize[]  = {16, 16, 1};
+
+    args.push_back(std::make_pair(sizeof(cl_mem),   &parvo.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),   &magno.data));
+    args.push_back(std::make_pair(sizeof(cl_int),   &parvo.cols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &parvo.rows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &halfCols));
+    args.push_back(std::make_pair(sizeof(cl_int),   &halfRows));
+    args.push_back(std::make_pair(sizeof(cl_int),   &elements_per_row));
+    args.push_back(std::make_pair(sizeof(cl_float), &minDistance));
+    openCLExecuteKernel(ctx, &retina_kernel, "processRetinaParvoMagnoMapping", globalSize, localSize, args, -1, -1);
+}
+}  /* namespace ocl */
+
+Ptr<Retina> createRetina_OCL(Size getInputSize){ return makePtr<ocl::RetinaOCLImpl>(getInputSize); }
+Ptr<Retina> createRetina_OCL(Size getInputSize, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght)
+{
+    return makePtr<ocl::RetinaOCLImpl>(getInputSize, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght);
+}
+
+}  /* namespace bioinspired */
+}  /* namespace cv */
+
+#endif /* #ifdef HAVE_OPENCV_OCL */
diff --git a/modules/bioinspired/src/retina_ocl.hpp b/modules/bioinspired/src/retina_ocl.hpp
new file mode 100644
index 000000000..90df0601c
--- /dev/null
+++ b/modules/bioinspired/src/retina_ocl.hpp
@@ -0,0 +1,634 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OCL_RETINA_HPP__
+#define __OCL_RETINA_HPP__
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCV_OCL
+
+// please refer to c++ headers for API comments
+namespace cv
+{
+namespace bioinspired
+{
+namespace ocl
+{
+void normalizeGrayOutputCentredSigmoide(const float meanValue, const float sensitivity, cv::ocl::oclMat &in, cv::ocl::oclMat &out, const float maxValue = 255.f);
+void normalizeGrayOutput_0_maxOutputValue(cv::ocl::oclMat &inputOutputBuffer, const float maxOutputValue = 255.0);
+void normalizeGrayOutputNearZeroCentreredSigmoide(cv::ocl::oclMat &inputPicture, cv::ocl::oclMat &outputBuffer, const float sensitivity = 40, const float maxOutputValue = 255.0f);
+void centerReductImageLuminance(cv::ocl::oclMat &inputOutputBuffer);
+
+class BasicRetinaFilter
+{
+public:
+    BasicRetinaFilter(const unsigned int NBrows, const unsigned int NBcolumns, const unsigned int parametersListSize = 1, const bool useProgressiveFilter = false);
+    ~BasicRetinaFilter();
+    inline void clearOutputBuffer()
+    {
+        _filterOutput = 0;
+    };
+    inline void clearSecondaryBuffer()
+    {
+        _localBuffer = 0;
+    };
+    inline void clearAllBuffers()
+    {
+        clearOutputBuffer();
+        clearSecondaryBuffer();
+    };
+    void  resize(const unsigned int NBrows, const unsigned int NBcolumns);
+    const cv::ocl::oclMat &runFilter_LPfilter(const cv::ocl::oclMat &inputFrame, const unsigned int filterIndex = 0);
+    void  runFilter_LPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const unsigned int filterIndex = 0);
+    void  runFilter_LPfilter_Autonomous(cv::ocl::oclMat &inputOutputFrame, const unsigned int filterIndex = 0);
+    const cv::ocl::oclMat &runFilter_LocalAdapdation(const cv::ocl::oclMat &inputOutputFrame, const cv::ocl::oclMat &localLuminance);
+    void  runFilter_LocalAdapdation(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &localLuminance, cv::ocl::oclMat &outputFrame);
+    const cv::ocl::oclMat &runFilter_LocalAdapdation_autonomous(const cv::ocl::oclMat &inputFrame);
+    void  runFilter_LocalAdapdation_autonomous(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame);
+    void  setLPfilterParameters(const float beta, const float tau, const float k, const unsigned int filterIndex = 0);
+    inline void setV0CompressionParameter(const float v0, const float maxInputValue, const float)
+    {
+        _v0 = v0 * maxInputValue;
+        _localLuminanceFactor = v0;
+        _localLuminanceAddon = maxInputValue * (1.0f - v0);
+        _maxInputValue = maxInputValue;
+    };
+    inline void setV0CompressionParameter(const float v0, const float meanLuminance)
+    {
+        this->setV0CompressionParameter(v0, _maxInputValue, meanLuminance);
+    };
+    inline void setV0CompressionParameter(const float v0)
+    {
+        _v0 = v0 * _maxInputValue;
+        _localLuminanceFactor = v0;
+        _localLuminanceAddon = _maxInputValue * (1.0f - v0);
+    };
+    inline void setV0CompressionParameterToneMapping(const float v0, const float maxInputValue, const float meanLuminance = 128.0f)
+    {
+        _v0 = v0 * maxInputValue;
+        _localLuminanceFactor = 1.0f;
+        _localLuminanceAddon = meanLuminance * _v0;
+        _maxInputValue = maxInputValue;
+    };
+    inline void updateCompressionParameter(const float meanLuminance)
+    {
+        _localLuminanceFactor = 1;
+        _localLuminanceAddon = meanLuminance * _v0;
+    };
+    inline float getV0CompressionParameter()
+    {
+        return _v0 / _maxInputValue;
+    };
+    inline const cv::ocl::oclMat &getOutput() const
+    {
+        return _filterOutput;
+    };
+    inline unsigned int getNBrows()
+    {
+        return _filterOutput.rows;
+    };
+    inline unsigned int getNBcolumns()
+    {
+        return _filterOutput.cols;
+    };
+    inline unsigned int getNBpixels()
+    {
+        return _filterOutput.size().area();
+    };
+    inline void normalizeGrayOutput_0_maxOutputValue(const float maxValue)
+    {
+        ocl::normalizeGrayOutput_0_maxOutputValue(_filterOutput, maxValue);
+    };
+    inline void normalizeGrayOutputCentredSigmoide()
+    {
+        ocl::normalizeGrayOutputCentredSigmoide(0.0, 2.0, _filterOutput, _filterOutput);
+    };
+    inline void centerReductImageLuminance()
+    {
+        ocl::centerReductImageLuminance(_filterOutput);
+    };
+    inline float getMaxInputValue()
+    {
+        return this->_maxInputValue;
+    };
+    inline void setMaxInputValue(const float newMaxInputValue)
+    {
+        this->_maxInputValue = newMaxInputValue;
+    };
+
+protected:
+
+    int _NBrows;
+    int _NBcols;
+    unsigned int _halfNBrows;
+    unsigned int _halfNBcolumns;
+
+    cv::ocl::oclMat _filterOutput;
+    cv::ocl::oclMat _localBuffer;
+
+    std::valarray <float>_filteringCoeficientsTable;
+    float _v0;
+    float _maxInputValue;
+    float _meanInputValue;
+    float _localLuminanceFactor;
+    float _localLuminanceAddon;
+
+    float _a;
+    float _tau;
+    float _gain;
+
+    void _spatiotemporalLPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &LPfilterOutput, const unsigned int coefTableOffset = 0);
+    float _squaringSpatiotemporalLPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const unsigned int filterIndex = 0);
+    void _spatiotemporalLPfilter_Irregular(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const unsigned int filterIndex = 0);
+    void _localSquaringSpatioTemporalLPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex = 0);
+    void _localLuminanceAdaptation(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &localLuminance, cv::ocl::oclMat &outputFrame, const bool updateLuminanceMean = true);
+    void _localLuminanceAdaptation(cv::ocl::oclMat &inputOutputFrame, const cv::ocl::oclMat &localLuminance);
+    void _localLuminanceAdaptationPosNegValues(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &localLuminance, float *outputFrame);
+    void _horizontalCausalFilter_addInput(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame);
+    void _horizontalAnticausalFilter(cv::ocl::oclMat &outputFrame);
+    void _verticalCausalFilter(cv::ocl::oclMat &outputFrame);
+    void _horizontalAnticausalFilter_Irregular(cv::ocl::oclMat &outputFrame, const cv::ocl::oclMat &spatialConstantBuffer);
+    void _verticalCausalFilter_Irregular(cv::ocl::oclMat &outputFrame, const cv::ocl::oclMat &spatialConstantBuffer);
+    void _verticalAnticausalFilter_multGain(cv::ocl::oclMat &outputFrame);
+};
+
+class MagnoRetinaFilter: public BasicRetinaFilter
+{
+public:
+    MagnoRetinaFilter(const unsigned int NBrows, const unsigned int NBcolumns);
+    virtual ~MagnoRetinaFilter();
+    void clearAllBuffers();
+    void resize(const unsigned int NBrows, const unsigned int NBcolumns);
+    void setCoefficientsTable(const float parasolCells_beta, const float parasolCells_tau, const float parasolCells_k, const float amacrinCellsTemporalCutFrequency, const float localAdaptIntegration_tau, const float localAdaptIntegration_k);
+
+    const cv::ocl::oclMat &runFilter(const cv::ocl::oclMat &OPL_ON, const cv::ocl::oclMat &OPL_OFF);
+
+    inline const cv::ocl::oclMat &getMagnoON() const
+    {
+        return _magnoXOutputON;
+    };
+    inline const cv::ocl::oclMat &getMagnoOFF() const
+    {
+        return _magnoXOutputOFF;
+    };
+    inline const cv::ocl::oclMat &getMagnoYsaturated() const
+    {
+        return _magnoYsaturated;
+    };
+    inline void normalizeGrayOutputNearZeroCentreredSigmoide()
+    {
+        ocl::normalizeGrayOutputNearZeroCentreredSigmoide(_magnoYOutput, _magnoYsaturated);
+    };
+    inline float getTemporalConstant()
+    {
+        return this->_filteringCoeficientsTable[2];
+    };
+private:
+    cv::ocl::oclMat _previousInput_ON;
+    cv::ocl::oclMat _previousInput_OFF;
+    cv::ocl::oclMat _amacrinCellsTempOutput_ON;
+    cv::ocl::oclMat _amacrinCellsTempOutput_OFF;
+    cv::ocl::oclMat _magnoXOutputON;
+    cv::ocl::oclMat _magnoXOutputOFF;
+    cv::ocl::oclMat _localProcessBufferON;
+    cv::ocl::oclMat _localProcessBufferOFF;
+    cv::ocl::oclMat _magnoYOutput;
+    cv::ocl::oclMat _magnoYsaturated;
+
+    float _temporalCoefficient;
+    void _amacrineCellsComputing(const cv::ocl::oclMat &OPL_ON,  const cv::ocl::oclMat &OPL_OFF);
+};
+
+class ParvoRetinaFilter: public BasicRetinaFilter
+{
+public:
+    ParvoRetinaFilter(const unsigned int NBrows = 480, const unsigned int NBcolumns = 640);
+    virtual ~ParvoRetinaFilter();
+    void resize(const unsigned int NBrows, const unsigned int NBcolumns);
+    void clearAllBuffers();
+    void setOPLandParvoFiltersParameters(const float beta1, const float tau1, const float k1, const float beta2, const float tau2, const float k2);
+
+    inline void setGanglionCellsLocalAdaptationLPfilterParameters(const float tau, const float k)
+    {
+        BasicRetinaFilter::setLPfilterParameters(0, tau, k, 2);
+    };
+    const cv::ocl::oclMat &runFilter(const cv::ocl::oclMat &inputFrame, const bool useParvoOutput = true);
+
+    inline const cv::ocl::oclMat &getPhotoreceptorsLPfilteringOutput() const
+    {
+        return _photoreceptorsOutput;
+    };
+
+    inline const cv::ocl::oclMat &getHorizontalCellsOutput() const
+    {
+        return _horizontalCellsOutput;
+    };
+
+    inline const cv::ocl::oclMat &getParvoON() const
+    {
+        return _parvocellularOutputON;
+    };
+
+    inline const cv::ocl::oclMat &getParvoOFF() const
+    {
+        return _parvocellularOutputOFF;
+    };
+
+    inline const cv::ocl::oclMat &getBipolarCellsON() const
+    {
+        return _bipolarCellsOutputON;
+    };
+
+    inline const cv::ocl::oclMat &getBipolarCellsOFF() const
+    {
+        return _bipolarCellsOutputOFF;
+    };
+
+    inline float getPhotoreceptorsTemporalConstant()
+    {
+        return this->_filteringCoeficientsTable[2];
+    };
+
+    inline float getHcellsTemporalConstant()
+    {
+        return this->_filteringCoeficientsTable[5];
+    };
+private:
+    cv::ocl::oclMat _photoreceptorsOutput;
+    cv::ocl::oclMat _horizontalCellsOutput;
+    cv::ocl::oclMat _parvocellularOutputON;
+    cv::ocl::oclMat _parvocellularOutputOFF;
+    cv::ocl::oclMat _bipolarCellsOutputON;
+    cv::ocl::oclMat _bipolarCellsOutputOFF;
+    cv::ocl::oclMat _localAdaptationOFF;
+    cv::ocl::oclMat _localAdaptationON;
+    cv::ocl::oclMat _parvocellularOutputONminusOFF;
+    void _OPL_OnOffWaysComputing();
+};
+class RetinaColor: public BasicRetinaFilter
+{
+public:
+    RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns, const int samplingMethod = RETINA_COLOR_DIAGONAL);
+    virtual ~RetinaColor();
+
+    void clearAllBuffers();
+    void resize(const unsigned int NBrows, const unsigned int NBcolumns);
+    inline void runColorMultiplexing(const cv::ocl::oclMat &inputRGBFrame)
+    {
+        runColorMultiplexing(inputRGBFrame, _multiplexedFrame);
+    };
+    void runColorMultiplexing(const cv::ocl::oclMat &demultiplexedInputFrame, cv::ocl::oclMat &multiplexedFrame);
+    void runColorDemultiplexing(const cv::ocl::oclMat &multiplexedColorFrame, const bool adaptiveFiltering = false, const float maxInputValue = 255.0);
+
+    void setColorSaturation(const bool saturateColors = true, const float colorSaturationValue = 4.0)
+    {
+        _saturateColors = saturateColors;
+        _colorSaturationValue = colorSaturationValue;
+    };
+
+    void setChrominanceLPfilterParameters(const float beta, const float tau, const float k)
+    {
+        setLPfilterParameters(beta, tau, k);
+    };
+
+    bool applyKrauskopfLMS2Acr1cr2Transform(cv::ocl::oclMat &result);
+    bool applyLMS2LabTransform(cv::ocl::oclMat &result);
+    inline const cv::ocl::oclMat &getMultiplexedFrame() const
+    {
+        return _multiplexedFrame;
+    };
+
+    inline const cv::ocl::oclMat &getDemultiplexedColorFrame() const
+    {
+        return _demultiplexedColorFrame;
+    };
+
+    inline const cv::ocl::oclMat &getLuminance() const
+    {
+        return _luminance;
+    };
+    inline const cv::ocl::oclMat &getChrominance() const
+    {
+        return _chrominance;
+    };
+    void clipRGBOutput_0_maxInputValue(cv::ocl::oclMat &inputOutputBuffer, const float maxOutputValue = 255.0);
+    void normalizeRGBOutput_0_maxOutputValue(const float maxOutputValue = 255.0);
+    inline void setDemultiplexedColorFrame(const cv::ocl::oclMat &demultiplexedImage)
+    {
+        _demultiplexedColorFrame = demultiplexedImage;
+    };
+protected:
+    inline unsigned int bayerSampleOffset(unsigned int index)
+    {
+        return index + ((index / getNBcolumns()) % 2) * getNBpixels() + ((index % getNBcolumns()) % 2) * getNBpixels();
+    }
+    inline Rect getROI(int idx)
+    {
+        return Rect(0, idx * _NBrows, _NBcols, _NBrows);
+    }
+    int _samplingMethod;
+    bool _saturateColors;
+    float _colorSaturationValue;
+    cv::ocl::oclMat _luminance;
+    cv::ocl::oclMat _multiplexedFrame;
+    cv::ocl::oclMat _RGBmosaic;
+    cv::ocl::oclMat _tempMultiplexedFrame;
+    cv::ocl::oclMat _demultiplexedTempBuffer;
+    cv::ocl::oclMat _demultiplexedColorFrame;
+    cv::ocl::oclMat _chrominance;
+    cv::ocl::oclMat _colorLocalDensity;
+    cv::ocl::oclMat _imageGradient;
+
+    float _pR, _pG, _pB;
+    bool _objectInit;
+
+    void _initColorSampling();
+    void _adaptiveSpatialLPfilter(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &gradient, cv::ocl::oclMat &outputFrame);
+    void _adaptiveHorizontalCausalFilter_addInput(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &gradient, cv::ocl::oclMat &outputFrame);
+    void _adaptiveVerticalAnticausalFilter_multGain(const cv::ocl::oclMat &gradient, cv::ocl::oclMat &outputFrame);
+    void _computeGradient(const cv::ocl::oclMat &luminance, cv::ocl::oclMat &gradient);
+    void _normalizeOutputs_0_maxOutputValue(void);
+    void _applyImageColorSpaceConversion(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const float *transformTable);
+};
+class RetinaFilter
+{
+public:
+    RetinaFilter(const unsigned int sizeRows, const unsigned int sizeColumns, const bool colorMode = false, const int samplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0);
+    ~RetinaFilter();
+
+    void clearAllBuffers();
+    void resize(const unsigned int NBrows, const unsigned int NBcolumns);
+    bool checkInput(const cv::ocl::oclMat &input, const bool colorMode);
+    bool runFilter(const cv::ocl::oclMat &imageInput, const bool useAdaptiveFiltering = true, const bool processRetinaParvoMagnoMapping = false, const bool useColorMode = false, const bool inputIsColorMultiplexed = false);
+
+    void setGlobalParameters(const float OPLspatialResponse1 = 0.7, const float OPLtemporalresponse1 = 1, const float OPLassymetryGain = 0, const float OPLspatialResponse2 = 5, const float OPLtemporalresponse2 = 1, const float LPfilterSpatialResponse = 5, const float LPfilterGain = 0, const float LPfilterTemporalresponse = 0, const float MovingContoursExtractorCoefficient = 5, const bool normalizeParvoOutput_0_maxOutputValue = false, const bool normalizeMagnoOutput_0_maxOutputValue = false, const float maxOutputValue = 255.0, const float maxInputValue = 255.0, const float meanValue = 128.0);
+
+    inline void setPhotoreceptorsLocalAdaptationSensitivity(const float V0CompressionParameter)
+    {
+        _photoreceptorsPrefilter.setV0CompressionParameter(1 - V0CompressionParameter);
+        _setInitPeriodCount();
+    };
+
+    inline void setParvoGanglionCellsLocalAdaptationSensitivity(const float V0CompressionParameter)
+    {
+        _ParvoRetinaFilter.setV0CompressionParameter(V0CompressionParameter);
+        _setInitPeriodCount();
+    };
+
+    inline void setGanglionCellsLocalAdaptationLPfilterParameters(const float spatialResponse, const float temporalResponse)
+    {
+        _ParvoRetinaFilter.setGanglionCellsLocalAdaptationLPfilterParameters(temporalResponse, spatialResponse);
+        _setInitPeriodCount();
+    };
+
+    inline void setMagnoGanglionCellsLocalAdaptationSensitivity(const float V0CompressionParameter)
+    {
+        _MagnoRetinaFilter.setV0CompressionParameter(V0CompressionParameter);
+        _setInitPeriodCount();
+    };
+
+    void setOPLandParvoParameters(const float beta1, const float tau1, const float k1, const float beta2, const float tau2, const float k2, const float V0CompressionParameter)
+    {
+        _ParvoRetinaFilter.setOPLandParvoFiltersParameters(beta1, tau1, k1, beta2, tau2, k2);
+        _ParvoRetinaFilter.setV0CompressionParameter(V0CompressionParameter);
+        _setInitPeriodCount();
+    };
+
+    void setMagnoCoefficientsTable(const float parasolCells_beta, const float parasolCells_tau, const float parasolCells_k, const float amacrinCellsTemporalCutFrequency, const float V0CompressionParameter, const float localAdaptintegration_tau, const float localAdaptintegration_k)
+    {
+        _MagnoRetinaFilter.setCoefficientsTable(parasolCells_beta, parasolCells_tau, parasolCells_k, amacrinCellsTemporalCutFrequency, localAdaptintegration_tau, localAdaptintegration_k);
+        _MagnoRetinaFilter.setV0CompressionParameter(V0CompressionParameter);
+        _setInitPeriodCount();
+    };
+
+    inline void activateNormalizeParvoOutput_0_maxOutputValue(const bool normalizeParvoOutput_0_maxOutputValue)
+    {
+        _normalizeParvoOutput_0_maxOutputValue = normalizeParvoOutput_0_maxOutputValue;
+    };
+
+    inline void activateNormalizeMagnoOutput_0_maxOutputValue(const bool normalizeMagnoOutput_0_maxOutputValue)
+    {
+        _normalizeMagnoOutput_0_maxOutputValue = normalizeMagnoOutput_0_maxOutputValue;
+    };
+
+    inline void setMaxOutputValue(const float maxOutputValue)
+    {
+        _maxOutputValue = maxOutputValue;
+    };
+
+    void setColorMode(const bool desiredColorMode)
+    {
+        _useColorMode = desiredColorMode;
+    };
+    inline void setColorSaturation(const bool saturateColors = true, const float colorSaturationValue = 4.0)
+    {
+        _colorEngine.setColorSaturation(saturateColors, colorSaturationValue);
+    };
+    inline const cv::ocl::oclMat &getLocalAdaptation() const
+    {
+        return _photoreceptorsPrefilter.getOutput();
+    };
+    inline const cv::ocl::oclMat &getPhotoreceptors() const
+    {
+        return _ParvoRetinaFilter.getPhotoreceptorsLPfilteringOutput();
+    };
+
+    inline const cv::ocl::oclMat &getHorizontalCells() const
+    {
+        return _ParvoRetinaFilter.getHorizontalCellsOutput();
+    };
+    inline bool areContoursProcessed()
+    {
+        return _useParvoOutput;
+    };
+    bool getParvoFoveaResponse(cv::ocl::oclMat &parvoFovealResponse);
+    inline void activateContoursProcessing(const bool useParvoOutput)
+    {
+        _useParvoOutput = useParvoOutput;
+    };
+
+    const cv::ocl::oclMat &getContours();
+
+    inline const cv::ocl::oclMat &getContoursON() const
+    {
+        return _ParvoRetinaFilter.getParvoON();
+    };
+
+    inline const cv::ocl::oclMat &getContoursOFF() const
+    {
+        return _ParvoRetinaFilter.getParvoOFF();
+    };
+
+    inline bool areMovingContoursProcessed()
+    {
+        return _useMagnoOutput;
+    };
+
+    inline void activateMovingContoursProcessing(const bool useMagnoOutput)
+    {
+        _useMagnoOutput = useMagnoOutput;
+    };
+
+    inline const cv::ocl::oclMat &getMovingContours() const
+    {
+        return _MagnoRetinaFilter.getOutput();
+    };
+
+    inline const cv::ocl::oclMat &getMovingContoursSaturated() const
+    {
+        return _MagnoRetinaFilter.getMagnoYsaturated();
+    };
+
+    inline const cv::ocl::oclMat &getMovingContoursON() const
+    {
+        return _MagnoRetinaFilter.getMagnoON();
+    };
+
+    inline const cv::ocl::oclMat &getMovingContoursOFF() const
+    {
+        return _MagnoRetinaFilter.getMagnoOFF();
+    };
+
+    inline const cv::ocl::oclMat &getRetinaParvoMagnoMappedOutput() const
+    {
+        return _retinaParvoMagnoMappedFrame;
+    };
+
+    inline const cv::ocl::oclMat &getParvoContoursChannel() const
+    {
+        return _colorEngine.getLuminance();
+    };
+
+    inline const cv::ocl::oclMat &getParvoChrominance() const
+    {
+        return _colorEngine.getChrominance();
+    };
+    inline const cv::ocl::oclMat &getColorOutput() const
+    {
+        return _colorEngine.getDemultiplexedColorFrame();
+    };
+
+    inline bool isColorMode()
+    {
+        return _useColorMode;
+    };
+    bool getColorMode()
+    {
+        return _useColorMode;
+    };
+
+    inline bool isInitTransitionDone()
+    {
+        if (_ellapsedFramesSinceLastReset < _globalTemporalConstant)
+        {
+            return false;
+        }
+        return true;
+    };
+    inline float getRetinaSamplingBackProjection(const float projectedRadiusLength)
+    {
+        return projectedRadiusLength;
+    };
+
+    inline unsigned int getInputNBrows()
+    {
+        return _photoreceptorsPrefilter.getNBrows();
+    };
+
+    inline unsigned int getInputNBcolumns()
+    {
+        return _photoreceptorsPrefilter.getNBcolumns();
+    };
+
+    inline unsigned int getInputNBpixels()
+    {
+        return _photoreceptorsPrefilter.getNBpixels();
+    };
+
+    inline unsigned int getOutputNBrows()
+    {
+        return _photoreceptorsPrefilter.getNBrows();
+    };
+
+    inline unsigned int getOutputNBcolumns()
+    {
+        return _photoreceptorsPrefilter.getNBcolumns();
+    };
+
+    inline unsigned int getOutputNBpixels()
+    {
+        return _photoreceptorsPrefilter.getNBpixels();
+    };
+private:
+    bool _useParvoOutput;
+    bool _useMagnoOutput;
+
+    unsigned int _ellapsedFramesSinceLastReset;
+    unsigned int _globalTemporalConstant;
+
+    cv::ocl::oclMat _retinaParvoMagnoMappedFrame;
+    BasicRetinaFilter _photoreceptorsPrefilter;
+    ParvoRetinaFilter _ParvoRetinaFilter;
+    MagnoRetinaFilter _MagnoRetinaFilter;
+    RetinaColor       _colorEngine;
+
+    bool _useMinimalMemoryForToneMappingONLY;
+    bool _normalizeParvoOutput_0_maxOutputValue;
+    bool _normalizeMagnoOutput_0_maxOutputValue;
+    float _maxOutputValue;
+    bool _useColorMode;
+
+    void _setInitPeriodCount();
+    void _processRetinaParvoMagnoMapping();
+    void _runGrayToneMapping(const cv::ocl::oclMat &grayImageInput, cv::ocl::oclMat &grayImageOutput , const float PhotoreceptorsCompression = 0.6, const float ganglionCellsCompression = 0.6);
+};
+
+}  /* namespace ocl */
+}  /* namespace bioinspired */
+}  /* namespace cv */
+
+#endif  /* HAVE_OPENCV_OCL */
+#endif  /* __OCL_RETINA_HPP__ */
diff --git a/modules/contrib/src/retinacolor.cpp b/modules/bioinspired/src/retinacolor.cpp
similarity index 98%
rename from modules/contrib/src/retinacolor.cpp
rename to modules/bioinspired/src/retinacolor.cpp
index 92cba47af..3fbc55385 100644
--- a/modules/contrib/src/retinacolor.cpp
+++ b/modules/bioinspired/src/retinacolor.cpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -73,14 +73,15 @@
 
 namespace cv
 {
-
+namespace bioinspired
+{
 // init static values
 static float _LMStoACr1Cr2[]={1.0,  1.0, 0.0,  1.0, -1.0, 0.0,  -0.5, -0.5, 1.0};
 //static double _ACr1Cr2toLMS[]={0.5,  0.5, 0.0,   0.5, -0.5, 0.0,  0.5,  0.0, 1.0};
 static float _LMStoLab[]={0.5774f, 0.5774f, 0.5774f, 0.4082f, 0.4082f, -0.8165f, 0.7071f, -0.7071f, 0.f};
 
 // constructor/desctructor
-RetinaColor::RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns, const RETINA_COLORSAMPLINGMETHOD samplingMethod)
+RetinaColor::RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns, const int samplingMethod)
 :BasicRetinaFilter(NBrows, NBcolumns, 3),
  _colorSampling(NBrows*NBcolumns),
  _RGBmosaic(NBrows*NBcolumns*3),
@@ -720,4 +721,5 @@ void RetinaColor::_applyImageColorSpaceConversion(const std::valarray<float> &in
     }
 }
 
-}
+}// end of namespace bioinspired
+}// end of namespace cv
diff --git a/modules/contrib/src/retinacolor.hpp b/modules/bioinspired/src/retinacolor.hpp
similarity index 97%
rename from modules/contrib/src/retinacolor.hpp
rename to modules/bioinspired/src/retinacolor.hpp
index ff4a12e40..3fb6be8f9 100644
--- a/modules/contrib/src/retinacolor.hpp
+++ b/modules/bioinspired/src/retinacolor.hpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -85,7 +85,8 @@
 
 namespace cv
 {
-
+namespace bioinspired
+{
     class RetinaColor: public BasicRetinaFilter
     {
     public:
@@ -99,7 +100,7 @@ namespace cv
         * @param NBcolumns: number of columns of the input image
         * @param samplingMethod: the chosen color sampling method
         */
-        RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns, const RETINA_COLORSAMPLINGMETHOD samplingMethod=RETINA_COLOR_DIAGONAL);
+        RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns, const int samplingMethod=RETINA_COLOR_BAYER);
 
         /**
         * standard destructor
@@ -219,7 +220,7 @@ namespace cv
     protected:
 
         // private functions
-        RETINA_COLORSAMPLINGMETHOD _samplingMethod;
+        int _samplingMethod;
         bool _saturateColors;
         float _colorSaturationValue;
         // links to parent buffers (more convienient names
@@ -382,8 +383,7 @@ namespace cv
 
 #endif
     };
-}
+}// end of namespace bioinspired
+}// end of namespace cv
 
 #endif /*RETINACOLOR_HPP_*/
-
-
diff --git a/modules/bioinspired/src/retinafasttonemapping.cpp b/modules/bioinspired/src/retinafasttonemapping.cpp
new file mode 100644
index 000000000..2713d7449
--- /dev/null
+++ b/modules/bioinspired/src/retinafasttonemapping.cpp
@@ -0,0 +1,316 @@
+
+/*#******************************************************************************
+ ** IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ **
+ ** By downloading, copying, installing or using the software you agree to this license.
+ ** If you do not agree to this license, do not download, install,
+ ** copy or use the software.
+ **
+ **
+ ** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+ **
+ ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
+ **
+ **  Creation - enhancement process 2007-2013
+ **      Author: Alexandre Benoit (benoit.alexandre.vision@gmail.com), LISTIC lab, Annecy le vieux, France
+ **
+ ** Theses algorithm have been developped by Alexandre BENOIT since his thesis with Alice Caplier at Gipsa-Lab (www.gipsa-lab.inpg.fr) and the research he pursues at LISTIC Lab (www.listic.univ-savoie.fr).
+ ** Refer to the following research paper for more information:
+ ** Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+ ** This work have been carried out thanks to Jeanny Herault who's research and great discussions are the basis of all this work, please take a look at his book:
+ ** Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+ **
+ **
+ ** This class is based on image processing tools of the author and already used within the Retina class (this is the same code as method retina::applyFastToneMapping, but in an independent class, it is ligth from a memory requirement point of view). It implements an adaptation of the efficient tone mapping algorithm propose by David Alleyson, Sabine Susstruck and Laurence Meylan's work, please cite:
+ ** -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816
+ **
+ **
+ **                          License Agreement
+ **               For Open Source Computer Vision Library
+ **
+ ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+ **
+ **               For Human Visual System tools (bioinspired)
+ ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
+ **
+ ** Third party copyrights are property of their respective owners.
+ **
+ ** Redistribution and use in source and binary forms, with or without modification,
+ ** are permitted provided that the following conditions are met:
+ **
+ ** * Redistributions of source code must retain the above copyright notice,
+ **    this list of conditions and the following disclaimer.
+ **
+ ** * Redistributions in binary form must reproduce the above copyright notice,
+ **    this list of conditions and the following disclaimer in the documentation
+ **    and/or other materials provided with the distribution.
+ **
+ ** * The name of the copyright holders may not be used to endorse or promote products
+ **    derived from this software without specific prior written permission.
+ **
+ ** This software is provided by the copyright holders and contributors "as is" and
+ ** any express or implied warranties, including, but not limited to, the implied
+ ** warranties of merchantability and fitness for a particular purpose are disclaimed.
+ ** In no event shall the Intel Corporation or contributors be liable for any direct,
+ ** indirect, incidental, special, exemplary, or consequential damages
+ ** (including, but not limited to, procurement of substitute goods or services;
+ ** loss of use, data, or profits; or business interruption) however caused
+ ** and on any theory of liability, whether in contract, strict liability,
+ ** or tort (including negligence or otherwise) arising in any way out of
+ ** the use of this software, even if advised of the possibility of such damage.
+ *******************************************************************************/
+
+/*
+ * retinafasttonemapping.cpp
+ *
+ *  Created on: May 26, 2013
+ *      Author: Alexandre Benoit
+ */
+
+#include "precomp.hpp"
+#include "basicretinafilter.hpp"
+#include "retinacolor.hpp"
+#include <cstdio>
+#include <sstream>
+#include <valarray>
+
+namespace cv
+{
+namespace bioinspired
+{
+/**
+ * @class RetinaFastToneMappingImpl a wrapper class which allows the tone mapping algorithm of Meylan&al(2007) to be used with OpenCV.
+ * This algorithm is already implemented in thre Retina class (retina::applyFastToneMapping) but used it does not require all the retina model to be allocated. This allows a light memory use for low memory devices (smartphones, etc.
+ * As a summary, these are the model properties:
+ * => 2 stages of local luminance adaptation with a different local neighborhood for each.
+ * => first stage models the retina photorecetors local luminance adaptation
+ * => second stage models th ganglion cells local information adaptation
+ * => compared to the initial publication, this class uses spatio-temporal low pass filters instead of spatial only filters.
+ * ====> this can help noise robustness and temporal stability for video sequence use cases.
+ * for more information, read to the following papers :
+ *  Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+ * regarding spatio-temporal filter and the bigger retina model :
+ * Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+ */
+
+class RetinaFastToneMappingImpl : public RetinaFastToneMapping
+{
+public:
+    /**
+     * constructor
+     * @param imageInput: the size of the images to process
+     */
+    RetinaFastToneMappingImpl(Size imageInput)
+    {
+        unsigned int nbPixels=imageInput.height*imageInput.width;
+
+        // basic error check
+        if (nbPixels <= 0)
+        throw cv::Exception(-1, "Bad retina size setup : size height and with must be superior to zero", "RetinaImpl::setup", "retinafasttonemapping.cpp", 0);
+
+        // resize buffers
+        _inputBuffer.resize(nbPixels*3); // buffer supports gray images but also 3 channels color buffers... (larger is better...)
+        _imageOutput.resize(nbPixels*3);
+        _temp2.resize(nbPixels);
+        // allocate the main filter with 2 setup sets properties (one for each low pass filter
+        _multiuseFilter = makePtr<BasicRetinaFilter>(imageInput.height, imageInput.width, 2);
+        // allocate the color manager (multiplexer/demultiplexer
+        _colorEngine = makePtr<RetinaColor>(imageInput.height, imageInput.width);
+        // setup filter behaviors with default values
+        setup();
+    }
+
+    /**
+     * basic destructor
+     */
+    virtual ~RetinaFastToneMappingImpl(){};
+
+    /**
+     * method that applies a luminance correction (initially High Dynamic Range (HDR) tone mapping) using only the 2 local adaptation stages of the retina parvocellular channel : photoreceptors level and ganlion cells level. Spatio temporal filtering is applied but limited to temporal smoothing and eventually high frequencies attenuation. This is a lighter method than the one available using the regular retina::run method. It is then faster but it does not include complete temporal filtering nor retina spectral whitening. Then, it can have a more limited effect on images with a very high dynamic range. This is an adptation of the original still image HDR tone mapping algorithm of David Alleyson, Sabine Susstruck and Laurence Meylan's work, please cite:
+    * -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816
+     @param inputImage the input image to process RGB or gray levels
+     @param outputToneMappedImage the output tone mapped image
+     */
+    virtual void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)
+    {
+        // first convert input image to the compatible format :
+        const bool colorMode = _convertCvMat2ValarrayBuffer(inputImage.getMat(), _inputBuffer);
+
+        // process tone mapping
+        if (colorMode)
+        {
+            _runRGBToneMapping(_inputBuffer, _imageOutput, true);
+            _convertValarrayBuffer2cvMat(_imageOutput, _multiuseFilter->getNBrows(), _multiuseFilter->getNBcolumns(), true, outputToneMappedImage);
+        }else
+        {
+            _runGrayToneMapping(_inputBuffer, _imageOutput);
+            _convertValarrayBuffer2cvMat(_imageOutput, _multiuseFilter->getNBrows(), _multiuseFilter->getNBcolumns(), false, outputToneMappedImage);
+        }
+
+    }
+
+    /**
+     * setup method that updates tone mapping behaviors by adjusing the local luminance computation area
+     * @param photoreceptorsNeighborhoodRadius the first stage local adaptation area
+     * @param ganglioncellsNeighborhoodRadius the second stage local adaptation area
+     * @param meanLuminanceModulatorK the factor applied to modulate the meanLuminance information (default is 1, see reference paper)
+     */
+    virtual void setup(const float photoreceptorsNeighborhoodRadius=3.f, const float ganglioncellsNeighborhoodRadius=1.f, const float meanLuminanceModulatorK=1.f)
+    {
+        // setup the spatio-temporal properties of each filter
+        _meanLuminanceModulatorK = meanLuminanceModulatorK;
+        _multiuseFilter->setV0CompressionParameter(1.f, 255.f, 128.f);
+        _multiuseFilter->setLPfilterParameters(0.f, 0.f, photoreceptorsNeighborhoodRadius, 1);
+        _multiuseFilter->setLPfilterParameters(0.f, 0.f, ganglioncellsNeighborhoodRadius, 2);
+    }
+
+private:
+    // a filter able to perform local adaptation and low pass spatio-temporal filtering
+    cv::Ptr <BasicRetinaFilter> _multiuseFilter;
+    cv::Ptr <RetinaColor> _colorEngine;
+
+    //!< buffer used to convert input cv::Mat to internal retina buffers format (valarrays)
+    std::valarray<float> _inputBuffer;
+    std::valarray<float> _imageOutput;
+    std::valarray<float> _temp2;
+    float _meanLuminanceModulatorK;
+
+
+void _convertValarrayBuffer2cvMat(const std::valarray<float> &grayMatrixToConvert, const unsigned int nbRows, const unsigned int nbColumns, const bool colorMode, OutputArray outBuffer)
+{
+    // fill output buffer with the valarray buffer
+    const float *valarrayPTR=get_data(grayMatrixToConvert);
+    if (!colorMode)
+    {
+        outBuffer.create(cv::Size(nbColumns, nbRows), CV_8U);
+        Mat outMat = outBuffer.getMat();
+        for (unsigned int i=0;i<nbRows;++i)
+        {
+            for (unsigned int j=0;j<nbColumns;++j)
+            {
+                cv::Point2d pixel(j,i);
+                outMat.at<unsigned char>(pixel)=(unsigned char)*(valarrayPTR++);
+            }
+        }
+    }else
+    {
+        const unsigned int nbPixels=nbColumns*nbRows;
+        const unsigned int doubleNBpixels=nbColumns*nbRows*2;
+        outBuffer.create(cv::Size(nbColumns, nbRows), CV_8UC3);
+        Mat outMat = outBuffer.getMat();
+        for (unsigned int i=0;i<nbRows;++i)
+        {
+            for (unsigned int j=0;j<nbColumns;++j,++valarrayPTR)
+            {
+                cv::Point2d pixel(j,i);
+                cv::Vec3b pixelValues;
+                pixelValues[2]=(unsigned char)*(valarrayPTR);
+                pixelValues[1]=(unsigned char)*(valarrayPTR+nbPixels);
+                pixelValues[0]=(unsigned char)*(valarrayPTR+doubleNBpixels);
+
+                outMat.at<cv::Vec3b>(pixel)=pixelValues;
+            }
+        }
+    }
+}
+
+bool _convertCvMat2ValarrayBuffer(InputArray inputMat, std::valarray<float> &outputValarrayMatrix)
+{
+    const Mat inputMatToConvert=inputMat.getMat();
+    // first check input consistency
+    if (inputMatToConvert.empty())
+        throw cv::Exception(-1, "RetinaImpl cannot be applied, input buffer is empty", "RetinaImpl::run", "RetinaImpl.h", 0);
+
+    // retreive color mode from image input
+    int imageNumberOfChannels = inputMatToConvert.channels();
+
+        // convert to float AND fill the valarray buffer
+    typedef float T; // define here the target pixel format, here, float
+    const int dsttype = DataType<T>::depth; // output buffer is float format
+
+    const unsigned int nbPixels=inputMat.getMat().rows*inputMat.getMat().cols;
+    const unsigned int doubleNBpixels=inputMat.getMat().rows*inputMat.getMat().cols*2;
+
+    if(imageNumberOfChannels==4)
+    {
+    // create a cv::Mat table (for RGBA planes)
+        cv::Mat planes[4] =
+        {
+            cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[doubleNBpixels]),
+            cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[nbPixels]),
+            cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[0])
+        };
+        planes[3] = cv::Mat(inputMatToConvert.size(), dsttype);     // last channel (alpha) does not point on the valarray (not usefull in our case)
+        // split color cv::Mat in 4 planes... it fills valarray directely
+        cv::split(Mat_<Vec<T, 4> >(inputMatToConvert), planes);
+    }
+    else if (imageNumberOfChannels==3)
+    {
+        // create a cv::Mat table (for RGB planes)
+        cv::Mat planes[] =
+        {
+        cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[doubleNBpixels]),
+        cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[nbPixels]),
+        cv::Mat(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[0])
+        };
+        // split color cv::Mat in 3 planes... it fills valarray directely
+        cv::split(cv::Mat_<Vec<T, 3> >(inputMatToConvert), planes);
+    }
+    else if(imageNumberOfChannels==1)
+    {
+        // create a cv::Mat header for the valarray
+        cv::Mat dst(inputMatToConvert.size(), dsttype, &outputValarrayMatrix[0]);
+        inputMatToConvert.convertTo(dst, dsttype);
+    }
+        else
+            CV_Error(Error::StsUnsupportedFormat, "input image must be single channel (gray levels), bgr format (color) or bgra (color with transparency which won't be considered");
+
+    return imageNumberOfChannels>1; // return bool : false for gray level image processing, true for color mode
+}
+
+
+    // run the initilized retina filter in order to perform gray image tone mapping, after this call all retina outputs are updated
+    void _runGrayToneMapping(const std::valarray<float> &grayImageInput, std::valarray<float> &grayImageOutput)
+    {
+         // apply tone mapping on the multiplexed image
+        // -> photoreceptors local adaptation (large area adaptation)
+        _multiuseFilter->runFilter_LPfilter(grayImageInput, grayImageOutput, 0); // compute low pass filtering modeling the horizontal cells filtering to acess local luminance
+        _multiuseFilter->setV0CompressionParameterToneMapping(1.f, grayImageOutput.max(), _meanLuminanceModulatorK*grayImageOutput.sum()/(float)_multiuseFilter->getNBpixels());
+        _multiuseFilter->runFilter_LocalAdapdation(grayImageInput, grayImageOutput, _temp2); // adapt contrast to local luminance
+
+        // -> ganglion cells local adaptation (short area adaptation)
+        _multiuseFilter->runFilter_LPfilter(_temp2, grayImageOutput, 1); // compute low pass filtering (high cut frequency (remove spatio-temporal noise)
+        _multiuseFilter->setV0CompressionParameterToneMapping(1.f, _temp2.max(), _meanLuminanceModulatorK*grayImageOutput.sum()/(float)_multiuseFilter->getNBpixels());
+        _multiuseFilter->runFilter_LocalAdapdation(_temp2, grayImageOutput, grayImageOutput); // adapt contrast to local luminance
+
+    }
+
+ // run the initilized retina filter in order to perform color tone mapping, after this call all retina outputs are updated
+    void _runRGBToneMapping(const std::valarray<float> &RGBimageInput, std::valarray<float> &RGBimageOutput, const bool useAdaptiveFiltering)
+    {
+        // multiplex the image with the color sampling method specified in the constructor
+        _colorEngine->runColorMultiplexing(RGBimageInput);
+
+        // apply tone mapping on the multiplexed image
+        _runGrayToneMapping(_colorEngine->getMultiplexedFrame(), RGBimageOutput);
+
+        // demultiplex tone maped image
+        _colorEngine->runColorDemultiplexing(RGBimageOutput, useAdaptiveFiltering, _multiuseFilter->getMaxInputValue());//_ColorEngine->getMultiplexedFrame());//_ParvoRetinaFilter->getPhotoreceptorsLPfilteringOutput());
+
+        // rescaling result between 0 and 255
+        _colorEngine->normalizeRGBOutput_0_maxOutputValue(255.0);
+
+        // return the result
+        RGBimageOutput=_colorEngine->getDemultiplexedColorFrame();
+    }
+
+};
+
+CV_EXPORTS Ptr<RetinaFastToneMapping> createRetinaFastToneMapping(Size inputSize)
+{
+    return makePtr<RetinaFastToneMappingImpl>(inputSize);
+}
+
+}// end of namespace bioinspired
+}// end of namespace cv
diff --git a/modules/contrib/src/retinafilter.cpp b/modules/bioinspired/src/retinafilter.cpp
similarity index 96%
rename from modules/contrib/src/retinafilter.cpp
rename to modules/bioinspired/src/retinafilter.cpp
index 4cf6019f5..e1e24c89b 100644
--- a/modules/contrib/src/retinafilter.cpp
+++ b/modules/bioinspired/src/retinafilter.cpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -72,9 +72,11 @@
 #include <cmath>
 
 namespace cv
+{
+namespace bioinspired
 {
     // standard constructor without any log sampling of the input frame
-    RetinaFilter::RetinaFilter(const unsigned int sizeRows, const unsigned int sizeColumns, const bool colorMode, const RETINA_COLORSAMPLINGMETHOD samplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght)
+    RetinaFilter::RetinaFilter(const unsigned int sizeRows, const unsigned int sizeColumns, const bool colorMode, const int samplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght)
         :
     _retinaParvoMagnoMappedFrame(0),
         _retinaParvoMagnoMapCoefTable(0),
@@ -375,21 +377,15 @@ namespace cv
         // apply tone mapping on the multiplexed image
         // -> photoreceptors local adaptation (large area adaptation)
         _photoreceptorsPrefilter.runFilter_LPfilter(grayImageInput, grayImageOutput, 2); // compute low pass filtering modeling the horizontal cells filtering to acess local luminance
-        _photoreceptorsPrefilter.setV0CompressionParameterToneMapping(PhotoreceptorsCompression, grayImageOutput.sum()/(float)_photoreceptorsPrefilter.getNBpixels());
+        _photoreceptorsPrefilter.setV0CompressionParameterToneMapping(1.f-PhotoreceptorsCompression, grayImageOutput.max(), 1.f*grayImageOutput.sum()/(float)_photoreceptorsPrefilter.getNBpixels());
         _photoreceptorsPrefilter.runFilter_LocalAdapdation(grayImageInput, grayImageOutput, temp2); // adapt contrast to local luminance
 
-        // high pass filter
-        //_spatiotemporalLPfilter(_localBuffer, _filterOutput, 2); // compute low pass filtering (high cut frequency (remove spatio-temporal noise)
-
-        //for (unsigned int i=0;i<_NBpixels;++i)
-        //  _localBuffer[i]-= _filterOutput[i]/2.0;
-
         // -> ganglion cells local adaptation (short area adaptation)
         _photoreceptorsPrefilter.runFilter_LPfilter(temp2, grayImageOutput, 1); // compute low pass filtering (high cut frequency (remove spatio-temporal noise)
-        _photoreceptorsPrefilter.setV0CompressionParameterToneMapping(ganglionCellsCompression, temp2.max(), temp2.sum()/(float)_photoreceptorsPrefilter.getNBpixels());
+        _photoreceptorsPrefilter.setV0CompressionParameterToneMapping(1.f-ganglionCellsCompression, temp2.max(), 1.f*temp2.sum()/(float)_photoreceptorsPrefilter.getNBpixels());
         _photoreceptorsPrefilter.runFilter_LocalAdapdation(temp2, grayImageOutput, grayImageOutput); // adapt contrast to local luminance
-
     }
+
     // run the initilized retina filter in order to perform color tone mapping, after this call all retina outputs are updated
     void RetinaFilter::runRGBToneMapping(const std::valarray<float> &RGBimageInput, std::valarray<float> &RGBimageOutput, const bool useAdaptiveFiltering, const float PhotoreceptorsCompression, const float ganglionCellsCompression)
     {
@@ -526,4 +522,5 @@ namespace cv
 
         return true;
     }
-}
+}// end of namespace bioinspired
+}// end of namespace cv
diff --git a/modules/contrib/src/retinafilter.hpp b/modules/bioinspired/src/retinafilter.hpp
similarity index 98%
rename from modules/contrib/src/retinafilter.hpp
rename to modules/bioinspired/src/retinafilter.hpp
index 7fa2a078c..3e204885f 100644
--- a/modules/contrib/src/retinafilter.hpp
+++ b/modules/bioinspired/src/retinafilter.hpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -110,7 +110,8 @@
 //#define __RETINADEBUG // define RETINADEBUG to display debug data
 namespace cv
 {
-
+namespace bioinspired
+{
 // retina class that process the 3 outputs of the retina filtering stages
 class RetinaFilter//: public BasicRetinaFilter
 {
@@ -126,7 +127,7 @@ public:
     * @param reductionFactor: only usefull if param useRetinaLogSampling=true, specifies the reduction factor of the output frame (as the center (fovea) is high resolution and corners can be underscaled, then a reduction of the output is allowed without precision leak
     * @param samplingStrenght: only usefull if param useRetinaLogSampling=true, specifies the strenght of the log scale that is applied
     */
-    RetinaFilter(const unsigned int sizeRows, const unsigned int sizeColumns, const bool colorMode=false, const RETINA_COLORSAMPLINGMETHOD samplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
+    RetinaFilter(const unsigned int sizeRows, const unsigned int sizeColumns, const bool colorMode=false, const int samplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0);
 
     /**
     * standard destructor
@@ -541,9 +542,7 @@ private:
 
 };
 
-}
+}// end of namespace bioinspired
+}// end of namespace cv
+
 #endif /*RETINACLASSES_H_*/
-
-
-
-
diff --git a/modules/contrib/src/templatebuffer.hpp b/modules/bioinspired/src/templatebuffer.hpp
similarity index 98%
rename from modules/contrib/src/templatebuffer.hpp
rename to modules/bioinspired/src/templatebuffer.hpp
index 21414b4da..827eb709f 100644
--- a/modules/contrib/src/templatebuffer.hpp
+++ b/modules/bioinspired/src/templatebuffer.hpp
@@ -6,7 +6,7 @@
 ** copy or use the software.
 **
 **
-** HVStools : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
 ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
 **
 ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
@@ -32,7 +32,7 @@
 ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
 **
-**               For Human Visual System tools (hvstools)
+**               For Human Visual System tools (bioinspired)
 ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
 **
 ** Third party copyrights are property of their respective owners.
@@ -71,6 +71,12 @@
 #include <cmath>
 
 
+//#define __TEMPLATEBUFFERDEBUG //define TEMPLATEBUFFERDEBUG in order to display debug information
+
+namespace cv
+{
+namespace bioinspired
+{
 //// If a parallelization method is available then, you should define MAKE_PARALLEL, in the other case, the classical serial code will be used
 #define MAKE_PARALLEL
 // ==> then include required includes
@@ -101,10 +107,6 @@ public:
 };
 #endif
 
-//#define __TEMPLATEBUFFERDEBUG //define TEMPLATEBUFFERDEBUG in order to display debug information
-
-namespace cv
-{
     /**
     * @class TemplateBuffer
     * @brief this class is a simple template memory buffer which contains basic functions to get information on or normalize the buffer content
@@ -548,8 +550,6 @@ namespace cv
         return std::fabs(x);
     }
 
-}
+}// end of namespace bioinspired
+}// end of namespace cv
 #endif
-
-
-
diff --git a/modules/calib3d/test/test_precomp.cpp b/modules/bioinspired/test/test_main.cpp
similarity index 58%
rename from modules/calib3d/test/test_precomp.cpp
rename to modules/bioinspired/test/test_main.cpp
index 5956e13e3..6b2499344 100644
--- a/modules/calib3d/test/test_precomp.cpp
+++ b/modules/bioinspired/test/test_main.cpp
@@ -1 +1,3 @@
 #include "test_precomp.hpp"
+
+CV_TEST_MAIN("cv")
diff --git a/modules/bioinspired/test/test_precomp.hpp b/modules/bioinspired/test/test_precomp.hpp
new file mode 100644
index 000000000..b1672149a
--- /dev/null
+++ b/modules/bioinspired/test/test_precomp.hpp
@@ -0,0 +1,16 @@
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/bioinspired.hpp"
+#include <iostream>
+
+#endif
diff --git a/modules/bioinspired/test/test_retina_ocl.cpp b/modules/bioinspired/test/test_retina_ocl.cpp
new file mode 100644
index 000000000..b09ce5036
--- /dev/null
+++ b/modules/bioinspired/test/test_retina_ocl.cpp
@@ -0,0 +1,144 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+#include "opencv2/opencv_modules.hpp"
+#include "opencv2/bioinspired.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+
+#if defined(HAVE_OPENCV_OCL)
+
+#include "opencv2/ocl.hpp"
+#define RETINA_ITERATIONS 5
+
+static double checkNear(const cv::Mat &m1, const cv::Mat &m2)
+{
+    return cv::norm(m1, m2, cv::NORM_INF);
+}
+
+#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
+#define GET_PARAM(k) std::tr1::get< k >(GetParam())
+
+static int oclInit = false;
+
+PARAM_TEST_CASE(Retina_OCL, bool, int, bool, double, double)
+{
+    bool colorMode;
+    int colorSamplingMethod;
+    bool useLogSampling;
+    double reductionFactor;
+    double samplingStrength;
+
+    std::vector<cv::ocl::Info> infos;
+
+    virtual void SetUp()
+    {
+        colorMode           = GET_PARAM(0);
+        colorSamplingMethod = GET_PARAM(1);
+        useLogSampling      = GET_PARAM(2);
+        reductionFactor     = GET_PARAM(3);
+        samplingStrength    = GET_PARAM(4);
+
+        if(!oclInit)
+        {
+            cv::ocl::getDevice(infos);
+            std::cout << "Device name:" << infos[0].DeviceName[0] << std::endl;
+            oclInit = true;
+        }
+    }
+};
+
+TEST_P(Retina_OCL, Accuracy)
+{
+    using namespace cv;
+    Mat input = imread(cvtest::TS::ptr()->get_data_path() + "shared/lena.png", colorMode);
+    CV_Assert(!input.empty());
+    ocl::oclMat ocl_input(input);
+
+    Ptr<bioinspired::Retina> ocl_retina = bioinspired::createRetina_OCL(
+        input.size(),
+        colorMode,
+        colorSamplingMethod,
+        useLogSampling,
+        reductionFactor,
+        samplingStrength);
+
+    Ptr<bioinspired::Retina> gold_retina = bioinspired::createRetina(
+        input.size(),
+        colorMode,
+        colorSamplingMethod,
+        useLogSampling,
+        reductionFactor,
+        samplingStrength);
+
+    Mat gold_parvo;
+    Mat gold_magno;
+    ocl::oclMat ocl_parvo;
+    ocl::oclMat ocl_magno;
+
+    for(int i = 0; i < RETINA_ITERATIONS; i ++)
+    {
+        ocl_retina->run(ocl_input);
+        gold_retina->run(input);
+
+        gold_retina->getParvo(gold_parvo);
+        gold_retina->getMagno(gold_magno);
+
+        ocl_retina->getParvo(ocl_parvo);
+        ocl_retina->getMagno(ocl_magno);
+
+        EXPECT_LE(checkNear(gold_parvo, (Mat)ocl_parvo), 1.0);
+        EXPECT_LE(checkNear(gold_magno, (Mat)ocl_magno), 1.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Contrib, Retina_OCL, testing::Combine(
+                            testing::Values(false, true),
+                            testing::Values((int)cv::bioinspired::RETINA_COLOR_BAYER),
+                            testing::Values(false/*,true*/),
+                            testing::Values(1.0, 0.5),
+                            testing::Values(10.0, 5.0)));
+#endif
diff --git a/modules/calib3d/doc/calib3d.rst b/modules/calib3d/doc/calib3d.rst
index 0ac8d5196..0c056843d 100644
--- a/modules/calib3d/doc/calib3d.rst
+++ b/modules/calib3d/doc/calib3d.rst
@@ -6,4 +6,3 @@ calib3d. Camera Calibration and 3D Reconstruction
     :maxdepth: 2
 
     camera_calibration_and_3d_reconstruction
-
diff --git a/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst b/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst
index f2fbfd1d9..cb30dc36d 100644
--- a/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst
+++ b/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst
@@ -109,7 +109,16 @@ The functions below use the above model to do the following:
 
  * Estimate the relative position and orientation of the stereo camera "heads" and compute the *rectification* transformation that makes the camera optical axes parallel.
 
+.. note::
 
+   * A calibration sample for 3 cameras in horizontal position can be found at opencv_source_code/samples/cpp/3calibration.cpp
+   * A calibration sample based on a sequence of images can be found at opencv_source_code/samples/cpp/calibration.cpp
+   * A calibration sample in order to do 3D reconstruction can be found at opencv_source_code/samples/cpp/build3dmodel.cpp
+   * A calibration sample of an artificially generated camera and chessboard patterns can be found at opencv_source_code/samples/cpp/calibration_artificial.cpp
+   * A calibration example on stereo calibration can be found at opencv_source_code/samples/cpp/stereo_calib.cpp
+   * A calibration example on stereo matching can be found at opencv_source_code/samples/cpp/stereo_match.cpp
+
+   * (Python) A camera calibration sample can be found at opencv_source_code/samples/python2/calibrate.py
 
 calibrateCamera
 ---------------
@@ -277,6 +286,8 @@ For points in an image of a stereo pair, computes the corresponding epilines in
 
 .. ocv:cfunction:: void cvComputeCorrespondEpilines( const CvMat* points, int which_image, const CvMat* fundamental_matrix, CvMat* correspondent_lines )
 
+.. ocv:pyfunction:: cv2.computeCorrespondEpilines(points, whichImage, F[, lines]) -> lines
+
     :param points: Input points.  :math:`N \times 1`  or  :math:`1 \times N`  matrix of type  ``CV_32FC2``  or  ``vector<Point2f>`` .
 
     :param whichImage: Index of the image (1 or 2) that contains the  ``points`` .
@@ -504,7 +515,7 @@ findCirclesGrid
 -------------------
 Finds centers in the grid of circles.
 
-.. ocv:function:: bool findCirclesGrid( InputArray image, Size patternSize, OutputArray centers, int flags=CALIB_CB_SYMMETRIC_GRID, const Ptr<FeatureDetector> &blobDetector = new SimpleBlobDetector() )
+.. ocv:function:: bool findCirclesGrid( InputArray image, Size patternSize, OutputArray centers, int flags=CALIB_CB_SYMMETRIC_GRID, const Ptr<FeatureDetector> &blobDetector = makePtr<SimpleBlobDetector>() )
 
 .. ocv:pyfunction:: cv2.findCirclesGrid(image, patternSize[, centers[, flags[, blobDetector]]]) -> retval, centers
 
@@ -577,7 +588,9 @@ Finds an object pose from 3D-2D point correspondences.
 
 The function estimates the object pose given a set of object points, their corresponding image projections, as well as the camera matrix and the distortion coefficients.
 
+.. note::
 
+   * An example of how to use solvePNP for planar augmented reality can be found at opencv_source_code/samples/python2/plane_ar.py
 
 solvePnPRansac
 ------------------
@@ -879,6 +892,9 @@ Homography matrix is determined up to a scale. Thus, it is normalized so that
     :ocv:func:`warpPerspective`,
     :ocv:func:`perspectiveTransform`
 
+.. note::
+
+   * A example on calculating a homography for image matching can be found at opencv_source_code/samples/cpp/video_homography.cpp
 
 estimateAffine3D
 --------------------
@@ -1168,6 +1184,9 @@ StereoBM
 
 Class for computing stereo correspondence using the block matching algorithm, introduced and contributed to OpenCV by K. Konolige.
 
+.. Sample code:
+
+   (Ocl) An example for using the stereoBM matching algorithm can be found at opencv_source_code/samples/ocl/stereo_match.cpp
 
 createStereoBM
 ------------------
@@ -1199,6 +1218,9 @@ The class implements the modified H. Hirschmuller algorithm [HH08]_ that differs
 
  * Some pre- and post- processing steps from K. Konolige algorithm ``StereoBM``  are included, for example: pre-filtering (``StereoBM::PREFILTER_XSOBEL`` type) and post-filtering (uniqueness check, quadratic interpolation and speckle filtering).
 
+.. note::
+
+   * (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found at opencv_source_code/samples/python2/stereo_match.py
 
 createStereoSGBM
 --------------------------
diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp
index 64462eea8..1a8777b93 100644
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@@ -180,7 +180,7 @@ CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSiz
 //! finds circles' grid pattern of the specified size in the image
 CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize,
                                    OutputArray centers, int flags = CALIB_CB_SYMMETRIC_GRID,
-                                   const Ptr<FeatureDetector> &blobDetector = new SimpleBlobDetector());
+                                   const Ptr<FeatureDetector> &blobDetector = makePtr<SimpleBlobDetector>());
 
 //! finds intrinsic and extrinsic camera parameters from several fews of a known calibration pattern.
 CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints,
@@ -262,24 +262,24 @@ CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2,
                                    double param1 = 3., double param2 = 0.99 );
 
 //! finds essential matrix from a set of corresponding 2D points using five-point algorithm
-CV_EXPORTS Mat findEssentialMat( InputArray points1, InputArray points2,
+CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2,
                                  double focal = 1.0, Point2d pp = Point2d(0, 0),
                                  int method = RANSAC, double prob = 0.999,
                                  double threshold = 1.0, OutputArray mask = noArray() );
 
 //! decompose essential matrix to possible rotation matrix and one translation vector
-CV_EXPORTS void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t );
+CV_EXPORTS_W void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t );
 
 //! recover relative camera pose from a set of corresponding 2D points
-CV_EXPORTS int recoverPose( InputArray E, InputArray points1, InputArray points2,
+CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2,
                             OutputArray R, OutputArray t,
                             double focal = 1.0, Point2d pp = Point2d(0, 0),
                             InputOutputArray mask = noArray() );
 
 
 //! finds coordinates of epipolar lines corresponding the specified points
-CV_EXPORTS void computeCorrespondEpilines( InputArray points, int whichImage,
-                                           InputArray F, OutputArray lines );
+CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, int whichImage,
+                                             InputArray F, OutputArray lines );
 
 CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2,
                                      InputArray projPoints1, InputArray projPoints2,
diff --git a/modules/calib3d/include/opencv2/calib3d/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d/calib3d.hpp
index 302de229b..b3da45edd 100644
--- a/modules/calib3d/include/opencv2/calib3d/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d/calib3d.hpp
@@ -45,4 +45,4 @@
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 
-#include "opencv2/calib3d.hpp"
\ No newline at end of file
+#include "opencv2/calib3d.hpp"
diff --git a/modules/calib3d/perf/perf_pnp.cpp b/modules/calib3d/perf/perf_pnp.cpp
index e88155729..7a7acb052 100644
--- a/modules/calib3d/perf/perf_pnp.cpp
+++ b/modules/calib3d/perf/perf_pnp.cpp
@@ -130,7 +130,7 @@ PERF_TEST_P(PointsNum, DISABLED_SolvePnPRansac, testing::Values(4, 3*9, 7*13))
 
 #ifdef HAVE_TBB
     // limit concurrency to get determenistic result
-    cv::Ptr<tbb::task_scheduler_init> one_thread = new tbb::task_scheduler_init(1);
+    tbb::task_scheduler_init one_thread(1);
 #endif
 
     TEST_CYCLE()
diff --git a/modules/calib3d/perf/perf_precomp.cpp b/modules/calib3d/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/calib3d/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/calib3d/src/calib3d_init.cpp b/modules/calib3d/src/calib3d_init.cpp
index 06303bd98..6192c7998 100644
--- a/modules/calib3d/src/calib3d_init.cpp
+++ b/modules/calib3d/src/calib3d_init.cpp
@@ -47,7 +47,7 @@ using namespace cv;
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
-//////////////////////////////////////////////////////////////////////////////////////////////////////////                  
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
 
diff --git a/modules/calib3d/src/calibinit.cpp b/modules/calib3d/src/calibinit.cpp
index b93b4951e..844fde499 100644
--- a/modules/calib3d/src/calibinit.cpp
+++ b/modules/calib3d/src/calibinit.cpp
@@ -271,8 +271,8 @@ int cvFindChessboardCorners( const void* arr, CvSize pattern_size,
     if( !out_corners )
         CV_Error( CV_StsNullPtr, "Null pointer to corners" );
 
-    storage = cvCreateMemStorage(0);
-    thresh_img = cvCreateMat( img->rows, img->cols, CV_8UC1 );
+    storage.reset(cvCreateMemStorage(0));
+    thresh_img.reset(cvCreateMat( img->rows, img->cols, CV_8UC1 ));
 
 #ifdef DEBUG_CHESSBOARD
     dbg_img = cvCreateImage(cvGetSize(img), IPL_DEPTH_8U, 3 );
@@ -284,7 +284,7 @@ int cvFindChessboardCorners( const void* arr, CvSize pattern_size,
     {
         // equalize the input image histogram -
         // that should make the contrast between "black" and "white" areas big enough
-        norm_img = cvCreateMat( img->rows, img->cols, CV_8UC1 );
+        norm_img.reset(cvCreateMat( img->rows, img->cols, CV_8UC1 ));
 
         if( CV_MAT_CN(img->type) != 1 )
         {
@@ -541,12 +541,12 @@ int cvFindChessboardCorners( const void* arr, CvSize pattern_size,
         cv::Ptr<CvMat> gray;
         if( CV_MAT_CN(img->type) != 1 )
         {
-            gray = cvCreateMat(img->rows, img->cols, CV_8UC1);
+            gray.reset(cvCreateMat(img->rows, img->cols, CV_8UC1));
             cvCvtColor(img, gray, CV_BGR2GRAY);
         }
         else
         {
-            gray = cvCloneMat(img);
+            gray.reset(cvCloneMat(img));
         }
         int wsize = 2;
         cvFindCornerSubPix( gray, out_corners, pattern_size.width*pattern_size.height,
@@ -627,7 +627,7 @@ icvOrderFoundConnectedQuads( int quad_count, CvCBQuad **quads,
         int *all_count, CvCBQuad **all_quads, CvCBCorner **corners,
         CvSize pattern_size, CvMemStorage* storage )
 {
-    cv::Ptr<CvMemStorage> temp_storage = cvCreateChildMemStorage( storage );
+    cv::Ptr<CvMemStorage> temp_storage(cvCreateChildMemStorage( storage ));
     CvSeq* stack = cvCreateSeq( 0, sizeof(*stack), sizeof(void*), temp_storage );
 
     // first find an interior quad
@@ -1109,7 +1109,7 @@ icvCleanFoundConnectedQuads( int quad_count, CvCBQuad **quad_group, CvSize patte
 
     // create an array of quadrangle centers
     cv::AutoBuffer<CvPoint2D32f> centers( quad_count );
-    cv::Ptr<CvMemStorage> temp_storage = cvCreateMemStorage(0);
+    cv::Ptr<CvMemStorage> temp_storage(cvCreateMemStorage(0));
 
     for( i = 0; i < quad_count; i++ )
     {
@@ -1205,7 +1205,7 @@ static int
 icvFindConnectedQuads( CvCBQuad *quad, int quad_count, CvCBQuad **out_group,
                        int group_idx, CvMemStorage* storage )
 {
-    cv::Ptr<CvMemStorage> temp_storage = cvCreateChildMemStorage( storage );
+    cv::Ptr<CvMemStorage> temp_storage(cvCreateChildMemStorage( storage ));
     CvSeq* stack = cvCreateSeq( 0, sizeof(*stack), sizeof(void*), temp_storage );
     int i, count = 0;
 
@@ -1674,7 +1674,7 @@ icvGenerateQuads( CvCBQuad **out_quads, CvCBCorner **out_corners,
     min_size = 25; //cvRound( image->cols * image->rows * .03 * 0.01 * 0.92 );
 
     // create temporary storage for contours and the sequence of pointers to found quadrangles
-    temp_storage = cvCreateChildMemStorage( storage );
+    temp_storage.reset(cvCreateChildMemStorage( storage ));
     root = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvSeq*), temp_storage );
 
     // initialize contour retrieving routine
diff --git a/modules/calib3d/src/calibration.cpp b/modules/calib3d/src/calibration.cpp
index bb7863575..893c0e9a9 100644
--- a/modules/calib3d/src/calibration.cpp
+++ b/modules/calib3d/src/calibration.cpp
@@ -568,7 +568,7 @@ CV_IMPL void cvProjectPoints2( const CvMat* objectPoints,
         (objectPoints->rows == count && CV_MAT_CN(objectPoints->type)*objectPoints->cols == 3) ||
         (objectPoints->rows == 3 && CV_MAT_CN(objectPoints->type) == 1 && objectPoints->cols == count)))
     {
-        matM = cvCreateMat( objectPoints->rows, objectPoints->cols, CV_MAKETYPE(CV_64F,CV_MAT_CN(objectPoints->type)) );
+        matM.reset(cvCreateMat( objectPoints->rows, objectPoints->cols, CV_MAKETYPE(CV_64F,CV_MAT_CN(objectPoints->type)) ));
         cvConvert(objectPoints, matM);
     }
     else
@@ -584,7 +584,7 @@ CV_IMPL void cvProjectPoints2( const CvMat* objectPoints,
         (imagePoints->rows == count && CV_MAT_CN(imagePoints->type)*imagePoints->cols == 2) ||
         (imagePoints->rows == 2 && CV_MAT_CN(imagePoints->type) == 1 && imagePoints->cols == count)))
     {
-        _m = cvCreateMat( imagePoints->rows, imagePoints->cols, CV_MAKETYPE(CV_64F,CV_MAT_CN(imagePoints->type)) );
+        _m.reset(cvCreateMat( imagePoints->rows, imagePoints->cols, CV_MAKETYPE(CV_64F,CV_MAT_CN(imagePoints->type)) ));
         cvConvert(imagePoints, _m);
     }
     else
@@ -664,10 +664,10 @@ CV_IMPL void cvProjectPoints2( const CvMat* objectPoints,
 
         if( CV_MAT_TYPE(dpdr->type) == CV_64FC1 )
         {
-            _dpdr = cvCloneMat(dpdr);
+            _dpdr.reset(cvCloneMat(dpdr));
         }
         else
-            _dpdr = cvCreateMat( 2*count, 3, CV_64FC1 );
+            _dpdr.reset(cvCreateMat( 2*count, 3, CV_64FC1 ));
         dpdr_p = _dpdr->data.db;
         dpdr_step = _dpdr->step/sizeof(dpdr_p[0]);
     }
@@ -682,10 +682,10 @@ CV_IMPL void cvProjectPoints2( const CvMat* objectPoints,
 
         if( CV_MAT_TYPE(dpdt->type) == CV_64FC1 )
         {
-            _dpdt = cvCloneMat(dpdt);
+            _dpdt.reset(cvCloneMat(dpdt));
         }
         else
-            _dpdt = cvCreateMat( 2*count, 3, CV_64FC1 );
+            _dpdt.reset(cvCreateMat( 2*count, 3, CV_64FC1 ));
         dpdt_p = _dpdt->data.db;
         dpdt_step = _dpdt->step/sizeof(dpdt_p[0]);
     }
@@ -699,10 +699,10 @@ CV_IMPL void cvProjectPoints2( const CvMat* objectPoints,
 
         if( CV_MAT_TYPE(dpdf->type) == CV_64FC1 )
         {
-            _dpdf = cvCloneMat(dpdf);
+            _dpdf.reset(cvCloneMat(dpdf));
         }
         else
-            _dpdf = cvCreateMat( 2*count, 2, CV_64FC1 );
+            _dpdf.reset(cvCreateMat( 2*count, 2, CV_64FC1 ));
         dpdf_p = _dpdf->data.db;
         dpdf_step = _dpdf->step/sizeof(dpdf_p[0]);
     }
@@ -716,10 +716,10 @@ CV_IMPL void cvProjectPoints2( const CvMat* objectPoints,
 
         if( CV_MAT_TYPE(dpdc->type) == CV_64FC1 )
         {
-            _dpdc = cvCloneMat(dpdc);
+            _dpdc.reset(cvCloneMat(dpdc));
         }
         else
-            _dpdc = cvCreateMat( 2*count, 2, CV_64FC1 );
+            _dpdc.reset(cvCreateMat( 2*count, 2, CV_64FC1 ));
         dpdc_p = _dpdc->data.db;
         dpdc_step = _dpdc->step/sizeof(dpdc_p[0]);
     }
@@ -736,10 +736,10 @@ CV_IMPL void cvProjectPoints2( const CvMat* objectPoints,
 
         if( CV_MAT_TYPE(dpdk->type) == CV_64FC1 )
         {
-            _dpdk = cvCloneMat(dpdk);
+            _dpdk.reset(cvCloneMat(dpdk));
         }
         else
-            _dpdk = cvCreateMat( dpdk->rows, dpdk->cols, CV_64FC1 );
+            _dpdk.reset(cvCreateMat( dpdk->rows, dpdk->cols, CV_64FC1 ));
         dpdk_p = _dpdk->data.db;
         dpdk_step = _dpdk->step/sizeof(dpdk_p[0]);
     }
@@ -950,8 +950,8 @@ CV_IMPL void cvFindExtrinsicCameraParams2( const CvMat* objectPoints,
         CV_IS_MAT(A) && CV_IS_MAT(rvec) && CV_IS_MAT(tvec) );
 
     count = MAX(objectPoints->cols, objectPoints->rows);
-    matM = cvCreateMat( 1, count, CV_64FC3 );
-    _m = cvCreateMat( 1, count, CV_64FC2 );
+    matM.reset(cvCreateMat( 1, count, CV_64FC3 ));
+    _m.reset(cvCreateMat( 1, count, CV_64FC2 ));
 
     cvConvertPointsHomogeneous( objectPoints, matM );
     cvConvertPointsHomogeneous( imagePoints, _m );
@@ -963,8 +963,8 @@ CV_IMPL void cvFindExtrinsicCameraParams2( const CvMat* objectPoints,
     CV_Assert( (CV_MAT_DEPTH(tvec->type) == CV_64F || CV_MAT_DEPTH(tvec->type) == CV_32F) &&
         (tvec->rows == 1 || tvec->cols == 1) && tvec->rows*tvec->cols*CV_MAT_CN(tvec->type) == 3 );
 
-    _mn = cvCreateMat( 1, count, CV_64FC2 );
-    _Mxy = cvCreateMat( 1, count, CV_64FC2 );
+    _mn.reset(cvCreateMat( 1, count, CV_64FC2 ));
+    _Mxy.reset(cvCreateMat( 1, count, CV_64FC2 ));
 
     // normalize image points
     // (unapply the intrinsic matrix transformation and distortion)
@@ -1055,7 +1055,7 @@ CV_IMPL void cvFindExtrinsicCameraParams2( const CvMat* objectPoints,
             CvPoint3D64f* M = (CvPoint3D64f*)matM->data.db;
             CvPoint2D64f* mn = (CvPoint2D64f*)_mn->data.db;
 
-            matL = cvCreateMat( 2*count, 12, CV_64F );
+            matL.reset(cvCreateMat( 2*count, 12, CV_64F ));
             L = matL->data.db;
 
             for( i = 0; i < count; i++, L += 24 )
@@ -1162,11 +1162,11 @@ CV_IMPL void cvInitIntrinsicParams2D( const CvMat* objectPoints,
     if( objectPoints->rows != 1 || imagePoints->rows != 1 )
         CV_Error( CV_StsBadSize, "object points and image points must be a single-row matrices" );
 
-    matA = cvCreateMat( 2*nimages, 2, CV_64F );
-    _b = cvCreateMat( 2*nimages, 1, CV_64F );
+    matA.reset(cvCreateMat( 2*nimages, 2, CV_64F ));
+    _b.reset(cvCreateMat( 2*nimages, 1, CV_64F ));
     a[2] = (imageSize.width - 1)*0.5;
     a[5] = (imageSize.height - 1)*0.5;
-    _allH = cvCreateMat( nimages, 9, CV_64F );
+    _allH.reset(cvCreateMat( nimages, 9, CV_64F ));
 
     // extract vanishing points in order to obtain initial value for the focal length
     for( i = 0, pos = 0; i < nimages; i++, pos += ni )
@@ -1310,16 +1310,16 @@ CV_IMPL double cvCalibrateCamera2( const CvMat* objectPoints,
         total += ni;
     }
 
-    matM = cvCreateMat( 1, total, CV_64FC3 );
-    _m = cvCreateMat( 1, total, CV_64FC2 );
+    matM.reset(cvCreateMat( 1, total, CV_64FC3 ));
+    _m.reset(cvCreateMat( 1, total, CV_64FC2 ));
 
     cvConvertPointsHomogeneous( objectPoints, matM );
     cvConvertPointsHomogeneous( imagePoints, _m );
 
     nparams = NINTRINSIC + nimages*6;
-    _Ji = cvCreateMat( maxPoints*2, NINTRINSIC, CV_64FC1 );
-    _Je = cvCreateMat( maxPoints*2, 6, CV_64FC1 );
-    _err = cvCreateMat( maxPoints*2, 1, CV_64FC1 );
+    _Ji.reset(cvCreateMat( maxPoints*2, NINTRINSIC, CV_64FC1 ));
+    _Je.reset(cvCreateMat( maxPoints*2, 6, CV_64FC1 ));
+    _err.reset(cvCreateMat( maxPoints*2, 1, CV_64FC1 ));
     cvZero( _Ji );
 
     _k = cvMat( distCoeffs->rows, distCoeffs->cols, CV_MAKETYPE(CV_64F,CV_MAT_CN(distCoeffs->type)), k);
@@ -1403,6 +1403,8 @@ CV_IMPL double cvCalibrateCamera2( const CvMat* objectPoints,
     }
     if( !(flags & CV_CALIB_RATIONAL_MODEL) )
         flags |= CV_CALIB_FIX_K4 + CV_CALIB_FIX_K5 + CV_CALIB_FIX_K6;
+    if( !(flags & CV_CALIB_THIN_PRISM_MODEL))
+        flags |= CALIB_FIX_S1_S2_S3_S4;
     if( flags & CV_CALIB_FIX_K1 )
         mask[4] = 0;
     if( flags & CV_CALIB_FIX_K2 )
@@ -1415,8 +1417,6 @@ CV_IMPL double cvCalibrateCamera2( const CvMat* objectPoints,
         mask[10] = 0;
     if( flags & CV_CALIB_FIX_K6 )
         mask[11] = 0;
-    if(!(flags & CV_CALIB_THIN_PRISM_MODEL))
-        flags |= CALIB_FIX_S1_S2_S3_S4;
 
     if(flags & CALIB_FIX_S1_S2_S3_S4)
     {
@@ -1638,12 +1638,12 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
                         CvTermCriteria termCrit,
                         int flags )
 {
-    const int NINTRINSIC = 12;
+    const int NINTRINSIC = 16;
     Ptr<CvMat> npoints, err, J_LR, Je, Ji, imagePoints[2], objectPoints, RT0;
     CvLevMarq solver;
     double reprojErr = 0;
 
-    double A[2][9], dk[2][8]={{0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0}}, rlr[9];
+    double A[2][9], dk[2][12]={{0,0,0,0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0,0,0,0}}, rlr[9];
     CvMat K[2], Dist[2], om_LR, T_LR;
     CvMat R_LR = cvMat(3, 3, CV_64F, rlr);
     int i, k, p, ni = 0, ofs, nimages, pointsTotal, maxPoints = 0;
@@ -1662,7 +1662,7 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
                CV_MAT_TYPE(_npoints->type) == CV_32SC1 );
 
     nimages = _npoints->cols + _npoints->rows - 1;
-    npoints = cvCreateMat( _npoints->rows, _npoints->cols, _npoints->type );
+    npoints.reset(cvCreateMat( _npoints->rows, _npoints->cols, _npoints->type ));
     cvCopy( _npoints, npoints );
 
     for( i = 0, pointsTotal = 0; i < nimages; i++ )
@@ -1671,8 +1671,8 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
         pointsTotal += npoints->data.i[i];
     }
 
-    objectPoints = cvCreateMat( _objectPoints->rows, _objectPoints->cols,
-                                CV_64FC(CV_MAT_CN(_objectPoints->type)));
+    objectPoints.reset(cvCreateMat( _objectPoints->rows, _objectPoints->cols,
+                                    CV_64FC(CV_MAT_CN(_objectPoints->type))));
     cvConvert( _objectPoints, objectPoints );
     cvReshape( objectPoints, objectPoints, 3, 1 );
 
@@ -1689,9 +1689,9 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
                 (_imagePoints1->rows == 1 && _imagePoints1->cols == pointsTotal && cn == 2)) );
 
         K[k] = cvMat(3,3,CV_64F,A[k]);
-        Dist[k] = cvMat(1,8,CV_64F,dk[k]);
+        Dist[k] = cvMat(1,12,CV_64F,dk[k]);
 
-        imagePoints[k] = cvCreateMat( points->rows, points->cols, CV_64FC(CV_MAT_CN(points->type)));
+        imagePoints[k].reset(cvCreateMat( points->rows, points->cols, CV_64FC(CV_MAT_CN(points->type))));
         cvConvert( points, imagePoints[k] );
         cvReshape( imagePoints[k], imagePoints[k], 2, 1 );
 
@@ -1729,10 +1729,10 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
 
     recomputeIntrinsics = (flags & CV_CALIB_FIX_INTRINSIC) == 0;
 
-    err = cvCreateMat( maxPoints*2, 1, CV_64F );
-    Je = cvCreateMat( maxPoints*2, 6, CV_64F );
-    J_LR = cvCreateMat( maxPoints*2, 6, CV_64F );
-    Ji = cvCreateMat( maxPoints*2, NINTRINSIC, CV_64F );
+    err.reset(cvCreateMat( maxPoints*2, 1, CV_64F ));
+    Je.reset(cvCreateMat( maxPoints*2, 6, CV_64F ));
+    J_LR.reset(cvCreateMat( maxPoints*2, 6, CV_64F ));
+    Ji.reset(cvCreateMat( maxPoints*2, NINTRINSIC, CV_64F ));
     cvZero( Ji );
 
     // we optimize for the inter-camera R(3),t(3), then, optionally,
@@ -1740,7 +1740,7 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
     nparams = 6*(nimages+1) + (recomputeIntrinsics ? NINTRINSIC*2 : 0);
 
     // storage for initial [om(R){i}|t{i}] (in order to compute the median for each component)
-    RT0 = cvCreateMat( 6, nimages, CV_64F );
+    RT0.reset(cvCreateMat( 6, nimages, CV_64F ));
 
     solver.init( nparams, 0, termCrit );
     if( recomputeIntrinsics )
@@ -1748,6 +1748,8 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
         uchar* imask = solver.mask->data.ptr + nparams - NINTRINSIC*2;
         if( !(flags & CV_CALIB_RATIONAL_MODEL) )
             flags |= CV_CALIB_FIX_K4 | CV_CALIB_FIX_K5 | CV_CALIB_FIX_K6;
+        if( !(flags & CV_CALIB_THIN_PRISM_MODEL) )
+            flags |= CV_CALIB_FIX_S1_S2_S3_S4;
         if( flags & CV_CALIB_FIX_ASPECT_RATIO )
             imask[0] = imask[NINTRINSIC] = 0;
         if( flags & CV_CALIB_FIX_FOCAL_LENGTH )
@@ -1768,6 +1770,13 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
             imask[10] = imask[NINTRINSIC+10] = 0;
         if( flags & CV_CALIB_FIX_K6 )
             imask[11] = imask[NINTRINSIC+11] = 0;
+        if( flags & CV_CALIB_FIX_S1_S2_S3_S4 )
+        {
+            imask[12] = imask[NINTRINSIC+12] = 0;
+            imask[13] = imask[NINTRINSIC+13] = 0;
+            imask[14] = imask[NINTRINSIC+14] = 0;
+            imask[15] = imask[NINTRINSIC+15] = 0;
+        }
     }
 
     /*
@@ -1842,6 +1851,10 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
             iparam[4] = dk[k][0]; iparam[5] = dk[k][1]; iparam[6] = dk[k][2];
             iparam[7] = dk[k][3]; iparam[8] = dk[k][4]; iparam[9] = dk[k][5];
             iparam[10] = dk[k][6]; iparam[11] = dk[k][7];
+            iparam[12] = dk[k][8];
+            iparam[13] = dk[k][9];
+            iparam[14] = dk[k][10];
+            iparam[15] = dk[k][11];
         }
 
     om_LR = cvMat(3, 1, CV_64F, solver.param->data.db);
@@ -1908,6 +1921,10 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
                 dk[k][5] = iparam[k*NINTRINSIC+9];
                 dk[k][6] = iparam[k*NINTRINSIC+10];
                 dk[k][7] = iparam[k*NINTRINSIC+11];
+                dk[k][8] = iparam[k*NINTRINSIC+12];
+                dk[k][9] = iparam[k*NINTRINSIC+13];
+                dk[k][10] = iparam[k*NINTRINSIC+14];
+                dk[k][11] = iparam[k*NINTRINSIC+15];
             }
         }
 
@@ -2080,7 +2097,7 @@ icvGetRectangles( const CvMat* cameraMatrix, const CvMat* distCoeffs,
 {
     const int N = 9;
     int x, y, k;
-    cv::Ptr<CvMat> _pts = cvCreateMat(1, N*N, CV_32FC2);
+    cv::Ptr<CvMat> _pts(cvCreateMat(1, N*N, CV_32FC2));
     CvPoint2D32f* pts = (CvPoint2D32f*)(_pts->data.ptr);
 
     for( y = k = 0; y < N; y++ )
@@ -2439,10 +2456,10 @@ CV_IMPL int cvStereoRectifyUncalibrated(
 
     npoints = _points1->rows * _points1->cols * CV_MAT_CN(_points1->type) / 2;
 
-    _m1 = cvCreateMat( _points1->rows, _points1->cols, CV_64FC(CV_MAT_CN(_points1->type)) );
-    _m2 = cvCreateMat( _points2->rows, _points2->cols, CV_64FC(CV_MAT_CN(_points2->type)) );
-    _lines1 = cvCreateMat( 1, npoints, CV_64FC3 );
-    _lines2 = cvCreateMat( 1, npoints, CV_64FC3 );
+    _m1.reset(cvCreateMat( _points1->rows, _points1->cols, CV_64FC(CV_MAT_CN(_points1->type)) ));
+    _m2.reset(cvCreateMat( _points2->rows, _points2->cols, CV_64FC(CV_MAT_CN(_points2->type)) ));
+    _lines1.reset(cvCreateMat( 1, npoints, CV_64FC3 ));
+    _lines2.reset(cvCreateMat( 1, npoints, CV_64FC3 ));
 
     cvConvert( F0, &F );
 
@@ -3009,6 +3026,7 @@ static Mat prepareDistCoeffs(Mat& distCoeffs0, int rtype)
     if( distCoeffs0.size() == Size(1, 4) ||
        distCoeffs0.size() == Size(1, 5) ||
        distCoeffs0.size() == Size(1, 8) ||
+       distCoeffs0.size() == Size(1, 12) ||
        distCoeffs0.size() == Size(4, 1) ||
        distCoeffs0.size() == Size(5, 1) ||
        distCoeffs0.size() == Size(8, 1) ||
diff --git a/modules/calib3d/src/compat_ptsetreg.cpp b/modules/calib3d/src/compat_ptsetreg.cpp
index 74c9e0012..e8f410858 100644
--- a/modules/calib3d/src/compat_ptsetreg.cpp
+++ b/modules/calib3d/src/compat_ptsetreg.cpp
@@ -53,7 +53,6 @@ using cv::Ptr;
 
 CvLevMarq::CvLevMarq()
 {
-    mask = prevParam = param = J = err = JtJ = JtJN = JtErr = JtJV = JtJW = Ptr<CvMat>();
     lambdaLg10 = 0; state = DONE;
     criteria = cvTermCriteria(0,0,0);
     iters = 0;
@@ -62,7 +61,6 @@ CvLevMarq::CvLevMarq()
 
 CvLevMarq::CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria0, bool _completeSymmFlag )
 {
-    mask = prevParam = param = J = err = JtJ = JtJN = JtErr = JtJV = JtJW = Ptr<CvMat>();
     init(nparams, nerrs, criteria0, _completeSymmFlag);
 }
 
@@ -89,19 +87,19 @@ void CvLevMarq::init( int nparams, int nerrs, CvTermCriteria criteria0, bool _co
 {
     if( !param || param->rows != nparams || nerrs != (err ? err->rows : 0) )
         clear();
-    mask = cvCreateMat( nparams, 1, CV_8U );
+    mask.reset(cvCreateMat( nparams, 1, CV_8U ));
     cvSet(mask, cvScalarAll(1));
-    prevParam = cvCreateMat( nparams, 1, CV_64F );
-    param = cvCreateMat( nparams, 1, CV_64F );
-    JtJ = cvCreateMat( nparams, nparams, CV_64F );
-    JtJN = cvCreateMat( nparams, nparams, CV_64F );
-    JtJV = cvCreateMat( nparams, nparams, CV_64F );
-    JtJW = cvCreateMat( nparams, 1, CV_64F );
-    JtErr = cvCreateMat( nparams, 1, CV_64F );
+    prevParam.reset(cvCreateMat( nparams, 1, CV_64F ));
+    param.reset(cvCreateMat( nparams, 1, CV_64F ));
+    JtJ.reset(cvCreateMat( nparams, nparams, CV_64F ));
+    JtJN.reset(cvCreateMat( nparams, nparams, CV_64F ));
+    JtJV.reset(cvCreateMat( nparams, nparams, CV_64F ));
+    JtJW.reset(cvCreateMat( nparams, 1, CV_64F ));
+    JtErr.reset(cvCreateMat( nparams, 1, CV_64F ));
     if( nerrs > 0 )
     {
-        J = cvCreateMat( nerrs, nparams, CV_64F );
-        err = cvCreateMat( nerrs, 1, CV_64F );
+        J.reset(cvCreateMat( nerrs, nparams, CV_64F ));
+        err.reset(cvCreateMat( nerrs, 1, CV_64F ));
     }
     prevErrNorm = DBL_MAX;
     lambdaLg10 = -3;
@@ -196,7 +194,7 @@ bool CvLevMarq::updateAlt( const CvMat*& _param, CvMat*& _JtJ, CvMat*& _JtErr, d
 {
     double change;
 
-    CV_Assert( err.empty() );
+    CV_Assert( !err );
     if( state == DONE )
     {
         _param = param;
@@ -428,4 +426,3 @@ CV_IMPL void cvConvertPointsHomogeneous( const CvMat* _src, CvMat* _dst )
             dst.convertTo(dst0, dst0.type());
     }
 }
-
diff --git a/modules/calib3d/src/epnp.cpp b/modules/calib3d/src/epnp.cpp
index 7fb63254d..edbcaffd3 100644
--- a/modules/calib3d/src/epnp.cpp
+++ b/modules/calib3d/src/epnp.cpp
@@ -621,4 +621,3 @@ void epnp::qr_solve(CvMat * A, CvMat * b, CvMat * X)
     pX[i] = (pb[i] - sum) / A2[i];
   }
 }
-
diff --git a/modules/calib3d/src/five-point.cpp b/modules/calib3d/src/five-point.cpp
index 7eae2ebf8..992224700 100644
--- a/modules/calib3d/src/five-point.cpp
+++ b/modules/calib3d/src/five-point.cpp
@@ -436,9 +436,9 @@ cv::Mat cv::findEssentialMat( InputArray _points1, InputArray _points2, double f
 
     Mat E;
     if( method == RANSAC )
-        createRANSACPointSetRegistrator(new EMEstimatorCallback, 5, threshold, prob)->run(points1, points2, E, _mask);
+        createRANSACPointSetRegistrator(makePtr<EMEstimatorCallback>(), 5, threshold, prob)->run(points1, points2, E, _mask);
     else
-        createLMeDSPointSetRegistrator(new EMEstimatorCallback, 5, prob)->run(points1, points2, E, _mask);
+        createLMeDSPointSetRegistrator(makePtr<EMEstimatorCallback>(), 5, prob)->run(points1, points2, E, _mask);
 
     return E;
 }
@@ -529,16 +529,25 @@ int cv::recoverPose( InputArray E, InputArray _points1, InputArray _points2, Out
     mask4 = (Q.row(2) > 0) & mask4;
     mask4 = (Q.row(2) < dist) & mask4;
 
+    mask1 = mask1.t();
+    mask2 = mask2.t();
+    mask3 = mask3.t();
+    mask4 = mask4.t();
+
     // If _mask is given, then use it to filter outliers.
-    if (_mask.needed())
+    if (!_mask.empty())
     {
-        _mask.create(1, npoints, CV_8U, -1, true);
         Mat mask = _mask.getMat();
+        CV_Assert(mask.size() == mask1.size());
         bitwise_and(mask, mask1, mask1);
         bitwise_and(mask, mask2, mask2);
         bitwise_and(mask, mask3, mask3);
         bitwise_and(mask, mask4, mask4);
     }
+    if (_mask.empty() && _mask.needed())
+    {
+        _mask.create(mask1.size(), CV_8U);
+    }
 
     CV_Assert(_R.needed() && _t.needed());
     _R.create(3, 3, R1.type());
@@ -548,6 +557,7 @@ int cv::recoverPose( InputArray E, InputArray _points1, InputArray _points2, Out
     int good2 = countNonZero(mask2);
     int good3 = countNonZero(mask3);
     int good4 = countNonZero(mask4);
+
     if (good1 >= good2 && good1 >= good3 && good1 >= good4)
     {
         R1.copyTo(_R);
diff --git a/modules/calib3d/src/fundam.cpp b/modules/calib3d/src/fundam.cpp
index c58e8220a..d1c6e8cd0 100644
--- a/modules/calib3d/src/fundam.cpp
+++ b/modules/calib3d/src/fundam.cpp
@@ -259,6 +259,8 @@ public:
                 Jptr[8] = Jptr[9] = Jptr[10] = 0.;
                 Jptr[11] = Mx*ww; Jptr[12] = My*ww; Jptr[13] = ww;
                 Jptr[14] = -Mx*ww*yi; Jptr[15] = -My*ww*yi;
+
+                Jptr += 16;
             }
         }
 
@@ -305,7 +307,7 @@ cv::Mat cv::findHomography( InputArray _points1, InputArray _points2,
     if( ransacReprojThreshold <= 0 )
         ransacReprojThreshold = defaultRANSACReprojThreshold;
 
-    Ptr<PointSetRegistrator::Callback> cb = new HomographyEstimatorCallback;
+    Ptr<PointSetRegistrator::Callback> cb = makePtr<HomographyEstimatorCallback>();
 
     if( method == 0 || npoints == 4 )
     {
@@ -332,7 +334,7 @@ cv::Mat cv::findHomography( InputArray _points1, InputArray _points2,
             if( method == RANSAC || method == LMEDS )
                 cb->runKernel( src, dst, H );
             Mat H8(8, 1, CV_64F, H.ptr<double>());
-            createLMSolver(new HomographyRefineCallback(src, dst), 10)->run(H8);
+            createLMSolver(makePtr<HomographyRefineCallback>(src, dst), 10)->run(H8);
         }
     }
 
@@ -684,7 +686,7 @@ cv::Mat cv::findFundamentalMat( InputArray _points1, InputArray _points2,
     if( npoints < 7 )
         return Mat();
 
-    Ptr<PointSetRegistrator::Callback> cb = new FMEstimatorCallback;
+    Ptr<PointSetRegistrator::Callback> cb = makePtr<FMEstimatorCallback>();
     int result;
 
     if( npoints == 7 || method == FM_8POINT )
diff --git a/modules/calib3d/src/levmarq.cpp b/modules/calib3d/src/levmarq.cpp
index 539c804e2..55704132c 100644
--- a/modules/calib3d/src/levmarq.cpp
+++ b/modules/calib3d/src/levmarq.cpp
@@ -47,30 +47,30 @@
    This is translation to C++ of the Matlab's LMSolve package by Miroslav Balda.
    Here is the original copyright:
    ============================================================================
-   
+
    Copyright (c) 2007, Miroslav Balda
    All rights reserved.
 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
    met:
 
-       * Redistributions of source code must retain the above copyright 
+       * Redistributions of source code must retain the above copyright
          notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright 
-         notice, this list of conditions and the following disclaimer in 
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in
          the documentation and/or other materials provided with the distribution
 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    POSSIBILITY OF SUCH DAMAGE.
 */
 
@@ -95,7 +95,7 @@ public:
         int ptype = param0.type();
 
         CV_Assert( (param0.cols == 1 || param0.rows == 1) && (ptype == CV_32F || ptype == CV_64F));
-        CV_Assert( !cb.empty() );
+        CV_Assert( cb );
 
         int lx = param0.rows + param0.cols - 1;
         param0.convertTo(x, CV_64F);
@@ -112,7 +112,7 @@ public:
         gemm(J, r, 1, noArray(), 0, v, GEMM_1_T);
 
         Mat D = A.diag().clone();
-        
+
         const double Rlo = 0.25, Rhi = 0.75;
         double lambda = 1, lc = 0.75;
         int i, iter = 0;
@@ -220,7 +220,7 @@ CV_INIT_ALGORITHM(LMSolverImpl, "LMSolver",
 Ptr<LMSolver> createLMSolver(const Ptr<LMSolver::Callback>& cb, int maxIters)
 {
     CV_Assert( !LMSolverImpl_info_auto.name().empty() );
-    return new LMSolverImpl(cb, maxIters);
+    return makePtr<LMSolverImpl>(cb, maxIters);
 }
-    
+
 }
diff --git a/modules/calib3d/src/p3p.cpp b/modules/calib3d/src/p3p.cpp
index 92e795472..882868d6b 100644
--- a/modules/calib3d/src/p3p.cpp
+++ b/modules/calib3d/src/p3p.cpp
@@ -411,4 +411,3 @@ bool p3p::jacobi_4x4(double * A, double * D, double * U)
 
     return false;
 }
-
diff --git a/modules/calib3d/src/p3p.h b/modules/calib3d/src/p3p.h
index a58b0fa14..57f8d7df8 100644
--- a/modules/calib3d/src/p3p.h
+++ b/modules/calib3d/src/p3p.h
@@ -59,4 +59,3 @@ class p3p
 };
 
 #endif // P3P_H
-
diff --git a/modules/calib3d/src/precomp.cpp b/modules/calib3d/src/precomp.cpp
deleted file mode 100644
index 3e0ec42de..000000000
--- a/modules/calib3d/src/precomp.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-/* End of file. */
diff --git a/modules/calib3d/src/ptsetreg.cpp b/modules/calib3d/src/ptsetreg.cpp
index c92d858dc..aa361a911 100644
--- a/modules/calib3d/src/ptsetreg.cpp
+++ b/modules/calib3d/src/ptsetreg.cpp
@@ -171,7 +171,7 @@ public:
 
         RNG rng((uint64)-1);
 
-        CV_Assert( !cb.empty() );
+        CV_Assert( cb );
         CV_Assert( confidence > 0 && confidence < 1 );
 
         CV_Assert( count >= 0 && count2 == count );
@@ -288,7 +288,7 @@ public:
 
         RNG rng((uint64)-1);
 
-        CV_Assert( !cb.empty() );
+        CV_Assert( cb );
         CV_Assert( confidence > 0 && confidence < 1 );
 
         CV_Assert( count >= 0 && count2 == count );
@@ -397,7 +397,8 @@ Ptr<PointSetRegistrator> createRANSACPointSetRegistrator(const Ptr<PointSetRegis
                                                          double _confidence, int _maxIters)
 {
     CV_Assert( !RANSACPointSetRegistrator_info_auto.name().empty() );
-    return new RANSACPointSetRegistrator(_cb, _modelPoints, _threshold, _confidence, _maxIters);
+    return Ptr<PointSetRegistrator>(
+        new RANSACPointSetRegistrator(_cb, _modelPoints, _threshold, _confidence, _maxIters));
 }
 
 
@@ -405,7 +406,8 @@ Ptr<PointSetRegistrator> createLMeDSPointSetRegistrator(const Ptr<PointSetRegist
                              int _modelPoints, double _confidence, int _maxIters)
 {
     CV_Assert( !LMeDSPointSetRegistrator_info_auto.name().empty() );
-    return new LMeDSPointSetRegistrator(_cb, _modelPoints, _confidence, _maxIters);
+    return Ptr<PointSetRegistrator>(
+        new LMeDSPointSetRegistrator(_cb, _modelPoints, _confidence, _maxIters));
 }
 
 class Affine3DEstimatorCallback : public PointSetRegistrator::Callback
@@ -532,6 +534,5 @@ int cv::estimateAffine3D(InputArray _from, InputArray _to,
     param1 = param1 <= 0 ? 3 : param1;
     param2 = (param2 < epsilon) ? 0.99 : (param2 > 1 - epsilon) ? 0.99 : param2;
 
-    return createRANSACPointSetRegistrator(new Affine3DEstimatorCallback, 4, param1, param2)->run(dFrom, dTo, _out, _inliers);
+    return createRANSACPointSetRegistrator(makePtr<Affine3DEstimatorCallback>(), 4, param1, param2)->run(dFrom, dTo, _out, _inliers);
 }
-
diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp
index e395b0dc5..698302b94 100644
--- a/modules/calib3d/src/solvepnp.cpp
+++ b/modules/calib3d/src/solvepnp.cpp
@@ -348,4 +348,3 @@ void cv::solvePnPRansac(InputArray _opoints, InputArray _ipoints,
     }
     return;
 }
-
diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp
index 1fc193a0a..ee131db76 100644
--- a/modules/calib3d/src/stereobm.cpp
+++ b/modules/calib3d/src/stereobm.cpp
@@ -991,7 +991,7 @@ const char* StereoBMImpl::name_ = "StereoMatcher.BM";
 
 cv::Ptr<cv::StereoBM> cv::createStereoBM(int _numDisparities, int _SADWindowSize)
 {
-    return new StereoBMImpl(_numDisparities, _SADWindowSize);
+    return makePtr<StereoBMImpl>(_numDisparities, _SADWindowSize);
 }
 
 /* End of file. */
diff --git a/modules/calib3d/src/stereosgbm.cpp b/modules/calib3d/src/stereosgbm.cpp
index 508eb59b1..6d75d8f53 100644
--- a/modules/calib3d/src/stereosgbm.cpp
+++ b/modules/calib3d/src/stereosgbm.cpp
@@ -947,11 +947,12 @@ Ptr<StereoSGBM> createStereoSGBM(int minDisparity, int numDisparities, int SADWi
                                  int speckleWindowSize, int speckleRange,
                                  int mode)
 {
-    return new StereoSGBMImpl(minDisparity, numDisparities, SADWindowSize,
-                              P1, P2, disp12MaxDiff,
-                              preFilterCap, uniquenessRatio,
-                              speckleWindowSize, speckleRange,
-                              mode);
+    return Ptr<StereoSGBM>(
+        new StereoSGBMImpl(minDisparity, numDisparities, SADWindowSize,
+                           P1, P2, disp12MaxDiff,
+                           preFilterCap, uniquenessRatio,
+                           speckleWindowSize, speckleRange,
+                           mode));
 }
 
 Rect getValidDisparityROI( Rect roi1, Rect roi2,
@@ -1169,4 +1170,3 @@ void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDis
         }
     }
 }
-
diff --git a/modules/calib3d/src/triangulate.cpp b/modules/calib3d/src/triangulate.cpp
index 59c7c0f2b..b0af3dc46 100644
--- a/modules/calib3d/src/triangulate.cpp
+++ b/modules/calib3d/src/triangulate.cpp
@@ -240,32 +240,32 @@ cvCorrectMatches(CvMat *F_, CvMat *points1_, CvMat *points2_, CvMat *new_points1
     }
 
     // Make sure F uses double precision
-    F = cvCreateMat(3,3,CV_64FC1);
+    F.reset(cvCreateMat(3,3,CV_64FC1));
     cvConvert(F_, F);
 
     // Make sure points1 uses double precision
-    points1 = cvCreateMat(points1_->rows,points1_->cols,CV_64FC2);
+    points1.reset(cvCreateMat(points1_->rows,points1_->cols,CV_64FC2));
     cvConvert(points1_, points1);
 
     // Make sure points2 uses double precision
-    points2 = cvCreateMat(points2_->rows,points2_->cols,CV_64FC2);
+    points2.reset(cvCreateMat(points2_->rows,points2_->cols,CV_64FC2));
     cvConvert(points2_, points2);
 
-    tmp33 = cvCreateMat(3,3,CV_64FC1);
-    tmp31 = cvCreateMat(3,1,CV_64FC1), tmp31_2 = cvCreateMat(3,1,CV_64FC1);
-    T1i = cvCreateMat(3,3,CV_64FC1), T2i = cvCreateMat(3,3,CV_64FC1);
-    R1 = cvCreateMat(3,3,CV_64FC1), R2 = cvCreateMat(3,3,CV_64FC1);
-    TFT = cvCreateMat(3,3,CV_64FC1), TFTt = cvCreateMat(3,3,CV_64FC1), RTFTR = cvCreateMat(3,3,CV_64FC1);
-    U = cvCreateMat(3,3,CV_64FC1);
-    S = cvCreateMat(3,3,CV_64FC1);
-    V = cvCreateMat(3,3,CV_64FC1);
-    e1 = cvCreateMat(3,1,CV_64FC1), e2 = cvCreateMat(3,1,CV_64FC1);
+    tmp33.reset(cvCreateMat(3,3,CV_64FC1));
+    tmp31.reset(cvCreateMat(3,1,CV_64FC1)), tmp31_2.reset(cvCreateMat(3,1,CV_64FC1));
+    T1i.reset(cvCreateMat(3,3,CV_64FC1)), T2i.reset(cvCreateMat(3,3,CV_64FC1));
+    R1.reset(cvCreateMat(3,3,CV_64FC1)), R2.reset(cvCreateMat(3,3,CV_64FC1));
+    TFT.reset(cvCreateMat(3,3,CV_64FC1)), TFTt.reset(cvCreateMat(3,3,CV_64FC1)), RTFTR.reset(cvCreateMat(3,3,CV_64FC1));
+    U.reset(cvCreateMat(3,3,CV_64FC1));
+    S.reset(cvCreateMat(3,3,CV_64FC1));
+    V.reset(cvCreateMat(3,3,CV_64FC1));
+    e1.reset(cvCreateMat(3,1,CV_64FC1)), e2.reset(cvCreateMat(3,1,CV_64FC1));
 
     double x1, y1, x2, y2;
     double scale;
     double f1, f2, a, b, c, d;
-    polynomial = cvCreateMat(1,7,CV_64FC1);
-    result = cvCreateMat(1,6,CV_64FC2);
+    polynomial.reset(cvCreateMat(1,7,CV_64FC1));
+    result.reset(cvCreateMat(1,6,CV_64FC2));
     double t_min, s_val, t, s;
     for (int p = 0; p < points1->cols; ++p) {
         // Replace F by T2-t * F * T1-t
diff --git a/modules/calib3d/test/test_affine3.cpp b/modules/calib3d/test/test_affine3.cpp
index 196d428e4..62326e920 100644
--- a/modules/calib3d/test/test_affine3.cpp
+++ b/modules/calib3d/test/test_affine3.cpp
@@ -52,30 +52,30 @@ TEST(Calib3d_Affine3f, accuracy)
 
     cv::Mat expected;
     cv::Rodrigues(rvec, expected);
-    
-    
+
+
     ASSERT_EQ(0, norm(cv::Mat(affine.matrix, false).colRange(0, 3).rowRange(0, 3) != expected));
     ASSERT_EQ(0, norm(cv::Mat(affine.linear()) != expected));
-    
-    
+
+
     cv::Matx33d R = cv::Matx33d::eye();
-    
+
     double angle = 50;
     R.val[0] = R.val[4] = std::cos(CV_PI*angle/180.0);
     R.val[3] = std::sin(CV_PI*angle/180.0);
     R.val[1] = -R.val[3];
-    
-    
+
+
     cv::Affine3d affine1(cv::Mat(cv::Vec3d(0.2, 0.5, 0.3)).reshape(1, 1), cv::Vec3d(4, 5, 6));
     cv::Affine3d affine2(R, cv::Vec3d(1, 1, 0.4));
-    
+
     cv::Affine3d result = affine1.inv() * affine2;
-    
+
     expected = cv::Mat(affine1.matrix.inv(cv::DECOMP_SVD)) * cv::Mat(affine2.matrix, false);
-    
+
 
     cv::Mat diff;
     cv::absdiff(expected, result.matrix, diff);
-    
+
     ASSERT_LT(cv::norm(diff, cv::NORM_INF), 1e-15);
 }
diff --git a/modules/calib3d/test/test_affine3d_estimator.cpp b/modules/calib3d/test/test_affine3d_estimator.cpp
index ff061aacf..f31e2e732 100644
--- a/modules/calib3d/test/test_affine3d_estimator.cpp
+++ b/modules/calib3d/test/test_affine3d_estimator.cpp
@@ -195,4 +195,3 @@ void CV_Affine3D_EstTest::run( int /* start_from */)
 }
 
 TEST(Calib3d_EstimateAffineTransform, accuracy) { CV_Affine3D_EstTest test; test.safe_run(); }
-
diff --git a/modules/calib3d/test/test_cameracalibration_badarg.cpp b/modules/calib3d/test/test_cameracalibration_badarg.cpp
index 91118e1ed..3edab8bec 100644
--- a/modules/calib3d/test/test_cameracalibration_badarg.cpp
+++ b/modules/calib3d/test/test_cameracalibration_badarg.cpp
@@ -735,5 +735,3 @@ protected:
 TEST(Calib3d_CalibrateCamera_C, badarg) { CV_CameraCalibrationBadArgTest test; test.safe_run(); }
 TEST(Calib3d_Rodrigues_C, badarg) { CV_Rodrigues2BadArgTest test; test.safe_run(); }
 TEST(Calib3d_ProjectPoints_C, badarg) { CV_ProjectPoints2BadArgTest test; test.safe_run(); }
-
-
diff --git a/modules/calib3d/test/test_chessboardgenerator.cpp b/modules/calib3d/test/test_chessboardgenerator.cpp
index 3a5ebbc3a..7c0bd34dc 100644
--- a/modules/calib3d/test/test_chessboardgenerator.cpp
+++ b/modules/calib3d/test/test_chessboardgenerator.cpp
@@ -329,4 +329,3 @@ Mat cv::ChessBoardGenerator::operator ()(const Mat& bg, const Mat& camMat, const
     return generateChessBoard(bg, camMat, distCoeffs, zero, pb1, pb2,
         squareSize.width, squareSize.height,  pts3d, corners);
 }
-
diff --git a/modules/calib3d/test/test_compose_rt.cpp b/modules/calib3d/test/test_compose_rt.cpp
index b71288e6d..577cc0627 100644
--- a/modules/calib3d/test/test_compose_rt.cpp
+++ b/modules/calib3d/test/test_compose_rt.cpp
@@ -212,4 +212,3 @@ protected:
 };
 
 TEST(Calib3d_ComposeRT, accuracy) { CV_composeRT_Test test; test.safe_run(); }
-
diff --git a/modules/calib3d/test/test_modelest.cpp b/modules/calib3d/test/test_modelest.cpp
index 5b0a86016..55cb79eba 100644
--- a/modules/calib3d/test/test_modelest.cpp
+++ b/modules/calib3d/test/test_modelest.cpp
@@ -229,4 +229,3 @@ void CV_ModelEstimator2_Test::run_func()
 TEST(Calib3d_ModelEstimator2, accuracy) { CV_ModelEstimator2_Test test; test.safe_run(); }
 
 #endif
-
diff --git a/modules/calib3d/test/test_precomp.hpp b/modules/calib3d/test/test_precomp.hpp
index 3fe4480d5..d670a4c22 100644
--- a/modules/calib3d/test/test_precomp.hpp
+++ b/modules/calib3d/test/test_precomp.hpp
@@ -21,4 +21,3 @@ namespace cvtest
 }
 
 #endif
-
diff --git a/modules/calib3d/test/test_reproject_image_to_3d.cpp b/modules/calib3d/test/test_reproject_image_to_3d.cpp
index 3b44566ab..daf89e0f0 100644
--- a/modules/calib3d/test/test_reproject_image_to_3d.cpp
+++ b/modules/calib3d/test/test_reproject_image_to_3d.cpp
@@ -173,4 +173,3 @@ protected:
 };
 
 TEST(Calib3d_ReprojectImageTo3D, accuracy) { CV_ReprojectImageTo3DTest test; test.safe_run(); }
-
diff --git a/modules/calib3d/test/test_solvepnp_ransac.cpp b/modules/calib3d/test/test_solvepnp_ransac.cpp
index c0aff188d..ae744a4d7 100644
--- a/modules/calib3d/test/test_solvepnp_ransac.cpp
+++ b/modules/calib3d/test/test_solvepnp_ransac.cpp
@@ -276,7 +276,7 @@ TEST(DISABLED_Calib3d_SolvePnPRansac, concurrency)
     {
         // limit concurrency to get determenistic result
         cv::theRNG().state = 20121010;
-        cv::Ptr<tbb::task_scheduler_init> one_thread = new tbb::task_scheduler_init(1);
+        tbb::task_scheduler_init one_thread(1);
         solvePnPRansac(object, image, camera_mat, dist_coef, rvec1, tvec1);
     }
 
@@ -295,7 +295,7 @@ TEST(DISABLED_Calib3d_SolvePnPRansac, concurrency)
     {
         // single thread again
         cv::theRNG().state = 20121010;
-        cv::Ptr<tbb::task_scheduler_init> one_thread = new tbb::task_scheduler_init(1);
+        tbb::task_scheduler_init one_thread(1);
         solvePnPRansac(object, image, camera_mat, dist_coef, rvec2, tvec2);
     }
 
@@ -306,4 +306,4 @@ TEST(DISABLED_Calib3d_SolvePnPRansac, concurrency)
     EXPECT_LT(tnorm, 1e-6);
 
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/calib3d/test/test_undistort_points.cpp b/modules/calib3d/test/test_undistort_points.cpp
index c1ace3d4d..5dabd213d 100644
--- a/modules/calib3d/test/test_undistort_points.cpp
+++ b/modules/calib3d/test/test_undistort_points.cpp
@@ -94,4 +94,4 @@ void CV_UndistortTest::run(int /* start_from */)
     }
 }
 
-TEST(Calib3d_Undistort, accuracy) { CV_UndistortTest test; test.safe_run(); }
\ No newline at end of file
+TEST(Calib3d_Undistort, accuracy) { CV_UndistortTest test; test.safe_run(); }
diff --git a/modules/contrib/doc/contrib.rst b/modules/contrib/doc/contrib.rst
index 798d38d5b..de14d33ef 100644
--- a/modules/contrib/doc/contrib.rst
+++ b/modules/contrib/doc/contrib.rst
@@ -9,5 +9,4 @@ The module contains some recently added functionality that has not been stabiliz
 
     stereo
     FaceRecognizer Documentation <facerec/index>
-    Retina Documentation <retina/index>
     openfabmap
diff --git a/modules/contrib/doc/facerec/facerec_api.rst b/modules/contrib/doc/facerec/facerec_api.rst
index e16d15e62..c2c7f56d0 100644
--- a/modules/contrib/doc/facerec/facerec_api.rst
+++ b/modules/contrib/doc/facerec/facerec_api.rst
@@ -3,6 +3,12 @@ FaceRecognizer
 
 .. highlight:: cpp
 
+.. Sample code::
+
+   * An example using the FaceRecognizer class can be found at opencv_source_code/samples/cpp/facerec_demo.cpp
+
+   * (Python)  An example using the FaceRecognizer class can be found at opencv_source_code/samples/python2/facerec_demo.py
+
 FaceRecognizer
 --------------
 
@@ -64,6 +70,8 @@ Moreover every :ocv:class:`FaceRecognizer` supports the:
 
 * **Loading/Saving** the model state from/to a given XML or YAML.
 
+.. note:: When using the FaceRecognizer interface in combination with Python, please stick to Python 2. Some underlying scripts like create_csv will not work in other versions, like Python 3.
+
 Setting the Thresholds
 +++++++++++++++++++++++
 
diff --git a/modules/contrib/doc/facerec/facerec_tutorial.rst b/modules/contrib/doc/facerec/facerec_tutorial.rst
index 16b425d7e..cbfb41797 100644
--- a/modules/contrib/doc/facerec/facerec_tutorial.rst
+++ b/modules/contrib/doc/facerec/facerec_tutorial.rst
@@ -626,5 +626,3 @@ CSV for the AT&T Facedatabase
 .. literalinclude:: etc/at.txt
    :language: none
    :linenos:
-
-
diff --git a/modules/contrib/doc/facerec/index.rst b/modules/contrib/doc/facerec/index.rst
index ce8a15e67..b871448c5 100644
--- a/modules/contrib/doc/facerec/index.rst
+++ b/modules/contrib/doc/facerec/index.rst
@@ -30,4 +30,3 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
-
diff --git a/modules/contrib/doc/facerec/src/CMakeLists.txt b/modules/contrib/doc/facerec/src/CMakeLists.txt
index e56762ea4..94aa36fbe 100644
--- a/modules/contrib/doc/facerec/src/CMakeLists.txt
+++ b/modules/contrib/doc/facerec/src/CMakeLists.txt
@@ -23,4 +23,3 @@ target_link_libraries(facerec_fisherfaces opencv_contrib opencv_core opencv_imgp
 
 add_executable(facerec_lbph facerec_lbph.cpp)
 target_link_libraries(facerec_lbph opencv_contrib opencv_core opencv_imgproc opencv_highgui)
-
diff --git a/modules/contrib/doc/facerec/tutorial/facerec_gender_classification.rst b/modules/contrib/doc/facerec/tutorial/facerec_gender_classification.rst
index 0c7398d2f..770083170 100644
--- a/modules/contrib/doc/facerec/tutorial/facerec_gender_classification.rst
+++ b/modules/contrib/doc/facerec/tutorial/facerec_gender_classification.rst
@@ -231,5 +231,3 @@ Here are some examples:
 +---------------------------------+----------------------------------------------------------------------------+
 | 0.2 (20%), 0.2 (20%), (70,70)   | .. image:: ../img/tutorial/gender_classification/arnie_20_20_70_70.jpg     |
 +---------------------------------+----------------------------------------------------------------------------+
-
-
diff --git a/modules/contrib/doc/facerec/tutorial/facerec_save_load.rst b/modules/contrib/doc/facerec/tutorial/facerec_save_load.rst
index 7948bcd73..2d0b65dff 100644
--- a/modules/contrib/doc/facerec/tutorial/facerec_save_load.rst
+++ b/modules/contrib/doc/facerec/tutorial/facerec_save_load.rst
@@ -44,4 +44,3 @@ And here is the Reconstruction, which is the same as the original:
 
 .. image:: ../img/eigenface_reconstruction_opencv.png
     :align: center
-
diff --git a/modules/contrib/doc/facerec/tutorial/facerec_video_recognition.rst b/modules/contrib/doc/facerec/tutorial/facerec_video_recognition.rst
index b692fe513..76e76eebe 100644
--- a/modules/contrib/doc/facerec/tutorial/facerec_video_recognition.rst
+++ b/modules/contrib/doc/facerec/tutorial/facerec_video_recognition.rst
@@ -205,5 +205,3 @@ Here are some examples:
 +---------------------------------+----------------------------------------------------------------------------+
 | 0.2 (20%), 0.2 (20%), (70,70)   | .. image:: ../img/tutorial/gender_classification/arnie_20_20_70_70.jpg     |
 +---------------------------------+----------------------------------------------------------------------------+
-
-
diff --git a/modules/contrib/doc/openfabmap.rst b/modules/contrib/doc/openfabmap.rst
index 2f2ad4074..e2f157a07 100644
--- a/modules/contrib/doc/openfabmap.rst
+++ b/modules/contrib/doc/openfabmap.rst
@@ -9,6 +9,10 @@ FAB-MAP is an approach to appearance-based place recognition. FAB-MAP compares i
 
 openFABMAP requires training data (e.g. a collection of images from a similar but not identical environment) to construct a visual vocabulary for the visual bag-of-words model, along with a Chow-Liu tree representation of feature likelihood and for use in the Sampled new place method (see below).
 
+.. note::
+
+   * An example using the openFABMAP package can be found at opencv_source_code/samples/cpp/fabmap_sample.cpp
+
 of2::FabMap
 --------------------
 
diff --git a/modules/contrib/doc/stereo.rst b/modules/contrib/doc/stereo.rst
index d5f2d0080..103bd0f3f 100644
--- a/modules/contrib/doc/stereo.rst
+++ b/modules/contrib/doc/stereo.rst
@@ -113,5 +113,3 @@ The method executes the variational algorithm on a rectified stereo pair. See ``
 **Note**:
 
 The method is not constant, so you should not use the same ``StereoVar`` instance from different threads simultaneously.
-
-
diff --git a/modules/contrib/include/opencv2/contrib.hpp b/modules/contrib/include/opencv2/contrib.hpp
index be83152db..75c6f3db0 100644
--- a/modules/contrib/include/opencv2/contrib.hpp
+++ b/modules/contrib/include/opencv2/contrib.hpp
@@ -633,7 +633,6 @@ CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap);
 CV_EXPORTS bool initModule_contrib();
 }
 
-#include "opencv2/contrib/retina.hpp"
 #include "opencv2/contrib/openfabmap.hpp"
 
 #endif
diff --git a/modules/contrib/src/adaptiveskindetector.cpp b/modules/contrib/src/adaptiveskindetector.cpp
index 1448e6384..092e48cb4 100644
--- a/modules/contrib/src/adaptiveskindetector.cpp
+++ b/modules/contrib/src/adaptiveskindetector.cpp
@@ -286,5 +286,3 @@ void CvAdaptiveSkinDetector::Histogram::mergeWith(CvAdaptiveSkinDetector::Histog
         }
     }
 };
-
-
diff --git a/modules/contrib/src/ba.cpp b/modules/contrib/src/ba.cpp
index ff58073fc..9a8002e96 100644
--- a/modules/contrib/src/ba.cpp
+++ b/modules/contrib/src/ba.cpp
@@ -746,7 +746,7 @@ static void fjac(int /*i*/, int /*j*/, CvMat *point_params, CvMat* cam_params, C
   CvMat* _mp = cvCreateMat(1, 1, CV_64FC2 ); //projection of the point
 
   //split camera params into different matrices
-  CvMat _ri, _ti, _k;
+  CvMat _ri, _ti, _k = cvMat(0, 0, CV_64F, NULL); // dummy initialization to fix warning of cl.exe
   cvGetRows( cam_params, &_ri, 0, 3 );
   cvGetRows( cam_params, &_ti, 3, 6 );
 
diff --git a/modules/contrib/src/bowmsctrainer.cpp b/modules/contrib/src/bowmsctrainer.cpp
index c1e945f9b..305769120 100644
--- a/modules/contrib/src/bowmsctrainer.cpp
+++ b/modules/contrib/src/bowmsctrainer.cpp
@@ -136,4 +136,3 @@ Mat BOWMSCTrainer::cluster(const Mat& _descriptors) const {
 }
 
 }
-
diff --git a/modules/contrib/src/chowliutree.cpp b/modules/contrib/src/chowliutree.cpp
index 8c6acabdc..d4aed5383 100644
--- a/modules/contrib/src/chowliutree.cpp
+++ b/modules/contrib/src/chowliutree.cpp
@@ -287,4 +287,3 @@ bool ChowLiuTree::reduceEdgesToMinSpan(std::list<info>& edges) {
 }
 
 }
-
diff --git a/modules/contrib/src/colortracker.cpp b/modules/contrib/src/colortracker.cpp
index c1d91bb9b..a3eeb052b 100644
--- a/modules/contrib/src/colortracker.cpp
+++ b/modules/contrib/src/colortracker.cpp
@@ -132,5 +132,3 @@ Point2f CvMeanShiftTracker::getTrackingCenter()
 {
     return prev_center;
 }
-
-
diff --git a/modules/contrib/src/contrib_init.cpp b/modules/contrib/src/contrib_init.cpp
index a80f6f5e1..317867a59 100644
--- a/modules/contrib/src/contrib_init.cpp
+++ b/modules/contrib/src/contrib_init.cpp
@@ -41,4 +41,3 @@
 //M*/
 
 #include "precomp.hpp"
-
diff --git a/modules/contrib/src/detection_based_tracker.cpp b/modules/contrib/src/detection_based_tracker.cpp
index 49d09d1bf..27807290a 100644
--- a/modules/contrib/src/detection_based_tracker.cpp
+++ b/modules/contrib/src/detection_based_tracker.cpp
@@ -128,7 +128,7 @@ cv::DetectionBasedTracker::SeparateDetectionWork::SeparateDetectionWork(Detectio
     stateThread(STATE_THREAD_STOPPED),
     timeWhenDetectingThreadStartedWork(-1)
 {
-    CV_Assert(!_detector.empty());
+    CV_Assert(_detector);
 
     cascadeInThread = _detector;
 
@@ -462,11 +462,11 @@ cv::DetectionBasedTracker::DetectionBasedTracker(cv::Ptr<IDetector> mainDetector
     cascadeForTracking(trackingDetector)
 {
     CV_Assert( (params.maxTrackLifetime >= 0)
-//            && (!mainDetector.empty())
-            && (!trackingDetector.empty()) );
+//            && mainDetector
+            && trackingDetector );
 
-    if (!mainDetector.empty()) {
-        separateDetectionWork = new SeparateDetectionWork(*this, mainDetector);
+    if (mainDetector) {
+        separateDetectionWork.reset(new SeparateDetectionWork(*this, mainDetector));
     }
 
     weightsPositionsSmoothing.push_back(1);
@@ -483,7 +483,7 @@ void DetectionBasedTracker::process(const Mat& imageGray)
 {
     CV_Assert(imageGray.type()==CV_8UC1);
 
-    if ( (!separateDetectionWork.empty()) && (!separateDetectionWork->isWorking()) ) {
+    if ( separateDetectionWork && !separateDetectionWork->isWorking() ) {
         separateDetectionWork->run();
     }
 
@@ -501,7 +501,7 @@ void DetectionBasedTracker::process(const Mat& imageGray)
 
     std::vector<Rect> rectsWhereRegions;
     bool shouldHandleResult=false;
-    if (!separateDetectionWork.empty()) {
+    if (separateDetectionWork) {
         shouldHandleResult = separateDetectionWork->communicateWithDetectingThread(imageGray, rectsWhereRegions);
     }
 
@@ -589,7 +589,7 @@ void cv::DetectionBasedTracker::getObjects(std::vector<ExtObject>& result) const
 
 bool cv::DetectionBasedTracker::run()
 {
-    if (!separateDetectionWork.empty()) {
+    if (separateDetectionWork) {
         return separateDetectionWork->run();
     }
     return false;
@@ -597,14 +597,14 @@ bool cv::DetectionBasedTracker::run()
 
 void cv::DetectionBasedTracker::stop()
 {
-    if (!separateDetectionWork.empty()) {
+    if (separateDetectionWork) {
         separateDetectionWork->stop();
     }
 }
 
 void cv::DetectionBasedTracker::resetTracking()
 {
-    if (!separateDetectionWork.empty()) {
+    if (separateDetectionWork) {
         separateDetectionWork->resetTracking();
     }
     trackedObjects.clear();
@@ -876,11 +876,11 @@ bool cv::DetectionBasedTracker::setParameters(const Parameters& params)
         return false;
     }
 
-    if (!separateDetectionWork.empty()) {
+    if (separateDetectionWork) {
         separateDetectionWork->lock();
     }
     parameters=params;
-    if (!separateDetectionWork.empty()) {
+    if (separateDetectionWork) {
         separateDetectionWork->unlock();
     }
     return true;
@@ -892,4 +892,3 @@ const cv::DetectionBasedTracker::Parameters& DetectionBasedTracker::getParameter
 }
 
 #endif
-
diff --git a/modules/contrib/src/facerec.cpp b/modules/contrib/src/facerec.cpp
index d1050ebb7..1bea74e89 100644
--- a/modules/contrib/src/facerec.cpp
+++ b/modules/contrib/src/facerec.cpp
@@ -851,18 +851,18 @@ int LBPH::predict(InputArray _src) const {
 
 Ptr<FaceRecognizer> createEigenFaceRecognizer(int num_components, double threshold)
 {
-    return new Eigenfaces(num_components, threshold);
+    return makePtr<Eigenfaces>(num_components, threshold);
 }
 
 Ptr<FaceRecognizer> createFisherFaceRecognizer(int num_components, double threshold)
 {
-    return new Fisherfaces(num_components, threshold);
+    return makePtr<Fisherfaces>(num_components, threshold);
 }
 
 Ptr<FaceRecognizer> createLBPHFaceRecognizer(int radius, int neighbors,
                                              int grid_x, int grid_y, double threshold)
 {
-    return new LBPH(radius, neighbors, grid_x, grid_y, threshold);
+    return makePtr<LBPH>(radius, neighbors, grid_x, grid_y, threshold);
 }
 
 CV_INIT_ALGORITHM(Eigenfaces, "FaceRecognizer.Eigenfaces",
@@ -894,7 +894,7 @@ CV_INIT_ALGORITHM(LBPH, "FaceRecognizer.LBPH",
 
 bool initModule_contrib()
 {
-    Ptr<Algorithm> efaces = createEigenfaces_hidden(), ffaces = createFisherfaces_hidden(), lbph = createLBPH_hidden();
+    Ptr<Algorithm> efaces = createEigenfaces_ptr_hidden(), ffaces = createFisherfaces_ptr_hidden(), lbph = createLBPH_ptr_hidden();
     return efaces->info() != 0 && ffaces->info() != 0 && lbph->info() != 0;
 }
 
diff --git a/modules/contrib/src/featuretracker.cpp b/modules/contrib/src/featuretracker.cpp
index 4350aec46..795c1a08f 100644
--- a/modules/contrib/src/featuretracker.cpp
+++ b/modules/contrib/src/featuretracker.cpp
@@ -54,23 +54,26 @@ CvFeatureTracker::CvFeatureTracker(CvFeatureTrackerParams _params) :
     {
     case CvFeatureTrackerParams::SIFT:
         dd = Algorithm::create<Feature2D>("Feature2D.SIFT");
-        if( dd.empty() )
+        if( !dd )
             CV_Error(CV_StsNotImplemented, "OpenCV has been compiled without SIFT support");
         dd->set("nOctaveLayers", 5);
         dd->set("contrastThreshold", 0.04);
         dd->set("edgeThreshold", 10.7);
+        break;
     case CvFeatureTrackerParams::SURF:
         dd = Algorithm::create<Feature2D>("Feature2D.SURF");
-        if( dd.empty() )
+        if( !dd )
             CV_Error(CV_StsNotImplemented, "OpenCV has been compiled without SURF support");
         dd->set("hessianThreshold", 400);
         dd->set("nOctaves", 3);
         dd->set("nOctaveLayers", 4);
+        break;
     default:
         CV_Error(CV_StsBadArg, "Unknown feature type");
+        break;
     }
 
-    matcher = new BFMatcher(NORM_L2);
+    matcher = makePtr<BFMatcher>(int(NORM_L2));
 }
 
 CvFeatureTracker::~CvFeatureTracker()
@@ -218,4 +221,3 @@ Point2f CvFeatureTracker::getTrackingCenter()
     center.y = (float)(prev_center.y + prev_trackwindow.height/2.0);
     return center;
 }
-
diff --git a/modules/contrib/src/fuzzymeanshifttracker.cpp b/modules/contrib/src/fuzzymeanshifttracker.cpp
index 7ad8cd838..5e5ebc7e9 100644
--- a/modules/contrib/src/fuzzymeanshifttracker.cpp
+++ b/modules/contrib/src/fuzzymeanshifttracker.cpp
@@ -721,4 +721,3 @@ void CvFuzzyMeanShiftTracker::track(IplImage *maskImage, IplImage *depthMap, int
                 searchMode = tsTracking;
     }
 };
-
diff --git a/modules/contrib/src/hybridtracker.cpp b/modules/contrib/src/hybridtracker.cpp
index 23a6ecbba..d93f76d0d 100644
--- a/modules/contrib/src/hybridtracker.cpp
+++ b/modules/contrib/src/hybridtracker.cpp
@@ -233,4 +233,3 @@ void CvHybridTracker::updateTrackerWithLowPassFilter(Mat) {
 Rect CvHybridTracker::getTrackingWindow() {
     return prev_window;
 }
-
diff --git a/modules/contrib/src/inputoutput.cpp b/modules/contrib/src/inputoutput.cpp
index f64502e43..7f583558e 100644
--- a/modules/contrib/src/inputoutput.cpp
+++ b/modules/contrib/src/inputoutput.cpp
@@ -1,7 +1,7 @@
-
 #include "opencv2/contrib.hpp"
+#include "cvconfig.h"
 
-#ifdef WIN32
+#if defined(WIN32) || defined(_WIN32)
     #include <windows.h>
     #include <tchar.h>
 #else
@@ -10,16 +10,27 @@
 
 namespace cv
 {
-    std::vector<String> Directory::GetListFiles(  const String& path, const String & exten, bool addPath )
+    std::vector<String> Directory::GetListFiles( const String& path, const String & exten, bool addPath )
     {
         std::vector<String> list;
         list.clear();
         String path_f = path + "/" + exten;
         #ifdef WIN32
-            WIN32_FIND_DATA FindFileData;
-            HANDLE hFind;
+        #ifdef HAVE_WINRT
+            WIN32_FIND_DATAW FindFileData;
+        #else
+            WIN32_FIND_DATAA FindFileData;
+        #endif
+        HANDLE hFind;
 
-            hFind = FindFirstFile((LPCSTR)path_f.c_str(), &FindFileData);
+        #ifdef HAVE_WINRT
+            wchar_t wpath[MAX_PATH];
+            size_t copied = mbstowcs(wpath, path_f.c_str(), MAX_PATH);
+            CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+            hFind = FindFirstFileExW(wpath, FindExInfoStandard, &FindFileData, FindExSearchNameMatch, NULL, 0);
+        #else
+            hFind = FindFirstFileA((LPCSTR)path_f.c_str(), &FindFileData);
+        #endif
             if (hFind == INVALID_HANDLE_VALUE)
             {
                 return list;
@@ -34,13 +45,26 @@ namespace cv
                         FindFileData.dwFileAttributes == FILE_ATTRIBUTE_SYSTEM  ||
                         FindFileData.dwFileAttributes == FILE_ATTRIBUTE_READONLY)
                     {
+                        char* fname;
+                    #ifdef HAVE_WINRT
+                        char fname_tmp[MAX_PATH] = {0};
+                        size_t copied = wcstombs(fname_tmp, FindFileData.cFileName, MAX_PATH);
+                        CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+                        fname = fname_tmp;
+                    #else
+                        fname = FindFileData.cFileName;
+                    #endif
                         if (addPath)
-                            list.push_back(path + "/" + FindFileData.cFileName);
+                            list.push_back(path + "/" + String(fname));
                         else
-                            list.push_back(FindFileData.cFileName);
+                            list.push_back(String(fname));
                     }
                 }
-                while(FindNextFile(hFind, &FindFileData));
+            #ifdef HAVE_WINRT
+                while(FindNextFileW(hFind, &FindFileData));
+            #else
+                while(FindNextFileA(hFind, &FindFileData));
+            #endif
                 FindClose(hFind);
             }
         #else
@@ -75,10 +99,22 @@ namespace cv
         String path_f = path + "/" + exten;
         list.clear();
         #ifdef WIN32
-            WIN32_FIND_DATA FindFileData;
+        #ifdef HAVE_WINRT
+            WIN32_FIND_DATAW FindFileData;
+        #else
+            WIN32_FIND_DATAA FindFileData;
+        #endif
             HANDLE hFind;
 
-            hFind = FindFirstFile((LPCSTR)path_f.c_str(), &FindFileData);
+        #ifdef HAVE_WINRT
+            wchar_t wpath [MAX_PATH];
+            size_t copied = mbstowcs(wpath, path_f.c_str(), path_f.size());
+            CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+
+            hFind = FindFirstFileExW(wpath, FindExInfoStandard, &FindFileData, FindExSearchNameMatch, NULL, 0);
+        #else
+            hFind = FindFirstFileA((LPCSTR)path_f.c_str(), &FindFileData);
+        #endif
             if (hFind == INVALID_HANDLE_VALUE)
             {
                 return list;
@@ -87,17 +123,37 @@ namespace cv
             {
                 do
                 {
+#ifdef HAVE_WINRT
+                    if (FindFileData.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY &&
+                        wcscmp(FindFileData.cFileName, L".") != 0 &&
+                        wcscmp(FindFileData.cFileName, L"..") != 0)
+#else
                     if (FindFileData.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY &&
                         strcmp(FindFileData.cFileName, ".") != 0 &&
                         strcmp(FindFileData.cFileName, "..") != 0)
+#endif
                     {
+                        char* fname;
+                    #ifdef HAVE_WINRT
+                        char fname_tmp[MAX_PATH];
+                        size_t copied = wcstombs(fname_tmp, FindFileData.cFileName, MAX_PATH);
+                        CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+                        fname = fname_tmp;
+                    #else
+                        fname = FindFileData.cFileName;
+                    #endif
+
                         if (addPath)
-                            list.push_back(path + "/" + FindFileData.cFileName);
+                            list.push_back(path + "/" + String(fname));
                         else
-                            list.push_back(FindFileData.cFileName);
+                            list.push_back(String(fname));
                     }
                 }
-                while(FindNextFile(hFind, &FindFileData));
+            #ifdef HAVE_WINRT
+                while(FindNextFileW(hFind, &FindFileData));
+            #else
+                while(FindNextFileA(hFind, &FindFileData));
+            #endif
                 FindClose(hFind);
             }
 
diff --git a/modules/contrib/src/lda.cpp b/modules/contrib/src/lda.cpp
index 60693fc77..fe60e8923 100644
--- a/modules/contrib/src/lda.cpp
+++ b/modules/contrib/src/lda.cpp
@@ -1106,4 +1106,3 @@ Mat LDA::reconstruct(InputArray src) {
 }
 
 }
-
diff --git a/modules/contrib/src/logpolar_bsm.cpp b/modules/contrib/src/logpolar_bsm.cpp
index 70c7437bb..8327b0a2d 100644
--- a/modules/contrib/src/logpolar_bsm.cpp
+++ b/modules/contrib/src/logpolar_bsm.cpp
@@ -649,4 +649,3 @@ LogPolar_Adjacent::~LogPolar_Adjacent()
 }
 
 }
-
diff --git a/modules/contrib/src/precomp.cpp b/modules/contrib/src/precomp.cpp
deleted file mode 100644
index 3e0ec42de..000000000
--- a/modules/contrib/src/precomp.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-/* End of file. */
diff --git a/modules/contrib/src/stereovar.cpp b/modules/contrib/src/stereovar.cpp
index 54dd82ac9..b2141b1f1 100644
--- a/modules/contrib/src/stereovar.cpp
+++ b/modules/contrib/src/stereovar.cpp
@@ -408,4 +408,4 @@ void StereoVar::operator ()( const Mat& left, const Mat& right, Mat& disp )
 
     u.release();
 }
-} // namespace
\ No newline at end of file
+} // namespace
diff --git a/modules/contrib/test/test_precomp.hpp b/modules/contrib/test/test_precomp.hpp
index de9e283ee..992ff28df 100644
--- a/modules/contrib/test/test_precomp.hpp
+++ b/modules/contrib/test/test_precomp.hpp
@@ -14,4 +14,3 @@
 #include <iostream>
 
 #endif
-
diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt
index 8b3c6c770..b83a59149 100644
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@@ -1,9 +1,13 @@
 set(the_description "The Core Functionality")
-ocv_add_module(core ${ZLIB_LIBRARIES})
-ocv_module_include_directories(${ZLIB_INCLUDE_DIR})
+ocv_add_module(core ${ZLIB_LIBRARIES} OPTIONAL opencv_cudev)
+ocv_module_include_directories(${ZLIB_INCLUDE_DIRS})
+
+if (HAVE_WINRT)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW /GS /Gm- /AI\"${WINDOWS_SDK_PATH}/References/CommonConfiguration/Neutral\" /AI\"${VISUAL_STUDIO_PATH}/vcpackages\"")
+endif()
 
 if(HAVE_CUDA)
-  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
+  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wenum-compare -Wunused-function)
 endif()
 
 file(GLOB lib_cuda_hdrs        "include/opencv2/${name}/cuda/*.hpp"        "include/opencv2/${name}/cuda/*.h")
diff --git a/modules/core/doc/basic_structures.rst b/modules/core/doc/basic_structures.rst
index 70c7c0ebe..93e7ca479 100644
--- a/modules/core/doc/basic_structures.rst
+++ b/modules/core/doc/basic_structures.rst
@@ -49,6 +49,43 @@ Point\_
 -------
 .. ocv:class:: Point_
 
+::
+
+    template<typename _Tp> class CV_EXPORTS Point_
+    {
+    public:
+        typedef _Tp value_type;
+
+        // various constructors
+        Point_();
+        Point_(_Tp _x, _Tp _y);
+        Point_(const Point_& pt);
+        Point_(const CvPoint& pt);
+        Point_(const CvPoint2D32f& pt);
+        Point_(const Size_<_Tp>& sz);
+        Point_(const Vec<_Tp, 2>& v);
+
+        Point_& operator = (const Point_& pt);
+        //! conversion to another data type
+        template<typename _Tp2> operator Point_<_Tp2>() const;
+
+        //! conversion to the old-style C structures
+        operator CvPoint() const;
+        operator CvPoint2D32f() const;
+        operator Vec<_Tp, 2>() const;
+
+        //! dot product
+        _Tp dot(const Point_& pt) const;
+        //! dot product computed in double-precision arithmetics
+        double ddot(const Point_& pt) const;
+        //! cross-product
+        double cross(const Point_& pt) const;
+        //! checks whether the point is inside the specified rectangle
+        bool inside(const Rect_<_Tp>& r) const;
+
+        _Tp x, y; //< the point coordinates
+    };
+
 Template class for 2D points specified by its coordinates
 :math:`x` and
 :math:`y` .
@@ -84,6 +121,39 @@ Point3\_
 --------
 .. ocv:class:: Point3_
 
+::
+
+    template<typename _Tp> class CV_EXPORTS Point3_
+    {
+    public:
+        typedef _Tp value_type;
+
+        // various constructors
+        Point3_();
+        Point3_(_Tp _x, _Tp _y, _Tp _z);
+        Point3_(const Point3_& pt);
+        explicit Point3_(const Point_<_Tp>& pt);
+        Point3_(const CvPoint3D32f& pt);
+        Point3_(const Vec<_Tp, 3>& v);
+
+        Point3_& operator = (const Point3_& pt);
+        //! conversion to another data type
+        template<typename _Tp2> operator Point3_<_Tp2>() const;
+        //! conversion to the old-style CvPoint...
+        operator CvPoint3D32f() const;
+        //! conversion to cv::Vec<>
+        operator Vec<_Tp, 3>() const;
+
+        //! dot product
+        _Tp dot(const Point3_& pt) const;
+        //! dot product computed in double-precision arithmetics
+        double ddot(const Point3_& pt) const;
+        //! cross product of the 2 3D points
+        Point3_ cross(const Point3_& pt) const;
+
+        _Tp x, y, z; //< the point coordinates
+    };
+
 Template class for 3D points specified by its coordinates
 :math:`x`,
 :math:`y` and
@@ -100,6 +170,35 @@ Size\_
 ------
 .. ocv:class:: Size_
 
+::
+
+    template<typename _Tp> class CV_EXPORTS Size_
+    {
+    public:
+        typedef _Tp value_type;
+
+        //! various constructors
+        Size_();
+        Size_(_Tp _width, _Tp _height);
+        Size_(const Size_& sz);
+        Size_(const CvSize& sz);
+        Size_(const CvSize2D32f& sz);
+        Size_(const Point_<_Tp>& pt);
+
+        Size_& operator = (const Size_& sz);
+        //! the area (width*height)
+        _Tp area() const;
+
+        //! conversion of another data type.
+        template<typename _Tp2> operator Size_<_Tp2>() const;
+
+        //! conversion to the old-style OpenCV types
+        operator CvSize() const;
+        operator CvSize2D32f() const;
+
+        _Tp width, height; // the width and the height
+    };
+
 Template class for specifying the size of an image or rectangle. The class includes two members called ``width`` and ``height``. The structure can be converted to and from the old OpenCV structures
 ``CvSize`` and ``CvSize2D32f`` . The same set of arithmetic and comparison operations as for ``Point_`` is available.
 
@@ -113,6 +212,43 @@ Rect\_
 ------
 .. ocv:class:: Rect_
 
+::
+
+    template<typename _Tp> class CV_EXPORTS Rect_
+    {
+    public:
+        typedef _Tp value_type;
+
+        //! various constructors
+        Rect_();
+        Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height);
+        Rect_(const Rect_& r);
+        Rect_(const CvRect& r);
+        Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz);
+        Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2);
+
+        Rect_& operator = ( const Rect_& r );
+        //! the top-left corner
+        Point_<_Tp> tl() const;
+        //! the bottom-right corner
+        Point_<_Tp> br() const;
+
+        //! size (width, height) of the rectangle
+        Size_<_Tp> size() const;
+        //! area (width*height) of the rectangle
+        _Tp area() const;
+
+        //! conversion to another data type
+        template<typename _Tp2> operator Rect_<_Tp2>() const;
+        //! conversion to the old-style CvRect
+        operator CvRect() const;
+
+        //! checks whether the rectangle contains the point
+        bool contains(const Point_<_Tp>& pt) const;
+
+        _Tp x, y, width, height; //< the top-left corner, as well as width and height of the rectangle
+    };
+
 Template class for 2D rectangles, described by the following parameters:
 
 * Coordinates of the top-left corner. This is a default interpretation of ``Rect_::x`` and ``Rect_::y`` in OpenCV. Though, in your algorithms you may count ``x`` and ``y`` from the bottom-left corner.
@@ -171,6 +307,28 @@ RotatedRect
 -----------
 .. ocv:class:: RotatedRect
 
+::
+
+    class CV_EXPORTS RotatedRect
+    {
+    public:
+        //! various constructors
+        RotatedRect();
+        RotatedRect(const Point2f& center, const Size2f& size, float angle);
+        RotatedRect(const CvBox2D& box);
+
+        //! returns 4 vertices of the rectangle
+        void points(Point2f pts[]) const;
+        //! returns the minimal up-right rectangle containing the rotated rectangle
+        Rect boundingRect() const;
+        //! conversion to the old-style CvBox2D structure
+        operator CvBox2D() const;
+
+        Point2f center; //< the rectangle mass center
+        Size2f size;    //< width and height of the rectangle
+        float angle;    //< the rotation angle. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle.
+    };
+
 The class represents rotated (i.e. not up-right) rectangles on a plane. Each rectangle is specified by the center point (mass center), length of each side (represented by cv::Size2f structure) and the rotation angle in degrees.
 
     .. ocv:function:: RotatedRect::RotatedRect()
@@ -217,7 +375,33 @@ TermCriteria
 ------------
 .. ocv:class:: TermCriteria
 
-  The class defining termination criteria for iterative algorithms. You can initialize it by default constructor and then override any parameters, or the structure may be fully initialized using the advanced variant of the constructor.
+::
+
+    class CV_EXPORTS TermCriteria
+    {
+    public:
+        enum
+        {
+            COUNT=1, //!< the maximum number of iterations or elements to compute
+            MAX_ITER=COUNT, //!< ditto
+            EPS=2 //!< the desired accuracy or change in parameters at which the iterative algorithm stops
+        };
+
+        //! default constructor
+        TermCriteria();
+        //! full constructor
+        TermCriteria(int type, int maxCount, double epsilon);
+        //! conversion from CvTermCriteria
+        TermCriteria(const CvTermCriteria& criteria);
+        //! conversion to CvTermCriteria
+        operator CvTermCriteria() const;
+
+        int type; //!< the type of termination criteria: COUNT, EPS or COUNT + EPS
+        int maxCount; // the maximum number of iterations/elements
+        double epsilon; // the desired accuracy
+    };
+
+The class defining termination criteria for iterative algorithms. You can initialize it by default constructor and then override any parameters, or the structure may be fully initialized using the advanced variant of the constructor.
 
 TermCriteria::TermCriteria
 --------------------------
@@ -321,9 +505,36 @@ Scalar\_
 --------
 .. ocv:class:: Scalar_
 
-Template class for a 4-element vector derived from Vec. ::
+Template class for a 4-element vector derived from Vec.
 
-    template<typename _Tp> class Scalar_ : public Vec<_Tp, 4> { ... };
+::
+
+    template<typename _Tp> class CV_EXPORTS Scalar_ : public Vec<_Tp, 4>
+    {
+    public:
+        //! various constructors
+        Scalar_();
+        Scalar_(_Tp v0, _Tp v1, _Tp v2=0, _Tp v3=0);
+        Scalar_(const CvScalar& s);
+        Scalar_(_Tp v0);
+
+        //! returns a scalar with all elements set to v0
+        static Scalar_<_Tp> all(_Tp v0);
+        //! conversion to the old-style CvScalar
+        operator CvScalar() const;
+
+        //! conversion to another data type
+        template<typename T2> operator Scalar_<T2>() const;
+
+        //! per-element product
+        Scalar_<_Tp> mul(const Scalar_<_Tp>& t, double scale=1 ) const;
+
+        // returns (v0, -v1, -v2, -v3)
+        Scalar_<_Tp> conj() const;
+
+        // returns true iff v1 == v2 == v3 == 0
+        bool isReal() const;
+    };
 
     typedef Scalar_<double> Scalar;
 
@@ -333,12 +544,21 @@ Range
 -----
 .. ocv:class:: Range
 
-Template class specifying a continuous subsequence (slice) of a sequence. ::
+Template class specifying a continuous subsequence (slice) of a sequence.
 
-    class Range
+::
+
+    class CV_EXPORTS Range
     {
     public:
-        ...
+        Range();
+        Range(int _start, int _end);
+        Range(const CvSlice& slice);
+        int size() const;
+        bool empty() const;
+        static Range all();
+        operator CvSlice() const;
+
         int start, end;
     };
 
@@ -418,6 +638,48 @@ The keypoint constructors
     :param _class_id: object id
 
 
+KeyPoint::convert
+--------------------
+
+This method converts vector of keypoints to vector of points or the reverse, where each keypoint is assigned the same size and the same orientation.
+
+.. ocv:function:: void KeyPoint::convert(const std::vector<KeyPoint>& keypoints, std::vector<Point2f>& points2f, const std::vector<int>& keypointIndexes=std::vector<int>())
+
+.. ocv:function:: void KeyPoint::convert(const std::vector<Point2f>& points2f, std::vector<KeyPoint>& keypoints, float size=1, float response=1, int octave=0, int class_id=-1)
+
+.. ocv:pyfunction:: cv2.KeyPoint_convert(keypoints[, keypointIndexes]) -> points2f
+
+.. ocv:pyfunction:: cv2.KeyPoint_convert(points2f[, size[, response[, octave[, class_id]]]]) -> keypoints
+
+    :param keypoints: Keypoints obtained from any feature detection algorithm like SIFT/SURF/ORB
+
+    :param points2f: Array of (x,y) coordinates of each keypoint
+
+    :param keypointIndexes: Array of indexes of keypoints to be converted to points. (Acts like a mask to convert only specified keypoints)
+
+    :param _size: keypoint diameter
+
+    :param _response: keypoint detector response on the keypoint (that is, strength of the keypoint)
+
+    :param _octave: pyramid octave in which the keypoint has been detected
+
+    :param _class_id: object id
+
+
+KeyPoint::overlap
+--------------------
+
+This method computes overlap for pair of keypoints. Overlap is the ratio between area of keypoint regions' intersection and area of keypoint regions' union (considering keypoint region as circle). If they don't overlap, we get zero. If they coincide at same location with same size, we get 1.
+
+.. ocv:function:: float KeyPoint::overlap(const KeyPoint& kp1, const KeyPoint& kp2)
+
+.. ocv:pyfunction:: cv2.KeyPoint_overlap(kp1, kp2) -> retval
+
+    :param kp1: First keypoint
+
+    :param kp2: Second keypoint
+
+
 DMatch
 ------
 .. ocv:class:: DMatch
@@ -448,188 +710,335 @@ train descriptor index, train image index, and distance between descriptors. ::
     };
 
 
-
-.. _Ptr:
-
 Ptr
 ---
 .. ocv:class:: Ptr
 
-Template class for smart reference-counting pointers ::
+Template class for smart pointers with shared ownership. ::
 
-    template<typename _Tp> class Ptr
+    template<typename T>
+    struct Ptr
     {
-    public:
-        // default constructor
+        typedef T element_type;
+
         Ptr();
-        // constructor that wraps the object pointer
-        Ptr(_Tp* _obj);
-        // destructor: calls release()
+
+        template<typename Y>
+        explicit Ptr(Y* p);
+        template<typename Y, typename D>
+        Ptr(Y* p, D d);
+
+        Ptr(const Ptr& o);
+        template<typename Y>
+        Ptr(const Ptr<Y>& o);
+        template<typename Y>
+        Ptr(const Ptr<Y>& o, T* p);
+
         ~Ptr();
-        // copy constructor; increments ptr's reference counter
-        Ptr(const Ptr& ptr);
-        // assignment operator; decrements own reference counter
-        // (with release()) and increments ptr's reference counter
-        Ptr& operator = (const Ptr& ptr);
-        // increments reference counter
-        void addref();
-        // decrements reference counter; when it becomes 0,
-        // delete_obj() is called
+
+        Ptr& operator = (const Ptr& o);
+        template<typename Y>
+        Ptr& operator = (const Ptr<Y>& o);
+
         void release();
-        // user-specified custom object deletion operation.
-        // by default, "delete obj;" is called
-        void delete_obj();
-        // returns true if obj == 0;
+
+        template<typename Y>
+        void reset(Y* p);
+        template<typename Y, typename D>
+        void reset(Y* p, D d);
+
+        void swap(Ptr& o);
+
+        T* get() const;
+
+        T& operator * () const;
+        T* operator -> () const;
+        operator T* () const;
+
         bool empty() const;
 
-        // provide access to the object fields and methods
-        _Tp* operator -> ();
-        const _Tp* operator -> () const;
-
-        // return the underlying object pointer;
-        // thanks to the methods, the Ptr<_Tp> can be
-        // used instead of _Tp*
-        operator _Tp* ();
-        operator const _Tp*() const;
-    protected:
-        // the encapsulated object pointer
-        _Tp* obj;
-        // the associated reference counter
-        int* refcount;
+        template<typename Y>
+        Ptr<Y> staticCast() const;
+        template<typename Y>
+        Ptr<Y> constCast() const;
+        template<typename Y>
+        Ptr<Y> dynamicCast() const;
     };
 
 
-The ``Ptr<_Tp>`` class is a template class that wraps pointers of the corresponding type. It is
-similar to ``shared_ptr`` that is part of the Boost library
-(http://www.boost.org/doc/libs/1_40_0/libs/smart_ptr/shared_ptr.htm) and also part of the
-`C++0x <http://en.wikipedia.org/wiki/C++0x>`_ standard.
+A ``Ptr<T>`` pretends to be a pointer to an object of type T.
+Unlike an ordinary pointer, however, the object will be automatically
+cleaned up once all ``Ptr`` instances pointing to it are destroyed.
 
-This class provides the following options:
+``Ptr`` is similar to ``boost::shared_ptr`` that is part of the Boost library
+(http://www.boost.org/doc/libs/release/libs/smart_ptr/shared_ptr.htm)
+and ``std::shared_ptr`` from the `C++11 <http://en.wikipedia.org/wiki/C++11>`_ standard.
+
+This class provides the following advantages:
 
 *
     Default constructor, copy constructor, and assignment operator for an arbitrary C++ class
-    or a C structure. For some objects, like files, windows, mutexes, sockets, and others, a copy
+    or C structure. For some objects, like files, windows, mutexes, sockets, and others, a copy
     constructor or an assignment operator are difficult to define. For some other objects, like
     complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally,
     some of complex OpenCV and your own data structures may be written in C.
-    However, copy constructors and default constructors can simplify programming a lot.Besides,
-    they are often required (for example, by STL containers). By wrapping a pointer to such a
-    complex object ``TObj`` to ``Ptr<TObj>``, you automatically get all of the necessary
+    However, copy constructors and default constructors can simplify programming a lot. Besides,
+    they are often required (for example, by STL containers). By using a ``Ptr`` to such an
+    object instead of the object itself, you automatically get all of the necessary
     constructors and the assignment operator.
 
 *
     *O(1)* complexity of the above-mentioned operations. While some structures, like ``std::vector``,
     provide a copy constructor and an assignment operator, the operations may take a considerable
-    amount of time if the data structures are large. But if the structures are put into ``Ptr<>``,
+    amount of time if the data structures are large. But if the structures are put into a ``Ptr``,
     the overhead is small and independent of the data size.
 
 *
-    Automatic destruction, even for C structures. See the example below with ``FILE*``.
+    Automatic and customizable cleanup, even for C structures. See the example below with ``FILE*``.
 
 *
     Heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers
     can store only objects of the same type and the same size. The classical solution to store objects
-    of different types in the same container is to store pointers to the base class ``base_class_t*``
-    instead but then you loose the automatic memory management. Again, by using ``Ptr<base_class_t>()``
-    instead of the raw pointers, you can solve the problem.
+    of different types in the same container is to store pointers to the base class (``Base*``)
+    instead but then you lose the automatic memory management. Again, by using ``Ptr<Base>``
+    instead of raw pointers, you can solve the problem.
 
-The ``Ptr`` class treats the wrapped object as a black box. The reference counter is allocated and
-managed separately. The only thing the pointer class needs to know about the object is how to
-deallocate it. This knowledge is encapsulated in the ``Ptr::delete_obj()`` method that is called when
-the reference counter becomes 0. If the object is a C++ class instance, no additional coding is
-needed, because the default implementation of this method calls ``delete obj;``. However, if the
-object is deallocated in a different way, the specialized method should be created. For example,
-if you want to wrap ``FILE``, the ``delete_obj`` may be implemented as follows: ::
+A ``Ptr`` is said to *own* a pointer - that is, for each ``Ptr`` there is a pointer that will be deleted
+once all ``Ptr`` instances that own it are destroyed. The owned pointer may be null, in which case nothing is deleted.
+Each ``Ptr`` also *stores* a pointer. The stored pointer is the pointer the ``Ptr`` pretends to be;
+that is, the one you get when you use :ocv:func:`Ptr::get` or the conversion to ``T*``. It's usually
+the same as the owned pointer, but if you use casts or the general shared-ownership constructor, the two may diverge:
+the ``Ptr`` will still own the original pointer, but will itself point to something else.
 
-    template<> inline void Ptr<FILE>::delete_obj()
-    {
-        fclose(obj); // no need to clear the pointer afterwards,
-                     // it is done externally.
-    }
-    ...
+The owned pointer is treated as a black box. The only thing ``Ptr`` needs to know about it is how to
+delete it. This knowledge is encapsulated in the *deleter* - an auxiliary object that is associated
+with the owned pointer and shared between all ``Ptr`` instances that own it. The default deleter is
+an instance of ``DefaultDeleter``, which uses the standard C++ ``delete`` operator; as such it
+will work with any pointer allocated with the standard ``new`` operator.
 
-    // now use it:
-    Ptr<FILE> f(fopen("myfile.txt", "r"));
-    if(f.empty())
-        throw ...;
+However, if the pointer must be deleted in a different way, you must specify a custom deleter upon
+``Ptr`` construction. A deleter is simply a callable object that accepts the pointer as its sole argument.
+For example, if you want to wrap ``FILE``, you may do so as follows::
+
+    Ptr<FILE> f(fopen("myfile.txt", "w"), fclose);
+    if(!f) throw ...;
     fprintf(f, ....);
     ...
-    // the file will be closed automatically by the Ptr<FILE> destructor.
+    // the file will be closed automatically by f's destructor.
 
+Alternatively, if you want all pointers of a particular type to be deleted the same way,
+you can specialize ``DefaultDeleter<T>::operator()`` for that type, like this::
 
-.. note:: The reference increment/decrement operations are implemented as atomic operations,
-          and therefore it is normally safe to use the classes in multi-threaded applications.
-          The same is true for :ocv:class:`Mat` and other C++ OpenCV classes that operate on
-          the reference counters.
+    namespace cv {
+    template<> void DefaultDeleter<FILE>::operator ()(FILE * obj) const
+    {
+        fclose(obj);
+    }
+    }
 
-Ptr::Ptr
---------
-Various Ptr constructors.
+For convenience, the following types from the OpenCV C API already have such a specialization
+that calls the appropriate release function:
+
+* ``CvCapture``
+* :ocv:struct:`CvDTreeSplit`
+* :ocv:struct:`CvFileStorage`
+* ``CvHaarClassifierCascade``
+* :ocv:struct:`CvMat`
+* :ocv:struct:`CvMatND`
+* :ocv:struct:`CvMemStorage`
+* :ocv:struct:`CvSparseMat`
+* ``CvVideoWriter``
+* :ocv:struct:`IplImage`
+
+.. note:: The shared ownership mechanism is implemented with reference counting. As such,
+          cyclic ownership (e.g. when object ``a`` contains a ``Ptr`` to object ``b``, which
+          contains a ``Ptr`` to object ``a``) will lead to all involved objects never being
+          cleaned up. Avoid such situations.
+
+.. note:: It is safe to concurrently read (but not write) a ``Ptr`` instance from multiple threads
+          and therefore it is normally safe to use it in multi-threaded applications.
+          The same is true for :ocv:class:`Mat` and other C++ OpenCV classes that use internal
+          reference counts.
+
+Ptr::Ptr (null)
+------------------
 
 .. ocv:function:: Ptr::Ptr()
-.. ocv:function:: Ptr::Ptr(_Tp* _obj)
-.. ocv:function:: Ptr::Ptr(const Ptr& ptr)
+
+    The default constructor creates a null ``Ptr`` - one that owns and stores a null pointer.
+
+Ptr::Ptr (assuming ownership)
+-----------------------------
+
+.. ocv:function:: template<typename Y> Ptr::Ptr(Y* p)
+.. ocv:function:: template<typename Y, typename D> Ptr::Ptr(Y* p, D d)
+
+    :param d: Deleter to use for the owned pointer.
+    :param p: Pointer to own.
+
+    If ``p`` is null, these are equivalent to the default constructor.
+
+    Otherwise, these constructors assume ownership of ``p`` - that is, the created ``Ptr`` owns
+    and stores ``p`` and assumes it is the sole owner of it. Don't use them if ``p`` is already
+    owned by another ``Ptr``, or else ``p`` will get deleted twice.
+
+    With the first constructor, ``DefaultDeleter<Y>()`` becomes the associated deleter (so ``p``
+    will eventually be deleted with the standard ``delete`` operator). ``Y`` must be a complete
+    type at the point of invocation.
+
+    With the second constructor, ``d`` becomes the associated deleter.
+
+    ``Y*`` must be convertible to ``T*``.
+
+    .. note:: It is often easier to use :ocv:func:`makePtr` instead.
+
+Ptr::Ptr (sharing ownership)
+----------------------------
+
+.. ocv:function:: Ptr::Ptr(const Ptr& o)
+.. ocv:function:: template<typename Y> Ptr::Ptr(const Ptr<Y>& o)
+.. ocv:function:: template<typename Y> Ptr::Ptr(const Ptr<Y>& o, T* p)
+
+    :param o: ``Ptr`` to share ownership with.
+    :param p: Pointer to store.
+
+    These constructors create a ``Ptr`` that shares ownership with another ``Ptr`` - that is,
+    own the same pointer as ``o``.
+
+    With the first two, the same pointer is stored, as well; for the second, ``Y*`` must be convertible to ``T*``.
+
+    With the third, ``p`` is stored, and ``Y`` may be any type. This constructor allows to have completely
+    unrelated owned and stored pointers, and should be used with care to avoid confusion. A relatively
+    benign use is to create a non-owning ``Ptr``, like this::
+
+        ptr = Ptr<T>(Ptr<T>(), dont_delete_me); // owns nothing; will not delete the pointer.
 
 Ptr::~Ptr
 ---------
-The Ptr destructor.
 
 .. ocv:function:: Ptr::~Ptr()
 
+    The destructor is equivalent to calling :ocv:func:`Ptr::release`.
+
 Ptr::operator =
 ----------------
-Assignment operator.
 
-.. ocv:function:: Ptr& Ptr::operator = (const Ptr& ptr)
+.. ocv:function:: Ptr& Ptr::operator = (const Ptr& o)
+.. ocv:function:: template<typename Y> Ptr& Ptr::operator = (const Ptr<Y>& o)
 
-Decrements own reference counter (with ``release()``) and increments ptr's reference counter.
+    :param o: ``Ptr`` to share ownership with.
 
-Ptr::addref
------------
-Increments reference counter.
+    Assignment replaces the current ``Ptr`` instance with one that owns and stores same
+    pointers as ``o`` and then destroys the old instance.
 
-.. ocv:function:: void Ptr::addref()
 
 Ptr::release
 ------------
-Decrements reference counter; when it becomes 0, ``delete_obj()`` is called.
 
 .. ocv:function:: void Ptr::release()
 
-Ptr::delete_obj
----------------
-User-specified custom object deletion operation. By default, ``delete obj;`` is called.
+    If no other ``Ptr`` instance owns the owned pointer, deletes it with the associated deleter.
+    Then sets both the owned and the stored pointers to ``NULL``.
 
-.. ocv:function:: void Ptr::delete_obj()
+
+Ptr::reset
+----------
+
+.. ocv:function:: template<typename Y> void Ptr::reset(Y* p)
+.. ocv:function:: template<typename Y, typename D> void Ptr::reset(Y* p, D d)
+
+    :param d: Deleter to use for the owned pointer.
+    :param p: Pointer to own.
+
+    ``ptr.reset(...)`` is equivalent to ``ptr = Ptr<T>(...)``.
+
+Ptr::swap
+---------
+
+.. ocv:function:: void Ptr::swap(Ptr& o)
+
+    :param o: ``Ptr`` to swap with.
+
+    Swaps the owned and stored pointers (and deleters, if any) of this and ``o``.
+
+Ptr::get
+--------
+
+.. ocv:function:: T* Ptr::get() const
+
+    Returns the stored pointer.
+
+Ptr pointer emulation
+---------------------
+
+.. ocv:function:: T& Ptr::operator * () const
+.. ocv:function:: T* Ptr::operator -> () const
+.. ocv:function:: Ptr::operator T* () const
+
+    These operators are what allows ``Ptr`` to pretend to be a pointer.
+
+    If ``ptr`` is a ``Ptr<T>``, then ``*ptr`` is equivalent to ``*ptr.get()``
+    and ``ptr->foo`` is equivalent to ``ptr.get()->foo``. In addition, ``ptr``
+    is implicitly convertible to ``T*``, and such conversion is equivalent to
+    ``ptr.get()``. As a corollary, ``if (ptr)`` is equivalent to ``if (ptr.get())``.
+    In other words, a ``Ptr`` behaves as if it was its own stored pointer.
 
 Ptr::empty
 ----------
-Returns true if obj == 0;
 
-bool empty() const;
+.. ocv:function:: bool Ptr::empty() const
 
-Ptr::operator ->
-----------------
-Provide access to the object fields and methods.
+    ``ptr.empty()`` is equivalent to ``!ptr.get()``.
 
- .. ocv:function:: template<typename _Tp> _Tp* Ptr::operator -> ()
- .. ocv:function:: template<typename _Tp> const _Tp* Ptr::operator -> () const
+Ptr casts
+---------
 
+.. ocv:function:: template<typename Y> Ptr<Y> Ptr::staticCast() const
+.. ocv:function:: template<typename Y> Ptr<Y> Ptr::constCast() const
+.. ocv:function:: template<typename Y> Ptr<Y> Ptr::dynamicCast() const
 
-Ptr::operator _Tp*
-------------------
-Returns the underlying object pointer. Thanks to the methods, the ``Ptr<_Tp>`` can be used instead
-of ``_Tp*``.
+    If ``ptr`` is a ``Ptr``, then ``ptr.fooCast<Y>()`` is equivalent to
+    ``Ptr<Y>(ptr, foo_cast<Y>(ptr.get()))``. That is, these functions create
+    a new ``Ptr`` with the same owned pointer and a cast stored pointer.
 
- .. ocv:function:: template<typename _Tp> Ptr::operator _Tp* ()
- .. ocv:function:: template<typename _Tp> Ptr::operator const _Tp*() const
+Ptr global swap
+---------------
 
+.. ocv:function:: template<typename T> void swap(Ptr<T>& ptr1, Ptr<T>& ptr2)
+
+    Equivalent to ``ptr1.swap(ptr2)``. Provided to help write generic algorithms.
+
+Ptr comparisons
+---------------
+
+.. ocv:function:: template<typename T> bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
+.. ocv:function:: template<typename T> bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
+
+    Return whether ``ptr1.get()`` and ``ptr2.get()`` are equal and not equal, respectively.
+
+makePtr
+-------
+
+.. ocv:function:: template<typename T> Ptr<T> makePtr()
+.. ocv:function:: template<typename T, typename A1> Ptr<T> makePtr(const A1& a1)
+.. ocv:function:: template<typename T, typename A1, typename A2> Ptr<T> makePtr(const A1& a1, const A2& a2)
+.. ocv:function:: template<typename T, typename A1, typename A2, typename A3> Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3)
+
+    (and so on...)
+
+    ``makePtr<T>(...)`` is equivalent to ``Ptr<T>(new T(...))``. It is shorter than the latter, and
+    it's marginally safer than using a constructor or :ocv:func:`Ptr::reset`, since it ensures that
+    the owned pointer is new and thus not owned by any other ``Ptr`` instance.
+
+    Unfortunately, perfect forwarding is impossible to implement in C++03, and so ``makePtr`` is limited
+    to constructors of ``T`` that have up to 10 arguments, none of which are non-const references.
 
 Mat
 ---
 .. ocv:class:: Mat
 
-OpenCV C++ n-dimensional dense array class ::
+OpenCV C++ n-dimensional dense array class
+::
 
     class CV_EXPORTS Mat
     {
@@ -659,7 +1068,6 @@ OpenCV C++ n-dimensional dense array class ::
         ...
     };
 
-
 The class ``Mat`` represents an n-dimensional dense numerical single-channel or multi-channel array. It can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel volumes, vector fields, point clouds, tensors, histograms (though, very high-dimensional histograms may be better stored in a ``SparseMat`` ). The data layout of the array
 :math:`M` is defined by the array ``M.step[]``, so that the address of element
 :math:`(i_0,...,i_{M.dims-1})`, where
@@ -879,6 +1287,9 @@ Finally, there are STL-style iterators that are smart enough to skip gaps betwee
 
 The matrix iterators are random-access iterators, so they can be passed to any STL algorithm, including ``std::sort()`` .
 
+.. note::
+
+   * An example demonstrating the serial out capabilities of cv::Mat can be found at opencv_source_code/samples/cpp/cout_mat.cpp
 
 .. _MatrixExpressions:
 
@@ -1141,7 +1552,7 @@ The method makes a new header for the specified row span of the matrix. Similarl
 
 Mat::colRange
 -------------
-Creates a matrix header for the specified row span.
+Creates a matrix header for the specified column span.
 
 .. ocv:function:: Mat Mat::colRange(int startcol, int endcol) const
 
@@ -1542,6 +1953,7 @@ Adds elements to the bottom of the matrix.
 .. ocv:function:: void Mat::push_back( const Mat& m )
 
     :param elem: Added element(s).
+    :param m: Added line(s).
 
 The methods add one or more elements to the bottom of the matrix. They emulate the corresponding method of the STL vector class. When ``elem`` is ``Mat`` , its type and the number of columns must be the same as in the container matrix.
 
@@ -2209,7 +2621,6 @@ Various SparseMat constructors.
     :param dims: Array dimensionality.
     :param _sizes: Sparce matrix size on all dementions.
     :param _type: Sparse matrix data type.
-    :param try1d: if try1d is true and matrix is a single-column matrix (Nx1), then the sparse matrix will be 1-dimensional.
 
 SparseMat::~SparseMat
 ---------------------
@@ -2224,6 +2635,8 @@ Provides sparse matrix assignment operators.
 .. ocv:function:: SparseMat& SparseMat::operator = (const SparseMat& m)
 .. ocv:function:: SparseMat& SparseMat::operator = (const Mat& m)
 
+    :param m: Matrix for assignment.
+
 The last variant is equivalent to the corresponding constructor with try1d=false.
 
 
@@ -2251,6 +2664,10 @@ Convert sparse matrix with possible type change and scaling.
 .. ocv:function:: void SparseMat::convertTo( SparseMat& m, int rtype, double alpha=1 ) const
 .. ocv:function:: void SparseMat::convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const
 
+    :param m: Destination matrix.
+    :param rtype: Destination matrix type.
+    :param alpha: Conversion multiplier.
+
 The first version converts arbitrary sparse matrix to dense matrix and multiplies all the matrix elements by the specified scalar.
 The second versiob converts sparse matrix to dense matrix with optional type conversion and scaling.
 When rtype=-1, the destination element type will be the same as the sparse matrix element type.
@@ -2343,7 +2760,7 @@ The method returns the number of matrix channels.
 
 SparseMat::size
 ---------------
-Returns the array of sizes or matrix size by i dimention and 0 if the matrix is not allocated.
+Returns the array of sizes or matrix size by i dimension and 0 if the matrix is not allocated.
 
 .. ocv:function:: const int* SparseMat::size() const
 .. ocv:function:: int SparseMat::size(int i) const
@@ -2371,6 +2788,11 @@ Compute element hash value from the element indices.
 .. ocv:function:: size_t SparseMat::hash(int i0, int i1, int i2) const
 .. ocv:function:: size_t SparseMat::hash(const int* idx) const
 
+    :param i0: The first dimension index.
+    :param i1: The second dimension index.
+    :param i2: The third dimension index.
+    :param idx: Array of element indices for multidimensional matices.
+
 SparseMat::ptr
 --------------
 Low-level element-access functions, special variants for 1D, 2D, 3D cases, and the generic one for n-D case.
@@ -2380,6 +2802,12 @@ Low-level element-access functions, special variants for 1D, 2D, 3D cases, and t
 .. ocv:function:: uchar* SparseMat::ptr(int i0, int i1, int i2, bool createMissing, size_t* hashval=0)
 .. ocv:function:: uchar* SparseMat::ptr(const int* idx, bool createMissing, size_t* hashval=0)
 
+    :param i0: The first dimension index.
+    :param i1: The second dimension index.
+    :param i2: The third dimension index.
+    :param idx: Array of element indices for multidimensional matices.
+    :param createMissing: Create new element with 0 value if it does not exist in SparseMat.
+
 Return pointer to the matrix element. If the element is there (it is non-zero), the pointer to it is returned.
 If it is not there and ``createMissing=false``, NULL pointer is returned. If it is not there and ``createMissing=true``,
 the new elementis created and initialized with 0. Pointer to it is returned. If the optional hashval pointer is not ``NULL``,
@@ -2393,6 +2821,11 @@ Erase the specified matrix element. When there is no such an element, the method
 .. ocv:function:: void SparseMat::erase(int i0, int i1, int i2, size_t* hashval=0)
 .. ocv:function:: void SparseMat::erase(const int* idx, size_t* hashval=0)
 
+    :param i0: The first dimension index.
+    :param i1: The second dimension index.
+    :param i2: The third dimension index.
+    :param idx: Array of element indices for multidimensional matices.
+
 SparseMat\_
 -----------
 .. ocv:class:: SparseMat_
@@ -2461,6 +2894,82 @@ Algorithm
 ---------
 .. ocv:class:: Algorithm
 
+::
+
+    class CV_EXPORTS_W Algorithm
+    {
+    public:
+        Algorithm();
+        virtual ~Algorithm();
+        string name() const;
+
+        template<typename _Tp> typename ParamType<_Tp>::member_type get(const string& name) const;
+        template<typename _Tp> typename ParamType<_Tp>::member_type get(const char* name) const;
+
+        CV_WRAP int getInt(const string& name) const;
+        CV_WRAP double getDouble(const string& name) const;
+        CV_WRAP bool getBool(const string& name) const;
+        CV_WRAP string getString(const string& name) const;
+        CV_WRAP Mat getMat(const string& name) const;
+        CV_WRAP vector<Mat> getMatVector(const string& name) const;
+        CV_WRAP Ptr<Algorithm> getAlgorithm(const string& name) const;
+
+        void set(const string& name, int value);
+        void set(const string& name, double value);
+        void set(const string& name, bool value);
+        void set(const string& name, const string& value);
+        void set(const string& name, const Mat& value);
+        void set(const string& name, const vector<Mat>& value);
+        void set(const string& name, const Ptr<Algorithm>& value);
+        template<typename _Tp> void set(const string& name, const Ptr<_Tp>& value);
+
+        CV_WRAP void setInt(const string& name, int value);
+        CV_WRAP void setDouble(const string& name, double value);
+        CV_WRAP void setBool(const string& name, bool value);
+        CV_WRAP void setString(const string& name, const string& value);
+        CV_WRAP void setMat(const string& name, const Mat& value);
+        CV_WRAP void setMatVector(const string& name, const vector<Mat>& value);
+        CV_WRAP void setAlgorithm(const string& name, const Ptr<Algorithm>& value);
+        template<typename _Tp> void setAlgorithm(const string& name, const Ptr<_Tp>& value);
+
+        void set(const char* name, int value);
+        void set(const char* name, double value);
+        void set(const char* name, bool value);
+        void set(const char* name, const string& value);
+        void set(const char* name, const Mat& value);
+        void set(const char* name, const vector<Mat>& value);
+        void set(const char* name, const Ptr<Algorithm>& value);
+        template<typename _Tp> void set(const char* name, const Ptr<_Tp>& value);
+
+        void setInt(const char* name, int value);
+        void setDouble(const char* name, double value);
+        void setBool(const char* name, bool value);
+        void setString(const char* name, const string& value);
+        void setMat(const char* name, const Mat& value);
+        void setMatVector(const char* name, const vector<Mat>& value);
+        void setAlgorithm(const char* name, const Ptr<Algorithm>& value);
+        template<typename _Tp> void setAlgorithm(const char* name, const Ptr<_Tp>& value);
+
+        CV_WRAP string paramHelp(const string& name) const;
+        int paramType(const char* name) const;
+        CV_WRAP int paramType(const string& name) const;
+        CV_WRAP void getParams(CV_OUT vector<string>& names) const;
+
+
+        virtual void write(FileStorage& fs) const;
+        virtual void read(const FileNode& fn);
+
+        typedef Algorithm* (*Constructor)(void);
+        typedef int (Algorithm::*Getter)() const;
+        typedef void (Algorithm::*Setter)(int);
+
+        CV_WRAP static void getList(CV_OUT vector<string>& algorithms);
+        CV_WRAP static Ptr<Algorithm> _create(const string& name);
+        template<typename _Tp> static Ptr<_Tp> create(const string& name);
+
+        virtual AlgorithmInfo* info() const /* TODO: make it = 0;*/ { return 0; }
+    };
+
 This is a base class for all more or less complex algorithms in OpenCV, especially for classes of algorithms, for which there can be multiple implementations. The examples are stereo correspondence (for which there are algorithms like block matching, semi-global block matching, graph-cut etc.), background subtraction (which can be done using mixture-of-gaussians models, codebook-based algorithm etc.), optical flow (block matching, Lucas-Kanade, Horn-Schunck etc.).
 
 The class provides the following features for all derived classes:
@@ -2599,7 +3108,7 @@ Creates algorithm instance by name
 
     :param name: The algorithm name, one of the names returned by ``Algorithm::getList()``.
 
-This static method creates a new instance of the specified algorithm. If there is no such algorithm, the method will silently return null pointer (that can be checked by ``Ptr::empty()`` method). Also, you should specify the particular ``Algorithm`` subclass as ``_Tp`` (or simply ``Algorithm`` if you do not know it at that point). ::
+This static method creates a new instance of the specified algorithm. If there is no such algorithm, the method will silently return a null pointer. Also, you should specify the particular ``Algorithm`` subclass as ``_Tp`` (or simply ``Algorithm`` if you do not know it at that point). ::
 
     Ptr<BackgroundSubtractor> bgfg = Algorithm::create<BackgroundSubtractor>("BackgroundSubtractor.MOG2");
 
@@ -2615,4 +3124,3 @@ The above methods are usually enough for users. If you want to make your own alg
  * Add public virtual method ``AlgorithmInfo* info() const;`` to your class.
  * Add constructor function, ``AlgorithmInfo`` instance and implement the ``info()`` method. The simplest way is to take  http://code.opencv.org/projects/opencv/repository/revisions/master/entry/modules/ml/src/ml_init.cpp as the reference and modify it according to the list of your parameters.
  * Add some public function (e.g. ``initModule_<mymodule>()``) that calls info() of your algorithm and put it into the same source file as ``info()`` implementation. This is to force C++ linker to include this object file into the target application. See ``Algorithm::create()`` for details.
-
diff --git a/modules/core/doc/clustering.rst b/modules/core/doc/clustering.rst
index 557e92eb4..dc9b28799 100644
--- a/modules/core/doc/clustering.rst
+++ b/modules/core/doc/clustering.rst
@@ -15,12 +15,18 @@ Finds centers of clusters and groups input samples around the clusters.
 
     :param samples: Floating-point matrix of input samples, one row per sample.
 
+    :param data: Data for clustering.
+
     :param cluster_count: Number of clusters to split the set by.
 
+    :param K: Number of clusters to split the set by.
+
     :param labels: Input/output integer array that stores the cluster indices for every sample.
 
     :param criteria: The algorithm termination criteria, that is, the maximum number of iterations and/or the desired accuracy. The accuracy is specified as ``criteria.epsilon``. As soon as each of the cluster centers moves by less than ``criteria.epsilon`` on some iteration, the algorithm stops.
 
+    :param termcrit: The algorithm termination criteria, that is, the maximum number of iterations and/or the desired accuracy.
+
     :param attempts: Flag to specify the number of times the algorithm is executed using different initial labellings. The algorithm returns the labels that yield the best compactness (see the last function parameter).
 
     :param rng: CvRNG state initialized by RNG().
@@ -35,6 +41,8 @@ Finds centers of clusters and groups input samples around the clusters.
 
     :param centers: Output matrix of the cluster centers, one row per each cluster center.
 
+    :param _centers: Output matrix of the cluster centers, one row per each cluster center.
+
     :param compactness: The returned value that is described below.
 
 The function ``kmeans`` implements a k-means algorithm that finds the
@@ -56,6 +64,12 @@ Basically, you can use only the core of the function, set the number of
 attempts to 1, initialize labels each time using a custom algorithm, pass them with the
 ( ``flags`` = ``KMEANS_USE_INITIAL_LABELS`` ) flag, and then choose the best (most-compact) clustering.
 
+.. note::
+
+   * An example on K-means clustering can be found at opencv_source_code/samples/cpp/kmeans.cpp
+
+   * (Python) An example on K-means clustering can be found at opencv_source_code/samples/python2/kmeans.py
+
 partition
 -------------
 Splits an element set into equivalency classes.
diff --git a/modules/core/doc/command_line_parser.rst b/modules/core/doc/command_line_parser.rst
index 64c72020a..c1f69f4e5 100644
--- a/modules/core/doc/command_line_parser.rst
+++ b/modules/core/doc/command_line_parser.rst
@@ -98,4 +98,3 @@ Use:
     # ./app -fps=aaa
     ERRORS:
     Exception: can not convert: [aaa] to [double]
-
diff --git a/modules/core/doc/core.rst b/modules/core/doc/core.rst
index 90caeebc7..73bfc3536 100644
--- a/modules/core/doc/core.rst
+++ b/modules/core/doc/core.rst
@@ -15,4 +15,4 @@ core. The Core Functionality
     old_xml_yaml_persistence
     clustering
     utility_and_system_functions_and_macros
-
+    opengl_interop
diff --git a/modules/core/doc/drawing_functions.rst b/modules/core/doc/drawing_functions.rst
index 17e983b6b..258cfe6ef 100644
--- a/modules/core/doc/drawing_functions.rst
+++ b/modules/core/doc/drawing_functions.rst
@@ -26,6 +26,10 @@ If a drawn figure is partially or completely outside the image, the drawing func
 
 .. note:: The functions do not support alpha-transparency when the target image is 4-channel. In this case, the ``color[3]`` is simply copied to the repainted pixels. Thus, if you want to paint semi-transparent shapes, you can paint them in a separate buffer and then blend it with the main image.
 
+.. note::
+
+   * An example on using variate drawing functions like line, rectangle, ... can be found at opencv_source_code/samples/cpp/drawing.cpp
+
 circle
 ----------
 Draws a circle.
@@ -225,6 +229,8 @@ Calculates the width and height of a text string.
 
     :param text: Input text string.
 
+    :param text_string: Input text string in C format.
+
     :param fontFace: Font to use. See the  :ocv:func:`putText` for details.
 
     :param fontScale: Font scale. See the  :ocv:func:`putText`  for details.
@@ -233,6 +239,12 @@ Calculates the width and height of a text string.
 
     :param baseLine: Output parameter - y-coordinate of the baseline relative to the bottom-most text point.
 
+    :param baseline: Output parameter - y-coordinate of the baseline relative to the bottom-most text point.
+
+    :param font: Font description in terms of old C API.
+
+    :param text_size: Output parameter - The size of a box that contains the specified text.
+
 The function ``getTextSize`` calculates and returns the size of a box that contains the specified text.
 That is, the following code renders some text, the tight box surrounding it, and the baseline: ::
 
@@ -547,6 +559,12 @@ The function draws contour outlines in the image if
         waitKey(0);
     }
 
+.. note::
+
+   * An example using the drawContour functionality can be found at opencv_source_code/samples/cpp/contours2.cpp
+   * An example using drawContours to clean up a background segmentation result at opencv_source_code/samples/cpp/segment_objects.cpp
+
+   * (Python) An example using the drawContour functionality can be found at opencv_source/samples/python2/contours.py
 
 
 putText
@@ -584,4 +602,3 @@ The function ``putText`` renders the specified text string in the image.
 Symbols that cannot be rendered using the specified font are
 replaced by question marks. See
 :ocv:func:`getTextSize` for a text rendering code example.
-
diff --git a/modules/core/doc/dynamic_structures.rst b/modules/core/doc/dynamic_structures.rst
index 1f2abd480..0f3df5d12 100644
--- a/modules/core/doc/dynamic_structures.rst
+++ b/modules/core/doc/dynamic_structures.rst
@@ -1580,4 +1580,3 @@ Gathers all node pointers to a single sequence.
     :param storage: Container for the sequence
 
 The function puts pointers of all nodes reachable from  ``first`` into a single sequence. The pointers are written sequentially in the depth-first order.
-
diff --git a/modules/core/doc/intro.rst b/modules/core/doc/intro.rst
index 582f1d00d..6d9fdfca5 100644
--- a/modules/core/doc/intro.rst
+++ b/modules/core/doc/intro.rst
@@ -83,17 +83,22 @@ First of all, ``std::vector``, ``Mat``, and other data structures used by the fu
     // matrix will be deallocated, since it is not referenced by anyone
     C = C.clone();
 
-You see that the use of ``Mat`` and other basic structures is simple. But what about high-level classes or even user data types created without taking automatic memory management into account? For them, OpenCV offers the ``Ptr<>`` template class that is similar to ``std::shared_ptr`` from C++ TR1. So, instead of using plain pointers::
+You see that the use of ``Mat`` and other basic structures is simple. But what about high-level classes or even user
+data types created without taking automatic memory management into account? For them, OpenCV offers the :ocv:class:`Ptr`
+template class that is similar to ``std::shared_ptr`` from C++11. So, instead of using plain pointers::
 
    T* ptr = new T(...);
 
 you can use::
 
-   Ptr<T> ptr = new T(...);
+   Ptr<T> ptr(new T(...));
 
-That is, ``Ptr<T> ptr`` encapsulates a pointer to a ``T`` instance and a reference counter associated with the pointer. See the
-:ocv:class:`Ptr`
-description for details.
+or::
+
+   Ptr<T> ptr = makePtr<T>(...);
+
+``Ptr<T>`` encapsulates a pointer to a ``T`` instance and a reference counter associated with the pointer. See the
+:ocv:class:`Ptr` description for details.
 
 .. _AutomaticAllocation:
 
diff --git a/modules/core/doc/old_basic_structures.rst b/modules/core/doc/old_basic_structures.rst
index d86daa4a5..906306dcb 100644
--- a/modules/core/doc/old_basic_structures.rst
+++ b/modules/core/doc/old_basic_structures.rst
@@ -1677,4 +1677,3 @@ For example, `NumPy <http://numpy.scipy.org/>`_ arrays support the array interfa
     (480, 640, 3) 1
 
 .. note:: In the new Python wrappers (**cv2** module) the function is not needed, since cv2 can process  Numpy arrays (and this is the only supported array type).
-
diff --git a/modules/core/doc/opengl_interop.rst b/modules/core/doc/opengl_interop.rst
new file mode 100644
index 000000000..5267c4d45
--- /dev/null
+++ b/modules/core/doc/opengl_interop.rst
@@ -0,0 +1,543 @@
+OpenGL interoperability
+=======================
+
+.. highlight:: cpp
+
+
+
+General Information
+-------------------
+This section describes OpenGL interoperability.
+
+To enable OpenGL support, configure OpenCV using ``CMake`` with ``WITH_OPENGL=ON`` .
+Currently OpenGL is supported only with WIN32, GTK and Qt backends on Windows and Linux (MacOS and Android are not supported).
+For GTK backend ``gtkglext-1.0`` library is required.
+
+To use OpenGL functionality you should first create OpenGL context (window or frame buffer).
+You can do this with :ocv:func:`namedWindow` function or with other OpenGL toolkit (GLUT, for example).
+
+
+
+ogl::Buffer
+-----------
+Smart pointer for OpenGL buffer object with reference counting.
+
+.. ocv:class:: ogl::Buffer
+
+Buffer Objects are OpenGL objects that store an array of unformatted memory allocated by the OpenGL context.
+These can be used to store vertex data, pixel data retrieved from images or the framebuffer, and a variety of other things.
+
+``ogl::Buffer`` has interface similar with :ocv:class:`Mat` interface and represents 2D array memory.
+
+``ogl::Buffer`` supports memory transfers between host and device and also can be mapped to CUDA memory.
+
+
+
+ogl::Buffer::Target
+-------------------
+The target defines how you intend to use the buffer object.
+
+.. ocv:enum:: ogl::Buffer::Target
+
+    .. ocv:emember:: ARRAY_BUFFER
+
+        The buffer will be used as a source for vertex data.
+
+    .. ocv:emember:: ELEMENT_ARRAY_BUFFER
+
+        The buffer will be used for indices (in ``glDrawElements`` or :ocv:func:`ogl::render`, for example).
+
+    .. ocv:emember:: PIXEL_PACK_BUFFER
+
+        The buffer will be used for reading from OpenGL textures.
+
+    .. ocv:emember:: PIXEL_UNPACK_BUFFER
+
+        The buffer will be used for writing to OpenGL textures.
+
+
+
+ogl::Buffer::Buffer
+-------------------
+The constructors.
+
+.. ocv:function:: ogl::Buffer::Buffer()
+
+.. ocv:function:: ogl::Buffer::Buffer(int arows, int acols, int atype, unsigned int abufId, bool autoRelease = false)
+
+.. ocv:function:: ogl::Buffer::Buffer(Size asize, int atype, unsigned int abufId, bool autoRelease = false)
+
+.. ocv:function:: ogl::Buffer::Buffer(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false)
+
+.. ocv:function:: ogl::Buffer::Buffer(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false)
+
+.. ocv:function:: ogl::Buffer::Buffer(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false)
+
+    :param arows: Number of rows in a 2D array.
+
+    :param acols: Number of columns in a 2D array.
+
+    :param asize: 2D array size.
+
+    :param atype: Array type ( ``CV_8UC1, ..., CV_64FC4`` ). See :ocv:class:`Mat` for details.
+
+    :param abufId: Buffer object name.
+
+    :param arr: Input array (host or device memory, it can be :ocv:class:`Mat` , :ocv:class:`gpu::GpuMat` or ``std::vector`` ).
+
+    :param target: Buffer usage. See :ocv:enum:`ogl::Buffer::Target` .
+
+    :param autoRelease: Auto release mode (if true, release will be called in object's destructor).
+
+Creates empty ``ogl::Buffer`` object, creates ``ogl::Buffer`` object from existed buffer ( ``abufId`` parameter),
+allocates memory for ``ogl::Buffer`` object or copies from host/device memory.
+
+
+
+ogl::Buffer::create
+-------------------
+Allocates memory for ``ogl::Buffer`` object.
+
+.. ocv:function:: void ogl::Buffer::create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false)
+
+.. ocv:function:: void ogl::Buffer::create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false)
+
+    :param arows: Number of rows in a 2D array.
+
+    :param acols: Number of columns in a 2D array.
+
+    :param asize: 2D array size.
+
+    :param atype: Array type ( ``CV_8UC1, ..., CV_64FC4`` ). See :ocv:class:`Mat` for details.
+
+    :param target: Buffer usage. See :ocv:enum:`ogl::Buffer::Target` .
+
+    :param autoRelease: Auto release mode (if true, release will be called in object's destructor).
+
+
+
+ogl::Buffer::release
+--------------------
+Decrements the reference counter and destroys the buffer object if needed.
+
+.. ocv:function:: void ogl::Buffer::release()
+
+The function will call `setAutoRelease(true)` .
+
+
+
+ogl::Buffer::setAutoRelease
+---------------------------
+Sets auto release mode.
+
+.. ocv:function:: void ogl::Buffer::setAutoRelease(bool flag)
+
+    :param flag: Auto release mode (if true, release will be called in object's destructor).
+
+The lifetime of the OpenGL object is tied to the lifetime of the context.
+If OpenGL context was bound to a window it could be released at any time (user can close a window).
+If object's destructor is called after destruction of the context it will cause an error.
+Thus ``ogl::Buffer`` doesn't destroy OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL context).
+This function can force ``ogl::Buffer`` destructor to destroy OpenGL object.
+
+
+
+ogl::Buffer::copyFrom
+---------------------
+Copies from host/device memory to OpenGL buffer.
+
+.. ocv:function:: void ogl::Buffer::copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false)
+
+    :param arr: Input array (host or device memory, it can be :ocv:class:`Mat` , :ocv:class:`gpu::GpuMat` or ``std::vector`` ).
+
+    :param target: Buffer usage. See :ocv:enum:`ogl::Buffer::Target` .
+
+    :param autoRelease: Auto release mode (if true, release will be called in object's destructor).
+
+
+
+ogl::Buffer::copyTo
+-------------------
+Copies from OpenGL buffer to host/device memory or another OpenGL buffer object.
+
+.. ocv:function:: void ogl::Buffer::copyTo(OutputArray arr) const
+
+    :param arr: Destination array (host or device memory, can be :ocv:class:`Mat` , :ocv:class:`gpu::GpuMat` , ``std::vector`` or ``ogl::Buffer`` ).
+
+
+
+ogl::Buffer::clone
+------------------
+Creates a full copy of the buffer object and the underlying data.
+
+.. ocv:function:: Buffer ogl::Buffer::clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const
+
+    :param target: Buffer usage for destination buffer.
+
+    :param autoRelease: Auto release mode for destination buffer.
+
+
+
+ogl::Buffer::bind
+-----------------
+Binds OpenGL buffer to the specified buffer binding point.
+
+.. ocv:function:: void ogl::Buffer::bind(Target target) const
+
+    :param target: Binding point. See :ocv:enum:`ogl::Buffer::Target` .
+
+
+
+ogl::Buffer::unbind
+-------------------
+Unbind any buffers from the specified binding point.
+
+.. ocv:function:: static void ogl::Buffer::unbind(Target target)
+
+    :param target: Binding point. See :ocv:enum:`ogl::Buffer::Target` .
+
+
+
+ogl::Buffer::mapHost
+--------------------
+Maps OpenGL buffer to host memory.
+
+.. ocv:function:: Mat ogl::Buffer::mapHost(Access access)
+
+    :param access: Access policy, indicating whether it will be possible to read from, write to, or both read from and write to the buffer object's mapped data store. The symbolic constant must be ``ogl::Buffer::READ_ONLY`` , ``ogl::Buffer::WRITE_ONLY`` or ``ogl::Buffer::READ_WRITE`` .
+
+``mapHost`` maps to the client's address space the entire data store of the buffer object.
+The data can then be directly read and/or written relative to the returned pointer, depending on the specified ``access`` policy.
+
+A mapped data store must be unmapped with :ocv:func:`ogl::Buffer::unmapHost` before its buffer object is used.
+
+This operation can lead to memory transfers between host and device.
+
+Only one buffer object can be mapped at a time.
+
+
+
+ogl::Buffer::unmapHost
+----------------------
+Unmaps OpenGL buffer.
+
+.. ocv:function:: void ogl::Buffer::unmapHost()
+
+
+
+ogl::Buffer::mapDevice
+----------------------
+Maps OpenGL buffer to CUDA device memory.
+
+.. ocv:function:: gpu::GpuMat ogl::Buffer::mapDevice()
+
+This operatation doesn't copy data.
+Several buffer objects can be mapped to CUDA memory at a time.
+
+A mapped data store must be unmapped with :ocv:func:`ogl::Buffer::unmapDevice` before its buffer object is used.
+
+
+
+ogl::Buffer::unmapDevice
+------------------------
+Unmaps OpenGL buffer.
+
+.. ocv:function:: void ogl::Buffer::unmapDevice()
+
+
+
+ogl::Texture2D
+--------------
+Smart pointer for OpenGL 2D texture memory with reference counting.
+
+.. ocv:class:: ogl::Texture2D
+
+
+
+ogl::Texture2D::Format
+----------------------
+An Image Format describes the way that the images in Textures store their data.
+
+.. ocv:enum:: ogl::Texture2D::Format
+
+    .. ocv:emember:: NONE
+    .. ocv:emember:: DEPTH_COMPONENT
+    .. ocv:emember:: RGB
+    .. ocv:emember:: RGBA
+
+
+
+ogl::Texture2D::Texture2D
+-------------------------
+The constructors.
+
+.. ocv:function:: ogl::Texture2D::Texture2D()
+
+.. ocv:function:: ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, unsigned int atexId, bool autoRelease = false)
+
+.. ocv:function:: ogl::Texture2D::Texture2D(Size asize, Format aformat, unsigned int atexId, bool autoRelease = false)
+
+.. ocv:function:: ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease = false)
+
+.. ocv:function:: ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease = false)
+
+.. ocv:function:: ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease = false)
+
+    :param arows: Number of rows.
+
+    :param acols: Number of columns.
+
+    :param asize: 2D array size.
+
+    :param aformat: Image format. See :ocv:enum:`ogl::Texture2D::Format` .
+
+    :param arr: Input array (host or device memory, it can be :ocv:class:`Mat` , :ocv:class:`gpu::GpuMat` or :ocv:class:`ogl::Buffer` ).
+
+    :param autoRelease: Auto release mode (if true, release will be called in object's destructor).
+
+Creates empty ``ogl::Texture2D`` object, allocates memory for ``ogl::Texture2D`` object or copies from host/device memory.
+
+
+
+ogl::Texture2D::create
+----------------------
+Allocates memory for ``ogl::Texture2D`` object.
+
+.. ocv:function:: void ogl::Texture2D::create(int arows, int acols, Format aformat, bool autoRelease = false)
+
+.. ocv:function:: void ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease = false)
+
+    :param arows: Number of rows.
+
+    :param acols: Number of columns.
+
+    :param asize: 2D array size.
+
+    :param aformat: Image format. See :ocv:enum:`ogl::Texture2D::Format` .
+
+    :param autoRelease: Auto release mode (if true, release will be called in object's destructor).
+
+
+
+ogl::Texture2D::release
+-----------------------
+Decrements the reference counter and destroys the texture object if needed.
+
+.. ocv:function:: void ogl::Texture2D::release()
+
+The function will call `setAutoRelease(true)` .
+
+
+
+ogl::Texture2D::setAutoRelease
+------------------------------
+Sets auto release mode.
+
+.. ocv:function:: void ogl::Texture2D::setAutoRelease(bool flag)
+
+    :param flag: Auto release mode (if true, release will be called in object's destructor).
+
+The lifetime of the OpenGL object is tied to the lifetime of the context.
+If OpenGL context was bound to a window it could be released at any time (user can close a window).
+If object's destructor is called after destruction of the context it will cause an error.
+Thus ``ogl::Texture2D`` doesn't destroy OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL context).
+This function can force ``ogl::Texture2D`` destructor to destroy OpenGL object.
+
+
+
+ogl::Texture2D::copyFrom
+------------------------
+Copies from host/device memory to OpenGL texture.
+
+.. ocv:function:: void ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease = false)
+
+    :param arr: Input array (host or device memory, it can be :ocv:class:`Mat` , :ocv:class:`gpu::GpuMat` or :ocv:class:`ogl::Buffer` ).
+
+    :param autoRelease: Auto release mode (if true, release will be called in object's destructor).
+
+
+
+ogl::Texture2D::copyTo
+----------------------
+Copies from OpenGL texture to host/device memory or another OpenGL texture object.
+
+.. ocv:function:: void ogl::Texture2D::copyTo(OutputArray arr, int ddepth = CV_32F, bool autoRelease = false) const
+
+    :param arr: Destination array (host or device memory, can be :ocv:class:`Mat` , :ocv:class:`gpu::GpuMat` , :ocv:class:`ogl::Buffer` or ``ogl::Texture2D`` ).
+
+    :param ddepth: Destination depth.
+
+    :param autoRelease: Auto release mode for destination buffer (if ``arr`` is OpenGL buffer or texture).
+
+
+
+ogl::Texture2D::bind
+--------------------
+Binds texture to current active texture unit for ``GL_TEXTURE_2D`` target.
+
+.. ocv:function:: void ogl::Texture2D::bind() const
+
+
+
+ogl::Arrays
+-----------
+Wrapper for OpenGL Client-Side Vertex arrays.
+
+.. ocv:class:: ogl::Arrays
+
+``ogl::Arrays`` stores vertex data in :ocv:class:`ogl::Buffer` objects.
+
+
+
+ogl::Arrays::setVertexArray
+---------------------------
+Sets an array of vertex coordinates.
+
+.. ocv:function:: void ogl::Arrays::setVertexArray(InputArray vertex)
+
+    :param vertex: array with vertex coordinates, can be both host and device memory.
+
+
+
+ogl::Arrays::resetVertexArray
+-----------------------------
+Resets vertex coordinates.
+
+.. ocv:function:: void ogl::Arrays::resetVertexArray()
+
+
+
+ogl::Arrays::setColorArray
+--------------------------
+Sets an array of vertex colors.
+
+.. ocv:function:: void ogl::Arrays::setColorArray(InputArray color)
+
+    :param color: array with vertex colors, can be both host and device memory.
+
+
+
+ogl::Arrays::resetColorArray
+----------------------------
+Resets vertex colors.
+
+.. ocv:function:: void ogl::Arrays::resetColorArray()
+
+
+
+ogl::Arrays::setNormalArray
+---------------------------
+Sets an array of vertex normals.
+
+.. ocv:function:: void ogl::Arrays::setNormalArray(InputArray normal)
+
+    :param normal: array with vertex normals, can be both host and device memory.
+
+
+
+ogl::Arrays::resetNormalArray
+-----------------------------
+Resets vertex normals.
+
+.. ocv:function:: void ogl::Arrays::resetNormalArray()
+
+
+
+ogl::Arrays::setTexCoordArray
+-----------------------------
+Sets an array of vertex texture coordinates.
+
+.. ocv:function:: void ogl::Arrays::setTexCoordArray(InputArray texCoord)
+
+    :param texCoord: array with vertex texture coordinates, can be both host and device memory.
+
+
+
+ogl::Arrays::resetTexCoordArray
+-------------------------------
+Resets vertex texture coordinates.
+
+.. ocv:function:: void ogl::Arrays::resetTexCoordArray()
+
+
+
+ogl::Arrays::release
+--------------------
+Releases all inner buffers.
+
+.. ocv:function:: void ogl::Arrays::release()
+
+
+
+ogl::Arrays::setAutoRelease
+---------------------------
+Sets auto release mode all inner buffers.
+
+.. ocv:function:: void ogl::Arrays::setAutoRelease(bool flag)
+
+    :param flag: Auto release mode.
+
+
+
+ogl::Arrays::bind
+-----------------
+Binds all vertex arrays.
+
+.. ocv:function:: void ogl::Arrays::bind() const
+
+
+
+ogl::Arrays::size
+-----------------
+Returns the vertex count.
+
+.. ocv:function:: int ogl::Arrays::size() const
+
+
+
+ogl::render
+-----------
+Render OpenGL texture or primitives.
+
+.. ocv:function:: void ogl::render(const Texture2D& tex, Rect_<double> wndRect = Rect_<double>(0.0, 0.0, 1.0, 1.0), Rect_<double> texRect = Rect_<double>(0.0, 0.0, 1.0, 1.0))
+
+.. ocv:function:: void ogl::render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255))
+
+.. ocv:function:: void ogl::render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255))
+
+    :param tex: Texture to draw.
+
+    :param wndRect: Region of window, where to draw a texture (normalized coordinates).
+
+    :param texRect: Region of texture to draw (normalized coordinates).
+
+    :param arr: Array of privitives vertices.
+
+    :param indices: Array of vertices indices (host or device memory).
+
+    :param mode: Render mode. Available options:
+
+        * **POINTS**
+        * **LINES**
+        * **LINE_LOOP**
+        * **LINE_STRIP**
+        * **TRIANGLES**
+        * **TRIANGLE_STRIP**
+        * **TRIANGLE_FAN**
+        * **QUADS**
+        * **QUAD_STRIP**
+        * **POLYGON**
+
+    :param color: Color for all vertices. Will be used if ``arr`` doesn't contain color array.
+
+
+
+gpu::setGlDevice
+----------------
+Sets a CUDA device and initializes it for the current thread with OpenGL interoperability.
+
+.. ocv:function:: void gpu::setGlDevice( int device = 0 )
+
+    :param device: System index of a GPU device starting with 0.
+
+This function should be explicitly called after OpenGL context creation and before any CUDA calls.
diff --git a/modules/core/doc/operations_on_arrays.rst b/modules/core/doc/operations_on_arrays.rst
index d2c0bba14..d38f57ac3 100644
--- a/modules/core/doc/operations_on_arrays.rst
+++ b/modules/core/doc/operations_on_arrays.rst
@@ -512,7 +512,7 @@ Performs the per-element comparison of two arrays or an array and scalar value.
 
     :param value: scalar value.
 
-    :param dst: output array that has the same size as the input arrays and type= ``CV_8UC1`` .
+    :param dst: output array that has the same size and type as the input arrays.
 
     :param cmpop: a flag, that specifies correspondence between the arrays:
 
@@ -971,6 +971,12 @@ All of the above improvements have been implemented in :ocv:func:`matchTemplate`
 
 .. seealso:: :ocv:func:`dct` , :ocv:func:`getOptimalDFTSize` , :ocv:func:`mulSpectrums`, :ocv:func:`filter2D` , :ocv:func:`matchTemplate` , :ocv:func:`flip` , :ocv:func:`cartToPolar` , :ocv:func:`magnitude` , :ocv:func:`phase`
 
+.. note::
+
+   * An example using the discrete fourier transform can be found at opencv_source_code/samples/cpp/dft.cpp
+
+   * (Python) An example using the dft functionality to perform Wiener deconvolution can be found at opencv_source/samples/python2/deconvolution.py
+   * (Python) An example rearranging the quadrants of a Fourier image can be found at opencv_source/samples/python2/dft.py
 
 
 divide
@@ -1033,6 +1039,8 @@ Returns the determinant of a square floating-point matrix.
 
     :param mtx: input matrix that must have ``CV_32FC1`` or ``CV_64FC1`` type and square size.
 
+    :param mat: input matrix that must have ``CV_32FC1`` or ``CV_64FC1`` type and square size.
+
 The function ``determinant`` calculates and returns the determinant of the specified matrix. For small matrices ( ``mtx.cols=mtx.rows<=3`` ),
 the direct method is used. For larger matrices, the function uses LU factorization with partial pivoting.
 
@@ -2159,7 +2167,9 @@ The sample below is the function that takes two matrices. The first function sto
     :ocv:func:`dft`,
     :ocv:func:`dct`
 
+.. note::
 
+   * An example using PCA for dimensionality reduction while maintaining an amount of variance can be found at opencv_source_code/samples/cpp/pca.cpp
 
 PCA::PCA
 --------
diff --git a/modules/core/doc/utility_and_system_functions_and_macros.rst b/modules/core/doc/utility_and_system_functions_and_macros.rst
index 9cecb11a0..d5052fa5a 100644
--- a/modules/core/doc/utility_and_system_functions_and_macros.rst
+++ b/modules/core/doc/utility_and_system_functions_and_macros.rst
@@ -165,6 +165,8 @@ Checks a condition at runtime and throws exception if it fails
 
 .. ocv:function:: CV_Assert(expr)
 
+    :param expr: Expression for check.
+
 The macros ``CV_Assert`` (and ``CV_DbgAssert``) evaluate the specified expression. If it is 0, the macros raise an error (see :ocv:func:`error` ). The macro ``CV_Assert`` checks the condition in both Debug and Release configurations while ``CV_DbgAssert`` is only retained in the Debug configuration.
 
 
@@ -180,8 +182,14 @@ Signals an error and raises an exception.
 
     :param status: Error code. Normally, it is a negative value. The list of pre-defined error codes can be found in  ``cxerror.h`` .
 
+    :param func_name: The function name where error occurs.
+
     :param err_msg: Text of the error message.
 
+    :param file_name: The file name where error occurs.
+
+    :param line: The line number where error occurs.
+
     :param args: ``printf`` -like formatted error message in parentheses.
 
 The function and the helper macros ``CV_Error`` and ``CV_Error_``: ::
@@ -241,6 +249,7 @@ Allocates an aligned memory buffer.
 .. ocv:cfunction:: void* cvAlloc( size_t size )
 
     :param size: Allocated buffer size.
+    :param bufSize: Allocated buffer size.
 
 The function allocates the buffer of the specified size and returns it. When the buffer size is 16 bytes or more, the returned buffer is aligned to 16 bytes.
 
diff --git a/modules/core/doc/xml_yaml_persistence.rst b/modules/core/doc/xml_yaml_persistence.rst
index b3938b75c..cf3cfb447 100644
--- a/modules/core/doc/xml_yaml_persistence.rst
+++ b/modules/core/doc/xml_yaml_persistence.rst
@@ -11,7 +11,7 @@ You can store and then restore various OpenCV data structures to/from XML (http:
 
 Use the following procedure to write something to XML or YAML:
  #. Create new :ocv:class:`FileStorage` and open it for writing. It can be done with a single call to :ocv:func:`FileStorage::FileStorage` constructor that takes a filename, or you can use the default constructor and then call :ocv:func:`FileStorage::open`. Format of the file (XML or YAML) is determined from the filename extension (".xml" and ".yml"/".yaml", respectively)
- #. Write all the data you want using the streaming operator ``>>``, just like in the case of STL streams.
+ #. Write all the data you want using the streaming operator ``<<``, just like in the case of STL streams.
  #. Close the file using :ocv:func:`FileStorage::release`. ``FileStorage`` destructor also closes the file.
 
 Here is an example: ::
@@ -91,6 +91,10 @@ Several things can be noted by looking at the sample code and the output:
  *
    In YAML (but not XML), mappings and sequences can be written in a compact Python-like inline form. In the sample above matrix elements, as well as each feature, including its lbp value, is stored in such inline form. To store a mapping/sequence in a compact form, put ":" after the opening character, e.g. use **"{:"** instead of **"{"** and **"[:"** instead of **"["**. When the data is written to XML, those extra ":" are ignored.
 
+.. note::
+
+   * A complete example using the FileStorage interface can be found at opencv_source_code/samples/cpp/filestorage.cpp
+
 
 Reading data from a file storage.
 ---------------------------------
@@ -181,6 +185,17 @@ Opens a file.
 
 .. ocv:function:: bool FileStorage::open(const String& filename, int flags, const String& encoding=String())
 
+    :param filename: Name of the file to open or the text string to read the data from.
+                     Extension of the file (``.xml`` or ``.yml``/``.yaml``) determines its format (XML or YAML respectively).
+                     Also you can append ``.gz`` to work with compressed files, for example ``myHugeMatrix.xml.gz``.
+                     If both ``FileStorage::WRITE`` and ``FileStorage::MEMORY`` flags are specified, ``source``
+                     is used just to specify the output file format (e.g. ``mydata.xml``, ``.yml`` etc.).
+
+    :param flags: Mode of operation. See FileStorage constructor for more details.
+
+    :param encoding: Encoding of the file. Note that UTF-16 XML encoding is not supported currently and you should use 8-bit encoding instead of it.
+
+
 See description of parameters in :ocv:func:`FileStorage::FileStorage`. The method calls :ocv:func:`FileStorage::release` before opening the file.
 
 
diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp
index 9833315d5..c7f07ed45 100644
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@@ -670,6 +670,10 @@ public:
     //! reconstructs the original vector from the projection
     void backProject(InputArray vec, OutputArray result) const;
 
+    //! write and load PCA matrix
+    void write(FileStorage& fs ) const;
+    void read(const FileNode& fs);
+
     Mat eigenvectors; //!< eigenvectors of the covariation matrix
     Mat eigenvalues; //!< eigenvalues of the covariation matrix
     Mat mean; //!< mean value subtracted before the projection and added after the back projection
diff --git a/modules/core/include/opencv2/core/affine.hpp b/modules/core/include/opencv2/core/affine.hpp
index cf7b29cad..ed8bd29a1 100644
--- a/modules/core/include/opencv2/core/affine.hpp
+++ b/modules/core/include/opencv2/core/affine.hpp
@@ -430,5 +430,3 @@ cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
 #endif /* __cplusplus */
 
 #endif /* __OPENCV_CORE_AFFINE3_HPP__ */
-
-
diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp
index 637ecdf51..013f573a6 100644
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@@ -449,15 +449,15 @@ _AccTp normInf(const _Tp* a, const _Tp* b, int n)
 
 ////////////////// forward declarations for important OpenCV types //////////////////
 
-template<typename _Tp, int cn> class CV_EXPORTS Vec;
-template<typename _Tp, int m, int n> class CV_EXPORTS Matx;
+template<typename _Tp, int cn> class Vec;
+template<typename _Tp, int m, int n> class Matx;
 
-template<typename _Tp> class CV_EXPORTS Complex;
-template<typename _Tp> class CV_EXPORTS Point_;
-template<typename _Tp> class CV_EXPORTS Point3_;
-template<typename _Tp> class CV_EXPORTS Size_;
-template<typename _Tp> class CV_EXPORTS Rect_;
-template<typename _Tp> class CV_EXPORTS Scalar_;
+template<typename _Tp> class Complex;
+template<typename _Tp> class Point_;
+template<typename _Tp> class Point3_;
+template<typename _Tp> class Size_;
+template<typename _Tp> class Rect_;
+template<typename _Tp> class Scalar_;
 
 class CV_EXPORTS RotatedRect;
 class CV_EXPORTS Range;
@@ -472,16 +472,16 @@ class CV_EXPORTS MatExpr;
 class CV_EXPORTS SparseMat;
 typedef Mat MatND;
 
-template<typename _Tp> class CV_EXPORTS Mat_;
-template<typename _Tp> class CV_EXPORTS SparseMat_;
+template<typename _Tp> class Mat_;
+template<typename _Tp> class SparseMat_;
 
 class CV_EXPORTS MatConstIterator;
 class CV_EXPORTS SparseMatIterator;
 class CV_EXPORTS SparseMatConstIterator;
-template<typename _Tp> class CV_EXPORTS MatIterator_;
-template<typename _Tp> class CV_EXPORTS MatConstIterator_;
-template<typename _Tp> class CV_EXPORTS SparseMatIterator_;
-template<typename _Tp> class CV_EXPORTS SparseMatConstIterator_;
+template<typename _Tp> class MatIterator_;
+template<typename _Tp> class MatConstIterator_;
+template<typename _Tp> class SparseMatIterator_;
+template<typename _Tp> class SparseMatConstIterator_;
 
 namespace ogl
 {
@@ -498,6 +498,11 @@ namespace gpu
     class CV_EXPORTS Event;
 }
 
+namespace cudev
+{
+    template <typename _Tp> class GpuMat_;
+}
+
 } // cv
 
 #endif //__OPENCV_CORE_BASE_HPP__
diff --git a/modules/core/include/opencv2/core/core_c.h b/modules/core/include/opencv2/core/core_c.h
index 0ed3edec6..ca8413ee2 100644
--- a/modules/core/include/opencv2/core/core_c.h
+++ b/modules/core/include/opencv2/core/core_c.h
@@ -1882,13 +1882,13 @@ CV_EXPORTS void insertImageCOI(InputArray coiimg, CvArr* arr, int coi=-1);
 
 
 
-//////// specializied implementations of Ptr::delete_obj() for classic OpenCV types ////////
+////// specialized implementations of DefaultDeleter::operator() for classic OpenCV types //////
 
-template<> CV_EXPORTS void Ptr<CvMat>::delete_obj();
-template<> CV_EXPORTS void Ptr<IplImage>::delete_obj();
-template<> CV_EXPORTS void Ptr<CvMatND>::delete_obj();
-template<> CV_EXPORTS void Ptr<CvSparseMat>::delete_obj();
-template<> CV_EXPORTS void Ptr<CvMemStorage>::delete_obj();
+template<> CV_EXPORTS void DefaultDeleter<CvMat>::operator ()(CvMat* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<IplImage>::operator ()(IplImage* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvMatND>::operator ()(CvMatND* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvSparseMat>::operator ()(CvSparseMat* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvMemStorage>::operator ()(CvMemStorage* obj) const;
 
 ////////////// convenient wrappers for operating old-style dynamic structures //////////////
 
@@ -1906,7 +1906,7 @@ typedef Ptr<CvMemStorage> MemStorage;
     i.e. no constructors or destructors
     are called for the sequence elements.
 */
-template<typename _Tp> class CV_EXPORTS Seq
+template<typename _Tp> class Seq
 {
 public:
     typedef SeqIterator<_Tp> iterator;
@@ -1989,7 +1989,7 @@ public:
 /*!
  STL-style Sequence Iterator inherited from the CvSeqReader structure
 */
-template<typename _Tp> class CV_EXPORTS SeqIterator : public CvSeqReader
+template<typename _Tp> class SeqIterator : public CvSeqReader
 {
 public:
     //! the default constructor
diff --git a/modules/core/include/opencv2/core/cuda/color.hpp b/modules/core/include/opencv2/core/cuda/color.hpp
index a2b772d8b..d5f94c92a 100644
--- a/modules/core/include/opencv2/core/cuda/color.hpp
+++ b/modules/core/include/opencv2/core/cuda/color.hpp
@@ -107,25 +107,25 @@ namespace cv { namespace gpu { namespace cudev
 
     #undef OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS
 
-    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 0)
-    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 0)
-    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 0)
-    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 0)
-    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 2)
-    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 2)
-    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 2)
-    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 2)
+    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
+    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
+    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
+    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
+    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
+    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
+    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
+    OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)
 
     #undef OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS
 
-    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 0)
-    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 0)
-    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 0)
-    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 0)
-    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 2)
-    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 2)
-    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 2)
-    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 2)
+    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
+    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
+    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
+    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
+    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
+    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
+    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
+    OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)
 
     #undef OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS
 
diff --git a/modules/core/include/opencv2/core/cuda/limits.hpp b/modules/core/include/opencv2/core/cuda/limits.hpp
index 4b265da0e..0439de795 100644
--- a/modules/core/include/opencv2/core/cuda/limits.hpp
+++ b/modules/core/include/opencv2/core/cuda/limits.hpp
@@ -43,193 +43,80 @@
 #ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
 #define __OPENCV_GPU_LIMITS_GPU_HPP__
 
-#include <limits>
+#include <limits.h>
+#include <float.h>
 #include "common.hpp"
 
 namespace cv { namespace gpu { namespace cudev
 {
-    template<class T> struct numeric_limits
-    {
-        typedef T type;
-        __device__ __forceinline__ static type min()  { return type(); };
-        __device__ __forceinline__ static type max() { return type(); };
-        __device__ __forceinline__ static type epsilon() { return type(); }
-        __device__ __forceinline__ static type round_error() { return type(); }
-        __device__ __forceinline__ static type denorm_min()  { return type(); }
-        __device__ __forceinline__ static type infinity() { return type(); }
-        __device__ __forceinline__ static type quiet_NaN() { return type(); }
-        __device__ __forceinline__ static type signaling_NaN() { return T(); }
-        static const bool is_signed;
-    };
 
-    template<> struct numeric_limits<bool>
-    {
-        typedef bool type;
-        __device__ __forceinline__ static type min() { return false; };
-        __device__ __forceinline__ static type max() { return true;  };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = false;
-    };
+template <class T> struct numeric_limits;
 
-    template<> struct numeric_limits<char>
-    {
-        typedef char type;
-        __device__ __forceinline__ static type min() { return CHAR_MIN; };
-        __device__ __forceinline__ static type max() { return CHAR_MAX; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = (char)-1 == -1;
-    };
+template <> struct numeric_limits<bool>
+{
+    __device__ __forceinline__ static bool min() { return false; }
+    __device__ __forceinline__ static bool max() { return true;  }
+    static const bool is_signed = false;
+};
 
-    template<> struct numeric_limits<signed char>
-    {
-        typedef char type;
-        __device__ __forceinline__ static type min() { return SCHAR_MIN; };
-        __device__ __forceinline__ static type max() { return SCHAR_MAX; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = (signed char)-1 == -1;
-    };
+template <> struct numeric_limits<signed char>
+{
+    __device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
+    __device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
+    static const bool is_signed = true;
+};
 
-    template<> struct numeric_limits<unsigned char>
-    {
-        typedef unsigned char type;
-        __device__ __forceinline__ static type min() { return 0; };
-        __device__ __forceinline__ static type max() { return UCHAR_MAX; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = false;
-    };
+template <> struct numeric_limits<unsigned char>
+{
+    __device__ __forceinline__ static unsigned char min() { return 0; }
+    __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
+    static const bool is_signed = false;
+};
 
-    template<> struct numeric_limits<short>
-    {
-        typedef short type;
-        __device__ __forceinline__ static type min() { return SHRT_MIN; };
-        __device__ __forceinline__ static type max() { return SHRT_MAX; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = true;
-    };
+template <> struct numeric_limits<short>
+{
+    __device__ __forceinline__ static short min() { return SHRT_MIN; }
+    __device__ __forceinline__ static short max() { return SHRT_MAX; }
+    static const bool is_signed = true;
+};
 
-    template<> struct numeric_limits<unsigned short>
-    {
-        typedef unsigned short type;
-        __device__ __forceinline__ static type min() { return 0; };
-        __device__ __forceinline__ static type max() { return USHRT_MAX; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = false;
-    };
+template <> struct numeric_limits<unsigned short>
+{
+    __device__ __forceinline__ static unsigned short min() { return 0; }
+    __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
+    static const bool is_signed = false;
+};
 
-    template<> struct numeric_limits<int>
-    {
-        typedef int type;
-        __device__ __forceinline__ static type min() { return INT_MIN; };
-        __device__ __forceinline__ static type max() { return INT_MAX; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = true;
-    };
+template <> struct numeric_limits<int>
+{
+    __device__ __forceinline__ static int min() { return INT_MIN; }
+    __device__ __forceinline__ static int max() { return INT_MAX; }
+    static const bool is_signed = true;
+};
 
+template <> struct numeric_limits<unsigned int>
+{
+    __device__ __forceinline__ static unsigned int min() { return 0; }
+    __device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
+    static const bool is_signed = false;
+};
 
-    template<> struct numeric_limits<unsigned int>
-    {
-        typedef unsigned int type;
-        __device__ __forceinline__ static type min() { return 0; };
-        __device__ __forceinline__ static type max() { return UINT_MAX; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = false;
-    };
+template <> struct numeric_limits<float>
+{
+    __device__ __forceinline__ static float min() { return FLT_MIN; }
+    __device__ __forceinline__ static float max() { return FLT_MAX; }
+    __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
+    static const bool is_signed = true;
+};
 
-    template<> struct numeric_limits<long>
-    {
-        typedef long type;
-        __device__ __forceinline__ static type min() { return LONG_MIN; };
-        __device__ __forceinline__ static type max() { return LONG_MAX; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = true;
-    };
+template <> struct numeric_limits<double>
+{
+    __device__ __forceinline__ static double min() { return DBL_MIN; }
+    __device__ __forceinline__ static double max() { return DBL_MAX; }
+    __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
+    static const bool is_signed = true;
+};
 
-    template<> struct numeric_limits<unsigned long>
-    {
-        typedef unsigned long type;
-        __device__ __forceinline__ static type min() { return 0; };
-        __device__ __forceinline__ static type max() { return ULONG_MAX; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = false;
-    };
-
-    template<> struct numeric_limits<float>
-    {
-        typedef float type;
-        __device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
-        __device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
-        __device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = true;
-    };
-
-    template<> struct numeric_limits<double>
-    {
-        typedef double type;
-        __device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
-        __device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
-        __device__ __forceinline__ static type epsilon();
-        __device__ __forceinline__ static type round_error();
-        __device__ __forceinline__ static type denorm_min();
-        __device__ __forceinline__ static type infinity();
-        __device__ __forceinline__ static type quiet_NaN();
-        __device__ __forceinline__ static type signaling_NaN();
-        static const bool is_signed = true;
-    };
 }}} // namespace cv { namespace gpu { namespace cudev {
 
 #endif // __OPENCV_GPU_LIMITS_GPU_HPP__
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index 2fba1351f..46f294962 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -84,7 +84,7 @@
 #ifndef CV_INLINE
 #  if defined __cplusplus
 #    define CV_INLINE static inline
-#  elif (defined WIN32 || defined _WIN32 || defined WINCE) && !defined __GNUC__
+#  elif defined _MSC_VER
 #    define CV_INLINE __inline
 #  else
 #    define CV_INLINE static
@@ -149,7 +149,12 @@
 #  endif
 #endif
 
-#ifdef __ARM_NEON__
+#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
+# include <Intrin.h>
+# include "arm_neon.h"
+# define CV_NEON 1
+# define CPU_HAS_NEON_FEATURE (true)
+#elif defined(__ARM_NEON__)
 #  include <arm_neon.h>
 #  define CV_NEON 1
 #endif
@@ -196,8 +201,10 @@
 #if !defined _MSC_VER && !defined __BORLANDC__
 #  if defined __cplusplus && __cplusplus >= 201103L
 #    include <cstdint>
+     typedef std::uint32_t uint;
 #  else
 #    include <stdint.h>
+     typedef uint32_t uint;
 #  endif
 #else
    typedef unsigned uint;
@@ -364,7 +371,7 @@ CV_INLINE int cvRound( double value )
     return t;
 #elif defined _MSC_VER && defined _M_ARM && defined HAVE_TEGRA_OPTIMIZATION
     TEGRA_ROUND(value);
-#elif defined HAVE_LRINT || defined CV_ICC || defined __GNUC__
+#elif defined CV_ICC || defined __GNUC__
 #  ifdef HAVE_TEGRA_OPTIMIZATION
     TEGRA_ROUND(value);
 #  else
diff --git a/modules/core/include/opencv2/core/cvstd.hpp b/modules/core/include/opencv2/core/cvstd.hpp
index 0232c6d08..afdeb2549 100644
--- a/modules/core/include/opencv2/core/cvstd.hpp
+++ b/modules/core/include/opencv2/core/cvstd.hpp
@@ -127,7 +127,7 @@ CV_EXPORTS void fastFree(void* ptr);
 /*!
   The STL-compilant memory Allocator based on cv::fastMalloc() and cv::fastFree()
 */
-template<typename _Tp> class CV_EXPORTS Allocator
+template<typename _Tp> class Allocator
 {
 public:
     typedef _Tp value_type;
@@ -158,69 +158,176 @@ public:
     size_type max_size() const { return cv::max(static_cast<_Tp>(-1)/sizeof(_Tp), 1); }
 };
 
-
-
-//////////////////// generic_type ref-counting pointer class for C/C++ objects ////////////////////////
-
-/*!
-  Smart pointer to dynamically allocated objects.
-
-  This is template pointer-wrapping class that stores the associated reference counter along with the
-  object pointer. The class is similar to std::smart_ptr<> from the recent addons to the C++ standard,
-  but is shorter to write :) and self-contained (i.e. does add any dependency on the compiler or an external library).
-
-  Basically, you can use "Ptr<MyObjectType> ptr" (or faster "const Ptr<MyObjectType>& ptr" for read-only access)
-  everywhere instead of "MyObjectType* ptr", where MyObjectType is some C structure or a C++ class.
-  To make it all work, you need to specialize Ptr<>::delete_obj(), like:
-
-  \code
-  template<> CV_EXPORTS void Ptr<MyObjectType>::delete_obj() { call_destructor_func(obj); }
-  \endcode
-
-  \note{if MyObjectType is a C++ class with a destructor, you do not need to specialize delete_obj(),
-  since the default implementation calls "delete obj;"}
-
-  \note{Another good property of the class is that the operations on the reference counter are atomic,
-  i.e. it is safe to use the class in multi-threaded applications}
-*/
-template<typename _Tp> class CV_EXPORTS Ptr
+namespace detail
 {
-public:
-    //! empty constructor
-    Ptr();
-    //! take ownership of the pointer. The associated reference counter is allocated and set to 1
-    Ptr(_Tp* _obj);
-    //! calls release()
-    ~Ptr();
-    //! copy constructor. Copies the members and calls addref()
-    Ptr(const Ptr& ptr);
-    template<typename _Tp2> Ptr(const Ptr<_Tp2>& ptr);
-    //! copy operator. Calls ptr.addref() and release() before copying the members
-    Ptr& operator = (const Ptr& ptr);
-    //! increments the reference counter
-    void addref();
-    //! decrements the reference counter. If it reaches 0, delete_obj() is called
-    void release();
-    //! deletes the object. Override if needed
-    void delete_obj();
-    //! returns true iff obj==NULL
-    bool empty() const;
 
-    //! cast pointer to another type
-    template<typename _Tp2> Ptr<_Tp2> ptr();
-    template<typename _Tp2> const Ptr<_Tp2> ptr() const;
+// Metafunction to avoid taking a reference to void.
+template<typename T>
+struct RefOrVoid { typedef T& type; };
 
-    //! helper operators making "Ptr<T> ptr" use very similar to "T* ptr".
-    _Tp* operator -> ();
-    const _Tp* operator -> () const;
+template<>
+struct RefOrVoid<void>{ typedef void type; };
 
-    operator _Tp* ();
-    operator const _Tp*() const;
+template<>
+struct RefOrVoid<const void>{ typedef const void type; };
 
-    _Tp* obj; //< the object pointer.
-    int* refcount; //< the associated reference counter
+template<>
+struct RefOrVoid<volatile void>{ typedef volatile void type; };
+
+template<>
+struct RefOrVoid<const volatile void>{ typedef const volatile void type; };
+
+// This class would be private to Ptr, if it didn't have to be a non-template.
+struct PtrOwner;
+
+}
+
+template<typename Y>
+struct DefaultDeleter
+{
+    void operator () (Y* p) const;
 };
 
+/*
+  A smart shared pointer class with reference counting.
+
+  A Ptr<T> stores a pointer and owns a (potentially different) pointer.
+  The stored pointer has type T and is the one returned by get() et al,
+  while the owned pointer can have any type and is the one deleted
+  when there are no more Ptrs that own it. You can't directly obtain the
+  owned pointer.
+
+  The interface of this class is mostly a subset of that of C++11's
+  std::shared_ptr.
+*/
+template<typename T>
+struct Ptr
+{
+    /* Generic programming support. */
+    typedef T element_type;
+
+    /* Ptr that owns NULL and stores NULL. */
+    Ptr();
+
+    /* Ptr that owns p and stores p. The owned pointer will be deleted with
+       DefaultDeleter<Y>. Y must be a complete type and Y* must be
+       convertible to T*. */
+    template<typename Y>
+    explicit Ptr(Y* p);
+
+    /* Ptr that owns p and stores p. The owned pointer will be deleted by
+       calling d(p). Y* must be convertible to T*. */
+    template<typename Y, typename D>
+    Ptr(Y* p, D d);
+
+    /* Same as the constructor below; it exists to suppress the generation
+       of the implicit copy constructor. */
+    Ptr(const Ptr& o);
+
+    /* Ptr that owns the same pointer as o and stores the same pointer as o,
+       converted to T*. Naturally, Y* must be convertible to T*. */
+    template<typename Y>
+    Ptr(const Ptr<Y>& o);
+
+    /* Ptr that owns same pointer as o, and stores p. Useful for casts and
+       creating non-owning Ptrs. */
+    template<typename Y>
+    Ptr(const Ptr<Y>& o, T* p);
+
+    /* Equivalent to release(). */
+    ~Ptr();
+
+    /* Same as assignment below; exists to suppress the generation of the
+       implicit assignment operator. */
+    Ptr& operator = (const Ptr& o);
+
+    template<typename Y>
+    Ptr& operator = (const Ptr<Y>& o);
+
+    /* Resets both the owned and stored pointers to NULL. Deletes the owned
+       pointer with the associated deleter if it's not owned by any other
+       Ptr and is non-zero. It's called reset() in std::shared_ptr; here
+       it is release() for compatibility with old OpenCV versions. */
+    void release();
+
+    /* Equivalent to assigning from Ptr<T>(p). */
+    template<typename Y>
+    void reset(Y* p);
+
+    /* Equivalent to assigning from Ptr<T>(p, d). */
+    template<typename Y, typename D>
+    void reset(Y* p, D d);
+
+    /* Swaps the stored and owned pointers of this and o. */
+    void swap(Ptr& o);
+
+    /* Returns the stored pointer. */
+    T* get() const;
+
+    /* Ordinary pointer emulation. */
+    typename detail::RefOrVoid<T>::type operator * () const;
+    T* operator -> () const;
+
+    /* Equivalent to get(). */
+    operator T* () const;
+
+    /* Equivalent to !*this. */
+    bool empty() const;
+
+    /* Returns a Ptr that owns the same pointer as this, and stores the same
+       pointer as this, except converted via static_cast to Y*. */
+    template<typename Y>
+    Ptr<Y> staticCast() const;
+
+    /* Ditto for const_cast. */
+    template<typename Y>
+    Ptr<Y> constCast() const;
+
+    /* Ditto for dynamic_cast. */
+    template<typename Y>
+    Ptr<Y> dynamicCast() const;
+
+private:
+    detail::PtrOwner* owner;
+    T* stored;
+
+    template<typename Y>
+    friend struct Ptr; // have to do this for the cross-type copy constructor
+};
+
+/* Overload of the generic swap. */
+template<typename T>
+void swap(Ptr<T>& ptr1, Ptr<T>& ptr2);
+
+/* Obvious comparisons. */
+template<typename T>
+bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2);
+template<typename T>
+bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2);
+
+/* Convenience creation functions. In the far future, there may be variadic templates here. */
+template<typename T>
+Ptr<T> makePtr();
+template<typename T, typename A1>
+Ptr<T> makePtr(const A1& a1);
+template<typename T, typename A1, typename A2>
+Ptr<T> makePtr(const A1& a1, const A2& a2);
+template<typename T, typename A1, typename A2, typename A3>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3);
+template<typename T, typename A1, typename A2, typename A3, typename A4>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4);
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5);
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6);
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7);
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8);
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9);
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10);
 
 
 //////////////////////////////// string class ////////////////////////////////
@@ -324,176 +431,6 @@ private:
 };
 
 
-
-/////////////////////////// cv::Ptr implementation ///////////////////////////
-
-template<typename _Tp> inline
-Ptr<_Tp>::Ptr()
-    : obj(0), refcount(0) {}
-
-template<typename _Tp> inline
-Ptr<_Tp>::Ptr(_Tp* _obj)
-    : obj(_obj)
-{
-    if(obj)
-    {
-        refcount = (int*)fastMalloc(sizeof(*refcount));
-        *refcount = 1;
-    }
-    else
-        refcount = 0;
-}
-
-template<typename _Tp> template<typename _Tp2>
-Ptr<_Tp>::Ptr(const Ptr<_Tp2>& p)
-    : obj(0), refcount(0)
-{
-    if (p.empty())
-        return;
-
-    _Tp* p_casted = dynamic_cast<_Tp*>(p.obj);
-    if (!p_casted)
-        return;
-
-    obj = p_casted;
-    refcount = p.refcount;
-    addref();
-}
-
-template<typename _Tp> inline
-Ptr<_Tp>::~Ptr()
-{
-    release();
-}
-
-template<typename _Tp> inline
-void Ptr<_Tp>::addref()
-{
-    if( refcount )
-        CV_XADD(refcount, 1);
-}
-
-template<typename _Tp> inline
-void Ptr<_Tp>::release()
-{
-    if( refcount && CV_XADD(refcount, -1) == 1 )
-    {
-        delete_obj();
-        fastFree(refcount);
-    }
-    refcount = 0;
-    obj = 0;
-}
-
-template<typename _Tp> inline
-void Ptr<_Tp>::delete_obj()
-{
-    if( obj )
-        delete obj;
-}
-
-template<typename _Tp> inline
-Ptr<_Tp>::Ptr(const Ptr<_Tp>& _ptr)
-{
-    obj = _ptr.obj;
-    refcount = _ptr.refcount;
-    addref();
-}
-
-template<typename _Tp> inline
-Ptr<_Tp>& Ptr<_Tp>::operator = (const Ptr<_Tp>& _ptr)
-{
-    int* _refcount = _ptr.refcount;
-    if( _refcount )
-        CV_XADD(_refcount, 1);
-    release();
-    obj = _ptr.obj;
-    refcount = _refcount;
-    return *this;
-}
-
-template<typename _Tp> inline
-_Tp* Ptr<_Tp>::operator -> ()
-{
-    return obj;
-}
-
-template<typename _Tp> inline
-const _Tp* Ptr<_Tp>::operator -> () const
-{
-    return obj;
-}
-
-template<typename _Tp> inline
-Ptr<_Tp>::operator _Tp* ()
-{
-    return obj;
-}
-
-template<typename _Tp> inline
-Ptr<_Tp>::operator const _Tp*() const
-{
-    return obj;
-}
-
-template<typename _Tp> inline
-bool Ptr<_Tp>::empty() const
-{
-    return obj == 0;
-}
-
-template<typename _Tp> template<typename _Tp2> inline
-Ptr<_Tp2> Ptr<_Tp>::ptr()
-{
-    Ptr<_Tp2> p;
-    if( !obj )
-        return p;
-
-    _Tp2* obj_casted = dynamic_cast<_Tp2*>(obj);
-    if (!obj_casted)
-        return p;
-
-    if( refcount )
-        CV_XADD(refcount, 1);
-
-    p.obj = obj_casted;
-    p.refcount = refcount;
-    return p;
-}
-
-template<typename _Tp> template<typename _Tp2> inline
-const Ptr<_Tp2> Ptr<_Tp>::ptr() const
-{
-    Ptr<_Tp2> p;
-    if( !obj )
-        return p;
-
-    _Tp2* obj_casted = dynamic_cast<_Tp2*>(obj);
-    if (!obj_casted)
-        return p;
-
-    if( refcount )
-        CV_XADD(refcount, 1);
-
-    p.obj = obj_casted;
-    p.refcount = refcount;
-    return p;
-}
-
-template<class _Tp, class _Tp2> static inline
-bool operator == (const Ptr<_Tp>& a, const Ptr<_Tp2>& b)
-{
-    return a.refcount == b.refcount;
-}
-
-template<class _Tp, class _Tp2> static inline
-bool operator != (const Ptr<_Tp>& a, const Ptr<_Tp2>& b)
-{
-    return a.refcount != b.refcount;
-}
-
-
-
 ////////////////////////// cv::String implementation /////////////////////////
 
 inline
@@ -580,6 +517,8 @@ String::~String()
 inline
 String& String::operator=(const String& str)
 {
+    if (&str == this) return *this;
+
     deallocate();
     if (str.cstr_) CV_XADD(((int*)str.cstr_)-1, 1);
     cstr_ = str.cstr_;
@@ -938,4 +877,6 @@ namespace cv
     }
 }
 
+#include "opencv2/core/ptr.inl.hpp"
+
 #endif //__OPENCV_CORE_CVSTD_HPP__
diff --git a/modules/core/include/opencv2/core/cvstd.inl.hpp b/modules/core/include/opencv2/core/cvstd.inl.hpp
index 3f29a1b67..8642b7442 100644
--- a/modules/core/include/opencv2/core/cvstd.inl.hpp
+++ b/modules/core/include/opencv2/core/cvstd.inl.hpp
@@ -247,4 +247,4 @@ std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
 #endif // OPENCV_NOSTL
 } // cv
 
-#endif // __OPENCV_CORE_CVSTDINL_HPP__
\ No newline at end of file
+#endif // __OPENCV_CORE_CVSTDINL_HPP__
diff --git a/modules/core/include/opencv2/core/gpu.hpp b/modules/core/include/opencv2/core/gpu.hpp
index 775a9d026..dfd4dd92a 100644
--- a/modules/core/include/opencv2/core/gpu.hpp
+++ b/modules/core/include/opencv2/core/gpu.hpp
@@ -375,19 +375,6 @@ public:
     //! returns true if stream object is not default (!= 0)
     operator bool_type() const;
 
-    // obsolete methods
-
-    void enqueueDownload(const GpuMat& src, OutputArray dst);
-
-    void enqueueUpload(InputArray src, GpuMat& dst);
-
-    void enqueueCopy(const GpuMat& src, OutputArray dst);
-
-    void enqueueMemSet(GpuMat& src, Scalar val);
-    void enqueueMemSet(GpuMat& src, Scalar val, InputArray mask);
-
-    void enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha = 1.0, double beta = 0.0);
-
     class Impl;
 
 private:
@@ -529,10 +516,10 @@ public:
     size_t totalConstMem() const;
 
     //! major compute capability
-    int major() const;
+    int majorVersion() const;
 
     //! minor compute capability
-    int minor() const;
+    int minorVersion() const;
 
     //! alignment requirement for textures
     size_t textureAlignment() const;
@@ -679,12 +666,6 @@ CV_EXPORTS void printShortCudaDeviceInfo(int device);
 
 }} // namespace cv { namespace gpu {
 
-namespace cv {
-
-template <> CV_EXPORTS void Ptr<cv::gpu::Stream::Impl>::delete_obj();
-template <> CV_EXPORTS void Ptr<cv::gpu::Event::Impl>::delete_obj();
-
-}
 
 #include "opencv2/core/gpu.inl.hpp"
 
diff --git a/modules/core/include/opencv2/core/gpu.inl.hpp b/modules/core/include/opencv2/core/gpu.inl.hpp
index b44c2b151..13861170c 100644
--- a/modules/core/include/opencv2/core/gpu.inl.hpp
+++ b/modules/core/include/opencv2/core/gpu.inl.hpp
@@ -525,42 +525,6 @@ void swap(CudaMem& a, CudaMem& b)
 
 //////////////////////////////// Stream ///////////////////////////////
 
-inline
-void Stream::enqueueDownload(const GpuMat& src, OutputArray dst)
-{
-    src.download(dst, *this);
-}
-
-inline
-void Stream::enqueueUpload(InputArray src, GpuMat& dst)
-{
-    dst.upload(src, *this);
-}
-
-inline
-void Stream::enqueueCopy(const GpuMat& src, OutputArray dst)
-{
-    src.copyTo(dst, *this);
-}
-
-inline
-void Stream::enqueueMemSet(GpuMat& src, Scalar val)
-{
-    src.setTo(val, *this);
-}
-
-inline
-void Stream::enqueueMemSet(GpuMat& src, Scalar val, InputArray mask)
-{
-    src.setTo(val, mask, *this);
-}
-
-inline
-void Stream::enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha, double beta)
-{
-    src.convertTo(dst, dtype, alpha, beta, *this);
-}
-
 inline
 Stream::Stream(const Ptr<Impl>& impl)
     : impl_(impl)
@@ -619,7 +583,7 @@ size_t DeviceInfo::totalMemory() const
 inline
 bool DeviceInfo::supports(FeatureSet feature_set) const
 {
-    int version = major() * 10 + minor();
+    int version = majorVersion() * 10 + minorVersion();
     return version >= feature_set;
 }
 
diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp
index 8c9b10cea..c132be945 100644
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -78,7 +78,8 @@ public:
         EXPR              = 6 << KIND_SHIFT,
         OPENGL_BUFFER     = 7 << KIND_SHIFT,
         CUDA_MEM          = 8 << KIND_SHIFT,
-        GPU_MAT           = 9 << KIND_SHIFT
+        GPU_MAT           = 9 << KIND_SHIFT,
+        OCL_MAT           =10 << KIND_SHIFT
     };
 
     _InputArray();
@@ -95,6 +96,7 @@ public:
     _InputArray(const gpu::GpuMat& d_mat);
     _InputArray(const ogl::Buffer& buf);
     _InputArray(const gpu::CudaMem& cuda_mem);
+    template<typename _Tp> _InputArray(const cudev::GpuMat_<_Tp>& m);
 
     virtual Mat getMat(int i=-1) const;
     virtual void getMatVector(std::vector<Mat>& mv) const;
@@ -143,6 +145,7 @@ public:
     _OutputArray(gpu::GpuMat& d_mat);
     _OutputArray(ogl::Buffer& buf);
     _OutputArray(gpu::CudaMem& cuda_mem);
+    template<typename _Tp> _OutputArray(cudev::GpuMat_<_Tp>& m);
     template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
     template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
     template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
@@ -155,6 +158,7 @@ public:
     _OutputArray(const gpu::GpuMat& d_mat);
     _OutputArray(const ogl::Buffer& buf);
     _OutputArray(const gpu::CudaMem& cuda_mem);
+    template<typename _Tp> _OutputArray(const cudev::GpuMat_<_Tp>& m);
     template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
     template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
     template<typename _Tp> _OutputArray(const std::vector<Mat_<_Tp> >& vec);
@@ -827,7 +831,7 @@ protected:
        img(i,j)[2] ^= (uchar)(i ^ j); // img(y,x)[c] accesses c-th channel of the pixel (x,y)
  \endcode
 */
-template<typename _Tp> class CV_EXPORTS Mat_ : public Mat
+template<typename _Tp> class Mat_ : public Mat
 {
 public:
     typedef _Tp value_type;
@@ -1129,8 +1133,6 @@ public:
     //! converts dense 2d matrix to the sparse form
     /*!
      \param m the input matrix
-     \param try1d if true and m is a single-column matrix (Nx1),
-            then the sparse matrix will be 1-dimensional.
     */
     explicit SparseMat(const Mat& m);
     //! converts old-style sparse matrix to the new-style. All the data is copied
@@ -1356,7 +1358,7 @@ public:
  m_.ref(2) += m_(3); // equivalent to m.ref<int>(2) += m.value<int>(3);
  \endcode
 */
-template<typename _Tp> class CV_EXPORTS SparseMat_ : public SparseMat
+template<typename _Tp> class SparseMat_ : public SparseMat
 {
 public:
     typedef SparseMatIterator_<_Tp> iterator;
@@ -1728,7 +1730,7 @@ public:
  This is the derived from cv::SparseMatConstIterator_ class that
  introduces more convenient operator *() for accessing the current element.
 */
-template<typename _Tp> class CV_EXPORTS SparseMatIterator_ : public SparseMatConstIterator_<_Tp>
+template<typename _Tp> class SparseMatIterator_ : public SparseMatConstIterator_<_Tp>
 {
 public:
 
diff --git a/modules/core/include/opencv2/core/matx.hpp b/modules/core/include/opencv2/core/matx.hpp
index 6115e3de1..86a35cd75 100644
--- a/modules/core/include/opencv2/core/matx.hpp
+++ b/modules/core/include/opencv2/core/matx.hpp
@@ -77,10 +77,11 @@ struct CV_EXPORTS Matx_AddOp {};
 struct CV_EXPORTS Matx_SubOp {};
 struct CV_EXPORTS Matx_ScaleOp {};
 struct CV_EXPORTS Matx_MulOp {};
+struct CV_EXPORTS Matx_DivOp {};
 struct CV_EXPORTS Matx_MatMulOp {};
 struct CV_EXPORTS Matx_TOp {};
 
-template<typename _Tp, int m, int n> class CV_EXPORTS Matx
+template<typename _Tp, int m, int n> class Matx
 {
 public:
     enum { depth    = DataType<_Tp>::depth,
@@ -162,6 +163,9 @@ public:
     //! multiply two matrices element-wise
     Matx<_Tp, m, n> mul(const Matx<_Tp, m, n>& a) const;
 
+    //! divide two matrices element-wise
+    Matx<_Tp, m, n> div(const Matx<_Tp, m, n>& a) const;
+
     //! element access
     const _Tp& operator ()(int i, int j) const;
     _Tp& operator ()(int i, int j);
@@ -174,6 +178,7 @@ public:
     Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp);
     template<typename _T2> Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp);
     Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp);
+    Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp);
     template<int l> Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp);
     Matx(const Matx<_Tp, n, m>& a, Matx_TOp);
 
@@ -281,7 +286,7 @@ template<typename _Tp, int m, int n> static double norm(const Matx<_Tp, m, n>& M
   In addition to the universal notation like Vec<float, 3>, you can use shorter aliases
   for the most popular specialized variants of Vec, e.g. Vec3f ~ Vec<float, 3>.
 */
-template<typename _Tp, int cn> class CV_EXPORTS Vec : public Matx<_Tp, cn, 1>
+template<typename _Tp, int cn> class Vec : public Matx<_Tp, cn, 1>
 {
 public:
     typedef _Tp value_type;
@@ -746,6 +751,13 @@ Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_Mul
         val[i] = saturate_cast<_Tp>(a.val[i] * b.val[i]);
 }
 
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] / b.val[i]);
+}
+
 template<typename _Tp, int m, int n> template<int l> inline
 Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp)
 {
@@ -773,6 +785,12 @@ Matx<_Tp, m, n> Matx<_Tp, m, n>::mul(const Matx<_Tp, m, n>& a) const
     return Matx<_Tp, m, n>(*this, a, Matx_MulOp());
 }
 
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n> Matx<_Tp, m, n>::div(const Matx<_Tp, m, n>& a) const
+{
+    return Matx<_Tp, m, n>(*this, a, Matx_DivOp());
+}
+
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, n, m> Matx<_Tp, m, n>::t() const
 {
@@ -1337,4 +1355,4 @@ template<typename _Tp> inline Vec<_Tp, 4>& operator *= (Vec<_Tp, 4>& v1, const V
 
 } // cv
 
-#endif // __OPENCV_CORE_MATX_HPP__
\ No newline at end of file
+#endif // __OPENCV_CORE_MATX_HPP__
diff --git a/modules/core/include/opencv2/core/opengl.hpp b/modules/core/include/opencv2/core/opengl.hpp
index a4ee091f7..0a4b454c8 100644
--- a/modules/core/include/opencv2/core/opengl.hpp
+++ b/modules/core/include/opencv2/core/opengl.hpp
@@ -283,12 +283,6 @@ CV_EXPORTS void setGlDevice(int device = 0);
 
 }}
 
-namespace cv {
-
-template <> CV_EXPORTS void Ptr<cv::ogl::Buffer::Impl>::delete_obj();
-template <> CV_EXPORTS void Ptr<cv::ogl::Texture2D::Impl>::delete_obj();
-
-}
 
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
diff --git a/modules/core/include/opencv2/core/operations.hpp b/modules/core/include/opencv2/core/operations.hpp
index 7d39154a1..f8aeddfb1 100644
--- a/modules/core/include/opencv2/core/operations.hpp
+++ b/modules/core/include/opencv2/core/operations.hpp
@@ -445,14 +445,14 @@ int print(const Matx<_Tp, m, n>& matx, FILE* stream = stdout)
 template<typename _Tp> inline
 Ptr<_Tp> Algorithm::create(const String& name)
 {
-    return _create(name).ptr<_Tp>();
+    return _create(name).dynamicCast<_Tp>();
 }
 
 template<typename _Tp> inline
 void Algorithm::set(const char* _name, const Ptr<_Tp>& value)
 {
-    Ptr<Algorithm> algo_ptr = value. template ptr<cv::Algorithm>();
-    if (algo_ptr.empty()) {
+    Ptr<Algorithm> algo_ptr = value. template dynamicCast<cv::Algorithm>();
+    if (!algo_ptr) {
         CV_Error( Error::StsUnsupportedFormat, "unknown/unsupported Ptr type of the second parameter of the method Algorithm::set");
     }
     info()->set(this, _name, ParamType<Algorithm>::type, &algo_ptr);
@@ -468,7 +468,7 @@ template<typename _Tp> inline
 void Algorithm::setAlgorithm(const char* _name, const Ptr<_Tp>& value)
 {
     Ptr<Algorithm> algo_ptr = value. template ptr<cv::Algorithm>();
-    if (algo_ptr.empty()) {
+    if (!algo_ptr) {
         CV_Error( Error::StsUnsupportedFormat, "unknown/unsupported Ptr type of the second parameter of the method Algorithm::set");
     }
     info()->set(this, _name, ParamType<Algorithm>::type, &algo_ptr);
diff --git a/modules/core/include/opencv2/core/persistence.hpp b/modules/core/include/opencv2/core/persistence.hpp
index e81401cae..f5687614b 100644
--- a/modules/core/include/opencv2/core/persistence.hpp
+++ b/modules/core/include/opencv2/core/persistence.hpp
@@ -186,7 +186,7 @@ public:
     //! the full constructor that opens file storage for reading or writing
     CV_WRAP FileStorage(const String& source, int flags, const String& encoding=String());
     //! the constructor that takes pointer to the C FileStorage structure
-    FileStorage(CvFileStorage* fs);
+    FileStorage(CvFileStorage* fs, bool owning=true);
     //! the destructor. calls release()
     virtual ~FileStorage();
 
@@ -209,9 +209,9 @@ public:
     CV_WRAP FileNode operator[](const char* nodename) const;
 
     //! returns pointer to the underlying C FileStorage structure
-    CvFileStorage* operator *() { return fs; }
+    CvFileStorage* operator *() { return fs.get(); }
     //! returns pointer to the underlying C FileStorage structure
-    const CvFileStorage* operator *() const { return fs; }
+    const CvFileStorage* operator *() const { return fs.get(); }
     //! writes one or more numbers of the specified format to the currently written structure
     void writeRaw( const String& fmt, const uchar* vec, size_t len );
     //! writes the registered C structure (CvMat, CvMatND, CvSeq). See cvWrite()
@@ -226,7 +226,7 @@ public:
     int state; //!< the writer state
 };
 
-template<> CV_EXPORTS void Ptr<CvFileStorage>::delete_obj();
+template<> CV_EXPORTS void DefaultDeleter<CvFileStorage>::operator ()(CvFileStorage* obj) const;
 
 /*!
  File Storage Node class
@@ -406,6 +406,59 @@ CV_EXPORTS void read(const FileNode& node, Mat& mat, const Mat& default_mat = Ma
 CV_EXPORTS void read(const FileNode& node, SparseMat& mat, const SparseMat& default_mat = SparseMat() );
 CV_EXPORTS void read(const FileNode& node, std::vector<KeyPoint>& keypoints);
 
+template<typename _Tp> static inline void read(const FileNode& node, Point_<_Tp>& value, const Point_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 2 ? default_value : Point_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Point3_<_Tp>& value, const Point3_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 3 ? default_value : Point3_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]),
+                                                            saturate_cast<_Tp>(temp[2]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Size_<_Tp>& value, const Size_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 2 ? default_value : Size_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Complex<_Tp>& value, const Complex<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 2 ? default_value : Complex<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Rect_<_Tp>& value, const Rect_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 4 ? default_value : Rect_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]),
+                                                          saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3]));
+}
+
+template<typename _Tp, int cn> static inline void read(const FileNode& node, Vec<_Tp, cn>& value, const Vec<_Tp, cn>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != cn ? default_value : Vec<_Tp, cn>(&temp[0]);
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Scalar_<_Tp>& value, const Scalar_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 4 ? default_value : Scalar_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]),
+                                                            saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3]));
+}
+
+static inline void read(const FileNode& node, Range& value, const Range& default_value)
+{
+    Point2i temp(value.start, value.end); const Point2i default_temp = Point2i(default_value.start, default_value.end);
+    read(node, temp, default_temp);
+    value.start = temp.x; value.end = temp.y;
+}
+
+
 CV_EXPORTS FileStorage& operator << (FileStorage& fs, const String& str);
 
 
diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp
index 12961b32c..a1cc0e5c8 100644
--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@@ -121,15 +121,24 @@ namespace cv
         body(range);
     }
 #endif
+
+    // Returns a static string if there is a parallel framework,
+    // NULL otherwise.
+    CV_EXPORTS const char* currentParallelFramework();
 } //namespace cv
 
 #define CV_INIT_ALGORITHM(classname, algname, memberinit) \
-    static ::cv::Algorithm* create##classname##_hidden() \
+    static inline ::cv::Algorithm* create##classname##_hidden() \
     { \
         return new classname; \
     } \
     \
-    static ::cv::AlgorithmInfo& classname##_info() \
+    static inline ::cv::Ptr< ::cv::Algorithm> create##classname##_ptr_hidden() \
+    { \
+        return ::cv::makePtr<classname>(); \
+    } \
+    \
+    static inline ::cv::AlgorithmInfo& classname##_info() \
     { \
         static ::cv::AlgorithmInfo classname##_info_var(algname, create##classname##_hidden); \
         return classname##_info_var; \
diff --git a/modules/core/include/opencv2/core/ptr.inl.hpp b/modules/core/include/opencv2/core/ptr.inl.hpp
new file mode 100644
index 000000000..989724281
--- /dev/null
+++ b/modules/core/include/opencv2/core/ptr.inl.hpp
@@ -0,0 +1,338 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CORE_PTR_INL_HPP__
+#define __OPENCV_CORE_PTR_INL_HPP__
+
+#include <algorithm>
+
+namespace cv {
+
+template<typename Y>
+void DefaultDeleter<Y>::operator () (Y* p) const
+{
+    delete p;
+}
+
+namespace detail
+{
+
+struct PtrOwner
+{
+    PtrOwner() : refCount(1)
+    {}
+
+    void incRef()
+    {
+        CV_XADD(&refCount, 1);
+    }
+
+    void decRef()
+    {
+        if (CV_XADD(&refCount, -1) == 1) deleteSelf();
+    }
+
+protected:
+    /* This doesn't really need to be virtual, since PtrOwner is never deleted
+       directly, but it doesn't hurt and it helps avoid warnings. */
+    virtual ~PtrOwner()
+    {}
+
+    virtual void deleteSelf() = 0;
+
+private:
+    unsigned int refCount;
+
+    // noncopyable
+    PtrOwner(const PtrOwner&);
+    PtrOwner& operator = (const PtrOwner&);
+};
+
+template<typename Y, typename D>
+struct PtrOwnerImpl : PtrOwner
+{
+    PtrOwnerImpl(Y* p, D d) : owned(p), deleter(d)
+    {}
+
+    void deleteSelf()
+    {
+        deleter(owned);
+        delete this;
+    }
+
+private:
+    Y* owned;
+    D deleter;
+};
+
+
+}
+
+template<typename T>
+Ptr<T>::Ptr() : owner(NULL), stored(NULL)
+{}
+
+template<typename T>
+template<typename Y>
+Ptr<T>::Ptr(Y* p)
+  : owner(p
+      ? new detail::PtrOwnerImpl<Y, DefaultDeleter<Y> >(p, DefaultDeleter<Y>())
+      : NULL),
+    stored(p)
+{}
+
+template<typename T>
+template<typename Y, typename D>
+Ptr<T>::Ptr(Y* p, D d)
+  : owner(p
+      ? new detail::PtrOwnerImpl<Y, D>(p, d)
+      : NULL),
+    stored(p)
+{}
+
+template<typename T>
+Ptr<T>::Ptr(const Ptr& o) : owner(o.owner), stored(o.stored)
+{
+    if (owner) owner->incRef();
+}
+
+template<typename T>
+template<typename Y>
+Ptr<T>::Ptr(const Ptr<Y>& o) : owner(o.owner), stored(o.stored)
+{
+    if (owner) owner->incRef();
+}
+
+template<typename T>
+template<typename Y>
+Ptr<T>::Ptr(const Ptr<Y>& o, T* p) : owner(o.owner), stored(p)
+{
+    if (owner) owner->incRef();
+}
+
+template<typename T>
+Ptr<T>::~Ptr()
+{
+    release();
+}
+
+template<typename T>
+Ptr<T>& Ptr<T>::operator = (const Ptr<T>& o)
+{
+    Ptr(o).swap(*this);
+    return *this;
+}
+
+template<typename T>
+template<typename Y>
+Ptr<T>& Ptr<T>::operator = (const Ptr<Y>& o)
+{
+    Ptr(o).swap(*this);
+    return *this;
+}
+
+template<typename T>
+void Ptr<T>::release()
+{
+    if (owner) owner->decRef();
+    owner = NULL;
+    stored = NULL;
+}
+
+template<typename T>
+template<typename Y>
+void Ptr<T>::reset(Y* p)
+{
+    Ptr(p).swap(*this);
+}
+
+template<typename T>
+template<typename Y, typename D>
+void Ptr<T>::reset(Y* p, D d)
+{
+    Ptr(p, d).swap(*this);
+}
+
+template<typename T>
+void Ptr<T>::swap(Ptr<T>& o)
+{
+    std::swap(owner, o.owner);
+    std::swap(stored, o.stored);
+}
+
+template<typename T>
+T* Ptr<T>::get() const
+{
+    return stored;
+}
+
+template<typename T>
+typename detail::RefOrVoid<T>::type Ptr<T>::operator * () const
+{
+    return *stored;
+}
+
+template<typename T>
+T* Ptr<T>::operator -> () const
+{
+    return stored;
+}
+
+template<typename T>
+Ptr<T>::operator T* () const
+{
+    return stored;
+}
+
+
+template<typename T>
+bool Ptr<T>::empty() const
+{
+    return !stored;
+}
+
+template<typename T>
+template<typename Y>
+Ptr<Y> Ptr<T>::staticCast() const
+{
+    return Ptr<Y>(*this, static_cast<Y*>(stored));
+}
+
+template<typename T>
+template<typename Y>
+Ptr<Y> Ptr<T>::constCast() const
+{
+    return Ptr<Y>(*this, const_cast<Y*>(stored));
+}
+
+template<typename T>
+template<typename Y>
+Ptr<Y> Ptr<T>::dynamicCast() const
+{
+    return Ptr<Y>(*this, dynamic_cast<Y*>(stored));
+}
+
+template<typename T>
+void swap(Ptr<T>& ptr1, Ptr<T>& ptr2){
+    ptr1.swap(ptr2);
+}
+
+template<typename T>
+bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
+{
+    return ptr1.get() == ptr2.get();
+}
+
+template<typename T>
+bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
+{
+    return ptr1.get() != ptr2.get();
+}
+
+template<typename T>
+Ptr<T> makePtr()
+{
+    return Ptr<T>(new T());
+}
+
+template<typename T, typename A1>
+Ptr<T> makePtr(const A1& a1)
+{
+    return Ptr<T>(new T(a1));
+}
+
+template<typename T, typename A1, typename A2>
+Ptr<T> makePtr(const A1& a1, const A2& a2)
+{
+    return Ptr<T>(new T(a1, a2));
+}
+
+template<typename T, typename A1, typename A2, typename A3>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3)
+{
+    return Ptr<T>(new T(a1, a2, a3));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10));
+}
+
+} // namespace cv
+
+#endif // __OPENCV_CORE_PTR_INL_HPP__
diff --git a/modules/core/include/opencv2/core/types.hpp b/modules/core/include/opencv2/core/types.hpp
index 05cf5052f..9252cad14 100644
--- a/modules/core/include/opencv2/core/types.hpp
+++ b/modules/core/include/opencv2/core/types.hpp
@@ -68,7 +68,7 @@ namespace cv
   more convenient access to the real and imaginary parts using through the simple field access, as opposite
   to std::complex::real() and std::complex::imag().
 */
-template<typename _Tp> class CV_EXPORTS Complex
+template<typename _Tp> class Complex
 {
 public:
 
@@ -120,7 +120,7 @@ public:
   as a template parameter. There are a few shorter aliases available for user convenience.
   See cv::Point, cv::Point2i, cv::Point2f and cv::Point2d.
 */
-template<typename _Tp> class CV_EXPORTS Point_
+template<typename _Tp> class Point_
 {
 public:
     typedef _Tp value_type;
@@ -191,7 +191,7 @@ public:
 
   \see cv::Point3i, cv::Point3f and cv::Point3d
 */
-template<typename _Tp> class CV_EXPORTS Point3_
+template<typename _Tp> class Point3_
 {
 public:
     typedef _Tp value_type;
@@ -256,7 +256,7 @@ public:
   The class represents the size of a 2D rectangle, image size, matrix size etc.
   Normally, cv::Size ~ cv::Size_<int> is used.
 */
-template<typename _Tp> class CV_EXPORTS Size_
+template<typename _Tp> class Size_
 {
 public:
     typedef _Tp value_type;
@@ -314,7 +314,7 @@ public:
   The class represents a 2D rectangle with coordinates of the specified data type.
   Normally, cv::Rect ~ cv::Rect_<int> is used.
 */
-template<typename _Tp> class CV_EXPORTS Rect_
+template<typename _Tp> class Rect_
 {
 public:
     typedef _Tp value_type;
@@ -470,7 +470,7 @@ public:
    This is partially specialized cv::Vec class with the number of elements = 4, i.e. a short vector of four elements.
    Normally, cv::Scalar ~ cv::Scalar_<double> is used.
 */
-template<typename _Tp> class CV_EXPORTS Scalar_ : public Vec<_Tp, 4>
+template<typename _Tp> class Scalar_ : public Vec<_Tp, 4>
 {
 public:
     //! various constructors
@@ -551,18 +551,18 @@ public:
     size_t hash() const;
 
     //! converts vector of keypoints to vector of points
-    static void convert(const std::vector<KeyPoint>& keypoints,
-                        CV_OUT std::vector<Point2f>& points2f,
-                        const std::vector<int>& keypointIndexes=std::vector<int>());
+    CV_WRAP static void convert(const std::vector<KeyPoint>& keypoints,
+                                CV_OUT std::vector<Point2f>& points2f,
+                                const std::vector<int>& keypointIndexes=std::vector<int>());
     //! converts vector of points to the vector of keypoints, where each keypoint is assigned the same size and the same orientation
-    static void convert(const std::vector<Point2f>& points2f,
-                        CV_OUT std::vector<KeyPoint>& keypoints,
-                        float size=1, float response=1, int octave=0, int class_id=-1);
+    CV_WRAP static void convert(const std::vector<Point2f>& points2f,
+                                CV_OUT std::vector<KeyPoint>& keypoints,
+                                float size=1, float response=1, int octave=0, int class_id=-1);
 
     //! computes overlap for pair of keypoints;
     //! overlap is a ratio between area of keypoint regions intersection and
     //! area of keypoint regions union (now keypoint region is circle)
-    static float overlap(const KeyPoint& kp1, const KeyPoint& kp2);
+    CV_WRAP static float overlap(const KeyPoint& kp1, const KeyPoint& kp2);
 
     CV_PROP_RW Point2f pt; //!< coordinates of the keypoints
     CV_PROP_RW float size; //!< diameter of the meaningful keypoint neighborhood
@@ -1922,4 +1922,4 @@ TermCriteria::TermCriteria(int _type, int _maxCount, double _epsilon)
 
 } // cv
 
-#endif //__OPENCV_CORE_TYPES_HPP__
\ No newline at end of file
+#endif //__OPENCV_CORE_TYPES_HPP__
diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp
index d2942f8c5..c96a0aaf9 100644
--- a/modules/core/include/opencv2/core/utility.hpp
+++ b/modules/core/include/opencv2/core/utility.hpp
@@ -80,7 +80,7 @@ namespace cv
  }
  \endcode
 */
-template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class CV_EXPORTS AutoBuffer
+template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
 {
 public:
     typedef _Tp value_type;
@@ -116,8 +116,8 @@ protected:
     _Tp* ptr;
     //! size of the real buffer
     size_t sz;
-    //! pre-allocated buffer
-    _Tp buf[fixed_size];
+    //! pre-allocated buffer. At least 1 element to confirm C++ standard reqirements
+    _Tp buf[(fixed_size > 0) ? fixed_size : 1];
 };
 
 //! Sets/resets the break-on-error mode.
diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp
index 916d173ba..203503eca 100644
--- a/modules/core/include/opencv2/core/version.hpp
+++ b/modules/core/include/opencv2/core/version.hpp
@@ -11,6 +11,7 @@
 //                For Open Source Computer Vision Library
 //
 // Copyright( C) 2000, Intel Corporation, all rights reserved.
+// Copyright (C) 2011-2013, NVIDIA Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -47,18 +48,22 @@
 #ifndef __OPENCV_VERSION_HPP__
 #define __OPENCV_VERSION_HPP__
 
-#define CV_VERSION_EPOCH    2
-#define CV_VERSION_MAJOR    4
-#define CV_VERSION_MINOR    9
+#define CV_VERSION_EPOCH    3
+#define CV_VERSION_MAJOR    0
+#define CV_VERSION_MINOR    0
 #define CV_VERSION_REVISION 0
+#define CV_VERSION_STATUS   "-dev"
 
 #define CVAUX_STR_EXP(__A)  #__A
 #define CVAUX_STR(__A)      CVAUX_STR_EXP(__A)
 
+#define CVAUX_STRW_EXP(__A)  L#__A
+#define CVAUX_STRW(__A)      CVAUX_STRW_EXP(__A)
+
 #if CV_VERSION_REVISION
-#  define CV_VERSION        CVAUX_STR(CV_VERSION_EPOCH) "." CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION)
+#  define CV_VERSION        CVAUX_STR(CV_VERSION_EPOCH) "." CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS
 #else
-#  define CV_VERSION        CVAUX_STR(CV_VERSION_EPOCH) "." CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR)
+#  define CV_VERSION        CVAUX_STR(CV_VERSION_EPOCH) "." CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) CV_VERSION_STATUS
 #endif
 
 /* old  style version constants*/
diff --git a/modules/core/perf/perf_abs.cpp b/modules/core/perf/perf_abs.cpp
index 691c6f7c4..63cb06b63 100644
--- a/modules/core/perf/perf_abs.cpp
+++ b/modules/core/perf/perf_abs.cpp
@@ -24,4 +24,3 @@ PERF_TEST_P(Size_MatType, abs, TYPICAL_MATS_ABS)
 
     SANITY_CHECK(c);
 }
-
diff --git a/modules/core/perf/perf_bitwise.cpp b/modules/core/perf/perf_bitwise.cpp
index 64a8dd8bd..1308b7bf3 100644
--- a/modules/core/perf/perf_bitwise.cpp
+++ b/modules/core/perf/perf_bitwise.cpp
@@ -73,4 +73,3 @@ PERF_TEST_P(Size_MatType, bitwise_xor, TYPICAL_MATS_BITW_ARITHM)
 
     SANITY_CHECK(c);
 }
-
diff --git a/modules/core/perf/perf_main.cpp b/modules/core/perf/perf_main.cpp
index 79c28a645..7c899c244 100644
--- a/modules/core/perf/perf_main.cpp
+++ b/modules/core/perf/perf_main.cpp
@@ -1,3 +1,8 @@
 #include "perf_precomp.hpp"
+#ifdef _MSC_VER
+# if _MSC_VER >= 1700
+#  pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
+# endif
+#endif
 
 CV_PERF_TEST_MAIN(core)
diff --git a/modules/core/perf/perf_merge.cpp b/modules/core/perf/perf_merge.cpp
index d82941a92..e7e8d2fe3 100644
--- a/modules/core/perf/perf_merge.cpp
+++ b/modules/core/perf/perf_merge.cpp
@@ -34,4 +34,4 @@ PERF_TEST_P( Size_SrcDepth_DstChannels, merge,
     TEST_CYCLE_MULTIRUN(runs) merge( (vector<Mat> &)mv, dst );
 
     SANITY_CHECK(dst, 1e-12);
-}
\ No newline at end of file
+}
diff --git a/modules/core/perf/perf_precomp.cpp b/modules/core/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/core/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/core/perf/perf_stat.cpp b/modules/core/perf/perf_stat.cpp
index b7fc43d12..6b5f0ff52 100644
--- a/modules/core/perf/perf_stat.cpp
+++ b/modules/core/perf/perf_stat.cpp
@@ -33,7 +33,7 @@ PERF_TEST_P(Size_MatType, mean, TYPICAL_MATS)
 
     TEST_CYCLE() s = mean(src);
 
-    SANITY_CHECK(s, 1e-6);
+    SANITY_CHECK(s, 1e-5);
 }
 
 PERF_TEST_P(Size_MatType, mean_mask, TYPICAL_MATS)
@@ -49,7 +49,7 @@ PERF_TEST_P(Size_MatType, mean_mask, TYPICAL_MATS)
 
     TEST_CYCLE() s = mean(src, mask);
 
-    SANITY_CHECK(s, 1e-6);
+    SANITY_CHECK(s, 5e-5);
 }
 
 PERF_TEST_P(Size_MatType, meanStdDev, TYPICAL_MATS)
@@ -83,8 +83,8 @@ PERF_TEST_P(Size_MatType, meanStdDev_mask, TYPICAL_MATS)
 
     TEST_CYCLE() meanStdDev(src, mean, dev, mask);
 
-    SANITY_CHECK(mean, 1e-6);
-    SANITY_CHECK(dev, 1e-6);
+    SANITY_CHECK(mean, 1e-5);
+    SANITY_CHECK(dev, 1e-5);
 }
 
 PERF_TEST_P(Size_MatType, countNonZero, testing::Combine( testing::Values( TYPICAL_MAT_SIZES ), testing::Values( CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1 ) ))
diff --git a/modules/core/src/algorithm.cpp b/modules/core/src/algorithm.cpp
index d0c6f5c92..ff67a5df1 100644
--- a/modules/core/src/algorithm.cpp
+++ b/modules/core/src/algorithm.cpp
@@ -163,7 +163,7 @@ Ptr<Algorithm> Algorithm::_create(const String& name)
     Algorithm::Constructor c = 0;
     if( !alglist().find(name, c) )
         return Ptr<Algorithm>();
-    return c();
+    return Ptr<Algorithm>(c());
 }
 
 Algorithm::Algorithm()
@@ -490,7 +490,7 @@ void AlgorithmInfo::read(Algorithm* algo, const FileNode& fn) const
         else if( p.type == Param::ALGORITHM )
         {
             Ptr<Algorithm> nestedAlgo = Algorithm::_create((String)n["name"]);
-            CV_Assert( !nestedAlgo.empty() );
+            CV_Assert( nestedAlgo );
             nestedAlgo->read(n);
             info->set(algo, pname.c_str(), p.type, &nestedAlgo, true);
         }
diff --git a/modules/core/src/alloc.cpp b/modules/core/src/alloc.cpp
index c830df23c..8e218738b 100644
--- a/modules/core/src/alloc.cpp
+++ b/modules/core/src/alloc.cpp
@@ -94,9 +94,20 @@ void fastFree(void* ptr)
 #define STAT(stmt)
 
 #ifdef WIN32
+#if (_WIN32_WINNT >= 0x0602)
+#include <synchapi.h>
+#endif
+
 struct CriticalSection
 {
-    CriticalSection() { InitializeCriticalSection(&cs); }
+    CriticalSection()
+    {
+#if (_WIN32_WINNT >= 0x0600)
+        InitializeCriticalSectionEx(&cs, 1000, 0);
+#else
+        InitializeCriticalSection(&cs);
+#endif
+    }
     ~CriticalSection() { DeleteCriticalSection(&cs); }
     void lock() { EnterCriticalSection(&cs); }
     void unlock() { LeaveCriticalSection(&cs); }
diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index 98b67aabf..313d06d88 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -1131,23 +1131,33 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
     }
 }
 
-static BinaryFunc maxTab[] =
+static BinaryFunc* getMaxTab()
 {
-    (BinaryFunc)GET_OPTIMIZED(max8u), (BinaryFunc)GET_OPTIMIZED(max8s),
-    (BinaryFunc)GET_OPTIMIZED(max16u), (BinaryFunc)GET_OPTIMIZED(max16s),
-    (BinaryFunc)GET_OPTIMIZED(max32s),
-    (BinaryFunc)GET_OPTIMIZED(max32f), (BinaryFunc)max64f,
-    0
-};
+    static BinaryFunc maxTab[] =
+    {
+        (BinaryFunc)GET_OPTIMIZED(max8u), (BinaryFunc)GET_OPTIMIZED(max8s),
+        (BinaryFunc)GET_OPTIMIZED(max16u), (BinaryFunc)GET_OPTIMIZED(max16s),
+        (BinaryFunc)GET_OPTIMIZED(max32s),
+        (BinaryFunc)GET_OPTIMIZED(max32f), (BinaryFunc)max64f,
+        0
+    };
 
-static BinaryFunc minTab[] =
+    return maxTab;
+}
+
+static BinaryFunc* getMinTab()
 {
-    (BinaryFunc)GET_OPTIMIZED(min8u), (BinaryFunc)GET_OPTIMIZED(min8s),
-    (BinaryFunc)GET_OPTIMIZED(min16u), (BinaryFunc)GET_OPTIMIZED(min16s),
-    (BinaryFunc)GET_OPTIMIZED(min32s),
-    (BinaryFunc)GET_OPTIMIZED(min32f), (BinaryFunc)min64f,
-    0
-};
+    static BinaryFunc minTab[] =
+    {
+        (BinaryFunc)GET_OPTIMIZED(min8u), (BinaryFunc)GET_OPTIMIZED(min8s),
+        (BinaryFunc)GET_OPTIMIZED(min16u), (BinaryFunc)GET_OPTIMIZED(min16s),
+        (BinaryFunc)GET_OPTIMIZED(min32s),
+        (BinaryFunc)GET_OPTIMIZED(min32f), (BinaryFunc)min64f,
+        0
+    };
+
+    return minTab;
+}
 
 }
 
@@ -1177,24 +1187,24 @@ void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask)
 
 void cv::max( InputArray src1, InputArray src2, OutputArray dst )
 {
-    binary_op(src1, src2, dst, noArray(), maxTab, false );
+    binary_op(src1, src2, dst, noArray(), getMaxTab(), false );
 }
 
 void cv::min( InputArray src1, InputArray src2, OutputArray dst )
 {
-    binary_op(src1, src2, dst, noArray(), minTab, false );
+    binary_op(src1, src2, dst, noArray(), getMinTab(), false );
 }
 
 void cv::max(const Mat& src1, const Mat& src2, Mat& dst)
 {
     OutputArray _dst(dst);
-    binary_op(src1, src2, _dst, noArray(), maxTab, false );
+    binary_op(src1, src2, _dst, noArray(), getMaxTab(), false );
 }
 
 void cv::min(const Mat& src1, const Mat& src2, Mat& dst)
 {
     OutputArray _dst(dst);
-    binary_op(src1, src2, _dst, noArray(), minTab, false );
+    binary_op(src1, src2, _dst, noArray(), getMinTab(), false );
 }
 
 
@@ -1482,39 +1492,54 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
     }
 }
 
-static BinaryFunc addTab[] =
+static BinaryFunc* getAddTab()
 {
-    (BinaryFunc)GET_OPTIMIZED(add8u), (BinaryFunc)GET_OPTIMIZED(add8s),
-    (BinaryFunc)GET_OPTIMIZED(add16u), (BinaryFunc)GET_OPTIMIZED(add16s),
-    (BinaryFunc)GET_OPTIMIZED(add32s),
-    (BinaryFunc)GET_OPTIMIZED(add32f), (BinaryFunc)add64f,
-    0
-};
+    static BinaryFunc addTab[] =
+    {
+        (BinaryFunc)GET_OPTIMIZED(add8u), (BinaryFunc)GET_OPTIMIZED(add8s),
+        (BinaryFunc)GET_OPTIMIZED(add16u), (BinaryFunc)GET_OPTIMIZED(add16s),
+        (BinaryFunc)GET_OPTIMIZED(add32s),
+        (BinaryFunc)GET_OPTIMIZED(add32f), (BinaryFunc)add64f,
+        0
+    };
 
-static BinaryFunc subTab[] =
-{
-    (BinaryFunc)GET_OPTIMIZED(sub8u), (BinaryFunc)GET_OPTIMIZED(sub8s),
-    (BinaryFunc)GET_OPTIMIZED(sub16u), (BinaryFunc)GET_OPTIMIZED(sub16s),
-    (BinaryFunc)GET_OPTIMIZED(sub32s),
-    (BinaryFunc)GET_OPTIMIZED(sub32f), (BinaryFunc)sub64f,
-    0
-};
+    return addTab;
+}
 
-static BinaryFunc absdiffTab[] =
+static BinaryFunc* getSubTab()
 {
-    (BinaryFunc)GET_OPTIMIZED(absdiff8u), (BinaryFunc)GET_OPTIMIZED(absdiff8s),
-    (BinaryFunc)GET_OPTIMIZED(absdiff16u), (BinaryFunc)GET_OPTIMIZED(absdiff16s),
-    (BinaryFunc)GET_OPTIMIZED(absdiff32s),
-    (BinaryFunc)GET_OPTIMIZED(absdiff32f), (BinaryFunc)absdiff64f,
-    0
-};
+    static BinaryFunc subTab[] =
+    {
+        (BinaryFunc)GET_OPTIMIZED(sub8u), (BinaryFunc)GET_OPTIMIZED(sub8s),
+        (BinaryFunc)GET_OPTIMIZED(sub16u), (BinaryFunc)GET_OPTIMIZED(sub16s),
+        (BinaryFunc)GET_OPTIMIZED(sub32s),
+        (BinaryFunc)GET_OPTIMIZED(sub32f), (BinaryFunc)sub64f,
+        0
+    };
+
+    return subTab;
+}
+
+static BinaryFunc* getAbsDiffTab()
+{
+    static BinaryFunc absDiffTab[] =
+    {
+        (BinaryFunc)GET_OPTIMIZED(absdiff8u), (BinaryFunc)GET_OPTIMIZED(absdiff8s),
+        (BinaryFunc)GET_OPTIMIZED(absdiff16u), (BinaryFunc)GET_OPTIMIZED(absdiff16s),
+        (BinaryFunc)GET_OPTIMIZED(absdiff32s),
+        (BinaryFunc)GET_OPTIMIZED(absdiff32f), (BinaryFunc)absdiff64f,
+        0
+    };
+
+    return absDiffTab;
+}
 
 }
 
 void cv::add( InputArray src1, InputArray src2, OutputArray dst,
           InputArray mask, int dtype )
 {
-    arithm_op(src1, src2, dst, mask, dtype, addTab );
+    arithm_op(src1, src2, dst, mask, dtype, getAddTab() );
 }
 
 void cv::subtract( InputArray src1, InputArray src2, OutputArray dst,
@@ -1549,12 +1574,12 @@ void cv::subtract( InputArray src1, InputArray src2, OutputArray dst,
         }
     }
 #endif
-    arithm_op(src1, src2, dst, mask, dtype, subTab );
+    arithm_op(src1, src2, dst, mask, dtype, getSubTab() );
 }
 
 void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
 {
-    arithm_op(src1, src2, dst, noArray(), -1, absdiffTab);
+    arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab());
 }
 
 /****************************************************************************************\
@@ -1844,46 +1869,60 @@ static void recip64f( const double* src1, size_t step1, const double* src2, size
 }
 
 
-static BinaryFunc mulTab[] =
+static BinaryFunc* getMulTab()
 {
-    (BinaryFunc)mul8u, (BinaryFunc)mul8s, (BinaryFunc)mul16u,
-    (BinaryFunc)mul16s, (BinaryFunc)mul32s, (BinaryFunc)mul32f,
-    (BinaryFunc)mul64f, 0
-};
+    static BinaryFunc mulTab[] =
+    {
+        (BinaryFunc)mul8u, (BinaryFunc)mul8s, (BinaryFunc)mul16u,
+        (BinaryFunc)mul16s, (BinaryFunc)mul32s, (BinaryFunc)mul32f,
+        (BinaryFunc)mul64f, 0
+    };
 
-static BinaryFunc divTab[] =
+    return mulTab;
+}
+
+static BinaryFunc* getDivTab()
 {
-    (BinaryFunc)div8u, (BinaryFunc)div8s, (BinaryFunc)div16u,
-    (BinaryFunc)div16s, (BinaryFunc)div32s, (BinaryFunc)div32f,
-    (BinaryFunc)div64f, 0
-};
+    static BinaryFunc divTab[] =
+    {
+        (BinaryFunc)div8u, (BinaryFunc)div8s, (BinaryFunc)div16u,
+        (BinaryFunc)div16s, (BinaryFunc)div32s, (BinaryFunc)div32f,
+        (BinaryFunc)div64f, 0
+    };
 
-static BinaryFunc recipTab[] =
+    return divTab;
+}
+
+static BinaryFunc* getRecipTab()
 {
-    (BinaryFunc)recip8u, (BinaryFunc)recip8s, (BinaryFunc)recip16u,
-    (BinaryFunc)recip16s, (BinaryFunc)recip32s, (BinaryFunc)recip32f,
-    (BinaryFunc)recip64f, 0
-};
+    static BinaryFunc recipTab[] =
+    {
+        (BinaryFunc)recip8u, (BinaryFunc)recip8s, (BinaryFunc)recip16u,
+        (BinaryFunc)recip16s, (BinaryFunc)recip32s, (BinaryFunc)recip32f,
+        (BinaryFunc)recip64f, 0
+    };
 
+    return recipTab;
+}
 
 }
 
 void cv::multiply(InputArray src1, InputArray src2,
                   OutputArray dst, double scale, int dtype)
 {
-    arithm_op(src1, src2, dst, noArray(), dtype, mulTab, true, &scale);
+    arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(), true, &scale);
 }
 
 void cv::divide(InputArray src1, InputArray src2,
                 OutputArray dst, double scale, int dtype)
 {
-    arithm_op(src1, src2, dst, noArray(), dtype, divTab, true, &scale);
+    arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale);
 }
 
 void cv::divide(double scale, InputArray src2,
                 OutputArray dst, int dtype)
 {
-    arithm_op(src2, src2, dst, noArray(), dtype, recipTab, true, &scale);
+    arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale);
 }
 
 /****************************************************************************************\
@@ -2026,12 +2065,17 @@ static void addWeighted64f( const double* src1, size_t step1, const double* src2
     addWeighted_<double, double>(src1, step1, src2, step2, dst, step, sz, scalars);
 }
 
-static BinaryFunc addWeightedTab[] =
+static BinaryFunc* getAddWeightedTab()
 {
-    (BinaryFunc)GET_OPTIMIZED(addWeighted8u), (BinaryFunc)GET_OPTIMIZED(addWeighted8s), (BinaryFunc)GET_OPTIMIZED(addWeighted16u),
-    (BinaryFunc)GET_OPTIMIZED(addWeighted16s), (BinaryFunc)GET_OPTIMIZED(addWeighted32s), (BinaryFunc)addWeighted32f,
-    (BinaryFunc)addWeighted64f, 0
-};
+    static BinaryFunc addWeightedTab[] =
+    {
+        (BinaryFunc)GET_OPTIMIZED(addWeighted8u), (BinaryFunc)GET_OPTIMIZED(addWeighted8s), (BinaryFunc)GET_OPTIMIZED(addWeighted16u),
+        (BinaryFunc)GET_OPTIMIZED(addWeighted16s), (BinaryFunc)GET_OPTIMIZED(addWeighted32s), (BinaryFunc)addWeighted32f,
+        (BinaryFunc)addWeighted64f, 0
+    };
+
+    return addWeightedTab;
+}
 
 }
 
@@ -2039,7 +2083,7 @@ void cv::addWeighted( InputArray src1, double alpha, InputArray src2,
                       double beta, double gamma, OutputArray dst, int dtype )
 {
     double scalars[] = {alpha, beta, gamma};
-    arithm_op(src1, src2, dst, noArray(), dtype, addWeightedTab, true, scalars);
+    arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars);
 }
 
 
@@ -2109,10 +2153,30 @@ cmp_(const T* src1, size_t step1, const T* src2, size_t step2,
     }
 }
 
+#if ARITHM_USE_IPP
+inline static IppCmpOp convert_cmp(int _cmpop)
+{
+    return _cmpop == CMP_EQ ? ippCmpEq :
+        _cmpop == CMP_GT ? ippCmpGreater :
+        _cmpop == CMP_GE ? ippCmpGreaterEq :
+        _cmpop == CMP_LT ? ippCmpLess :
+        _cmpop == CMP_LE ? ippCmpLessEq :
+        (IppCmpOp)-1;
+}
+#endif
 
 static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
                   uchar* dst, size_t step, Size size, void* _cmpop)
 {
+#if ARITHM_USE_IPP
+    IppCmpOp op = convert_cmp(*(int *)_cmpop);
+    if( op  >= 0 )
+    {
+        fixSteps(size, sizeof(dst[0]), step1, step2, step);
+        if( ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
+            return;
+    }
+#endif
   //vz optimized  cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
     int code = *(int*)_cmpop;
     step1 /= sizeof(src1[0]);
@@ -2187,12 +2251,30 @@ static void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t ste
 static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
                   uchar* dst, size_t step, Size size, void* _cmpop)
 {
+#if ARITHM_USE_IPP
+    IppCmpOp op = convert_cmp(*(int *)_cmpop);
+    if( op  >= 0 )
+    {
+        fixSteps(size, sizeof(dst[0]), step1, step2, step);
+        if( ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
+            return;
+    }
+#endif
     cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
 }
 
 static void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
                   uchar* dst, size_t step, Size size, void* _cmpop)
 {
+#if ARITHM_USE_IPP
+    IppCmpOp op = convert_cmp(*(int *)_cmpop);
+    if( op  > 0 )
+    {
+        fixSteps(size, sizeof(dst[0]), step1, step2, step);
+        if( ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
+            return;
+    }
+#endif
    //vz optimized cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
 
     int code = *(int*)_cmpop;
@@ -2290,6 +2372,15 @@ static void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2,
 static void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
                   uchar* dst, size_t step, Size size, void* _cmpop)
 {
+#if ARITHM_USE_IPP
+    IppCmpOp op = convert_cmp(*(int *)_cmpop);
+    if( op  >= 0 )
+    {
+        fixSteps(size, sizeof(dst[0]), step1, step2, step);
+        if( ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
+            return;
+    }
+#endif
     cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
 }
 
@@ -2299,15 +2390,19 @@ static void cmp64f(const double* src1, size_t step1, const double* src2, size_t
     cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
 }
 
-static BinaryFunc cmpTab[] =
+static BinaryFunc getCmpFunc(int depth)
 {
-    (BinaryFunc)GET_OPTIMIZED(cmp8u), (BinaryFunc)GET_OPTIMIZED(cmp8s),
-    (BinaryFunc)GET_OPTIMIZED(cmp16u), (BinaryFunc)GET_OPTIMIZED(cmp16s),
-    (BinaryFunc)GET_OPTIMIZED(cmp32s),
-    (BinaryFunc)GET_OPTIMIZED(cmp32f), (BinaryFunc)cmp64f,
-    0
-};
+    static BinaryFunc cmpTab[] =
+    {
+        (BinaryFunc)GET_OPTIMIZED(cmp8u), (BinaryFunc)GET_OPTIMIZED(cmp8s),
+        (BinaryFunc)GET_OPTIMIZED(cmp16u), (BinaryFunc)GET_OPTIMIZED(cmp16s),
+        (BinaryFunc)GET_OPTIMIZED(cmp32s),
+        (BinaryFunc)GET_OPTIMIZED(cmp32f), (BinaryFunc)cmp64f,
+        0
+    };
 
+    return cmpTab[depth];
+}
 
 static double getMinVal(int depth)
 {
@@ -2337,7 +2432,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
         _dst.create(src1.size(), CV_8UC(cn));
         Mat dst = _dst.getMat();
         Size sz = getContinuousSize(src1, src2, dst, src1.channels());
-        cmpTab[src1.depth()](src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, &op);
+        getCmpFunc(src1.depth())(src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, &op);
         return;
     }
 
@@ -2369,7 +2464,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
 
     size_t esz = src1.elemSize();
     size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
-    BinaryFunc func = cmpTab[depth1];
+    BinaryFunc func = getCmpFunc(depth1);
 
     if( !haveScalar )
     {
@@ -2546,12 +2641,17 @@ static void inRangeReduce(const uchar* src, uchar* dst, size_t len, int cn)
 typedef void (*InRangeFunc)( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
                              const uchar* src3, size_t step3, uchar* dst, size_t step, Size sz );
 
-static InRangeFunc inRangeTab[] =
+static InRangeFunc getInRangeFunc(int depth)
 {
-    (InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u),
-    (InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f),
-    (InRangeFunc)inRange64f, 0
-};
+    static InRangeFunc inRangeTab[] =
+    {
+        (InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u),
+        (InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f),
+        (InRangeFunc)inRange64f, 0
+    };
+
+    return inRangeTab[depth];
+}
 
 }
 
@@ -2590,7 +2690,7 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
 
     _dst.create(src.dims, src.size, CV_8U);
     Mat dst = _dst.getMat();
-    InRangeFunc func = inRangeTab[depth];
+    InRangeFunc func = getInRangeFunc(depth);
 
     const Mat* arrays_sc[] = { &src, &dst, 0 };
     const Mat* arrays_nosc[] = { &src, &dst, &lb, &ub, 0 };
diff --git a/modules/core/src/array.cpp b/modules/core/src/array.cpp
index 60ac84865..2ad7b1216 100644
--- a/modules/core/src/array.cpp
+++ b/modules/core/src/array.cpp
@@ -3190,22 +3190,22 @@ cvCheckTermCriteria( CvTermCriteria criteria, double default_eps,
 namespace cv
 {
 
-template<> void Ptr<CvMat>::delete_obj()
+template<> void DefaultDeleter<CvMat>::operator ()(CvMat* obj) const
 { cvReleaseMat(&obj); }
 
-template<> void Ptr<IplImage>::delete_obj()
+template<> void DefaultDeleter<IplImage>::operator ()(IplImage* obj) const
 { cvReleaseImage(&obj); }
 
-template<> void Ptr<CvMatND>::delete_obj()
+template<> void DefaultDeleter<CvMatND>::operator ()(CvMatND* obj) const
 { cvReleaseMatND(&obj); }
 
-template<> void Ptr<CvSparseMat>::delete_obj()
+template<> void DefaultDeleter<CvSparseMat>::operator ()(CvSparseMat* obj) const
 { cvReleaseSparseMat(&obj); }
 
-template<> void Ptr<CvMemStorage>::delete_obj()
+template<> void DefaultDeleter<CvMemStorage>::operator ()(CvMemStorage* obj) const
 { cvReleaseMemStorage(&obj); }
 
-template<> void Ptr<CvFileStorage>::delete_obj()
+template<> void DefaultDeleter<CvFileStorage>::operator ()(CvFileStorage* obj) const
 { cvReleaseFileStorage(&obj); }
 
 }
diff --git a/modules/core/src/command_line_parser.cpp b/modules/core/src/command_line_parser.cpp
index 4f4c8db4a..b082faee6 100644
--- a/modules/core/src/command_line_parser.cpp
+++ b/modules/core/src/command_line_parser.cpp
@@ -136,18 +136,13 @@ void CommandLineParser::getByIndex(int index, bool space_delete, int type, void*
 
 static bool cmp_params(const CommandLineParserParams & p1, const CommandLineParserParams & p2)
 {
+    if (p1.number < p2.number)
+        return true;
+
     if (p1.number > p2.number)
         return false;
 
-    if (p1.number == -1 && p2.number == -1)
-    {
-        if (p1.keys[0].compare(p2.keys[0]) > 0)
-        {
-            return false;
-        }
-    }
-
-    return true;
+    return p1.keys[0].compare(p2.keys[0]) < 0;
 }
 
 CommandLineParser::CommandLineParser(int argc, const char* const argv[], const String& keys)
diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp
index f5d6e9998..5cd689460 100644
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@@ -194,17 +194,27 @@ static void merge64s(const int64** src, int64* dst, int len, int cn )
 typedef void (*SplitFunc)(const uchar* src, uchar** dst, int len, int cn);
 typedef void (*MergeFunc)(const uchar** src, uchar* dst, int len, int cn);
 
-static SplitFunc splitTab[] =
+static SplitFunc getSplitFunc(int depth)
 {
-    (SplitFunc)GET_OPTIMIZED(split8u), (SplitFunc)GET_OPTIMIZED(split8u), (SplitFunc)GET_OPTIMIZED(split16u), (SplitFunc)GET_OPTIMIZED(split16u),
-    (SplitFunc)GET_OPTIMIZED(split32s), (SplitFunc)GET_OPTIMIZED(split32s), (SplitFunc)GET_OPTIMIZED(split64s), 0
-};
+    static SplitFunc splitTab[] =
+    {
+        (SplitFunc)GET_OPTIMIZED(split8u), (SplitFunc)GET_OPTIMIZED(split8u), (SplitFunc)GET_OPTIMIZED(split16u), (SplitFunc)GET_OPTIMIZED(split16u),
+        (SplitFunc)GET_OPTIMIZED(split32s), (SplitFunc)GET_OPTIMIZED(split32s), (SplitFunc)GET_OPTIMIZED(split64s), 0
+    };
 
-static MergeFunc mergeTab[] =
+    return splitTab[depth];
+}
+
+static MergeFunc getMergeFunc(int depth)
 {
-    (MergeFunc)GET_OPTIMIZED(merge8u), (MergeFunc)GET_OPTIMIZED(merge8u), (MergeFunc)GET_OPTIMIZED(merge16u), (MergeFunc)GET_OPTIMIZED(merge16u),
-    (MergeFunc)GET_OPTIMIZED(merge32s), (MergeFunc)GET_OPTIMIZED(merge32s), (MergeFunc)GET_OPTIMIZED(merge64s), 0
-};
+    static MergeFunc mergeTab[] =
+    {
+        (MergeFunc)GET_OPTIMIZED(merge8u), (MergeFunc)GET_OPTIMIZED(merge8u), (MergeFunc)GET_OPTIMIZED(merge16u), (MergeFunc)GET_OPTIMIZED(merge16u),
+        (MergeFunc)GET_OPTIMIZED(merge32s), (MergeFunc)GET_OPTIMIZED(merge32s), (MergeFunc)GET_OPTIMIZED(merge64s), 0
+    };
+
+    return mergeTab[depth];
+}
 
 }
 
@@ -217,7 +227,7 @@ void cv::split(const Mat& src, Mat* mv)
         return;
     }
 
-    SplitFunc func = splitTab[depth];
+    SplitFunc func = getSplitFunc(depth);
     CV_Assert( func != 0 );
 
     int esz = (int)src.elemSize(), esz1 = (int)src.elemSize1();
@@ -323,7 +333,7 @@ void cv::merge(const Mat* mv, size_t n, OutputArray _dst)
 
     NAryMatIterator it(arrays, ptrs, cn+1);
     int total = (int)it.size, blocksize = cn <= 4 ? total : std::min(total, blocksize0);
-    MergeFunc func = mergeTab[depth];
+    MergeFunc func = getMergeFunc(depth);
 
     for( i = 0; i < it.nplanes; i++, ++it )
     {
@@ -419,12 +429,17 @@ static void mixChannels64s( const int64** src, const int* sdelta,
 typedef void (*MixChannelsFunc)( const uchar** src, const int* sdelta,
         uchar** dst, const int* ddelta, int len, int npairs );
 
-static MixChannelsFunc mixchTab[] =
+static MixChannelsFunc getMixchFunc(int depth)
 {
-    (MixChannelsFunc)mixChannels8u, (MixChannelsFunc)mixChannels8u, (MixChannelsFunc)mixChannels16u,
-    (MixChannelsFunc)mixChannels16u, (MixChannelsFunc)mixChannels32s, (MixChannelsFunc)mixChannels32s,
-    (MixChannelsFunc)mixChannels64s, 0
-};
+    static MixChannelsFunc mixchTab[] =
+    {
+        (MixChannelsFunc)mixChannels8u, (MixChannelsFunc)mixChannels8u, (MixChannelsFunc)mixChannels16u,
+        (MixChannelsFunc)mixChannels16u, (MixChannelsFunc)mixChannels32s, (MixChannelsFunc)mixChannels32s,
+        (MixChannelsFunc)mixChannels64s, 0
+    };
+
+    return mixchTab[depth];
+}
 
 }
 
@@ -479,7 +494,7 @@ void cv::mixChannels( const Mat* src, size_t nsrcs, Mat* dst, size_t ndsts, cons
 
     NAryMatIterator it(arrays, ptrs, (int)(nsrcs + ndsts));
     int total = (int)it.size, blocksize = std::min(total, (int)((BLOCK_SIZE + esz1-1)/esz1));
-    MixChannelsFunc func = mixchTab[depth];
+    MixChannelsFunc func = getMixchFunc(depth);
 
     for( i = 0; i < it.nplanes; i++, ++it )
     {
@@ -947,104 +962,109 @@ DEF_CVT_FUNC(32s64f, int, double);
 DEF_CVT_FUNC(32f64f, float, double);
 DEF_CPY_FUNC(64s,    int64);
 
-static BinaryFunc cvtScaleAbsTab[] =
+static BinaryFunc getCvtScaleAbsFunc(int depth)
 {
-    (BinaryFunc)cvtScaleAbs8u, (BinaryFunc)cvtScaleAbs8s8u, (BinaryFunc)cvtScaleAbs16u8u,
-    (BinaryFunc)cvtScaleAbs16s8u, (BinaryFunc)cvtScaleAbs32s8u, (BinaryFunc)cvtScaleAbs32f8u,
-    (BinaryFunc)cvtScaleAbs64f8u, 0
-};
+    static BinaryFunc cvtScaleAbsTab[] =
+    {
+        (BinaryFunc)cvtScaleAbs8u, (BinaryFunc)cvtScaleAbs8s8u, (BinaryFunc)cvtScaleAbs16u8u,
+        (BinaryFunc)cvtScaleAbs16s8u, (BinaryFunc)cvtScaleAbs32s8u, (BinaryFunc)cvtScaleAbs32f8u,
+        (BinaryFunc)cvtScaleAbs64f8u, 0
+    };
 
-static BinaryFunc cvtScaleTab[][8] =
-{
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvtScale8u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8u),
-        (BinaryFunc)GET_OPTIMIZED(cvtScale16s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8u),
-        (BinaryFunc)cvtScale64f8u, 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvtScale8u8s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8s),
-        (BinaryFunc)GET_OPTIMIZED(cvtScale16s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8s),
-        (BinaryFunc)cvtScale64f8s, 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvtScale8u16u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u),
-        (BinaryFunc)GET_OPTIMIZED(cvtScale16s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16u),
-        (BinaryFunc)cvtScale64f16u, 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvtScale8u16s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u16s),
-        (BinaryFunc)GET_OPTIMIZED(cvtScale16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16s),
-        (BinaryFunc)cvtScale64f16s, 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvtScale8u32s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32s),
-        (BinaryFunc)GET_OPTIMIZED(cvtScale16s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f32s),
-        (BinaryFunc)cvtScale64f32s, 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvtScale8u32f), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32f),
-        (BinaryFunc)GET_OPTIMIZED(cvtScale16s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32f),
-        (BinaryFunc)cvtScale64f32f, 0
-    },
-    {
-        (BinaryFunc)cvtScale8u64f, (BinaryFunc)cvtScale8s64f, (BinaryFunc)cvtScale16u64f,
-        (BinaryFunc)cvtScale16s64f, (BinaryFunc)cvtScale32s64f, (BinaryFunc)cvtScale32f64f,
-        (BinaryFunc)cvtScale64f, 0
-    },
-    {
-        0, 0, 0, 0, 0, 0, 0, 0
-    }
-};
-
-static BinaryFunc cvtTab[][8] =
-{
-    {
-        (BinaryFunc)(cvt8u), (BinaryFunc)GET_OPTIMIZED(cvt8s8u), (BinaryFunc)GET_OPTIMIZED(cvt16u8u),
-        (BinaryFunc)GET_OPTIMIZED(cvt16s8u), (BinaryFunc)GET_OPTIMIZED(cvt32s8u), (BinaryFunc)GET_OPTIMIZED(cvt32f8u),
-        (BinaryFunc)GET_OPTIMIZED(cvt64f8u), 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvt8u8s), (BinaryFunc)cvt8u, (BinaryFunc)GET_OPTIMIZED(cvt16u8s),
-        (BinaryFunc)GET_OPTIMIZED(cvt16s8s), (BinaryFunc)GET_OPTIMIZED(cvt32s8s), (BinaryFunc)GET_OPTIMIZED(cvt32f8s),
-        (BinaryFunc)GET_OPTIMIZED(cvt64f8s), 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvt8u16u), (BinaryFunc)GET_OPTIMIZED(cvt8s16u), (BinaryFunc)cvt16u,
-        (BinaryFunc)GET_OPTIMIZED(cvt16s16u), (BinaryFunc)GET_OPTIMIZED(cvt32s16u), (BinaryFunc)GET_OPTIMIZED(cvt32f16u),
-        (BinaryFunc)GET_OPTIMIZED(cvt64f16u), 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvt8u16s), (BinaryFunc)GET_OPTIMIZED(cvt8s16s), (BinaryFunc)GET_OPTIMIZED(cvt16u16s),
-        (BinaryFunc)cvt16u, (BinaryFunc)GET_OPTIMIZED(cvt32s16s), (BinaryFunc)GET_OPTIMIZED(cvt32f16s),
-        (BinaryFunc)GET_OPTIMIZED(cvt64f16s), 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvt8u32s), (BinaryFunc)GET_OPTIMIZED(cvt8s32s), (BinaryFunc)GET_OPTIMIZED(cvt16u32s),
-        (BinaryFunc)GET_OPTIMIZED(cvt16s32s), (BinaryFunc)cvt32s, (BinaryFunc)GET_OPTIMIZED(cvt32f32s),
-        (BinaryFunc)GET_OPTIMIZED(cvt64f32s), 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvt8u32f), (BinaryFunc)GET_OPTIMIZED(cvt8s32f), (BinaryFunc)GET_OPTIMIZED(cvt16u32f),
-        (BinaryFunc)GET_OPTIMIZED(cvt16s32f), (BinaryFunc)GET_OPTIMIZED(cvt32s32f), (BinaryFunc)cvt32s,
-        (BinaryFunc)GET_OPTIMIZED(cvt64f32f), 0
-    },
-    {
-        (BinaryFunc)GET_OPTIMIZED(cvt8u64f), (BinaryFunc)GET_OPTIMIZED(cvt8s64f), (BinaryFunc)GET_OPTIMIZED(cvt16u64f),
-        (BinaryFunc)GET_OPTIMIZED(cvt16s64f), (BinaryFunc)GET_OPTIMIZED(cvt32s64f), (BinaryFunc)GET_OPTIMIZED(cvt32f64f),
-        (BinaryFunc)(cvt64s), 0
-    },
-    {
-        0, 0, 0, 0, 0, 0, 0, 0
-    }
-};
+    return cvtScaleAbsTab[depth];
+}
 
 BinaryFunc getConvertFunc(int sdepth, int ddepth)
 {
+    static BinaryFunc cvtTab[][8] =
+    {
+        {
+            (BinaryFunc)(cvt8u), (BinaryFunc)GET_OPTIMIZED(cvt8s8u), (BinaryFunc)GET_OPTIMIZED(cvt16u8u),
+            (BinaryFunc)GET_OPTIMIZED(cvt16s8u), (BinaryFunc)GET_OPTIMIZED(cvt32s8u), (BinaryFunc)GET_OPTIMIZED(cvt32f8u),
+            (BinaryFunc)GET_OPTIMIZED(cvt64f8u), 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvt8u8s), (BinaryFunc)cvt8u, (BinaryFunc)GET_OPTIMIZED(cvt16u8s),
+            (BinaryFunc)GET_OPTIMIZED(cvt16s8s), (BinaryFunc)GET_OPTIMIZED(cvt32s8s), (BinaryFunc)GET_OPTIMIZED(cvt32f8s),
+            (BinaryFunc)GET_OPTIMIZED(cvt64f8s), 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvt8u16u), (BinaryFunc)GET_OPTIMIZED(cvt8s16u), (BinaryFunc)cvt16u,
+            (BinaryFunc)GET_OPTIMIZED(cvt16s16u), (BinaryFunc)GET_OPTIMIZED(cvt32s16u), (BinaryFunc)GET_OPTIMIZED(cvt32f16u),
+            (BinaryFunc)GET_OPTIMIZED(cvt64f16u), 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvt8u16s), (BinaryFunc)GET_OPTIMIZED(cvt8s16s), (BinaryFunc)GET_OPTIMIZED(cvt16u16s),
+            (BinaryFunc)cvt16u, (BinaryFunc)GET_OPTIMIZED(cvt32s16s), (BinaryFunc)GET_OPTIMIZED(cvt32f16s),
+            (BinaryFunc)GET_OPTIMIZED(cvt64f16s), 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvt8u32s), (BinaryFunc)GET_OPTIMIZED(cvt8s32s), (BinaryFunc)GET_OPTIMIZED(cvt16u32s),
+            (BinaryFunc)GET_OPTIMIZED(cvt16s32s), (BinaryFunc)cvt32s, (BinaryFunc)GET_OPTIMIZED(cvt32f32s),
+            (BinaryFunc)GET_OPTIMIZED(cvt64f32s), 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvt8u32f), (BinaryFunc)GET_OPTIMIZED(cvt8s32f), (BinaryFunc)GET_OPTIMIZED(cvt16u32f),
+            (BinaryFunc)GET_OPTIMIZED(cvt16s32f), (BinaryFunc)GET_OPTIMIZED(cvt32s32f), (BinaryFunc)cvt32s,
+            (BinaryFunc)GET_OPTIMIZED(cvt64f32f), 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvt8u64f), (BinaryFunc)GET_OPTIMIZED(cvt8s64f), (BinaryFunc)GET_OPTIMIZED(cvt16u64f),
+            (BinaryFunc)GET_OPTIMIZED(cvt16s64f), (BinaryFunc)GET_OPTIMIZED(cvt32s64f), (BinaryFunc)GET_OPTIMIZED(cvt32f64f),
+            (BinaryFunc)(cvt64s), 0
+        },
+        {
+            0, 0, 0, 0, 0, 0, 0, 0
+        }
+    };
+
     return cvtTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
 }
 
 static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
 {
+    static BinaryFunc cvtScaleTab[][8] =
+    {
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvtScale8u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8u),
+            (BinaryFunc)GET_OPTIMIZED(cvtScale16s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8u),
+            (BinaryFunc)cvtScale64f8u, 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvtScale8u8s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8s),
+            (BinaryFunc)GET_OPTIMIZED(cvtScale16s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8s),
+            (BinaryFunc)cvtScale64f8s, 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvtScale8u16u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u),
+            (BinaryFunc)GET_OPTIMIZED(cvtScale16s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16u),
+            (BinaryFunc)cvtScale64f16u, 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvtScale8u16s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u16s),
+            (BinaryFunc)GET_OPTIMIZED(cvtScale16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16s),
+            (BinaryFunc)cvtScale64f16s, 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvtScale8u32s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32s),
+            (BinaryFunc)GET_OPTIMIZED(cvtScale16s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f32s),
+            (BinaryFunc)cvtScale64f32s, 0
+        },
+        {
+            (BinaryFunc)GET_OPTIMIZED(cvtScale8u32f), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32f),
+            (BinaryFunc)GET_OPTIMIZED(cvtScale16s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32f),
+            (BinaryFunc)cvtScale64f32f, 0
+        },
+        {
+            (BinaryFunc)cvtScale8u64f, (BinaryFunc)cvtScale8s64f, (BinaryFunc)cvtScale16u64f,
+            (BinaryFunc)cvtScale16s64f, (BinaryFunc)cvtScale32s64f, (BinaryFunc)cvtScale32f64f,
+            (BinaryFunc)cvtScale64f, 0
+        },
+        {
+            0, 0, 0, 0, 0, 0, 0, 0
+        }
+    };
+
     return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
 }
 
@@ -1057,7 +1077,7 @@ void cv::convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, doubl
     double scale[] = {alpha, beta};
     _dst.create( src.dims, src.size, CV_8UC(cn) );
     Mat dst = _dst.getMat();
-    BinaryFunc func = cvtScaleAbsTab[src.depth()];
+    BinaryFunc func = getCvtScaleAbsFunc(src.depth());
     CV_Assert( func != 0 );
 
     if( src.dims <= 2 )
@@ -1376,4 +1396,4 @@ CV_IMPL void cvNormalize( const CvArr* srcarr, CvArr* dstarr,
     cv::normalize( src, dst, a, b, norm_type, dst.type(), mask );
 }
 
-/* End of file. */
\ No newline at end of file
+/* End of file. */
diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu
new file mode 100644
index 000000000..0db158421
--- /dev/null
+++ b/modules/core/src/cuda/gpu_mat.cu
@@ -0,0 +1,486 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifndef HAVE_OPENCV_CUDEV
+
+#error "opencv_cudev is required"
+
+#else
+
+#include "opencv2/core/gpu.hpp"
+#include "opencv2/cudev.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+
+/////////////////////////////////////////////////////
+/// create
+
+void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
+{
+    CV_DbgAssert( _rows >= 0 && _cols >= 0 );
+
+    _type &= Mat::TYPE_MASK;
+
+    if (rows == _rows && cols == _cols && type() == _type && data)
+        return;
+
+    if (data)
+        release();
+
+    if (_rows > 0 && _cols > 0)
+    {
+        flags = Mat::MAGIC_VAL + _type;
+        rows = _rows;
+        cols = _cols;
+
+        size_t esz = elemSize();
+
+        void* devPtr;
+
+        if (rows > 1 && cols > 1)
+        {
+            CV_CUDEV_SAFE_CALL( cudaMallocPitch(&devPtr, &step, esz * cols, rows) );
+        }
+        else
+        {
+            // Single row or single column must be continuous
+            CV_CUDEV_SAFE_CALL( cudaMalloc(&devPtr, esz * cols * rows) );
+            step = esz * cols;
+        }
+
+        if (esz * cols == step)
+            flags |= Mat::CONTINUOUS_FLAG;
+
+        int64 _nettosize = static_cast<int64>(step) * rows;
+        size_t nettosize = static_cast<size_t>(_nettosize);
+
+        datastart = data = static_cast<uchar*>(devPtr);
+        dataend = data + nettosize;
+
+        refcount = static_cast<int*>(fastMalloc(sizeof(*refcount)));
+        *refcount = 1;
+    }
+}
+
+/////////////////////////////////////////////////////
+/// release
+
+void cv::gpu::GpuMat::release()
+{
+    if (refcount && CV_XADD(refcount, -1) == 1)
+    {
+        cudaFree(datastart);
+        fastFree(refcount);
+    }
+
+    data = datastart = dataend = 0;
+    step = rows = cols = 0;
+    refcount = 0;
+}
+
+/////////////////////////////////////////////////////
+/// upload
+
+void cv::gpu::GpuMat::upload(InputArray arr)
+{
+    Mat mat = arr.getMat();
+
+    CV_DbgAssert( !mat.empty() );
+
+    create(mat.size(), mat.type());
+
+    CV_CUDEV_SAFE_CALL( cudaMemcpy2D(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
+}
+
+void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream)
+{
+    Mat mat = arr.getMat();
+
+    CV_DbgAssert( !mat.empty() );
+
+    create(mat.size(), mat.type());
+
+    cudaStream_t stream = StreamAccessor::getStream(_stream);
+    CV_CUDEV_SAFE_CALL( cudaMemcpy2DAsync(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice, stream) );
+}
+
+/////////////////////////////////////////////////////
+/// download
+
+void cv::gpu::GpuMat::download(OutputArray _dst) const
+{
+    CV_DbgAssert( !empty() );
+
+    _dst.create(size(), type());
+    Mat dst = _dst.getMat();
+
+    CV_CUDEV_SAFE_CALL( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
+}
+
+void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const
+{
+    CV_DbgAssert( !empty() );
+
+    _dst.create(size(), type());
+    Mat dst = _dst.getMat();
+
+    cudaStream_t stream = StreamAccessor::getStream(_stream);
+    CV_CUDEV_SAFE_CALL( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost, stream) );
+}
+
+/////////////////////////////////////////////////////
+/// copyTo
+
+void cv::gpu::GpuMat::copyTo(OutputArray _dst) const
+{
+    CV_DbgAssert( !empty() );
+
+    _dst.create(size(), type());
+    GpuMat dst = _dst.getGpuMat();
+
+    CV_CUDEV_SAFE_CALL( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) );
+}
+
+void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const
+{
+    CV_DbgAssert( !empty() );
+
+    _dst.create(size(), type());
+    GpuMat dst = _dst.getGpuMat();
+
+    cudaStream_t stream = StreamAccessor::getStream(_stream);
+    CV_CUDEV_SAFE_CALL( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice, stream) );
+}
+
+namespace
+{
+    template <size_t size> struct CopyToPolicy : DefaultTransformPolicy
+    {
+    };
+    template <> struct CopyToPolicy<4> : DefaultTransformPolicy
+    {
+        enum {
+            shift = 2
+        };
+    };
+    template <> struct CopyToPolicy<8> : DefaultTransformPolicy
+    {
+        enum {
+            shift = 1
+        };
+    };
+
+    template <typename T>
+    void copyWithMask(const GpuMat& src, const GpuMat& dst, const GpuMat& mask, Stream& stream)
+    {
+        gridTransform_< CopyToPolicy<sizeof(typename VecTraits<T>::elem_type)> >(globPtr<T>(src), globPtr<T>(dst), identity<T>(), globPtr<uchar>(mask), stream);
+    }
+}
+
+void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& stream) const
+{
+    CV_DbgAssert( !empty() );
+    CV_DbgAssert( depth() <= CV_64F && channels() <= 4 );
+
+    GpuMat mask = _mask.getGpuMat();
+    CV_DbgAssert( size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == channels()) );
+
+    _dst.create(size(), type());
+    GpuMat dst = _dst.getGpuMat();
+
+    typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, const GpuMat& mask, Stream& stream);
+    static const func_t funcs[9][4] =
+    {
+        {0,0,0,0},
+        {copyWithMask<uchar>, copyWithMask<uchar2>, copyWithMask<uchar3>, copyWithMask<uchar4>},
+        {copyWithMask<ushort>, copyWithMask<ushort2>, copyWithMask<ushort3>, copyWithMask<ushort4>},
+        {0,0,0,0},
+        {copyWithMask<int>, copyWithMask<int2>, copyWithMask<int3>, copyWithMask<int4>},
+        {0,0,0,0},
+        {0,0,0,0},
+        {0,0,0,0},
+        {copyWithMask<double>, copyWithMask<double2>, copyWithMask<double3>, copyWithMask<double4>}
+    };
+
+    if (mask.channels() == channels())
+    {
+        const func_t func = funcs[elemSize1()][0];
+        CV_DbgAssert( func != 0 );
+        func(reshape(1), dst.reshape(1), mask.reshape(1), stream);
+    }
+    else
+    {
+        const func_t func = funcs[elemSize1()][channels() - 1];
+        CV_DbgAssert( func != 0 );
+        func(*this, dst, mask, stream);
+    }
+}
+
+/////////////////////////////////////////////////////
+/// setTo
+
+namespace
+{
+    template <typename T>
+    void setToWithOutMask(const GpuMat& mat, Scalar _scalar, Stream& stream)
+    {
+        Scalar_<typename VecTraits<T>::elem_type> scalar = _scalar;
+        gridTransform(constantPtr(VecTraits<T>::make(scalar.val), mat.rows, mat.cols), globPtr<T>(mat), identity<T>(), stream);
+    }
+
+    template <typename T>
+    void setToWithMask(const GpuMat& mat, const GpuMat& mask, Scalar _scalar, Stream& stream)
+    {
+        Scalar_<typename VecTraits<T>::elem_type> scalar = _scalar;
+        gridTransform(constantPtr(VecTraits<T>::make(scalar.val), mat.rows, mat.cols), globPtr<T>(mat), identity<T>(), globPtr<uchar>(mask), stream);
+    }
+}
+
+GpuMat& cv::gpu::GpuMat::setTo(Scalar value, Stream& stream)
+{
+    CV_DbgAssert( !empty() );
+    CV_DbgAssert( depth() <= CV_64F && channels() <= 4 );
+
+    if (value[0] == 0.0 && value[1] == 0.0 && value[2] == 0.0 && value[3] == 0.0)
+    {
+        // Zero fill
+
+        if (stream)
+            CV_CUDEV_SAFE_CALL( cudaMemset2DAsync(data, step, 0, cols * elemSize(), rows, StreamAccessor::getStream(stream)) );
+        else
+            CV_CUDEV_SAFE_CALL( cudaMemset2D(data, step, 0, cols * elemSize(), rows) );
+
+        return *this;
+    }
+
+    if (depth() == CV_8U)
+    {
+        const int cn = channels();
+
+        if (cn == 1
+                || (cn == 2 && value[0] == value[1])
+                || (cn == 3 && value[0] == value[1] && value[0] == value[2])
+                || (cn == 4 && value[0] == value[1] && value[0] == value[2] && value[0] == value[3]))
+        {
+            const int val = cv::saturate_cast<uchar>(value[0]);
+
+            if (stream)
+                CV_CUDEV_SAFE_CALL( cudaMemset2DAsync(data, step, val, cols * elemSize(), rows, StreamAccessor::getStream(stream)) );
+            else
+                CV_CUDEV_SAFE_CALL( cudaMemset2D(data, step, val, cols * elemSize(), rows) );
+
+            return *this;
+        }
+    }
+
+    typedef void (*func_t)(const GpuMat& mat, Scalar scalar, Stream& stream);
+    static const func_t funcs[7][4] =
+    {
+        {setToWithOutMask<uchar>,setToWithOutMask<uchar2>,setToWithOutMask<uchar3>,setToWithOutMask<uchar4>},
+        {setToWithOutMask<schar>,setToWithOutMask<char2>,setToWithOutMask<char3>,setToWithOutMask<char4>},
+        {setToWithOutMask<ushort>,setToWithOutMask<ushort2>,setToWithOutMask<ushort3>,setToWithOutMask<ushort4>},
+        {setToWithOutMask<short>,setToWithOutMask<short2>,setToWithOutMask<short3>,setToWithOutMask<short4>},
+        {setToWithOutMask<int>,setToWithOutMask<int2>,setToWithOutMask<int3>,setToWithOutMask<int4>},
+        {setToWithOutMask<float>,setToWithOutMask<float2>,setToWithOutMask<float3>,setToWithOutMask<float4>},
+        {setToWithOutMask<double>,setToWithOutMask<double2>,setToWithOutMask<double3>,setToWithOutMask<double4>}
+    };
+
+    funcs[depth()][channels() - 1](*this, value, stream);
+
+    return *this;
+}
+
+GpuMat& cv::gpu::GpuMat::setTo(Scalar value, InputArray _mask, Stream& stream)
+{
+    CV_DbgAssert( !empty() );
+    CV_DbgAssert( depth() <= CV_64F && channels() <= 4 );
+
+    GpuMat mask = _mask.getGpuMat();
+
+    CV_DbgAssert( size() == mask.size() && mask.type() == CV_8UC1 );
+
+    typedef void (*func_t)(const GpuMat& mat, const GpuMat& mask, Scalar scalar, Stream& stream);
+    static const func_t funcs[7][4] =
+    {
+        {setToWithMask<uchar>,setToWithMask<uchar2>,setToWithMask<uchar3>,setToWithMask<uchar4>},
+        {setToWithMask<schar>,setToWithMask<char2>,setToWithMask<char3>,setToWithMask<char4>},
+        {setToWithMask<ushort>,setToWithMask<ushort2>,setToWithMask<ushort3>,setToWithMask<ushort4>},
+        {setToWithMask<short>,setToWithMask<short2>,setToWithMask<short3>,setToWithMask<short4>},
+        {setToWithMask<int>,setToWithMask<int2>,setToWithMask<int3>,setToWithMask<int4>},
+        {setToWithMask<float>,setToWithMask<float2>,setToWithMask<float3>,setToWithMask<float4>},
+        {setToWithMask<double>,setToWithMask<double2>,setToWithMask<double3>,setToWithMask<double4>}
+    };
+
+    funcs[depth()][channels() - 1](*this, mask, value, stream);
+
+    return *this;
+}
+
+/////////////////////////////////////////////////////
+/// convertTo
+
+namespace
+{
+    template <typename T> struct ConvertToPolicy : DefaultTransformPolicy
+    {
+    };
+    template <> struct ConvertToPolicy<double> : DefaultTransformPolicy
+    {
+        enum {
+            shift = 1
+        };
+    };
+
+    template <typename T, typename D>
+    void convertToNoScale(const GpuMat& src, const GpuMat& dst, Stream& stream)
+    {
+        typedef typename VecTraits<T>::elem_type src_elem_type;
+        typedef typename VecTraits<D>::elem_type dst_elem_type;
+        typedef typename LargerType<src_elem_type, float>::type larger_elem_type;
+        typedef typename LargerType<float, dst_elem_type>::type scalar_type;
+
+        gridTransform_< ConvertToPolicy<scalar_type> >(globPtr<T>(src), globPtr<D>(dst), saturate_cast_func<T, D>(), stream);
+    }
+
+    template <typename T, typename D, typename S> struct Convertor : unary_function<T, D>
+    {
+        S alpha;
+        S beta;
+
+        __device__ __forceinline__ D operator ()(typename TypeTraits<T>::parameter_type src) const
+        {
+            return cudev::saturate_cast<D>(alpha * src + beta);
+        }
+    };
+
+    template <typename T, typename D>
+    void convertToScale(const GpuMat& src, const GpuMat& dst, double alpha, double beta, Stream& stream)
+    {
+        typedef typename VecTraits<T>::elem_type src_elem_type;
+        typedef typename VecTraits<D>::elem_type dst_elem_type;
+        typedef typename LargerType<src_elem_type, float>::type larger_elem_type;
+        typedef typename LargerType<float, dst_elem_type>::type scalar_type;
+
+        Convertor<T, D, scalar_type> op;
+        op.alpha = cv::saturate_cast<scalar_type>(alpha);
+        op.beta = cv::saturate_cast<scalar_type>(beta);
+
+        gridTransform_< ConvertToPolicy<scalar_type> >(globPtr<T>(src), globPtr<D>(dst), op, stream);
+    }
+}
+
+void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) const
+{
+    if (rtype < 0)
+        rtype = type();
+    else
+        rtype = CV_MAKE_TYPE(CV_MAT_DEPTH(rtype), channels());
+
+    const int sdepth = depth();
+    const int ddepth = CV_MAT_DEPTH(rtype);
+    if (sdepth == ddepth)
+    {
+        if (stream)
+            copyTo(_dst, stream);
+        else
+            copyTo(_dst);
+
+        return;
+    }
+
+    CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F );
+
+    GpuMat src = *this;
+
+    _dst.create(size(), rtype);
+    GpuMat dst = _dst.getGpuMat();
+
+    typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream);
+    static const func_t funcs[7][7] =
+    {
+        {0, convertToNoScale<uchar, schar>, convertToNoScale<uchar, ushort>, convertToNoScale<uchar, short>, convertToNoScale<uchar, int>, convertToNoScale<uchar, float>, convertToNoScale<uchar, double>},
+        {convertToNoScale<schar, uchar>, 0, convertToNoScale<schar, ushort>, convertToNoScale<schar, short>, convertToNoScale<schar, int>, convertToNoScale<schar, float>, convertToNoScale<schar, double>},
+        {convertToNoScale<ushort, uchar>, convertToNoScale<ushort, schar>, 0, convertToNoScale<ushort, short>, convertToNoScale<ushort, int>, convertToNoScale<ushort, float>, convertToNoScale<ushort, double>},
+        {convertToNoScale<short, uchar>, convertToNoScale<short, schar>, convertToNoScale<short, ushort>, 0, convertToNoScale<short, int>, convertToNoScale<short, float>, convertToNoScale<short, double>},
+        {convertToNoScale<int, uchar>, convertToNoScale<int, schar>, convertToNoScale<int, ushort>, convertToNoScale<int, short>, 0, convertToNoScale<int, float>, convertToNoScale<int, double>},
+        {convertToNoScale<float, uchar>, convertToNoScale<float, schar>, convertToNoScale<float, ushort>, convertToNoScale<float, short>, convertToNoScale<float, int>, 0, convertToNoScale<float, double>},
+        {convertToNoScale<double, uchar>, convertToNoScale<double, schar>, convertToNoScale<double, ushort>, convertToNoScale<double, short>, convertToNoScale<double, int>, convertToNoScale<double, float>, 0}
+    };
+
+    funcs[sdepth][ddepth](reshape(1), dst.reshape(1), stream);
+}
+
+void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& stream) const
+{
+    if (rtype < 0)
+        rtype = type();
+    else
+        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
+
+    const int sdepth = depth();
+    const int ddepth = CV_MAT_DEPTH(rtype);
+
+    GpuMat src = *this;
+
+    _dst.create(size(), rtype);
+    GpuMat dst = _dst.getGpuMat();
+
+    typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, double alpha, double beta, Stream& stream);
+    static const func_t funcs[7][7] =
+    {
+        {convertToScale<uchar, uchar>, convertToScale<uchar, schar>, convertToScale<uchar, ushort>, convertToScale<uchar, short>, convertToScale<uchar, int>, convertToScale<uchar, float>, convertToScale<uchar, double>},
+        {convertToScale<schar, uchar>, convertToScale<schar, schar>, convertToScale<schar, ushort>, convertToScale<schar, short>, convertToScale<schar, int>, convertToScale<schar, float>, convertToScale<schar, double>},
+        {convertToScale<ushort, uchar>, convertToScale<ushort, schar>, convertToScale<ushort, ushort>, convertToScale<ushort, short>, convertToScale<ushort, int>, convertToScale<ushort, float>, convertToScale<ushort, double>},
+        {convertToScale<short, uchar>, convertToScale<short, schar>, convertToScale<short, ushort>, convertToScale<short, short>, convertToScale<short, int>, convertToScale<short, float>, convertToScale<short, double>},
+        {convertToScale<int, uchar>, convertToScale<int, schar>, convertToScale<int, ushort>, convertToScale<int, short>, convertToScale<int, int>, convertToScale<int, float>, convertToScale<int, double>},
+        {convertToScale<float, uchar>, convertToScale<float, schar>, convertToScale<float, ushort>, convertToScale<float, short>, convertToScale<float, int>, convertToScale<float, float>, convertToScale<float, double>},
+        {convertToScale<double, uchar>, convertToScale<double, schar>, convertToScale<double, ushort>, convertToScale<double, short>, convertToScale<double, int>, convertToScale<double, float>, convertToScale<double, double>}
+    };
+
+    funcs[sdepth][ddepth](reshape(1), dst.reshape(1), alpha, beta, stream);
+}
+
+#endif
diff --git a/modules/core/src/cuda/matrix_operations.cu b/modules/core/src/cuda/matrix_operations.cu
deleted file mode 100644
index 7de5205ec..000000000
--- a/modules/core/src/cuda/matrix_operations.cu
+++ /dev/null
@@ -1,296 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "opencv2/core/cuda/saturate_cast.hpp"
-#include "opencv2/core/cuda/transform.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/type_traits.hpp"
-#include "opencv2/core/cuda/vec_traits.hpp"
-
-#include "matrix_operations.hpp"
-
-namespace cv { namespace gpu { namespace cudev
-{
-    ///////////////////////////////////////////////////////////////////////////
-    // copyWithMask
-
-    template <typename T>
-    void copyWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
-    {
-        if (multiChannelMask)
-            cv::gpu::cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, identity<T>(), SingleMask(mask), stream);
-        else
-            cv::gpu::cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
-    }
-
-    void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
-    {
-        typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream);
-
-        static const func_t tab[] =
-        {
-            0,
-            copyWithMask<uchar>,
-            copyWithMask<ushort>,
-            0,
-            copyWithMask<int>,
-            0,
-            0,
-            0,
-            copyWithMask<double>
-        };
-
-        const func_t func = tab[elemSize1];
-        CV_DbgAssert( func != 0 );
-
-        func(src, dst, cn, mask, multiChannelMask, stream);
-    }
-
-    ///////////////////////////////////////////////////////////////////////////
-    // set
-
-    template<typename T, class Mask>
-    __global__ void set(PtrStepSz<T> mat, const Mask mask, const int channels, const typename TypeVec<T, 4>::vec_type value)
-    {
-        const int x = blockIdx.x * blockDim.x + threadIdx.x;
-        const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-        if (x >= mat.cols * channels || y >= mat.rows)
-            return;
-
-        const T scalar[4] = {value.x, value.y, value.z, value.w};
-
-        if (mask(y, x / channels))
-            mat(y, x) = scalar[x % channels];
-    }
-
-    template <typename T>
-    void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream)
-    {
-        typedef typename TypeVec<T, 4>::vec_type scalar_t;
-
-        dim3 block(32, 8);
-        dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y));
-
-        set<T><<<grid, block, 0, stream>>>(mat, WithOutMask(), channels, VecTraits<scalar_t>::make(scalar));
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall ( cudaDeviceSynchronize() );
-    }
-
-    template void set<uchar >(PtrStepSz<uchar > mat, const uchar*  scalar, int channels, cudaStream_t stream);
-    template void set<schar >(PtrStepSz<schar > mat, const schar*  scalar, int channels, cudaStream_t stream);
-    template void set<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, int channels, cudaStream_t stream);
-    template void set<short >(PtrStepSz<short > mat, const short*  scalar, int channels, cudaStream_t stream);
-    template void set<int   >(PtrStepSz<int   > mat, const int*    scalar, int channels, cudaStream_t stream);
-    template void set<float >(PtrStepSz<float > mat, const float*  scalar, int channels, cudaStream_t stream);
-    template void set<double>(PtrStepSz<double> mat, const double* scalar, int channels, cudaStream_t stream);
-
-    template <typename T>
-    void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
-    {
-        typedef typename TypeVec<T, 4>::vec_type scalar_t;
-
-        dim3 block(32, 8);
-        dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y));
-
-        set<T><<<grid, block, 0, stream>>>(mat, SingleMask(mask), channels, VecTraits<scalar_t>::make(scalar));
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall ( cudaDeviceSynchronize() );
-    }
-
-    template void set<uchar >(PtrStepSz<uchar > mat, const uchar*  scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
-    template void set<schar >(PtrStepSz<schar > mat, const schar*  scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
-    template void set<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
-    template void set<short >(PtrStepSz<short > mat, const short*  scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
-    template void set<int   >(PtrStepSz<int   > mat, const int*    scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
-    template void set<float >(PtrStepSz<float > mat, const float*  scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
-    template void set<double>(PtrStepSz<double> mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
-
-    ///////////////////////////////////////////////////////////////////////////
-    // convert
-
-    template <typename T, typename D, typename S> struct Convertor : unary_function<T, D>
-    {
-        Convertor(S alpha_, S beta_) : alpha(alpha_), beta(beta_) {}
-
-        __device__ __forceinline__ D operator()(typename TypeTraits<T>::ParameterType src) const
-        {
-            return saturate_cast<D>(alpha * src + beta);
-        }
-
-        S alpha, beta;
-    };
-
-    namespace detail
-    {
-        template <size_t src_size, size_t dst_size, typename F> struct ConvertTraitsDispatcher : DefaultTransformFunctorTraits<F>
-        {
-        };
-        template <typename F> struct ConvertTraitsDispatcher<1, 1, F> : DefaultTransformFunctorTraits<F>
-        {
-            enum { smart_shift = 8 };
-        };
-        template <typename F> struct ConvertTraitsDispatcher<1, 2, F> : DefaultTransformFunctorTraits<F>
-        {
-            enum { smart_shift = 4 };
-        };
-        template <typename F> struct ConvertTraitsDispatcher<1, 4, F> : DefaultTransformFunctorTraits<F>
-        {
-            enum { smart_block_dim_y = 8 };
-            enum { smart_shift = 4 };
-        };
-
-        template <typename F> struct ConvertTraitsDispatcher<2, 2, F> : DefaultTransformFunctorTraits<F>
-        {
-            enum { smart_shift = 4 };
-        };
-        template <typename F> struct ConvertTraitsDispatcher<2, 4, F> : DefaultTransformFunctorTraits<F>
-        {
-            enum { smart_shift = 2 };
-        };
-
-        template <typename F> struct ConvertTraitsDispatcher<4, 2, F> : DefaultTransformFunctorTraits<F>
-        {
-            enum { smart_block_dim_y = 8 };
-            enum { smart_shift = 4 };
-        };
-        template <typename F> struct ConvertTraitsDispatcher<4, 4, F> : DefaultTransformFunctorTraits<F>
-        {
-            enum { smart_block_dim_y = 8 };
-            enum { smart_shift = 2 };
-        };
-
-        template <typename F> struct ConvertTraits : ConvertTraitsDispatcher<sizeof(typename F::argument_type), sizeof(typename F::result_type), F>
-        {
-        };
-    }
-
-    template <typename T, typename D, typename S> struct TransformFunctorTraits< Convertor<T, D, S> > : detail::ConvertTraits< Convertor<T, D, S> >
-    {
-    };
-
-    template<typename T, typename D, typename S>
-    void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
-    {
-        Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
-        cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
-    }
-
-    void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
-    {
-        typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream);
-
-        static const caller_t tab[7][7] =
-        {
-            {
-                cvt_<uchar, uchar, float>,
-                cvt_<uchar, schar, float>,
-                cvt_<uchar, ushort, float>,
-                cvt_<uchar, short, float>,
-                cvt_<uchar, int, float>,
-                cvt_<uchar, float, float>,
-                cvt_<uchar, double, double>
-            },
-            {
-                cvt_<schar, uchar, float>,
-                cvt_<schar, schar, float>,
-                cvt_<schar, ushort, float>,
-                cvt_<schar, short, float>,
-                cvt_<schar, int, float>,
-                cvt_<schar, float, float>,
-                cvt_<schar, double, double>
-            },
-            {
-                cvt_<ushort, uchar, float>,
-                cvt_<ushort, schar, float>,
-                cvt_<ushort, ushort, float>,
-                cvt_<ushort, short, float>,
-                cvt_<ushort, int, float>,
-                cvt_<ushort, float, float>,
-                cvt_<ushort, double, double>
-            },
-            {
-                cvt_<short, uchar, float>,
-                cvt_<short, schar, float>,
-                cvt_<short, ushort, float>,
-                cvt_<short, short, float>,
-                cvt_<short, int, float>,
-                cvt_<short, float, float>,
-                cvt_<short, double, double>
-            },
-            {
-                cvt_<int, uchar, float>,
-                cvt_<int, schar, float>,
-                cvt_<int, ushort, float>,
-                cvt_<int, short, float>,
-                cvt_<int, int, double>,
-                cvt_<int, float, double>,
-                cvt_<int, double, double>
-            },
-            {
-                cvt_<float, uchar, float>,
-                cvt_<float, schar, float>,
-                cvt_<float, ushort, float>,
-                cvt_<float, short, float>,
-                cvt_<float, int, float>,
-                cvt_<float, float, float>,
-                cvt_<float, double, double>
-            },
-            {
-                cvt_<double, uchar, double>,
-                cvt_<double, schar, double>,
-                cvt_<double, ushort, double>,
-                cvt_<double, short, double>,
-                cvt_<double, int, double>,
-                cvt_<double, float, double>,
-                cvt_<double, double, double>
-            }
-        };
-
-        const caller_t func = tab[sdepth][ddepth];
-        func(src, dst, alpha, beta, stream);
-    }
-}}} // namespace cv { namespace gpu { namespace cudev
diff --git a/modules/core/src/drawing.cpp b/modules/core/src/drawing.cpp
index 0e89143c3..5cc498256 100644
--- a/modules/core/src/drawing.cpp
+++ b/modules/core/src/drawing.cpp
@@ -886,12 +886,14 @@ void ellipse2Poly( Point center, Size axes, int angle,
         Point pt;
         pt.x = cvRound( cx + x * alpha - y * beta );
         pt.y = cvRound( cy + x * beta + y * alpha );
-        if( pt != prevPt )
+        if( pt != prevPt ){
             pts.push_back(pt);
+            prevPt = pt;
+        }
     }
 
     // If there are no points, it's a zero-size polygon
-    if( pts.size() < 2) {
+    if( pts.size() == 1) {
         pts.assign(2,center);
     }
 }
diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp
index b3c2b833b..a802868df 100644
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@@ -50,6 +50,13 @@ namespace cv
 # pragma warning(disable: 4748)
 #endif
 
+#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7
+#define USE_IPP_DFT 1
+#else
+#undef USE_IPP_DFT
+#endif
+
+
 /****************************************************************************************\
                                Discrete Fourier Transform
 \****************************************************************************************/
@@ -455,7 +462,7 @@ template<> struct DFT_VecR4<float>
 
 #endif
 
-#ifdef HAVE_IPP
+#ifdef USE_IPP_DFT
 static void ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
                              const void* spec, uchar* buf)
 {
@@ -517,7 +524,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
      int nf, const int* factors, const int* itab,
      const Complex<T>* wave, int tab_size,
      const void*
-#ifdef HAVE_IPP
+#ifdef USE_IPP_DFT
      spec
 #endif
      , Complex<T>* buf,
@@ -537,7 +544,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
     T scale = (T)_scale;
     int tab_step;
 
-#ifdef HAVE_IPP
+#ifdef USE_IPP_DFT
     if( spec )
     {
         if( !inv )
@@ -957,7 +964,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
 template<typename T> static void
 RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
          const Complex<T>* wave, int tab_size, const void*
-#ifdef HAVE_IPP
+#ifdef USE_IPP_DFT
          spec
 #endif
          ,
@@ -968,11 +975,18 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
     int j, n2 = n >> 1;
     dst += complex_output;
 
-#ifdef HAVE_IPP
+#ifdef USE_IPP_DFT
     if( spec )
     {
         ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf );
-        goto finalize;
+        if( complex_output )
+        {
+            dst[-1] = dst[0];
+            dst[0] = 0;
+            if( (n & 1) == 0 )
+                dst[n] = 0;
+        }
+        return;
     }
 #endif
     assert( tab_size == n );
@@ -1056,15 +1070,11 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
         }
     }
 
-#ifdef HAVE_IPP
-finalize:
-#endif
     if( complex_output && (n & 1) == 0 )
     {
         dst[-1] = dst[0];
         dst[0] = 0;
-        if( (n & 1) == 0 )
-            dst[n] = 0;
+        dst[n] = 0;
     }
 }
 
@@ -1076,7 +1086,7 @@ template<typename T> static void
 CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
          const Complex<T>* wave, int tab_size,
          const void*
-#ifdef HAVE_IPP
+#ifdef USE_IPP_DFT
          spec
 #endif
          , Complex<T>* buf,
@@ -1097,7 +1107,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
         ((T*)src)[1] = src[0];
         src++;
     }
-#ifdef HAVE_IPP
+#ifdef USE_IPP_DFT
     if( spec )
     {
         ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf );
@@ -1225,7 +1235,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
         }
     }
 
-#ifdef HAVE_IPP
+#ifdef USE_IPP_DFT
 finalize:
 #endif
     if( complex_input )
@@ -1458,6 +1468,10 @@ static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* fac
 
 }
 
+#ifdef USE_IPP_DFT
+typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
+typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
+#endif
 
 void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
 {
@@ -1482,8 +1496,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
     int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
     int factors[34];
     bool inplace_transform = false;
-#ifdef HAVE_IPP
-    void *spec_r = 0, *spec_c = 0;
+#ifdef USE_IPP_DFT
+    AutoBuffer<uchar> ippbuf;
     int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
 #endif
 
@@ -1542,53 +1556,48 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
         }
 
         spec = 0;
-#ifdef HAVE_IPP
+#ifdef USE_IPP_DFT
         if( len*count >= 64 ) // use IPP DFT if available
         {
-            int ipp_sz = 0;
+            int specsize=0, initsize=0, worksize=0;
+            IppDFTGetSizeFunc getSizeFunc = 0;
+            IppDFTInitFunc initFunc = 0;
 
             if( real_transform && stage == 0 )
             {
                 if( depth == CV_32F )
                 {
-                    if( spec_r )
-                        IPPI_CALL( ippsDFTFree_R_32f( (IppsDFTSpec_R_32f*)spec_r ));
-                    IPPI_CALL( ippsDFTInitAlloc_R_32f(
-                        (IppsDFTSpec_R_32f**)&spec_r, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_R_32f( (IppsDFTSpec_R_32f*)spec_r, &ipp_sz ));
+                    getSizeFunc = ippsDFTGetSize_R_32f;
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
                 }
                 else
                 {
-                    if( spec_r )
-                        IPPI_CALL( ippsDFTFree_R_64f( (IppsDFTSpec_R_64f*)spec_r ));
-                    IPPI_CALL( ippsDFTInitAlloc_R_64f(
-                        (IppsDFTSpec_R_64f**)&spec_r, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_R_64f( (IppsDFTSpec_R_64f*)spec_r, &ipp_sz ));
+                    getSizeFunc = ippsDFTGetSize_R_64f;
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
                 }
-                spec = spec_r;
             }
             else
             {
                 if( depth == CV_32F )
                 {
-                    if( spec_c )
-                        IPPI_CALL( ippsDFTFree_C_32fc( (IppsDFTSpec_C_32fc*)spec_c ));
-                    IPPI_CALL( ippsDFTInitAlloc_C_32fc(
-                        (IppsDFTSpec_C_32fc**)&spec_c, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_C_32fc( (IppsDFTSpec_C_32fc*)spec_c, &ipp_sz ));
+                    getSizeFunc = ippsDFTGetSize_C_32fc;
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
                 }
                 else
                 {
-                    if( spec_c )
-                        IPPI_CALL( ippsDFTFree_C_64fc( (IppsDFTSpec_C_64fc*)spec_c ));
-                    IPPI_CALL( ippsDFTInitAlloc_C_64fc(
-                        (IppsDFTSpec_C_64fc**)&spec_c, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_C_64fc( (IppsDFTSpec_C_64fc*)spec_c, &ipp_sz ));
+                    getSizeFunc = ippsDFTGetSize_C_64fc;
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
                 }
-                spec = spec_c;
             }
-
-            sz += ipp_sz;
+            if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
+            {
+                ippbuf.allocate(specsize + initsize + 64);
+                spec = alignPtr(&ippbuf[0], 32);
+                uchar* initbuf = alignPtr((uchar*)spec + specsize, 32);
+                if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 )
+                    spec = 0;
+                sz += worksize;
+            }
         }
         else
 #endif
@@ -1862,24 +1871,6 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
             src = dst;
         }
     }
-
-#ifdef HAVE_IPP
-    if( spec_c )
-    {
-        if( depth == CV_32F )
-            ippsDFTFree_C_32fc( (IppsDFTSpec_C_32fc*)spec_c );
-        else
-            ippsDFTFree_C_64fc( (IppsDFTSpec_C_64fc*)spec_c );
-    }
-
-    if( spec_r )
-    {
-        if( depth == CV_32F )
-            ippsDFTFree_R_32f( (IppsDFTSpec_R_32f*)spec_r );
-        else
-            ippsDFTFree_R_64f( (IppsDFTSpec_R_64f*)spec_r );
-    }
-#endif
 }
 
 
diff --git a/modules/core/src/glob.cpp b/modules/core/src/glob.cpp
index c655aa9c1..c75bd2e66 100644
--- a/modules/core/src/glob.cpp
+++ b/modules/core/src/glob.cpp
@@ -56,19 +56,42 @@ namespace
 
     struct DIR
     {
+#ifdef HAVE_WINRT
+        WIN32_FIND_DATAW data;
+#else
         WIN32_FIND_DATA data;
+#endif
         HANDLE handle;
         dirent ent;
+#ifdef HAVE_WINRT
+        DIR() {};
+        ~DIR()
+        {
+            if (ent.d_name)
+                delete[] ent.d_name;
+        }
+#endif
     };
 
     DIR* opendir(const char* path)
     {
         DIR* dir = new DIR;
         dir->ent.d_name = 0;
-        dir->handle = ::FindFirstFileA((cv::String(path) + "\\*").c_str(), &dir->data);
+#ifdef HAVE_WINRT
+        cv::String full_path = cv::String(path) + "\\*";
+        wchar_t wfull_path[MAX_PATH];
+        size_t copied = mbstowcs(wfull_path, full_path.c_str(), MAX_PATH);
+        CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+        dir->handle = ::FindFirstFileExW(wfull_path, FindExInfoStandard,
+                        &dir->data, FindExSearchNameMatch, NULL, 0);
+#else
+        dir->handle = ::FindFirstFileExA((cv::String(path) + "\\*").c_str(),
+            FindExInfoStandard, &dir->data, FindExSearchNameMatch, NULL, 0);
+#endif
         if(dir->handle == INVALID_HANDLE_VALUE)
         {
             /*closedir will do all cleanup*/
+            delete dir;
             return 0;
         }
         return dir;
@@ -76,12 +99,26 @@ namespace
 
     dirent* readdir(DIR* dir)
     {
+#ifdef HAVE_WINRT
         if (dir->ent.d_name != 0)
         {
-            if (::FindNextFile(dir->handle, &dir->data) != TRUE)
+            if (::FindNextFileW(dir->handle, &dir->data) != TRUE)
+                return 0;
+        }
+        size_t asize = wcstombs(NULL, dir->data.cFileName, 0);
+        CV_Assert((asize != 0) && (asize != (size_t)-1));
+        char* aname = new char[asize+1];
+        aname[asize] = 0;
+        wcstombs(aname, dir->data.cFileName, asize);
+        dir->ent.d_name = aname;
+#else
+        if (dir->ent.d_name != 0)
+        {
+            if (::FindNextFileA(dir->handle, &dir->data) != TRUE)
                 return 0;
         }
         dir->ent.d_name = dir->data.cFileName;
+#endif
         return &dir->ent;
     }
 
@@ -104,12 +141,24 @@ static bool isDir(const cv::String& path, DIR* dir)
 {
 #if defined WIN32 || defined _WIN32 || defined WINCE
     DWORD attributes;
+    BOOL status = TRUE;
     if (dir)
         attributes = dir->data.dwFileAttributes;
     else
-        attributes = ::GetFileAttributes(path.c_str());
+    {
+        WIN32_FILE_ATTRIBUTE_DATA all_attrs;
+#ifdef HAVE_WINRT
+        wchar_t wpath[MAX_PATH];
+        size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
+        CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+        status = ::GetFileAttributesExW(wpath, GetFileExInfoStandard, &all_attrs);
+#else
+        status = ::GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &all_attrs);
+#endif
+        attributes = all_attrs.dwFileAttributes;
+    }
 
-    return (attributes != INVALID_FILE_ATTRIBUTES) && ((attributes & FILE_ATTRIBUTE_DIRECTORY) != 0);
+    return status && ((attributes & FILE_ATTRIBUTE_DIRECTORY) != 0);
 #else
     (void)dir;
     struct stat stat_buf;
@@ -241,4 +290,4 @@ void cv::glob(String pattern, std::vector<String>& result, bool recursive)
 
     glob_rec(path, wildchart, result, recursive);
     std::sort(result.begin(), result.end());
-}
\ No newline at end of file
+}
diff --git a/modules/core/src/gpu_info.cpp b/modules/core/src/gpu_info.cpp
index e3400a538..5a1e56746 100644
--- a/modules/core/src/gpu_info.cpp
+++ b/modules/core/src/gpu_info.cpp
@@ -119,7 +119,7 @@ bool cv::gpu::deviceSupports(FeatureSet feature_set)
     else
     {
         DeviceInfo dev(devId);
-        version = dev.major() * 10 + dev.minor();
+        version = dev.majorVersion() * 10 + dev.minorVersion();
         if (devId < cache_size)
             versions[devId] = version;
     }
@@ -455,7 +455,7 @@ size_t cv::gpu::DeviceInfo::totalConstMem() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::major() const
+int cv::gpu::DeviceInfo::majorVersion() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -465,7 +465,7 @@ int cv::gpu::DeviceInfo::major() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::minor() const
+int cv::gpu::DeviceInfo::minorVersion() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -908,12 +908,12 @@ bool cv::gpu::DeviceInfo::isCompatible() const
     return false;
 #else
     // Check PTX compatibility
-    if (TargetArchs::hasEqualOrLessPtx(major(), minor()))
+    if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
         return true;
 
     // Check BIN compatibility
-    for (int i = minor(); i >= 0; --i)
-        if (TargetArchs::hasBin(major(), i))
+    for (int i = minorVersion(); i >= 0; --i)
+        if (TargetArchs::hasBin(majorVersion(), i))
             return true;
 
     return false;
diff --git a/modules/core/src/gpu_mat.cpp b/modules/core/src/gpu_mat.cpp
index a2e8da65a..33a6046fe 100644
--- a/modules/core/src/gpu_mat.cpp
+++ b/modules/core/src/gpu_mat.cpp
@@ -46,504 +46,6 @@
 using namespace cv;
 using namespace cv::gpu;
 
-/////////////////////////// matrix operations /////////////////////////
-
-#ifdef HAVE_CUDA
-
-// CUDA implementation
-
-#include "cuda/matrix_operations.hpp"
-
-namespace
-{
-    template <typename T> void cudaSet_(GpuMat& src, Scalar s, cudaStream_t stream)
-    {
-        Scalar_<T> sf = s;
-        cudev::set<T>(PtrStepSz<T>(src), sf.val, src.channels(), stream);
-    }
-
-    void cudaSet(GpuMat& src, Scalar s, cudaStream_t stream)
-    {
-        typedef void (*func_t)(GpuMat& src, Scalar s, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            cudaSet_<uchar>,
-            cudaSet_<schar>,
-            cudaSet_<ushort>,
-            cudaSet_<short>,
-            cudaSet_<int>,
-            cudaSet_<float>,
-            cudaSet_<double>
-        };
-
-        funcs[src.depth()](src, s, stream);
-    }
-
-    template <typename T> void cudaSet_(GpuMat& src, Scalar s, PtrStepSzb mask, cudaStream_t stream)
-    {
-        Scalar_<T> sf = s;
-        cudev::set<T>(PtrStepSz<T>(src), sf.val, mask, src.channels(), stream);
-    }
-
-    void cudaSet(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
-    {
-        typedef void (*func_t)(GpuMat& src, Scalar s, PtrStepSzb mask, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            cudaSet_<uchar>,
-            cudaSet_<schar>,
-            cudaSet_<ushort>,
-            cudaSet_<short>,
-            cudaSet_<int>,
-            cudaSet_<float>,
-            cudaSet_<double>
-        };
-
-        funcs[src.depth()](src, s, mask, stream);
-    }
-
-    void cudaCopyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
-    {
-        cudev::copyWithMask(src.reshape(1), dst.reshape(1), src.elemSize1(), src.channels(), mask.reshape(1), mask.channels() != 1, stream);
-    }
-
-    void cudaConvert(const GpuMat& src, GpuMat& dst, cudaStream_t stream)
-    {
-        cudev::convert(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, stream);
-    }
-
-    void cudaConvert(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream)
-    {
-        cudev::convert(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
-    }
-}
-
-// NPP implementation
-
-namespace
-{
-    //////////////////////////////////////////////////////////////////////////
-    // Convert
-
-    template<int SDEPTH, int DDEPTH> struct NppConvertFunc
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-        typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
-
-        typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI);
-    };
-    template<int DDEPTH> struct NppConvertFunc<CV_32F, DDEPTH>
-    {
-        typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
-
-        typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode);
-    };
-
-    template<int SDEPTH, int DDEPTH, typename NppConvertFunc<SDEPTH, DDEPTH>::func_ptr func> struct NppCvt
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-        typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
-
-        static void call(const GpuMat& src, GpuMat& dst, cudaStream_t stream)
-        {
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-    template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
-    {
-        typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
-
-        static void call(const GpuMat& src, GpuMat& dst, cudaStream_t stream)
-        {
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-
-    //////////////////////////////////////////////////////////////////////////
-    // Set
-
-    template<int SDEPTH, int SCN> struct NppSetFunc
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
-    };
-    template<int SDEPTH> struct NppSetFunc<SDEPTH, 1>
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
-    };
-    template<int SCN> struct NppSetFunc<CV_8S, SCN>
-    {
-        typedef NppStatus (*func_ptr)(Npp8s values[], Npp8s* pSrc, int nSrcStep, NppiSize oSizeROI);
-    };
-    template<> struct NppSetFunc<CV_8S, 1>
-    {
-        typedef NppStatus (*func_ptr)(Npp8s val, Npp8s* pSrc, int nSrcStep, NppiSize oSizeROI);
-    };
-
-    template<int SDEPTH, int SCN, typename NppSetFunc<SDEPTH, SCN>::func_ptr func> struct NppSet
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        static void call(GpuMat& src, Scalar s, cudaStream_t stream)
-        {
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            Scalar_<src_t> nppS = s;
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-    template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        static void call(GpuMat& src, Scalar s, cudaStream_t stream)
-        {
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            Scalar_<src_t> nppS = s;
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-
-    template<int SDEPTH, int SCN> struct NppSetMaskFunc
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
-    };
-    template<int SDEPTH> struct NppSetMaskFunc<SDEPTH, 1>
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
-    };
-
-    template<int SDEPTH, int SCN, typename NppSetMaskFunc<SDEPTH, SCN>::func_ptr func> struct NppSetMask
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        static void call(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
-        {
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            Scalar_<src_t> nppS = s;
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-    template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        static void call(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
-        {
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            Scalar_<src_t> nppS = s;
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-
-    //////////////////////////////////////////////////////////////////////////
-    // CopyMasked
-
-    template<int SDEPTH> struct NppCopyWithMaskFunc
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, src_t* pDst, int nDstStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
-    };
-
-    template<int SDEPTH, typename NppCopyWithMaskFunc<SDEPTH>::func_ptr func> struct NppCopyWithMask
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        static void call(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
-        {
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-}
-
-// Dispatcher
-
-namespace
-{
-    void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
-    {
-        CV_DbgAssert( src.size() == dst.size() && src.type() == dst.type() );
-
-        CV_Assert( src.depth() <= CV_64F && src.channels() <= 4 );
-        CV_Assert( src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels()) );
-
-        if (src.depth() == CV_64F)
-        {
-            CV_Assert( deviceSupports(NATIVE_DOUBLE) );
-        }
-
-        typedef void (*func_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
-        static const func_t funcs[7][4] =
-        {
-            /*  8U */ {NppCopyWithMask<CV_8U , nppiCopy_8u_C1MR >::call, cudaCopyWithMask, NppCopyWithMask<CV_8U , nppiCopy_8u_C3MR >::call, NppCopyWithMask<CV_8U , nppiCopy_8u_C4MR >::call},
-            /*  8S */ {cudaCopyWithMask                                , cudaCopyWithMask, cudaCopyWithMask                                , cudaCopyWithMask                                },
-            /* 16U */ {NppCopyWithMask<CV_16U, nppiCopy_16u_C1MR>::call, cudaCopyWithMask, NppCopyWithMask<CV_16U, nppiCopy_16u_C3MR>::call, NppCopyWithMask<CV_16U, nppiCopy_16u_C4MR>::call},
-            /* 16S */ {NppCopyWithMask<CV_16S, nppiCopy_16s_C1MR>::call, cudaCopyWithMask, NppCopyWithMask<CV_16S, nppiCopy_16s_C3MR>::call, NppCopyWithMask<CV_16S, nppiCopy_16s_C4MR>::call},
-            /* 32S */ {NppCopyWithMask<CV_32S, nppiCopy_32s_C1MR>::call, cudaCopyWithMask, NppCopyWithMask<CV_32S, nppiCopy_32s_C3MR>::call, NppCopyWithMask<CV_32S, nppiCopy_32s_C4MR>::call},
-            /* 32F */ {NppCopyWithMask<CV_32F, nppiCopy_32f_C1MR>::call, cudaCopyWithMask, NppCopyWithMask<CV_32F, nppiCopy_32f_C3MR>::call, NppCopyWithMask<CV_32F, nppiCopy_32f_C4MR>::call},
-            /* 64F */ {cudaCopyWithMask                                , cudaCopyWithMask, cudaCopyWithMask                                , cudaCopyWithMask                                }
-        };
-
-        const func_t func = mask.channels() == src.channels() ? funcs[src.depth()][src.channels() - 1] : cudaCopyWithMask;
-
-        func(src, dst, mask, stream);
-    }
-
-    void convert(const GpuMat& src, GpuMat& dst, cudaStream_t stream = 0)
-    {
-        CV_DbgAssert( src.size() == dst.size() && src.channels() == dst.channels() );
-
-        CV_Assert( src.depth() <= CV_64F && src.channels() <= 4 );
-        CV_Assert( dst.depth() <= CV_64F );
-
-        if (src.depth() == CV_64F || dst.depth() == CV_64F)
-        {
-            CV_Assert( deviceSupports(NATIVE_DOUBLE) );
-        }
-
-        typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream);
-        static const func_t funcs[7][7][4] =
-        {
-            {
-                /*  8U ->  8U */ {0, 0, 0, 0},
-                /*  8U ->  8S */ {cudaConvert                                       , cudaConvert, cudaConvert, cudaConvert                                       },
-                /*  8U -> 16U */ {NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::call, cudaConvert, cudaConvert, NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::call},
-                /*  8U -> 16S */ {NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::call, cudaConvert, cudaConvert, NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::call},
-                /*  8U -> 32S */ {cudaConvert                                       , cudaConvert, cudaConvert, cudaConvert                                       },
-                /*  8U -> 32F */ {NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::call, cudaConvert, cudaConvert, cudaConvert                                       },
-                /*  8U -> 64F */ {cudaConvert                                       , cudaConvert, cudaConvert, cudaConvert                                       }
-            },
-            {
-                /*  8S ->  8U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /*  8S ->  8S */ {0,0,0,0},
-                /*  8S -> 16U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /*  8S -> 16S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /*  8S -> 32S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /*  8S -> 32F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /*  8S -> 64F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}
-            },
-            {
-                /* 16U ->  8U */ {NppCvt<CV_16U, CV_8U , nppiConvert_16u8u_C1R >::call, cudaConvert, cudaConvert, NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::call},
-                /* 16U ->  8S */ {cudaConvert                                         , cudaConvert, cudaConvert, cudaConvert                                       },
-                /* 16U -> 16U */ {0,0,0,0},
-                /* 16U -> 16S */ {cudaConvert                                         , cudaConvert, cudaConvert, cudaConvert                                       },
-                /* 16U -> 32S */ {NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::call, cudaConvert, cudaConvert, cudaConvert                                       },
-                /* 16U -> 32F */ {NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::call, cudaConvert, cudaConvert, cudaConvert                                       },
-                /* 16U -> 64F */ {cudaConvert                                         , cudaConvert, cudaConvert, cudaConvert                                       }
-            },
-            {
-                /* 16S ->  8U */ {NppCvt<CV_16S, CV_8U , nppiConvert_16s8u_C1R >::call, cudaConvert, cudaConvert, NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::call},
-                /* 16S ->  8S */ {cudaConvert                                         , cudaConvert, cudaConvert, cudaConvert                                       },
-                /* 16S -> 16U */ {cudaConvert                                         , cudaConvert, cudaConvert, cudaConvert                                       },
-                /* 16S -> 16S */ {0,0,0,0},
-                /* 16S -> 32S */ {NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::call, cudaConvert, cudaConvert, cudaConvert                                       },
-                /* 16S -> 32F */ {NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::call, cudaConvert, cudaConvert, cudaConvert                                       },
-                /* 16S -> 64F */ {cudaConvert                                         , cudaConvert, cudaConvert, cudaConvert                                       }
-            },
-            {
-                /* 32S ->  8U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 32S ->  8S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 32S -> 16U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 32S -> 16S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 32S -> 32S */ {0,0,0,0},
-                /* 32S -> 32F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 32S -> 64F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}
-            },
-            {
-                /* 32F ->  8U */ {NppCvt<CV_32F, CV_8U , nppiConvert_32f8u_C1R >::call, cudaConvert, cudaConvert, cudaConvert},
-                /* 32F ->  8S */ {cudaConvert                                         , cudaConvert, cudaConvert, cudaConvert},
-                /* 32F -> 16U */ {NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::call, cudaConvert, cudaConvert, cudaConvert},
-                /* 32F -> 16S */ {NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::call, cudaConvert, cudaConvert, cudaConvert},
-                /* 32F -> 32S */ {cudaConvert                                         , cudaConvert, cudaConvert, cudaConvert},
-                /* 32F -> 32F */ {0,0,0,0},
-                /* 32F -> 64F */ {cudaConvert                                         , cudaConvert, cudaConvert, cudaConvert}
-            },
-            {
-                /* 64F ->  8U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 64F ->  8S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 64F -> 16U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 64F -> 16S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 64F -> 32S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 64F -> 32F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert},
-                /* 64F -> 64F */ {0,0,0,0}
-            }
-        };
-
-        const bool aligned = isAligned(src.data, 16) && isAligned(dst.data, 16);
-        if (!aligned)
-        {
-            cudaConvert(src, dst, stream);
-            return;
-        }
-
-        const func_t func = funcs[src.depth()][dst.depth()][src.channels() - 1];
-        CV_DbgAssert( func != 0 );
-
-        func(src, dst, stream);
-    }
-
-    void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0)
-    {
-        CV_DbgAssert( src.size() == dst.size() && src.channels() == dst.channels() );
-
-        CV_Assert( src.depth() <= CV_64F && src.channels() <= 4 );
-        CV_Assert( dst.depth() <= CV_64F );
-
-        if (src.depth() == CV_64F || dst.depth() == CV_64F)
-        {
-            CV_Assert( deviceSupports(NATIVE_DOUBLE) );
-        }
-
-        cudaConvert(src, dst, alpha, beta, stream);
-    }
-
-    void set(GpuMat& m, Scalar s, cudaStream_t stream = 0)
-    {
-        if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
-        {
-            if (stream)
-                cudaSafeCall( cudaMemset2DAsync(m.data, m.step, 0, m.cols * m.elemSize(), m.rows, stream) );
-            else
-                cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
-            return;
-        }
-
-        if (m.depth() == CV_8U)
-        {
-            int cn = m.channels();
-
-            if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
-            {
-                int val = saturate_cast<uchar>(s[0]);
-                if (stream)
-                    cudaSafeCall( cudaMemset2DAsync(m.data, m.step, val, m.cols * m.elemSize(), m.rows, stream) );
-                else
-                    cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
-                return;
-            }
-        }
-
-        typedef void (*func_t)(GpuMat& src, Scalar s, cudaStream_t stream);
-        static const func_t funcs[7][4] =
-        {
-            {NppSet<CV_8U , 1, nppiSet_8u_C1R >::call, cudaSet                                 , cudaSet                               , NppSet<CV_8U , 4, nppiSet_8u_C4R >::call},
-            {NppSet<CV_8S , 1, nppiSet_8s_C1R >::call, NppSet<CV_8S , 2, nppiSet_8s_C2R >::call, NppSet<CV_8S, 3, nppiSet_8s_C3R>::call, NppSet<CV_8S , 4, nppiSet_8s_C4R >::call},
-            {NppSet<CV_16U, 1, nppiSet_16u_C1R>::call, NppSet<CV_16U, 2, nppiSet_16u_C2R>::call, cudaSet                               , NppSet<CV_16U, 4, nppiSet_16u_C4R>::call},
-            {NppSet<CV_16S, 1, nppiSet_16s_C1R>::call, NppSet<CV_16S, 2, nppiSet_16s_C2R>::call, cudaSet                               , NppSet<CV_16S, 4, nppiSet_16s_C4R>::call},
-            {NppSet<CV_32S, 1, nppiSet_32s_C1R>::call, cudaSet                                 , cudaSet                               , NppSet<CV_32S, 4, nppiSet_32s_C4R>::call},
-            {NppSet<CV_32F, 1, nppiSet_32f_C1R>::call, cudaSet                                 , cudaSet                               , NppSet<CV_32F, 4, nppiSet_32f_C4R>::call},
-            {cudaSet                                 , cudaSet                                 , cudaSet                               , cudaSet                                 }
-        };
-
-        CV_Assert( m.depth() <= CV_64F && m.channels() <= 4 );
-
-        if (m.depth() == CV_64F)
-        {
-            CV_Assert( deviceSupports(NATIVE_DOUBLE) );
-        }
-
-        funcs[m.depth()][m.channels() - 1](m, s, stream);
-    }
-
-    void set(GpuMat& m, Scalar s, const GpuMat& mask, cudaStream_t stream = 0)
-    {
-        CV_DbgAssert( !mask.empty() );
-
-        CV_Assert( m.depth() <= CV_64F && m.channels() <= 4 );
-
-        if (m.depth() == CV_64F)
-        {
-            CV_Assert( deviceSupports(NATIVE_DOUBLE) );
-        }
-
-        typedef void (*func_t)(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
-        static const func_t funcs[7][4] =
-        {
-            {NppSetMask<CV_8U , 1, nppiSet_8u_C1MR >::call, cudaSet, cudaSet, NppSetMask<CV_8U , 4, nppiSet_8u_C4MR >::call},
-            {cudaSet                                      , cudaSet, cudaSet, cudaSet                                      },
-            {NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::call, cudaSet, cudaSet, NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::call},
-            {NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::call, cudaSet, cudaSet, NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::call},
-            {NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::call, cudaSet, cudaSet, NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::call},
-            {NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::call, cudaSet, cudaSet, NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::call},
-            {cudaSet                                      , cudaSet, cudaSet, cudaSet                                      }
-        };
-
-        funcs[m.depth()][m.channels() - 1](m, s, mask, stream);
-    }
-}
-
-#endif // HAVE_CUDA
-
 cv::gpu::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) :
     flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_),
     step(step_), data((uchar*)data_), refcount(0),
@@ -651,288 +153,6 @@ cv::gpu::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
         rows = cols = 0;
 }
 
-void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
-{
-#ifndef HAVE_CUDA
-    (void) _rows;
-    (void) _cols;
-    (void) _type;
-    throw_no_cuda();
-#else
-    _type &= Mat::TYPE_MASK;
-
-    if (rows == _rows && cols == _cols && type() == _type && data)
-        return;
-
-    if (data)
-        release();
-
-    CV_DbgAssert( _rows >= 0 && _cols >= 0 );
-
-    if (_rows > 0 && _cols > 0)
-    {
-        flags = Mat::MAGIC_VAL + _type;
-        rows = _rows;
-        cols = _cols;
-
-        size_t esz = elemSize();
-
-        void* devPtr;
-
-        if (rows > 1 && cols > 1)
-        {
-            cudaSafeCall( cudaMallocPitch(&devPtr, &step, esz * cols, rows) );
-        }
-        else
-        {
-            // Single row or single column must be continuous
-            cudaSafeCall( cudaMalloc(&devPtr, esz * cols * rows) );
-            step = esz * cols;
-        }
-
-        if (esz * cols == step)
-            flags |= Mat::CONTINUOUS_FLAG;
-
-        int64 _nettosize = static_cast<int64>(step) * rows;
-        size_t nettosize = static_cast<size_t>(_nettosize);
-
-        datastart = data = static_cast<uchar*>(devPtr);
-        dataend = data + nettosize;
-
-        refcount = static_cast<int*>(fastMalloc(sizeof(*refcount)));
-        *refcount = 1;
-    }
-#endif
-}
-
-void cv::gpu::GpuMat::release()
-{
-#ifdef HAVE_CUDA
-    if (refcount && CV_XADD(refcount, -1) == 1)
-    {
-        cudaFree(datastart);
-        fastFree(refcount);
-    }
-
-    data = datastart = dataend = 0;
-    step = rows = cols = 0;
-    refcount = 0;
-#endif
-}
-
-void cv::gpu::GpuMat::upload(InputArray arr)
-{
-#ifndef HAVE_CUDA
-    (void) arr;
-    throw_no_cuda();
-#else
-    Mat mat = arr.getMat();
-
-    CV_DbgAssert( !mat.empty() );
-
-    create(mat.size(), mat.type());
-
-    cudaSafeCall( cudaMemcpy2D(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
-#endif
-}
-
-void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream)
-{
-#ifndef HAVE_CUDA
-    (void) arr;
-    (void) _stream;
-    throw_no_cuda();
-#else
-    Mat mat = arr.getMat();
-
-    CV_DbgAssert( !mat.empty() );
-
-    create(mat.size(), mat.type());
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-    cudaSafeCall( cudaMemcpy2DAsync(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice, stream) );
-#endif
-}
-
-void cv::gpu::GpuMat::download(OutputArray _dst) const
-{
-#ifndef HAVE_CUDA
-    (void) _dst;
-    throw_no_cuda();
-#else
-    CV_DbgAssert( !empty() );
-
-    _dst.create(size(), type());
-    Mat dst = _dst.getMat();
-
-    cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
-#endif
-}
-
-void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const
-{
-#ifndef HAVE_CUDA
-    (void) _dst;
-    (void) _stream;
-    throw_no_cuda();
-#else
-    CV_DbgAssert( !empty() );
-
-    _dst.create(size(), type());
-    Mat dst = _dst.getMat();
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-    cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost, stream) );
-#endif
-}
-
-void cv::gpu::GpuMat::copyTo(OutputArray _dst) const
-{
-#ifndef HAVE_CUDA
-    (void) _dst;
-    throw_no_cuda();
-#else
-    CV_DbgAssert( !empty() );
-
-    _dst.create(size(), type());
-    GpuMat dst = _dst.getGpuMat();
-
-    cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) );
-#endif
-}
-
-void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const
-{
-#ifndef HAVE_CUDA
-    (void) _dst;
-    (void) _stream;
-    throw_no_cuda();
-#else
-    CV_DbgAssert( !empty() );
-
-    _dst.create(size(), type());
-    GpuMat dst = _dst.getGpuMat();
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-    cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice, stream) );
-#endif
-}
-
-void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& _stream) const
-{
-#ifndef HAVE_CUDA
-    (void) _dst;
-    (void) _mask;
-    (void) _stream;
-    throw_no_cuda();
-#else
-    CV_DbgAssert( !empty() );
-
-    _dst.create(size(), type());
-    GpuMat dst = _dst.getGpuMat();
-
-    GpuMat mask = _mask.getGpuMat();
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-    ::copyWithMask(*this, dst, mask, stream);
-#endif
-}
-
-GpuMat& cv::gpu::GpuMat::setTo(Scalar s, Stream& _stream)
-{
-#ifndef HAVE_CUDA
-    (void) s;
-    (void) _stream;
-    throw_no_cuda();
-#else
-    CV_DbgAssert( !empty() );
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-    ::set(*this, s, stream);
-#endif
-
-    return *this;
-}
-
-GpuMat& cv::gpu::GpuMat::setTo(Scalar s, InputArray _mask, Stream& _stream)
-{
-#ifndef HAVE_CUDA
-    (void) s;
-    (void) _mask;
-    (void) _stream;
-    throw_no_cuda();
-#else
-    CV_DbgAssert( !empty() );
-
-    GpuMat mask = _mask.getGpuMat();
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-    ::set(*this, s, mask, stream);
-#endif
-
-    return *this;
-}
-
-void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& _stream) const
-{
-#ifndef HAVE_CUDA
-    (void) _dst;
-    (void) rtype;
-    (void) _stream;
-    throw_no_cuda();
-#else
-    if (rtype < 0)
-        rtype = type();
-    else
-        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
-
-    const int sdepth = depth();
-    const int ddepth = CV_MAT_DEPTH(rtype);
-    if (sdepth == ddepth)
-    {
-        if (_stream)
-            copyTo(_dst, _stream);
-        else
-            copyTo(_dst);
-
-        return;
-    }
-
-    GpuMat src = *this;
-
-    _dst.create(size(), rtype);
-    GpuMat dst = _dst.getGpuMat();
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-    ::convert(src, dst, stream);
-#endif
-}
-
-void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& _stream) const
-{
-#ifndef HAVE_CUDA
-    (void) _dst;
-    (void) rtype;
-    (void) alpha;
-    (void) beta;
-    (void) _stream;
-    throw_no_cuda();
-#else
-    if (rtype < 0)
-        rtype = type();
-    else
-        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
-
-    GpuMat src = *this;
-
-    _dst.create(size(), rtype);
-    GpuMat dst = _dst.getGpuMat();
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-    ::convert(src, dst, alpha, beta, stream);
-#endif
-}
-
 GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
 {
     GpuMat hdr = *this;
@@ -1124,3 +344,101 @@ GpuMat cv::gpu::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
 
     return mat = GpuMat(rows, cols, type);
 }
+
+#ifndef HAVE_CUDA
+
+void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
+{
+    (void) _rows;
+    (void) _cols;
+    (void) _type;
+    throw_no_cuda();
+}
+
+void cv::gpu::GpuMat::release()
+{
+}
+
+void cv::gpu::GpuMat::upload(InputArray arr)
+{
+    (void) arr;
+    throw_no_cuda();
+}
+
+void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream)
+{
+    (void) arr;
+    (void) _stream;
+    throw_no_cuda();
+}
+
+void cv::gpu::GpuMat::download(OutputArray _dst) const
+{
+    (void) _dst;
+    throw_no_cuda();
+}
+
+void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const
+{
+    (void) _dst;
+    (void) _stream;
+    throw_no_cuda();
+}
+
+void cv::gpu::GpuMat::copyTo(OutputArray _dst) const
+{
+    (void) _dst;
+    throw_no_cuda();
+}
+
+void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const
+{
+    (void) _dst;
+    (void) _stream;
+    throw_no_cuda();
+}
+
+void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& _stream) const
+{
+    (void) _dst;
+    (void) _mask;
+    (void) _stream;
+    throw_no_cuda();
+}
+
+GpuMat& cv::gpu::GpuMat::setTo(Scalar s, Stream& _stream)
+{
+    (void) s;
+    (void) _stream;
+    throw_no_cuda();
+    return *this;
+}
+
+GpuMat& cv::gpu::GpuMat::setTo(Scalar s, InputArray _mask, Stream& _stream)
+{
+    (void) s;
+    (void) _mask;
+    (void) _stream;
+    throw_no_cuda();
+    return *this;
+}
+
+void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& _stream) const
+{
+    (void) _dst;
+    (void) rtype;
+    (void) _stream;
+    throw_no_cuda();
+}
+
+void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& _stream) const
+{
+    (void) _dst;
+    (void) rtype;
+    (void) alpha;
+    (void) beta;
+    (void) _stream;
+    throw_no_cuda();
+}
+
+#endif
diff --git a/modules/core/src/gpu_stream.cpp b/modules/core/src/gpu_stream.cpp
index 879775355..fcdf480a9 100644
--- a/modules/core/src/gpu_stream.cpp
+++ b/modules/core/src/gpu_stream.cpp
@@ -100,7 +100,7 @@ cv::gpu::Stream::Stream()
 #ifndef HAVE_CUDA
     throw_no_cuda();
 #else
-    impl_ = new Impl;
+    impl_ = makePtr<Impl>();
 #endif
 }
 
@@ -182,7 +182,7 @@ void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userDat
 
 Stream& cv::gpu::Stream::Null()
 {
-    static Stream s(new Impl(0));
+    static Stream s(Ptr<Impl>(new Impl(0)));
     return s;
 }
 
@@ -195,10 +195,6 @@ cv::gpu::Stream::operator bool_type() const
 #endif
 }
 
-template <> void cv::Ptr<Stream::Impl>::delete_obj()
-{
-    if (obj) delete obj;
-}
 
 ////////////////////////////////////////////////////////////////
 // Stream
@@ -249,7 +245,7 @@ cv::gpu::Event::Event(CreateFlags flags)
     (void) flags;
     throw_no_cuda();
 #else
-    impl_ = new Impl(flags);
+    impl_ = makePtr<Impl>(flags);
 #endif
 }
 
@@ -301,8 +297,3 @@ float cv::gpu::Event::elapsedTime(const Event& start, const Event& end)
     return ms;
 #endif
 }
-
-template <> void cv::Ptr<Event::Impl>::delete_obj()
-{
-    if (obj) delete obj;
-}
diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp
index 9e64697c3..f6bc7c88c 100644
--- a/modules/core/src/lapack.cpp
+++ b/modules/core/src/lapack.cpp
@@ -1823,4 +1823,4 @@ cvSVBkSb( const CvArr* warr, const CvArr* uarr,
 
     cv::SVD::backSubst(w, u, v, rhs, dst);
     CV_Assert( dst.data == dst0.data );
-}
\ No newline at end of file
+}
diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp
index 404c5b434..7d832cb0f 100644
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@@ -1725,19 +1725,29 @@ diagtransform_64f(const double* src, double* dst, const double* m, int len, int
 
 typedef void (*TransformFunc)( const uchar* src, uchar* dst, const uchar* m, int, int, int );
 
-static TransformFunc transformTab[] =
+static TransformFunc getTransformFunc(int depth)
 {
-    (TransformFunc)transform_8u, (TransformFunc)transform_8s, (TransformFunc)transform_16u,
-    (TransformFunc)transform_16s, (TransformFunc)transform_32s, (TransformFunc)transform_32f,
-    (TransformFunc)transform_64f, 0
-};
+    static TransformFunc transformTab[] =
+    {
+        (TransformFunc)transform_8u, (TransformFunc)transform_8s, (TransformFunc)transform_16u,
+        (TransformFunc)transform_16s, (TransformFunc)transform_32s, (TransformFunc)transform_32f,
+        (TransformFunc)transform_64f, 0
+    };
 
-static TransformFunc diagTransformTab[] =
+    return transformTab[depth];
+}
+
+static TransformFunc getDiagTransformFunc(int depth)
 {
-    (TransformFunc)diagtransform_8u, (TransformFunc)diagtransform_8s, (TransformFunc)diagtransform_16u,
-    (TransformFunc)diagtransform_16s, (TransformFunc)diagtransform_32s, (TransformFunc)diagtransform_32f,
-    (TransformFunc)diagtransform_64f, 0
-};
+    static TransformFunc diagTransformTab[] =
+    {
+        (TransformFunc)diagtransform_8u, (TransformFunc)diagtransform_8s, (TransformFunc)diagtransform_16u,
+        (TransformFunc)diagtransform_16s, (TransformFunc)diagtransform_32s, (TransformFunc)diagtransform_32f,
+        (TransformFunc)diagtransform_64f, 0
+    };
+
+    return diagTransformTab[depth];
+}
 
 }
 
@@ -1800,7 +1810,7 @@ void cv::transform( InputArray _src, OutputArray _dst, InputArray _mtx )
         }
     }
 
-    TransformFunc func = isDiag ? diagTransformTab[depth] : transformTab[depth];
+    TransformFunc func = isDiag ? getDiagTransformFunc(depth): getTransformFunc(depth);
     CV_Assert( func != 0 );
 
     const Mat* arrays[] = {&src, &dst, 0};
@@ -2761,19 +2771,24 @@ static double dotProd_64f(const double* src1, const double* src2, int len)
 
 typedef double (*DotProdFunc)(const uchar* src1, const uchar* src2, int len);
 
-static DotProdFunc dotProdTab[] =
+static DotProdFunc getDotProdFunc(int depth)
 {
-    (DotProdFunc)GET_OPTIMIZED(dotProd_8u), (DotProdFunc)GET_OPTIMIZED(dotProd_8s),
-    (DotProdFunc)dotProd_16u, (DotProdFunc)dotProd_16s,
-    (DotProdFunc)dotProd_32s, (DotProdFunc)GET_OPTIMIZED(dotProd_32f),
-    (DotProdFunc)dotProd_64f, 0
-};
+    static DotProdFunc dotProdTab[] =
+    {
+        (DotProdFunc)GET_OPTIMIZED(dotProd_8u), (DotProdFunc)GET_OPTIMIZED(dotProd_8s),
+        (DotProdFunc)dotProd_16u, (DotProdFunc)dotProd_16s,
+        (DotProdFunc)dotProd_32s, (DotProdFunc)GET_OPTIMIZED(dotProd_32f),
+        (DotProdFunc)dotProd_64f, 0
+    };
+
+    return dotProdTab[depth];
+}
 
 double Mat::dot(InputArray _mat) const
 {
     Mat mat = _mat.getMat();
     int cn = channels();
-    DotProdFunc func = dotProdTab[depth()];
+    DotProdFunc func = getDotProdFunc(depth());
     CV_Assert( mat.type() == type() && mat.size == size && func != 0 );
 
     if( isContinuous() && mat.isContinuous() )
@@ -2896,6 +2911,27 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp
     return *this;
 }
 
+void PCA::write(FileStorage& fs ) const
+{
+    CV_Assert( fs.isOpened() );
+
+    fs << "name" << "PCA";
+    fs << "vectors" << eigenvectors;
+    fs << "values" << eigenvalues;
+    fs << "mean" << mean;
+}
+
+void PCA::read(const FileNode& fs)
+{
+    CV_Assert( !fs.empty() );
+    String name = (String)fs["name"];
+    CV_Assert( name == "PCA" );
+
+    cv::read(fs["vectors"], eigenvectors);
+    cv::read(fs["values"], eigenvalues);
+    cv::read(fs["mean"], mean);
+}
+
 template <typename T>
 int computeCumulativeEnergy(const Mat& eigenvalues, double retainedVariance)
 {
diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp
index 053dd1cef..d2032b2e5 100644
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -995,6 +995,11 @@ Mat _InputArray::getMat(int i) const
         return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
     }
 
+    if( k == OCL_MAT )
+    {
+        CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+    }
+
     if( k == STD_VECTOR_MAT )
     {
         const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
@@ -1100,6 +1105,11 @@ void _InputArray::getMatVector(std::vector<Mat>& mv) const
         return;
     }
 
+    if( k == OCL_MAT )
+    {
+        CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+    }
+
     CV_Assert( k == STD_VECTOR_MAT );
     //if( k == STD_VECTOR_MAT )
     {
@@ -1224,6 +1234,11 @@ Size _InputArray::size(int i) const
         return d_mat->size();
     }
 
+    if( k == OCL_MAT )
+    {
+        CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+    }
+
     CV_Assert( k == CUDA_MEM );
     //if( k == CUDA_MEM )
     {
@@ -1338,6 +1353,11 @@ bool _InputArray::empty() const
     if( k == OPENGL_BUFFER )
         return ((const ogl::Buffer*)obj)->empty();
 
+    if( k == OCL_MAT )
+    {
+        CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+    }
+
     if( k == GPU_MAT )
         return ((const gpu::GpuMat*)obj)->empty();
 
@@ -1573,6 +1593,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all
         return;
     }
 
+    if( k == OCL_MAT )
+    {
+        CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+    }
+
     if( k == NONE )
     {
         CV_Error(CV_StsNullPtr, "create() called for the missing output array" );
@@ -1684,6 +1709,11 @@ void _OutputArray::release() const
         return;
     }
 
+    if( k == OCL_MAT )
+    {
+        CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+    }
+
     CV_Assert( k == STD_VECTOR_MAT );
     //if( k == STD_VECTOR_MAT )
     {
diff --git a/modules/core/src/opengl.cpp b/modules/core/src/opengl.cpp
index f8a647e8e..36a437a2d 100644
--- a/modules/core/src/opengl.cpp
+++ b/modules/core/src/opengl.cpp
@@ -484,7 +484,7 @@ cv::ogl::Buffer::Buffer(int arows, int acols, int atype, unsigned int abufId, bo
     (void) autoRelease;
     throw_no_ogl();
 #else
-    impl_ = new Impl(abufId, autoRelease);
+    impl_.reset(new Impl(abufId, autoRelease));
     rows_ = arows;
     cols_ = acols;
     type_ = atype;
@@ -500,7 +500,7 @@ cv::ogl::Buffer::Buffer(Size asize, int atype, unsigned int abufId, bool autoRel
     (void) autoRelease;
     throw_no_ogl();
 #else
-    impl_ = new Impl(abufId, autoRelease);
+    impl_.reset(new Impl(abufId, autoRelease));
     rows_ = asize.height;
     cols_ = asize.width;
     type_ = atype;
@@ -529,7 +529,7 @@ cv::ogl::Buffer::Buffer(InputArray arr, Target target, bool autoRelease) : rows_
             Mat mat = arr.getMat();
             CV_Assert( mat.isContinuous() );
             const GLsizeiptr asize = mat.rows * mat.cols * mat.elemSize();
-            impl_ = new Impl(asize, mat.data, target, autoRelease);
+            impl_.reset(new Impl(asize, mat.data, target, autoRelease));
             rows_ = mat.rows;
             cols_ = mat.cols;
             type_ = mat.type();
@@ -552,7 +552,7 @@ void cv::ogl::Buffer::create(int arows, int acols, int atype, Target target, boo
     if (rows_ != arows || cols_ != acols || type_ != atype)
     {
         const GLsizeiptr asize = arows * acols * CV_ELEM_SIZE(atype);
-        impl_ = new Impl(asize, 0, target, autoRelease);
+        impl_.reset(new Impl(asize, 0, target, autoRelease));
         rows_ = arows;
         cols_ = acols;
         type_ = atype;
@@ -563,7 +563,7 @@ void cv::ogl::Buffer::create(int arows, int acols, int atype, Target target, boo
 void cv::ogl::Buffer::release()
 {
 #ifdef HAVE_OPENGL
-    if (*impl_.refcount == 1)
+    if (impl_)
         impl_->setAutoRelease(true);
     impl_ = Impl::empty();
     rows_ = 0;
@@ -836,10 +836,6 @@ unsigned int cv::ogl::Buffer::bufId() const
 #endif
 }
 
-template <> void cv::Ptr<cv::ogl::Buffer::Impl>::delete_obj()
-{
-    if (obj) delete obj;
-}
 
 //////////////////////////////////////////////////////////////////////////////////////////
 // ogl::Texture
@@ -972,7 +968,7 @@ cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, unsigned int
     (void) autoRelease;
     throw_no_ogl();
 #else
-    impl_ = new Impl(atexId, autoRelease);
+    impl_.reset(new Impl(atexId, autoRelease));
     rows_ = arows;
     cols_ = acols;
     format_ = aformat;
@@ -988,7 +984,7 @@ cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, unsigned int atexId, b
     (void) autoRelease;
     throw_no_ogl();
 #else
-    impl_ = new Impl(atexId, autoRelease);
+    impl_.reset(new Impl(atexId, autoRelease));
     rows_ = asize.height;
     cols_ = asize.width;
     format_ = aformat;
@@ -1028,7 +1024,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
         {
             ogl::Buffer buf = arr.getOGlBuffer();
             buf.bind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
-            impl_ = new Impl(internalFormats[cn], asize.width, asize.height, srcFormats[cn], gl_types[depth], 0, autoRelease);
+            impl_.reset(new Impl(internalFormats[cn], asize.width, asize.height, srcFormats[cn], gl_types[depth], 0, autoRelease));
             ogl::Buffer::unbind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
             break;
         }
@@ -1041,7 +1037,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
                 GpuMat dmat = arr.getGpuMat();
                 ogl::Buffer buf(dmat, ogl::Buffer::PIXEL_UNPACK_BUFFER);
                 buf.bind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
-                impl_ = new Impl(internalFormats[cn], asize.width, asize.height, srcFormats[cn], gl_types[depth], 0, autoRelease);
+                impl_.reset(new Impl(internalFormats[cn], asize.width, asize.height, srcFormats[cn], gl_types[depth], 0, autoRelease));
                 ogl::Buffer::unbind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
             #endif
 
@@ -1053,7 +1049,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
             Mat mat = arr.getMat();
             CV_Assert( mat.isContinuous() );
             ogl::Buffer::unbind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
-            impl_ = new Impl(internalFormats[cn], asize.width, asize.height, srcFormats[cn], gl_types[depth], mat.data, autoRelease);
+            impl_.reset(new Impl(internalFormats[cn], asize.width, asize.height, srcFormats[cn], gl_types[depth], mat.data, autoRelease));
             break;
         }
     }
@@ -1076,7 +1072,7 @@ void cv::ogl::Texture2D::create(int arows, int acols, Format aformat, bool autoR
     if (rows_ != arows || cols_ != acols || format_ != aformat)
     {
         ogl::Buffer::unbind(ogl::Buffer::PIXEL_UNPACK_BUFFER);
-        impl_ = new Impl(aformat, acols, arows, aformat, gl::FLOAT, 0, autoRelease);
+        impl_.reset(new Impl(aformat, acols, arows, aformat, gl::FLOAT, 0, autoRelease));
         rows_ = arows;
         cols_ = acols;
         format_ = aformat;
@@ -1087,7 +1083,7 @@ void cv::ogl::Texture2D::create(int arows, int acols, Format aformat, bool autoR
 void cv::ogl::Texture2D::release()
 {
 #ifdef HAVE_OPENGL
-    if (*impl_.refcount == 1)
+    if (impl_)
         impl_->setAutoRelease(true);
     impl_ = Impl::empty();
     rows_ = 0;
@@ -1243,10 +1239,6 @@ unsigned int cv::ogl::Texture2D::texId() const
 #endif
 }
 
-template <> void cv::Ptr<cv::ogl::Texture2D::Impl>::delete_obj()
-{
-    if (obj) delete obj;
-}
 
 ////////////////////////////////////////////////////////////////////////
 // ogl::Arrays
diff --git a/modules/core/src/out.cpp b/modules/core/src/out.cpp
index cc2294ded..3ed454ebf 100644
--- a/modules/core/src/out.cpp
+++ b/modules/core/src/out.cpp
@@ -256,7 +256,7 @@ namespace
         cv::Ptr<cv::Formatted> format(const cv::Mat& mtx) const
         {
             char braces[5] = {'\0', '\0', ';', '\0', '\0'};
-            return new FormattedImpl("[", "]", mtx, braces,
+            return cv::makePtr<FormattedImpl>("[", "]", mtx, &*braces,
                 mtx.cols == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
         }
     };
@@ -270,7 +270,7 @@ namespace
             char braces[5] = {'[', ']', '\0', '[', ']'};
             if (mtx.cols == 1)
                 braces[0] = braces[1] = '\0';
-            return new FormattedImpl("[", "]", mtx, braces,
+            return cv::makePtr<FormattedImpl>("[", "]", mtx, &*braces,
                 mtx.cols*mtx.channels() == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
         }
     };
@@ -288,7 +288,8 @@ namespace
             char braces[5] = {'[', ']', '\0', '[', ']'};
             if (mtx.cols == 1)
                 braces[0] = braces[1] = '\0';
-            return new FormattedImpl("array([", cv::format("], type='%s')", numpyTypes[mtx.depth()]), mtx, braces,
+            return cv::makePtr<FormattedImpl>("array([",
+                cv::format("], type='%s')", numpyTypes[mtx.depth()]), mtx, &*braces,
                 mtx.cols*mtx.channels() == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
         }
     };
@@ -300,7 +301,8 @@ namespace
         cv::Ptr<cv::Formatted> format(const cv::Mat& mtx) const
         {
             char braces[5] = {'\0', '\0', '\0', '\0', '\0'};
-            return new FormattedImpl(cv::String(), mtx.rows > 1 ? cv::String("\n") : cv::String(), mtx, braces,
+            return cv::makePtr<FormattedImpl>(cv::String(),
+                mtx.rows > 1 ? cv::String("\n") : cv::String(), mtx, &*braces,
                 mtx.cols*mtx.channels() == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
         }
     };
@@ -312,7 +314,7 @@ namespace
         cv::Ptr<cv::Formatted> format(const cv::Mat& mtx) const
         {
             char braces[5] = {'\0', '\0', ',', '\0', '\0'};
-            return new FormattedImpl("{", "}", mtx, braces,
+            return cv::makePtr<FormattedImpl>("{", "}", mtx, &*braces,
                 mtx.cols == 1 || !multiline, mtx.depth() == CV_64F ? prec64f : prec32f );
         }
     };
@@ -330,16 +332,16 @@ namespace cv
         switch(fmt)
         {
             case FMT_MATLAB:
-                return new MatlabFormatter();
+                return makePtr<MatlabFormatter>();
             case FMT_CSV:
-                return new CSVFormatter();
+                return makePtr<CSVFormatter>();
             case FMT_PYTHON:
-                return new PythonFormatter();
+                return makePtr<PythonFormatter>();
             case FMT_NUMPY:
-                return new NumpyFormatter();
+                return makePtr<NumpyFormatter>();
             case FMT_C:
-                return new CFormatter();
+                return makePtr<CFormatter>();
         }
-        return new MatlabFormatter();
+        return makePtr<MatlabFormatter>();
     }
 } // cv
diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp
index 0b2a845ac..27d7ecc03 100644
--- a/modules/core/src/parallel.cpp
+++ b/modules/core/src/parallel.cpp
@@ -110,8 +110,16 @@
     #endif
 #endif
 
-#if defined HAVE_TBB || defined HAVE_CSTRIPES || defined HAVE_OPENMP || defined HAVE_GCD || defined HAVE_CONCURRENCY
-   #define HAVE_PARALLEL_FRAMEWORK
+#if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
+#  define CV_PARALLEL_FRAMEWORK "tbb"
+#elif defined HAVE_CSTRIPES
+#  define CV_PARALLEL_FRAMEWORK "cstripes"
+#elif defined HAVE_OPENMP
+#  define CV_PARALLEL_FRAMEWORK "openmp"
+#elif defined HAVE_GCD
+#  define CV_PARALLEL_FRAMEWORK "gcd"
+#elif defined HAVE_CONCURRENCY
+#  define CV_PARALLEL_FRAMEWORK "ms-concurrency"
 #endif
 
 namespace cv
@@ -121,7 +129,7 @@ namespace cv
 
 namespace
 {
-#ifdef HAVE_PARALLEL_FRAMEWORK
+#ifdef CV_PARALLEL_FRAMEWORK
     class ParallelLoopBodyWrapper
     {
     public:
@@ -136,9 +144,9 @@ namespace
         {
             cv::Range r;
             r.start = (int)(wholeRange.start +
-                            ((size_t)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
+                            ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
             r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
-                            ((size_t)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
+                            ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
             (*body)(r);
         }
         cv::Range stripeRange() const { return cv::Range(0, nstripes); }
@@ -218,7 +226,7 @@ public:
 static SchedPtr pplScheduler;
 #endif
 
-#endif // HAVE_PARALLEL_FRAMEWORK
+#endif // CV_PARALLEL_FRAMEWORK
 
 } //namespace
 
@@ -226,7 +234,7 @@ static SchedPtr pplScheduler;
 
 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
 {
-#ifdef HAVE_PARALLEL_FRAMEWORK
+#ifdef CV_PARALLEL_FRAMEWORK
 
     if(numThreads != 0)
     {
@@ -281,7 +289,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
     }
     else
 
-#endif // HAVE_PARALLEL_FRAMEWORK
+#endif // CV_PARALLEL_FRAMEWORK
     {
         (void)nstripes;
         body(range);
@@ -290,7 +298,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
 
 int cv::getNumThreads(void)
 {
-#ifdef HAVE_PARALLEL_FRAMEWORK
+#ifdef CV_PARALLEL_FRAMEWORK
 
     if(numThreads == 0)
         return 1;
@@ -333,7 +341,7 @@ int cv::getNumThreads(void)
 void cv::setNumThreads( int threads )
 {
     (void)threads;
-#ifdef HAVE_PARALLEL_FRAMEWORK
+#ifdef CV_PARALLEL_FRAMEWORK
     numThreads = threads;
 #endif
 
@@ -445,7 +453,11 @@ int cv::getNumberOfCPUs(void)
 {
 #if defined WIN32 || defined _WIN32
     SYSTEM_INFO sysinfo;
+#if defined(_M_ARM) || defined(_M_X64) || defined(HAVE_WINRT)
+    GetNativeSystemInfo( &sysinfo );
+#else
     GetSystemInfo( &sysinfo );
+#endif
 
     return (int)sysinfo.dwNumberOfProcessors;
 #elif defined ANDROID
@@ -480,6 +492,14 @@ int cv::getNumberOfCPUs(void)
 #endif
 }
 
+const char* cv::currentParallelFramework() {
+#ifdef CV_PARALLEL_FRAMEWORK
+    return CV_PARALLEL_FRAMEWORK;
+#else
+    return NULL;
+#endif
+}
+
 CV_IMPL void cvSetNumThreads(int nt)
 {
     cv::setNumThreads(nt);
diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp
index 657d86a16..39ce63313 100644
--- a/modules/core/src/persistence.cpp
+++ b/modules/core/src/persistence.cpp
@@ -58,7 +58,6 @@
 #endif
 
 #if USE_ZLIB
-#  undef HAVE_UNISTD_H //to avoid redefinition
 #  ifndef _LFS64_LARGEFILE
 #    define _LFS64_LARGEFILE 0
 #  endif
@@ -5130,9 +5129,11 @@ FileStorage::FileStorage(const String& filename, int flags, const String& encodi
     open( filename, flags, encoding );
 }
 
-FileStorage::FileStorage(CvFileStorage* _fs)
+FileStorage::FileStorage(CvFileStorage* _fs, bool owning)
 {
-    fs = Ptr<CvFileStorage>(_fs);
+    if (owning) fs.reset(_fs);
+    else fs = Ptr<CvFileStorage>(Ptr<CvFileStorage>(), _fs);
+
     state = _fs ? NAME_EXPECTED + INSIDE_MAP : UNDEFINED;
 }
 
@@ -5148,8 +5149,8 @@ FileStorage::~FileStorage()
 bool FileStorage::open(const String& filename, int flags, const String& encoding)
 {
     release();
-    fs = Ptr<CvFileStorage>(cvOpenFileStorage( filename.c_str(), 0, flags,
-                                               !encoding.empty() ? encoding.c_str() : 0));
+    fs.reset(cvOpenFileStorage( filename.c_str(), 0, flags,
+                                !encoding.empty() ? encoding.c_str() : 0));
     bool ok = isOpened();
     state = ok ? NAME_EXPECTED + INSIDE_MAP : UNDEFINED;
     return ok;
@@ -5157,7 +5158,7 @@ bool FileStorage::open(const String& filename, int flags, const String& encoding
 
 bool FileStorage::isOpened() const
 {
-    return !fs.empty() && fs.obj->is_opened;
+    return fs && fs->is_opened;
 }
 
 void FileStorage::release()
@@ -5170,8 +5171,8 @@ void FileStorage::release()
 String FileStorage::releaseAndGetString()
 {
     String buf;
-    if( fs.obj && fs.obj->outbuf )
-        icvClose(fs.obj, &buf);
+    if( fs && fs->outbuf )
+        icvClose(fs, &buf);
 
     release();
     return buf;
@@ -5480,7 +5481,7 @@ void write( FileStorage& fs, const String& name, const Mat& value )
 // TODO: the 4 functions below need to be implemented more efficiently
 void write( FileStorage& fs, const String& name, const SparseMat& value )
 {
-    Ptr<CvSparseMat> mat = cvCreateSparseMat(value);
+    Ptr<CvSparseMat> mat(cvCreateSparseMat(value));
     cvWrite( *fs, name.size() ? name.c_str() : 0, mat );
 }
 
@@ -5530,8 +5531,8 @@ void read( const FileNode& node, SparseMat& mat, const SparseMat& default_mat )
         default_mat.copyTo(mat);
         return;
     }
-    Ptr<CvSparseMat> m = (CvSparseMat*)cvRead((CvFileStorage*)node.fs, (CvFileNode*)*node);
-    CV_Assert(CV_IS_SPARSE_MAT(m.obj));
+    Ptr<CvSparseMat> m((CvSparseMat*)cvRead((CvFileStorage*)node.fs, (CvFileNode*)*node));
+    CV_Assert(CV_IS_SPARSE_MAT(m));
     m->copyToSparseMat(mat);
 }
 
diff --git a/modules/core/src/rand.cpp b/modules/core/src/rand.cpp
index 079e1fb7b..ffce63484 100644
--- a/modules/core/src/rand.cpp
+++ b/modules/core/src/rand.cpp
@@ -728,33 +728,54 @@ void RNG::fill( InputOutputArray _mat, int disttype,
 }
 
 #ifdef WIN32
+
+
+#ifdef HAVE_WINRT
+// using C++11 thread attribute for local thread data
+__declspec( thread ) RNG* rng = NULL;
+
+ void deleteThreadRNGData()
+ {
+    if (rng)
+        delete rng;
+}
+
+RNG& theRNG()
+{
+    if (!rng)
+    {
+        rng =  new RNG;
+    }
+    return *rng;
+}
+#else
 #ifdef WINCE
 #	define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
 #endif
 static DWORD tlsRNGKey = TLS_OUT_OF_INDEXES;
 
-void deleteThreadRNGData()
-{
-    if( tlsRNGKey != TLS_OUT_OF_INDEXES )
-        delete (RNG*)TlsGetValue( tlsRNGKey );
+ void deleteThreadRNGData()
+ {
+     if( tlsRNGKey != TLS_OUT_OF_INDEXES )
+         delete (RNG*)TlsGetValue( tlsRNGKey );
 }
 
 RNG& theRNG()
 {
     if( tlsRNGKey == TLS_OUT_OF_INDEXES )
     {
-        tlsRNGKey = TlsAlloc();
-        CV_Assert(tlsRNGKey != TLS_OUT_OF_INDEXES);
+       tlsRNGKey = TlsAlloc();
+       CV_Assert(tlsRNGKey != TLS_OUT_OF_INDEXES);
     }
     RNG* rng = (RNG*)TlsGetValue( tlsRNGKey );
     if( !rng )
     {
-        rng = new RNG;
-        TlsSetValue( tlsRNGKey, rng );
+       rng = new RNG;
+       TlsSetValue( tlsRNGKey, rng );
     }
     return *rng;
 }
-
+#endif //HAVE_WINRT
 #else
 
 static pthread_key_t tlsRNGKey = 0;
diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp
index 3b8916bbf..86555fcc3 100644
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -200,14 +200,19 @@ static int sum64f( const double* src, const uchar* mask, double* dst, int len, i
 
 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
 
-static SumFunc sumTab[] =
+static SumFunc getSumFunc(int depth)
 {
-    (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
-    (SumFunc)sum16u, (SumFunc)sum16s,
-    (SumFunc)sum32s,
-    (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
-    0
-};
+    static SumFunc sumTab[] =
+    {
+        (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
+        (SumFunc)sum16u, (SumFunc)sum16s,
+        (SumFunc)sum32s,
+        (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
+        0
+    };
+
+    return sumTab[depth];
+}
 
 template<typename T>
 static int countNonZero_(const T* src, int len )
@@ -272,14 +277,18 @@ static int countNonZero64f( const double* src, int len )
 
 typedef int (*CountNonZeroFunc)(const uchar*, int);
 
-static CountNonZeroFunc countNonZeroTab[] =
+static CountNonZeroFunc getCountNonZeroTab(int depth)
 {
-    (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
-    (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
-    (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
-    (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
-};
+    static CountNonZeroFunc countNonZeroTab[] =
+    {
+        (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
+        (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
+        (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
+        (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
+    };
 
+    return countNonZeroTab[depth];
+}
 
 template<typename T, typename ST, typename SQT>
 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
@@ -428,11 +437,16 @@ static int sqsum64f( const double* src, const uchar* mask, double* sum, double*
 
 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
 
-static SumSqrFunc sumSqrTab[] =
+static SumSqrFunc getSumSqrTab(int depth)
 {
-    (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
-    (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
-};
+    static SumSqrFunc sumSqrTab[] =
+    {
+        (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
+        (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
+    };
+
+    return sumSqrTab[depth];
+}
 
 }
 
@@ -440,7 +454,46 @@ cv::Scalar cv::sum( InputArray _src )
 {
     Mat src = _src.getMat();
     int k, cn = src.channels(), depth = src.depth();
-    SumFunc func = sumTab[depth];
+
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    size_t total_size = src.total();
+    int rows = src.size[0], cols = (int)(total_size/rows);
+    if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
+    {
+        IppiSize sz = { cols, rows };
+        int type = src.type();
+        typedef IppStatus (CV_STDCALL* ippiSumFunc)(const void*, int, IppiSize, double *, int);
+        ippiSumFunc ippFunc =
+            type == CV_8UC1 ? (ippiSumFunc)ippiSum_8u_C1R :
+            type == CV_8UC3 ? (ippiSumFunc)ippiSum_8u_C3R :
+            type == CV_8UC4 ? (ippiSumFunc)ippiSum_8u_C4R :
+            type == CV_16UC1 ? (ippiSumFunc)ippiSum_16u_C1R :
+            type == CV_16UC3 ? (ippiSumFunc)ippiSum_16u_C3R :
+            type == CV_16UC4 ? (ippiSumFunc)ippiSum_16u_C4R :
+            type == CV_16SC1 ? (ippiSumFunc)ippiSum_16s_C1R :
+            type == CV_16SC3 ? (ippiSumFunc)ippiSum_16s_C3R :
+            type == CV_16SC4 ? (ippiSumFunc)ippiSum_16s_C4R :
+            type == CV_32FC1 ? (ippiSumFunc)ippiSum_32f_C1R :
+            type == CV_32FC3 ? (ippiSumFunc)ippiSum_32f_C3R :
+            type == CV_32FC4 ? (ippiSumFunc)ippiSum_32f_C4R :
+            0;
+        if( ippFunc )
+        {
+            Ipp64f res[4];
+            if( ippFunc(src.data, src.step[0], sz, res, ippAlgHintAccurate) >= 0 )
+            {
+                Scalar sc;
+                for( int i = 0; i < cn; i++ )
+                {
+                    sc[i] = res[i];
+                }
+                return sc;
+            }
+        }
+    }
+#endif
+
+    SumFunc func = getSumFunc(depth);
 
     CV_Assert( cn <= 4 && func != 0 );
 
@@ -492,7 +545,7 @@ cv::Scalar cv::sum( InputArray _src )
 int cv::countNonZero( InputArray _src )
 {
     Mat src = _src.getMat();
-    CountNonZeroFunc func = countNonZeroTab[src.depth()];
+    CountNonZeroFunc func = getCountNonZeroTab(src.depth());
 
     CV_Assert( src.channels() == 1 && func != 0 );
 
@@ -513,7 +566,82 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
     CV_Assert( mask.empty() || mask.type() == CV_8U );
 
     int k, cn = src.channels(), depth = src.depth();
-    SumFunc func = sumTab[depth];
+
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    size_t total_size = src.total();
+    int rows = src.size[0], cols = (int)(total_size/rows);
+    if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
+    {
+        IppiSize sz = { cols, rows };
+        int type = src.type();
+        if( !mask.empty() )
+        {
+            typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *);
+            ippiMaskMeanFuncC1 ippFuncC1 =
+            type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
+            type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
+            type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
+            0;
+            if( ippFuncC1 )
+            {
+                Ipp64f res;
+                if( ippFuncC1(src.data, src.step[0], mask.data, mask.step[0], sz, &res) >= 0 )
+                {
+                    return Scalar(res);
+                }
+            }
+            typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *);
+            ippiMaskMeanFuncC3 ippFuncC3 =
+            type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
+            type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
+            type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
+            0;
+            if( ippFuncC3 )
+            {
+                Ipp64f res1, res2, res3;
+                if( ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 1, &res1) >= 0 &&
+                    ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 2, &res2) >= 0 &&
+                    ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 3, &res3) >= 0 )
+                {
+                    return Scalar(res1, res2, res3);
+                }
+            }
+        }
+        else
+        {
+            typedef IppStatus (CV_STDCALL* ippiMeanFunc)(const void*, int, IppiSize, double *, int);
+            ippiMeanFunc ippFunc =
+                type == CV_8UC1 ? (ippiMeanFunc)ippiMean_8u_C1R :
+                type == CV_8UC3 ? (ippiMeanFunc)ippiMean_8u_C3R :
+                type == CV_8UC4 ? (ippiMeanFunc)ippiMean_8u_C4R :
+                type == CV_16UC1 ? (ippiMeanFunc)ippiMean_16u_C1R :
+                type == CV_16UC3 ? (ippiMeanFunc)ippiMean_16u_C3R :
+                type == CV_16UC4 ? (ippiMeanFunc)ippiMean_16u_C4R :
+                type == CV_16SC1 ? (ippiMeanFunc)ippiMean_16s_C1R :
+                type == CV_16SC3 ? (ippiMeanFunc)ippiMean_16s_C3R :
+                type == CV_16SC4 ? (ippiMeanFunc)ippiMean_16s_C4R :
+                type == CV_32FC1 ? (ippiMeanFunc)ippiMean_32f_C1R :
+                type == CV_32FC3 ? (ippiMeanFunc)ippiMean_32f_C3R :
+                type == CV_32FC4 ? (ippiMeanFunc)ippiMean_32f_C4R :
+                0;
+            if( ippFunc )
+            {
+                Ipp64f res[4];
+                if( ippFunc(src.data, src.step[0], sz, res, ippAlgHintAccurate) >= 0 )
+                {
+                    Scalar sc;
+                    for( int i = 0; i < cn; i++ )
+                    {
+                        sc[i] = res[i];
+                    }
+                    return sc;
+                }
+            }
+        }
+    }
+#endif
+
+    SumFunc func = getSumFunc(depth);
 
     CV_Assert( cn <= 4 && func != 0 );
 
@@ -572,7 +700,100 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
     CV_Assert( mask.empty() || mask.type() == CV_8U );
 
     int k, cn = src.channels(), depth = src.depth();
-    SumSqrFunc func = sumSqrTab[depth];
+
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    size_t total_size = src.total();
+    int rows = src.size[0], cols = (int)(total_size/rows);
+    if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
+    {
+        Ipp64f mean_temp[3];
+        Ipp64f stddev_temp[3];
+        Ipp64f *pmean = &mean_temp[0];
+        Ipp64f *pstddev = &stddev_temp[0];
+        Mat mean, stddev;
+        int dcn_mean = -1;
+        if( _mean.needed() )
+        {
+            if( !_mean.fixedSize() )
+                _mean.create(cn, 1, CV_64F, -1, true);
+            mean = _mean.getMat();
+            dcn_mean = (int)mean.total();
+            pmean = (Ipp64f *)mean.data;
+        }
+        int dcn_stddev = -1;
+        if( _sdv.needed() )
+        {
+            if( !_sdv.fixedSize() )
+                _sdv.create(cn, 1, CV_64F, -1, true);
+            stddev = _sdv.getMat();
+            dcn_stddev = (int)stddev.total();
+            pstddev = (Ipp64f *)stddev.data;
+        }
+        for( int k = cn; k < dcn_mean; k++ )
+            pmean[k] = 0;
+        for( int k = cn; k < dcn_stddev; k++ )
+            pstddev[k] = 0;
+        IppiSize sz = { cols, rows };
+        int type = src.type();
+        if( !mask.empty() )
+        {
+            typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *, Ipp64f *);
+            ippiMaskMeanStdDevFuncC1 ippFuncC1 =
+            type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
+            type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
+            type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
+            0;
+            if( ippFuncC1 )
+            {
+                if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, pmean, pstddev) >= 0 )
+                    return;
+            }
+            typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
+            ippiMaskMeanStdDevFuncC3 ippFuncC3 =
+            type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
+            type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
+            type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
+            0;
+            if( ippFuncC3 )
+            {
+                if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
+                    ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
+                    ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
+                    return;
+            }
+        }
+        else
+        {
+            typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
+            ippiMeanStdDevFuncC1 ippFuncC1 =
+            type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
+            type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
+            //type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
+            0;
+            if( ippFuncC1 )
+            {
+                if( ippFuncC1(src.data, (int)src.step[0], sz, pmean, pstddev) >= 0 )
+                    return;
+            }
+            typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
+            ippiMeanStdDevFuncC3 ippFuncC3 =
+            type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
+            type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
+            type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
+            0;
+            if( ippFuncC3 )
+            {
+                if( ippFuncC3(src.data, (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
+                    ippFuncC3(src.data, (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
+                    ippFuncC3(src.data, (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
+                    return;
+            }
+        }
+    }
+#endif
+
+
+    SumSqrFunc func = getSumSqrTab(depth);
 
     CV_Assert( func != 0 );
 
@@ -746,14 +967,19 @@ static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval,
 
 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
 
-static MinMaxIdxFunc minmaxTab[] =
+static MinMaxIdxFunc getMinmaxTab(int depth)
 {
-    (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
-    (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
-    (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
-    (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
-    0
-};
+    static MinMaxIdxFunc minmaxTab[] =
+    {
+        (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
+        (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
+        (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
+        (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
+        0
+    };
+
+    return minmaxTab[depth];
+}
 
 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
 {
@@ -786,7 +1012,84 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
 
     CV_Assert( (cn == 1 && (mask.empty() || mask.type() == CV_8U)) ||
                (cn >= 1 && mask.empty() && !minIdx && !maxIdx) );
-    MinMaxIdxFunc func = minmaxTab[depth];
+
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    size_t total_size = src.total();
+    int rows = src.size[0], cols = (int)(total_size/rows);
+    if( cn == 1 && ( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) )
+    {
+        IppiSize sz = { cols, rows };
+        int type = src.type();
+        if( !mask.empty() )
+        {
+            typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
+            ippiMaskMinMaxIndxFuncC1 ippFuncC1 =
+            type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR :
+            type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR :
+            type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR :
+            0;
+            if( ippFuncC1 )
+            {
+                Ipp32f min, max;
+                IppiPoint minp, maxp;
+                if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
+                {
+                    if( minVal )
+                        *minVal = (double)min;
+                    if( maxVal )
+                        *maxVal = (double)max;
+                    if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.data[0] )
+                        minp.x = maxp.x = -1;
+                    if( minIdx )
+                    {
+                        size_t minidx = minp.y * cols + minp.x + 1;
+                        ofs2idx(src, minidx, minIdx);
+                    }
+                    if( maxIdx )
+                    {
+                        size_t maxidx = maxp.y * cols + maxp.x + 1;
+                        ofs2idx(src, maxidx, maxIdx);
+                    }
+                    return;
+                }
+            }
+        }
+        else
+        {
+            typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
+            ippiMinMaxIndxFuncC1 ippFuncC1 =
+                type == CV_8UC1 ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R :
+                type == CV_16UC1 ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R :
+                type == CV_32FC1 ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R :
+                0;
+            if( ippFuncC1 )
+            {
+                Ipp32f min, max;
+                IppiPoint minp, maxp;
+                if( ippFuncC1(src.data, (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
+                {
+                    if( minVal )
+                        *minVal = (double)min;
+                    if( maxVal )
+                        *maxVal = (double)max;
+                    if( minIdx )
+                    {
+                        size_t minidx = minp.y * cols + minp.x + 1;
+                        ofs2idx(src, minidx, minIdx);
+                    }
+                    if( maxIdx )
+                    {
+                        size_t maxidx = maxp.y * cols + maxp.x + 1;
+                        ofs2idx(src, maxidx, maxIdx);
+                    }
+                    return;
+                }
+            }
+        }
+    }
+#endif
+
+    MinMaxIdxFunc func = getMinmaxTab(depth);
     CV_Assert( func != 0 );
 
     const Mat* arrays[] = {&src, &mask, 0};
@@ -1251,43 +1554,53 @@ CV_DEF_NORM_ALL(64f, double, double, double, double)
 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
 
-static NormFunc normTab[3][8] =
+static NormFunc getNormFunc(int normType, int depth)
 {
+    static NormFunc normTab[3][8] =
     {
-        (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
-        (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
-    },
-    {
-        (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
-        (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
-    },
-    {
-        (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
-        (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
-    }
-};
+        {
+            (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
+            (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
+        },
+        {
+            (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
+            (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
+        },
+        {
+            (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
+            (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
+        }
+    };
 
-static NormDiffFunc normDiffTab[3][8] =
+    return normTab[normType][depth];
+}
+
+static NormDiffFunc getNormDiffFunc(int normType, int depth)
 {
+    static NormDiffFunc normDiffTab[3][8] =
     {
-        (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
-        (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
-        (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
-        (NormDiffFunc)normDiffInf_64f, 0
-    },
-    {
-        (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
-        (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
-        (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
-        (NormDiffFunc)normDiffL1_64f, 0
-    },
-    {
-        (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
-        (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
-        (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
-        (NormDiffFunc)normDiffL2_64f, 0
-    }
-};
+        {
+            (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
+            (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
+            (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
+            (NormDiffFunc)normDiffInf_64f, 0
+        },
+        {
+            (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
+            (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
+            (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
+            (NormDiffFunc)normDiffL1_64f, 0
+        },
+        {
+            (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
+            (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
+            (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
+            (NormDiffFunc)normDiffL2_64f, 0
+        }
+    };
+
+    return normDiffTab[normType][depth];
+}
 
 }
 
@@ -1300,6 +1613,147 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
     CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
                ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) );
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    size_t total_size = src.total();
+    int rows = src.size[0], cols = (int)(total_size/rows);
+    if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size)
+        && (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) )
+    {
+        IppiSize sz = { cols, rows };
+        int type = src.type();
+        if( !mask.empty() )
+        {
+            typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
+            ippiMaskNormFuncC1 ippFuncC1 =
+                normType == NORM_INF ?
+                (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
+                type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR :
+                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
+                type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
+                0) :
+            normType == NORM_L1 ?
+                (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
+                type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR :
+                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
+                type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
+                0) :
+            normType == NORM_L2 || normType == NORM_L2SQR ?
+                (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
+                type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR :
+                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
+                type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
+                0) : 0;
+            if( ippFuncC1 )
+            {
+                Ipp64f norm;
+                if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
+                {
+                    return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
+                }
+            }
+            typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
+            ippiMaskNormFuncC3 ippFuncC3 =
+                normType == NORM_INF ?
+                (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
+                type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR :
+                type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
+                type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
+                0) :
+            normType == NORM_L1 ?
+                (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
+                type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR :
+                type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
+                type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
+                0) :
+            normType == NORM_L2 || normType == NORM_L2SQR ?
+                (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
+                type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR :
+                type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
+                type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
+                0) : 0;
+            if( ippFuncC3 )
+            {
+                Ipp64f norm1, norm2, norm3;
+                if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
+                    ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
+                    ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
+                {
+                    Ipp64f norm =
+                        normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
+                        normType == NORM_L1 ? norm1 + norm2 + norm3 :
+                        normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
+                        0;
+                    return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
+                }
+            }
+        }
+        else
+        {
+            typedef IppStatus (CV_STDCALL* ippiNormFunc)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
+            ippiNormFunc ippFunc =
+                normType == NORM_INF ?
+                (type == CV_8UC1 ? (ippiNormFunc)ippiNorm_Inf_8u_C1R :
+                type == CV_8UC3 ? (ippiNormFunc)ippiNorm_Inf_8u_C3R :
+                type == CV_8UC4 ? (ippiNormFunc)ippiNorm_Inf_8u_C4R :
+                type == CV_16UC1 ? (ippiNormFunc)ippiNorm_Inf_16u_C1R :
+                type == CV_16UC3 ? (ippiNormFunc)ippiNorm_Inf_16u_C3R :
+                type == CV_16UC4 ? (ippiNormFunc)ippiNorm_Inf_16u_C4R :
+                type == CV_16SC1 ? (ippiNormFunc)ippiNorm_Inf_16s_C1R :
+                //type == CV_16SC3 ? (ippiNormFunc)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+                //type == CV_16SC4 ? (ippiNormFunc)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+                type == CV_32FC1 ? (ippiNormFunc)ippiNorm_Inf_32f_C1R :
+                type == CV_32FC3 ? (ippiNormFunc)ippiNorm_Inf_32f_C3R :
+                type == CV_32FC4 ? (ippiNormFunc)ippiNorm_Inf_32f_C4R :
+                0) :
+                normType == NORM_L1 ?
+                (type == CV_8UC1 ? (ippiNormFunc)ippiNorm_L1_8u_C1R :
+                type == CV_8UC3 ? (ippiNormFunc)ippiNorm_L1_8u_C3R :
+                type == CV_8UC4 ? (ippiNormFunc)ippiNorm_L1_8u_C4R :
+                type == CV_16UC1 ? (ippiNormFunc)ippiNorm_L1_16u_C1R :
+                type == CV_16UC3 ? (ippiNormFunc)ippiNorm_L1_16u_C3R :
+                type == CV_16UC4 ? (ippiNormFunc)ippiNorm_L1_16u_C4R :
+                type == CV_16SC1 ? (ippiNormFunc)ippiNorm_L1_16s_C1R :
+                type == CV_16SC3 ? (ippiNormFunc)ippiNorm_L1_16s_C3R :
+                type == CV_16SC4 ? (ippiNormFunc)ippiNorm_L1_16s_C4R :
+                type == CV_32FC1 ? (ippiNormFunc)ippiNorm_L1_32f_C1R :
+                type == CV_32FC3 ? (ippiNormFunc)ippiNorm_L1_32f_C3R :
+                type == CV_32FC4 ? (ippiNormFunc)ippiNorm_L1_32f_C4R :
+                0) :
+                normType == NORM_L2 || normType == NORM_L2SQR ?
+                (type == CV_8UC1 ? (ippiNormFunc)ippiNorm_L2_8u_C1R :
+                type == CV_8UC3 ? (ippiNormFunc)ippiNorm_L2_8u_C3R :
+                type == CV_8UC4 ? (ippiNormFunc)ippiNorm_L2_8u_C4R :
+                type == CV_16UC1 ? (ippiNormFunc)ippiNorm_L2_16u_C1R :
+                type == CV_16UC3 ? (ippiNormFunc)ippiNorm_L2_16u_C3R :
+                type == CV_16UC4 ? (ippiNormFunc)ippiNorm_L2_16u_C4R :
+                type == CV_16SC1 ? (ippiNormFunc)ippiNorm_L2_16s_C1R :
+                type == CV_16SC3 ? (ippiNormFunc)ippiNorm_L2_16s_C3R :
+                type == CV_16SC4 ? (ippiNormFunc)ippiNorm_L2_16s_C4R :
+                type == CV_32FC1 ? (ippiNormFunc)ippiNorm_L2_32f_C1R :
+                type == CV_32FC3 ? (ippiNormFunc)ippiNorm_L2_32f_C3R :
+                type == CV_32FC4 ? (ippiNormFunc)ippiNorm_L2_32f_C4R :
+                0) : 0;
+            if( ippFunc )
+            {
+                Ipp64f norm_array[4];
+                if( ippFunc(src.data, (int)src.step[0], sz, norm_array, ippAlgHintAccurate) >= 0 )
+                {
+                    Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
+                    for( int i = 1; i < cn; i++ )
+                    {
+                        norm =
+                            normType == NORM_INF ? std::max(norm, norm_array[i]) :
+                            normType == NORM_L1 ? norm + norm_array[i] :
+                            normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
+                            0;
+                    }
+                    return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
+                }
+            }
+        }
+    }
+#endif
+
     if( src.isContinuous() && mask.empty() )
     {
         size_t len = src.total()*cn;
@@ -1371,7 +1825,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
         return result;
     }
 
-    NormFunc func = normTab[normType >> 1][depth];
+    NormFunc func = getNormFunc(normType >> 1, depth);
     CV_Assert( func != 0 );
 
     const Mat* arrays[] = {&src, &mask, 0};
@@ -1438,7 +1892,84 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
 {
     if( normType & CV_RELATIVE )
+    {
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+        Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
+
+        CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
+
+        normType &= 7;
+        CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
+                ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
+        size_t total_size = src1.total();
+        int rows = src1.size[0], cols = (int)(total_size/rows);
+        if( src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size)
+            && (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) )
+        {
+            IppiSize sz = { cols, rows };
+            int type = src1.type();
+            if( !mask.empty() )
+            {
+                typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
+                ippiMaskNormRelFuncC1 ippFuncC1 =
+                    normType == NORM_INF ?
+                    (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR :
+                    type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR :
+                    type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR :
+                    type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR :
+                    0) :
+                    normType == NORM_L1 ?
+                    (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR :
+                    type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR :
+                    type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR :
+                    type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR :
+                    0) :
+                    normType == NORM_L2 || normType == NORM_L2SQR ?
+                    (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR :
+                    type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR :
+                    type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR :
+                    type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR :
+                    0) : 0;
+                if( ippFuncC1 )
+                {
+                    Ipp64f norm;
+                    if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
+                        return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
+                }
+            }
+            else
+            {
+                typedef IppStatus (CV_STDCALL* ippiNormRelFunc)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
+                ippiNormRelFunc ippFunc =
+                    normType == NORM_INF ?
+                    (type == CV_8UC1 ? (ippiNormRelFunc)ippiNormRel_Inf_8u_C1R :
+                    type == CV_16UC1 ? (ippiNormRelFunc)ippiNormRel_Inf_16u_C1R :
+                    type == CV_16SC1 ? (ippiNormRelFunc)ippiNormRel_Inf_16s_C1R :
+                    type == CV_32FC1 ? (ippiNormRelFunc)ippiNormRel_Inf_32f_C1R :
+                    0) :
+                    normType == NORM_L1 ?
+                    (type == CV_8UC1 ? (ippiNormRelFunc)ippiNormRel_L1_8u_C1R :
+                    type == CV_16UC1 ? (ippiNormRelFunc)ippiNormRel_L1_16u_C1R :
+                    type == CV_16SC1 ? (ippiNormRelFunc)ippiNormRel_L1_16s_C1R :
+                    type == CV_32FC1 ? (ippiNormRelFunc)ippiNormRel_L1_32f_C1R :
+                    0) :
+                    normType == NORM_L2 || normType == NORM_L2SQR ?
+                    (type == CV_8UC1 ? (ippiNormRelFunc)ippiNormRel_L2_8u_C1R :
+                    type == CV_16UC1 ? (ippiNormRelFunc)ippiNormRel_L2_16u_C1R :
+                    type == CV_16SC1 ? (ippiNormRelFunc)ippiNormRel_L2_16s_C1R :
+                    type == CV_32FC1 ? (ippiNormRelFunc)ippiNormRel_L2_32f_C1R :
+                    0) : 0;
+                if( ippFunc )
+                {
+                    Ipp64f norm;
+                    if( ippFunc(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 )
+                        return (double)norm;
+                }
+            }
+        }
+#endif
         return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
+    }
 
     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
     int depth = src1.depth(), cn = src1.channels();
@@ -1449,6 +1980,145 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
     CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
               ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    size_t total_size = src1.total();
+    int rows = src1.size[0], cols = (int)(total_size/rows);
+    if( src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size)
+        && (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) )
+    {
+        IppiSize sz = { cols, rows };
+        int type = src1.type();
+        if( !mask.empty() )
+        {
+            typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
+            ippiMaskNormDiffFuncC1 ippFuncC1 =
+                normType == NORM_INF ?
+                (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
+                type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR :
+                type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
+                type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
+                0) :
+                normType == NORM_L1 ?
+                (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
+                type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR :
+                type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
+                type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
+                0) :
+                normType == NORM_L2 || normType == NORM_L2SQR ?
+                (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
+                type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR :
+                type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
+                type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
+                0) : 0;
+            if( ippFuncC1 )
+            {
+                Ipp64f norm;
+                if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
+                    return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
+            }
+            typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
+            ippiMaskNormDiffFuncC3 ippFuncC3 =
+                normType == NORM_INF ?
+                (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
+                type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR :
+                type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
+                type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
+                0) :
+                normType == NORM_L1 ?
+                (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
+                type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR :
+                type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
+                type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
+                0) :
+                normType == NORM_L2 || normType == NORM_L2SQR ?
+                (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
+                type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR :
+                type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
+                type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
+                0) : 0;
+            if( ippFuncC3 )
+            {
+                Ipp64f norm1, norm2, norm3;
+                if( ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
+                    ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
+                    ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
+                {
+                    Ipp64f norm =
+                        normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
+                        normType == NORM_L1 ? norm1 + norm2 + norm3 :
+                        normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
+                        0;
+                    return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
+                }
+            }
+        }
+        else
+        {
+            typedef IppStatus (CV_STDCALL* ippiNormDiffFunc)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
+            ippiNormDiffFunc ippFunc =
+                normType == NORM_INF ?
+                (type == CV_8UC1 ? (ippiNormDiffFunc)ippiNormDiff_Inf_8u_C1R :
+                type == CV_8UC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_8u_C3R :
+                type == CV_8UC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_8u_C4R :
+                type == CV_16UC1 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16u_C1R :
+                type == CV_16UC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16u_C3R :
+                type == CV_16UC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16u_C4R :
+                type == CV_16SC1 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C1R :
+                //type == CV_16SC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+                //type == CV_16SC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
+                type == CV_32FC1 ? (ippiNormDiffFunc)ippiNormDiff_Inf_32f_C1R :
+                type == CV_32FC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_32f_C3R :
+                type == CV_32FC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_32f_C4R :
+                0) :
+                normType == NORM_L1 ?
+                (type == CV_8UC1 ? (ippiNormDiffFunc)ippiNormDiff_L1_8u_C1R :
+                type == CV_8UC3 ? (ippiNormDiffFunc)ippiNormDiff_L1_8u_C3R :
+                type == CV_8UC4 ? (ippiNormDiffFunc)ippiNormDiff_L1_8u_C4R :
+                type == CV_16UC1 ? (ippiNormDiffFunc)ippiNormDiff_L1_16u_C1R :
+                type == CV_16UC3 ? (ippiNormDiffFunc)ippiNormDiff_L1_16u_C3R :
+                type == CV_16UC4 ? (ippiNormDiffFunc)ippiNormDiff_L1_16u_C4R :
+                type == CV_16SC1 ? (ippiNormDiffFunc)ippiNormDiff_L1_16s_C1R :
+                type == CV_16SC3 ? (ippiNormDiffFunc)ippiNormDiff_L1_16s_C3R :
+                type == CV_16SC4 ? (ippiNormDiffFunc)ippiNormDiff_L1_16s_C4R :
+                type == CV_32FC1 ? (ippiNormDiffFunc)ippiNormDiff_L1_32f_C1R :
+                type == CV_32FC3 ? (ippiNormDiffFunc)ippiNormDiff_L1_32f_C3R :
+                type == CV_32FC4 ? (ippiNormDiffFunc)ippiNormDiff_L1_32f_C4R :
+                0) :
+                normType == NORM_L2 || normType == NORM_L2SQR ?
+                (type == CV_8UC1 ? (ippiNormDiffFunc)ippiNormDiff_L2_8u_C1R :
+                type == CV_8UC3 ? (ippiNormDiffFunc)ippiNormDiff_L2_8u_C3R :
+                type == CV_8UC4 ? (ippiNormDiffFunc)ippiNormDiff_L2_8u_C4R :
+                type == CV_16UC1 ? (ippiNormDiffFunc)ippiNormDiff_L2_16u_C1R :
+                type == CV_16UC3 ? (ippiNormDiffFunc)ippiNormDiff_L2_16u_C3R :
+                type == CV_16UC4 ? (ippiNormDiffFunc)ippiNormDiff_L2_16u_C4R :
+                type == CV_16SC1 ? (ippiNormDiffFunc)ippiNormDiff_L2_16s_C1R :
+                type == CV_16SC3 ? (ippiNormDiffFunc)ippiNormDiff_L2_16s_C3R :
+                type == CV_16SC4 ? (ippiNormDiffFunc)ippiNormDiff_L2_16s_C4R :
+                type == CV_32FC1 ? (ippiNormDiffFunc)ippiNormDiff_L2_32f_C1R :
+                type == CV_32FC3 ? (ippiNormDiffFunc)ippiNormDiff_L2_32f_C3R :
+                type == CV_32FC4 ? (ippiNormDiffFunc)ippiNormDiff_L2_32f_C4R :
+                0) : 0;
+            if( ippFunc )
+            {
+                Ipp64f norm_array[4];
+                if( ippFunc(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) >= 0 )
+                {
+                    Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
+                    for( int i = 1; i < src1.channels(); i++ )
+                    {
+                        norm =
+                            normType == NORM_INF ? std::max(norm, norm_array[i]) :
+                            normType == NORM_L1 ? norm + norm_array[i] :
+                            normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
+                            0;
+                    }
+                    return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
+                }
+            }
+        }
+    }
+#endif
+
     if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
     {
         size_t len = src1.total()*src1.channels();
@@ -1512,7 +2182,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
         return result;
     }
 
-    NormDiffFunc func = normDiffTab[normType >> 1][depth];
+    NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
     CV_Assert( func != 0 );
 
     const Mat* arrays[] = {&src1, &src2, &mask, 0};
diff --git a/modules/core/src/stl.cpp b/modules/core/src/stl.cpp
index 09ba66c21..f03c1a2d7 100644
--- a/modules/core/src/stl.cpp
+++ b/modules/core/src/stl.cpp
@@ -66,4 +66,4 @@ void cv::String::deallocate()
     {
         cv::fastFree(data-1);
     }
-}
\ No newline at end of file
+}
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index b828435b7..738e863d7 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -42,11 +42,20 @@
 
 #include "precomp.hpp"
 
+#ifdef _MSC_VER
+# if _MSC_VER >= 1700
+#  pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
+# endif
+#endif
+
 #if defined WIN32 || defined _WIN32 || defined WINCE
 #ifndef _WIN32_WINNT           // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
   #define _WIN32_WINNT 0x0400  // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
 #endif
 #include <windows.h>
+#if (_WIN32_WINNT >= 0x0602)
+  #include <synchapi.h>
+#endif
 #undef small
 #undef min
 #undef max
@@ -75,6 +84,30 @@
     }
   #endif
 #endif
+
+#ifdef HAVE_WINRT
+#include <wrl/client.h>
+
+std::wstring GetTempPathWinRT()
+{
+    return std::wstring(Windows::Storage::ApplicationData::Current->TemporaryFolder->Path->Data());
+}
+
+std::wstring GetTempFileNameWinRT(std::wstring prefix)
+{
+    wchar_t guidStr[40];
+    GUID g;
+    CoCreateGuid(&g);
+    wchar_t* mask = L"%08x_%04x_%04x_%02x%02x_%02x%02x%02x%02x%02x%02x";
+    swprintf(&guidStr[0], sizeof(guidStr)/sizeof(wchar_t), mask,
+             g.Data1, g.Data2, g.Data3, UINT(g.Data4[0]), UINT(g.Data4[1]),
+             UINT(g.Data4[2]), UINT(g.Data4[3]), UINT(g.Data4[4]),
+             UINT(g.Data4[5]), UINT(g.Data4[6]), UINT(g.Data4[7]));
+
+    return prefix + std::wstring(guidStr);
+}
+
+#endif
 #else
 #include <pthread.h>
 #include <sys/time.h>
@@ -371,12 +404,39 @@ String format( const char* fmt, ... )
 
 String tempfile( const char* suffix )
 {
+#ifdef HAVE_WINRT
+    std::wstring temp_dir = L"";
+    const wchar_t* opencv_temp_dir = _wgetenv(L"OPENCV_TEMP_PATH");
+    if (opencv_temp_dir)
+        temp_dir = std::wstring(opencv_temp_dir);
+#else
     const char *temp_dir = getenv("OPENCV_TEMP_PATH");
     String fname;
+#endif
 
 #if defined WIN32 || defined _WIN32
-    char temp_dir2[MAX_PATH + 1] = { 0 };
-    char temp_file[MAX_PATH + 1] = { 0 };
+#ifdef HAVE_WINRT
+    RoInitialize(RO_INIT_MULTITHREADED);
+    std::wstring temp_dir2;
+    if (temp_dir.empty())
+        temp_dir = GetTempPathWinRT();
+
+    std::wstring temp_file;
+    temp_file = GetTempFileNameWinRT(L"ocv");
+    if (temp_file.empty())
+        return std::string();
+
+    temp_file = temp_dir + std::wstring(L"\\") + temp_file;
+    DeleteFileW(temp_file.c_str());
+
+    char aname[MAX_PATH];
+    size_t copied = wcstombs(aname, temp_file.c_str(), MAX_PATH);
+    CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+    fname = std::string(aname);
+    RoUninitialize();
+#else
+    char temp_dir2[MAX_PATH] = { 0 };
+    char temp_file[MAX_PATH] = { 0 };
 
     if (temp_dir == 0 || temp_dir[0] == 0)
     {
@@ -389,6 +449,7 @@ String tempfile( const char* suffix )
     DeleteFileA(temp_file);
 
     fname = temp_file;
+#endif
 # else
 #  ifdef ANDROID
     //char defaultTemplate[] = "/mnt/sdcard/__opencv_temp.XXXXXX";
@@ -486,40 +547,6 @@ redirectError( CvErrorCallback errCallback, void* userdata, void** prevUserdata)
 
 }
 
-/*CV_IMPL int
-cvGuiBoxReport( int code, const char *func_name, const char *err_msg,
-                const char *file, int line, void* )
-{
-#if (!defined WIN32 && !defined _WIN32) || defined WINCE
-    return cvStdErrReport( code, func_name, err_msg, file, line, 0 );
-#else
-    if( code != CV_StsBackTrace && code != CV_StsAutoTrace )
-    {
-        size_t msg_len = strlen(err_msg ? err_msg : "") + 1024;
-        char* message = (char*)alloca(msg_len);
-        char title[100];
-
-        wsprintf( message, "%s (%s)\nin function %s, %s(%d)\n\n"
-                  "Press \"Abort\" to terminate application.\n"
-                  "Press \"Retry\" to debug (if the app is running under debugger).\n"
-                  "Press \"Ignore\" to continue (this is not safe).\n",
-                  cvErrorStr(code), err_msg ? err_msg : "no description",
-                  func_name, file, line );
-
-        wsprintf( title, "OpenCV GUI Error Handler" );
-
-        int answer = MessageBox( NULL, message, title, MB_ICONERROR|MB_ABORTRETRYIGNORE|MB_SYSTEMMODAL );
-
-        if( answer == IDRETRY )
-        {
-            CV_DBG_BREAK();
-        }
-        return answer != IDIGNORE;
-    }
-    return 0;
-#endif
-}*/
-
 CV_IMPL int cvCheckHardwareSupport(int feature)
 {
     CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
@@ -677,7 +704,11 @@ cvErrorFromIppStatus( int status )
 }
 
 
-#if defined BUILD_SHARED_LIBS && defined CVAPI_EXPORTS && defined WIN32 && !defined WINCE
+#if defined CVAPI_EXPORTS && defined WIN32 && !defined WINCE
+#ifdef HAVE_WINRT
+    #pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
+#endif
+
 BOOL WINAPI DllMain( HINSTANCE, DWORD  fdwReason, LPVOID );
 
 BOOL WINAPI DllMain( HINSTANCE, DWORD  fdwReason, LPVOID )
@@ -698,7 +729,15 @@ namespace cv
 
 struct Mutex::Impl
 {
-    Impl() { InitializeCriticalSection(&cs); refcount = 1; }
+    Impl()
+    {
+#if (_WIN32_WINNT >= 0x0600)
+        ::InitializeCriticalSectionEx(&cs, 1000, 0);
+#else
+        ::InitializeCriticalSection(&cs);
+#endif
+        refcount = 1;
+    }
     ~Impl() { DeleteCriticalSection(&cs); }
 
     void lock() { EnterCriticalSection(&cs); }
@@ -791,4 +830,4 @@ bool Mutex::trylock() { return impl->trylock(); }
 
 }
 
-/* End of file. */
\ No newline at end of file
+/* End of file. */
diff --git a/modules/core/src/types.cpp b/modules/core/src/types.cpp
index 4dbb06f31..89e004265 100644
--- a/modules/core/src/types.cpp
+++ b/modules/core/src/types.cpp
@@ -136,4 +136,4 @@ float KeyPoint::overlap( const KeyPoint& kp1, const KeyPoint& kp2 )
     return ovrl;
 }
 
-} // cv
\ No newline at end of file
+} // cv
diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp
index 721fd0e6b..7486e134a 100644
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@@ -1123,7 +1123,7 @@ struct MeanOp : public BaseElemWiseOp
     }
     double getMaxErr(int)
     {
-        return 1e-6;
+        return 1e-5;
     }
 };
 
diff --git a/modules/core/test/test_ds.cpp b/modules/core/test/test_ds.cpp
index d79786054..c71deed06 100644
--- a/modules/core/test/test_ds.cpp
+++ b/modules/core/test/test_ds.cpp
@@ -358,8 +358,6 @@ Core_DynStructBaseTest::Core_DynStructBaseTest()
     iterations = max_struct_size*2;
     gen = struct_idx = iter = -1;
     test_progress = -1;
-
-    storage = 0;
 }
 
 
@@ -999,7 +997,7 @@ void Core_SeqBaseTest::run( int )
             {
                 t = cvtest::randReal(rng)*(max_log_storage_block_size - min_log_storage_block_size)
                 + min_log_storage_block_size;
-                storage = cvCreateMemStorage( cvRound( exp(t * CV_LOG2) ) );
+                storage.reset(cvCreateMemStorage( cvRound( exp(t * CV_LOG2) ) ));
             }
 
             iter = struct_idx = -1;
@@ -1083,11 +1081,11 @@ void Core_SeqSortInvTest::run( int )
         {
             struct_idx = iter = -1;
 
-            if( storage.empty() )
+            if( !storage )
             {
                 t = cvtest::randReal(rng)*(max_log_storage_block_size - min_log_storage_block_size)
                 + min_log_storage_block_size;
-                storage = cvCreateMemStorage( cvRound( exp(t * CV_LOG2) ) );
+                storage.reset(cvCreateMemStorage( cvRound( exp(t * CV_LOG2) ) ));
             }
 
             for( iter = 0; iter < iterations/10; iter++ )
@@ -1384,7 +1382,7 @@ void Core_SetTest::run( int )
         {
             struct_idx = iter = -1;
             t = cvtest::randReal(rng)*(max_log_storage_block_size - min_log_storage_block_size) + min_log_storage_block_size;
-            storage = cvCreateMemStorage( cvRound( exp(t * CV_LOG2) ) );
+            storage.reset(cvCreateMemStorage( cvRound( exp(t * CV_LOG2) ) ));
 
             for( int i = 0; i < struct_count; i++ )
             {
@@ -1398,7 +1396,7 @@ void Core_SetTest::run( int )
 
                 cvTsReleaseSimpleSet( (CvTsSimpleSet**)&simple_struct[i] );
                 simple_struct[i] = cvTsCreateSimpleSet( max_struct_size, pure_elem_size );
-                 cxcore_struct[i] = cvCreateSet( 0, sizeof(CvSet), elem_size, storage );
+                cxcore_struct[i] = cvCreateSet( 0, sizeof(CvSet), elem_size, storage );
             }
 
             if( test_set_ops( iterations*100 ) < 0 )
@@ -1811,7 +1809,7 @@ void Core_GraphTest::run( int )
             int block_size = cvRound( exp(t * CV_LOG2) );
             block_size = MAX(block_size, (int)(sizeof(CvGraph) + sizeof(CvMemBlock) + sizeof(CvSeqBlock)));
 
-            storage = cvCreateMemStorage(block_size);
+            storage.reset(cvCreateMemStorage(block_size));
 
             for( i = 0; i < struct_count; i++ )
             {
@@ -1929,7 +1927,7 @@ void Core_GraphScanTest::run( int )
             storage_blocksize = MAX(storage_blocksize, (int)(sizeof(CvGraph) + sizeof(CvMemBlock) + sizeof(CvSeqBlock)));
             storage_blocksize = MAX(storage_blocksize, (int)(sizeof(CvGraphEdge) + sizeof(CvMemBlock) + sizeof(CvSeqBlock)));
             storage_blocksize = MAX(storage_blocksize, (int)(sizeof(CvGraphVtx) + sizeof(CvMemBlock) + sizeof(CvSeqBlock)));
-            storage = cvCreateMemStorage(storage_blocksize);
+            storage.reset(cvCreateMemStorage(storage_blocksize));
 
             if( gen == 0 )
             {
@@ -2118,5 +2116,3 @@ TEST(Core_DS_Seq, sort_invert) { Core_SeqSortInvTest test; test.safe_run(); }
 TEST(Core_DS_Set, basic_operations) { Core_SetTest test; test.safe_run(); }
 TEST(Core_DS_Graph, basic_operations) { Core_GraphTest test; test.safe_run(); }
 TEST(Core_DS_Graph, scan) { Core_GraphScanTest test; test.safe_run(); }
-
-
diff --git a/modules/core/test/test_dxt.cpp b/modules/core/test/test_dxt.cpp
index 16025fa8f..1c0c7b00b 100644
--- a/modules/core/test/test_dxt.cpp
+++ b/modules/core/test/test_dxt.cpp
@@ -866,5 +866,3 @@ protected:
 };
 
 TEST(Core_DFT, complex_output) { Core_DFTComplexOutputTest test; test.safe_run(); }
-
-
diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp
index 602dcd1e1..ba6656761 100644
--- a/modules/core/test/test_io.cpp
+++ b/modules/core/test/test_io.cpp
@@ -270,16 +270,16 @@ protected:
 
             cvRelease((void**)&m_nd);
 
-            Ptr<CvSparseMat> m_s = (CvSparseMat*)fs["test_sparse_mat"].readObj();
-            Ptr<CvSparseMat> _test_sparse_ = cvCreateSparseMat(test_sparse_mat);
-            Ptr<CvSparseMat> _test_sparse = (CvSparseMat*)cvClone(_test_sparse_);
+            Ptr<CvSparseMat> m_s((CvSparseMat*)fs["test_sparse_mat"].readObj());
+            Ptr<CvSparseMat> _test_sparse_(cvCreateSparseMat(test_sparse_mat));
+            Ptr<CvSparseMat> _test_sparse((CvSparseMat*)cvClone(_test_sparse_));
             SparseMat m_s2;
             fs["test_sparse_mat"] >> m_s2;
-            Ptr<CvSparseMat> _m_s2 = cvCreateSparseMat(m_s2);
+            Ptr<CvSparseMat> _m_s2(cvCreateSparseMat(m_s2));
 
             if( !m_s || !CV_IS_SPARSE_MAT(m_s) ||
-               !cvTsCheckSparse(m_s, _test_sparse,0) ||
-               !cvTsCheckSparse(_m_s2, _test_sparse,0))
+               !cvTsCheckSparse(m_s, _test_sparse, 0) ||
+               !cvTsCheckSparse(_m_s2, _test_sparse, 0))
             {
                 ts->printf( cvtest::TS::LOG, "the read sparse matrix is not correct\n" );
                 ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
@@ -391,7 +391,6 @@ protected:
         try
         {
             string fname = cv::tempfile(".xml");
-            FileStorage fs(fname, FileStorage::WRITE);
             vector<int> mi, mi2, mi3, mi4;
             vector<Mat> mv, mv2, mv3, mv4;
             Mat m(10, 9, CV_32F);
@@ -399,24 +398,59 @@ protected:
             randu(m, 0, 1);
             mi3.push_back(5);
             mv3.push_back(m);
+            Point_<float> p1(1.1f, 2.2f), op1;
+            Point3i p2(3, 4, 5), op2;
+            Size s1(6, 7), os1;
+            Complex<int> c1(9, 10), oc1;
+            Rect r1(11, 12, 13, 14), or1;
+            Vec<int, 5> v1(15, 16, 17, 18, 19), ov1;
+            Scalar sc1(20.0, 21.1, 22.2, 23.3), osc1;
+            Range g1(7, 8), og1;
+
+            FileStorage fs(fname, FileStorage::WRITE);
             fs << "mi" << mi;
             fs << "mv" << mv;
             fs << "mi3" << mi3;
             fs << "mv3" << mv3;
             fs << "empty" << empty;
+            fs << "p1" << p1;
+            fs << "p2" << p2;
+            fs << "s1" << s1;
+            fs << "c1" << c1;
+            fs << "r1" << r1;
+            fs << "v1" << v1;
+            fs << "sc1" << sc1;
+            fs << "g1" << g1;
             fs.release();
+
             fs.open(fname, FileStorage::READ);
             fs["mi"] >> mi2;
             fs["mv"] >> mv2;
             fs["mi3"] >> mi4;
             fs["mv3"] >> mv4;
             fs["empty"] >> empty;
+            fs["p1"] >> op1;
+            fs["p2"] >> op2;
+            fs["s1"] >> os1;
+            fs["c1"] >> oc1;
+            fs["r1"] >> or1;
+            fs["v1"] >> ov1;
+            fs["sc1"] >> osc1;
+            fs["g1"] >> og1;
             CV_Assert( mi2.empty() );
             CV_Assert( mv2.empty() );
             CV_Assert( norm(mi3, mi4, CV_C) == 0 );
             CV_Assert( mv4.size() == 1 );
             double n = norm(mv3[0], mv4[0], CV_C);
             CV_Assert( n == 0 );
+            CV_Assert( op1 == p1 );
+            CV_Assert( op2 == p2 );
+            CV_Assert( os1 == s1 );
+            CV_Assert( oc1 == c1 );
+            CV_Assert( or1 == r1 );
+            CV_Assert( ov1 == v1 );
+            CV_Assert( osc1 == sc1 );
+            CV_Assert( og1 == g1 );
         }
         catch(...)
         {
diff --git a/modules/core/test/test_main.cpp b/modules/core/test/test_main.cpp
index 6b2499344..d5400e20f 100644
--- a/modules/core/test/test_main.cpp
+++ b/modules/core/test/test_main.cpp
@@ -1,3 +1,10 @@
+#ifdef _MSC_VER
+# if _MSC_VER >= 1700
+#  pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
+# endif
+#endif
+
+
 #include "test_precomp.hpp"
 
 CV_TEST_MAIN("cv")
diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp
index 245347b8b..3c8ae8bf9 100644
--- a/modules/core/test/test_mat.cpp
+++ b/modules/core/test/test_mat.cpp
@@ -510,6 +510,32 @@ protected:
             return;
         }
     #endif
+        // Test read and write
+        FileStorage fs( "PCA_store.yml", FileStorage::WRITE );
+        rPCA.write( fs );
+        fs.release();
+
+        PCA lPCA;
+        fs.open( "PCA_store.yml", FileStorage::READ );
+        lPCA.read( fs.root() );
+        err = norm( rPCA.eigenvectors, lPCA.eigenvectors, CV_RELATIVE_L2 );
+        if( err > 0 )
+        {
+            ts->printf( cvtest::TS::LOG, "bad accuracy of write/load functions (YML); err = %f\n", err );
+            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
+        }
+        err = norm( rPCA.eigenvalues, lPCA.eigenvalues, CV_RELATIVE_L2 );
+        if( err > 0 )
+        {
+            ts->printf( cvtest::TS::LOG, "bad accuracy of write/load functions (YML); err = %f\n", err );
+            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
+        }
+        err = norm( rPCA.mean, lPCA.mean, CV_RELATIVE_L2 );
+        if( err > 0 )
+        {
+            ts->printf( cvtest::TS::LOG, "bad accuracy of write/load functions (YML); err = %f\n", err );
+            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
+        }
     }
 };
 
@@ -643,7 +669,7 @@ void Core_ArrayOpTest::run( int /* start_from */)
         cvSetReal3D(&matA, idx1[0], idx1[1], idx1[2], -val0);
         cvSetND(&matB, idx0, val1);
         cvSet3D(&matB, idx1[0], idx1[1], idx1[2], -val1);
-        Ptr<CvMatND> matC = cvCloneMatND(&matB);
+        Ptr<CvMatND> matC(cvCloneMatND(&matB));
 
         if( A.at<float>(idx0[0], idx0[1], idx0[2]) != val0 ||
            A.at<float>(idx1[0], idx1[1], idx1[2]) != -val0 ||
@@ -736,7 +762,7 @@ void Core_ArrayOpTest::run( int /* start_from */)
             }
         }
 
-        Ptr<CvSparseMat> M2 = cvCreateSparseMat(M);
+        Ptr<CvSparseMat> M2(cvCreateSparseMat(M));
         MatND Md;
         M.copyTo(Md);
         SparseMat M3; SparseMat(Md).convertTo(M3, Md.type(), 2);
diff --git a/modules/core/test/test_math.cpp b/modules/core/test/test_math.cpp
index b9beeb7d3..377742386 100644
--- a/modules/core/test/test_math.cpp
+++ b/modules/core/test/test_math.cpp
@@ -2755,4 +2755,3 @@ TEST(CovariationMatrixVectorOfMatWithMean, accuracy)
 }
 
 /* End of file. */
-
diff --git a/modules/core/test/test_operations.cpp b/modules/core/test/test_operations.cpp
index 6b36883cf..8d3341e59 100644
--- a/modules/core/test/test_operations.cpp
+++ b/modules/core/test/test_operations.cpp
@@ -75,6 +75,7 @@ protected:
     bool TestSparseMat();
     bool TestVec();
     bool TestMatxMultiplication();
+    bool TestMatxElementwiseDivison();
     bool TestSubMatAccess();
     bool TestExp();
     bool TestSVD();
@@ -891,6 +892,28 @@ bool CV_OperationsTest::TestMatxMultiplication()
     return true;
 }
 
+bool CV_OperationsTest::TestMatxElementwiseDivison()
+{
+    try
+    {
+        Matx22f mat(2, 4, 6, 8);
+        Matx22f mat2(2, 2, 2, 2);
+
+        Matx22f res = mat.div(mat2);
+
+        if(res(0, 0) != 1.0) throw test_excep();
+        if(res(0, 1) != 2.0) throw test_excep();
+        if(res(1, 0) != 3.0) throw test_excep();
+        if(res(1, 1) != 4.0) throw test_excep();
+    }
+    catch(const test_excep&)
+    {
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+        return false;
+    }
+    return true;
+}
+
 
 bool CV_OperationsTest::TestVec()
 {
@@ -1109,6 +1132,9 @@ void CV_OperationsTest::run( int /* start_from */)
     if (!TestMatxMultiplication())
         return;
 
+    if (!TestMatxElementwiseDivison())
+        return;
+
     if (!TestSubMatAccess())
         return;
 
diff --git a/modules/core/test/test_precomp.cpp b/modules/core/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/core/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/core/test/test_ptr.cpp b/modules/core/test/test_ptr.cpp
new file mode 100644
index 000000000..c6f793ab1
--- /dev/null
+++ b/modules/core/test/test_ptr.cpp
@@ -0,0 +1,389 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+
+namespace {
+
+struct Reporter {
+    Reporter(bool* deleted) : deleted_(deleted)
+    { *deleted_ = false; }
+
+    // the destructor is virtual, so that we can test dynamic_cast later
+    virtual ~Reporter()
+    { *deleted_ = true; }
+
+private:
+    bool* deleted_;
+
+    Reporter(const Reporter&);
+    Reporter& operator = (const Reporter&);
+};
+
+struct ReportingDeleter {
+    ReportingDeleter(bool* deleted) : deleted_(deleted)
+    { *deleted_ = false; }
+
+    void operator()(void*)
+    { *deleted_ = true; }
+
+private:
+    bool* deleted_;
+};
+
+int dummyObject;
+
+}
+
+TEST(Core_Ptr, default_ctor)
+{
+    Ptr<int> p;
+    EXPECT_EQ(NULL, p.get());
+}
+
+TEST(Core_Ptr, owning_ctor)
+{
+    bool deleted = false;
+
+    {
+        Reporter* r = new Reporter(&deleted);
+        Ptr<void> p(r);
+        EXPECT_EQ(r, p.get());
+    }
+
+    EXPECT_TRUE(deleted);
+
+    {
+        Ptr<int> p(&dummyObject, ReportingDeleter(&deleted));
+        EXPECT_EQ(&dummyObject, p.get());
+    }
+
+    EXPECT_TRUE(deleted);
+
+    {
+        Ptr<void> p((void*)0, ReportingDeleter(&deleted));
+        EXPECT_EQ(NULL, p.get());
+    }
+
+    EXPECT_FALSE(deleted);
+}
+
+TEST(Core_Ptr, sharing_ctor)
+{
+    bool deleted = false;
+
+    {
+        Ptr<Reporter> p1(new Reporter(&deleted));
+        Ptr<Reporter> p2(p1);
+        EXPECT_EQ(p1.get(), p2.get());
+        p1.release();
+        EXPECT_FALSE(deleted);
+    }
+
+    EXPECT_TRUE(deleted);
+
+    {
+        Ptr<Reporter> p1(new Reporter(&deleted));
+        Ptr<void> p2(p1);
+        EXPECT_EQ(p1.get(), p2.get());
+        p1.release();
+        EXPECT_FALSE(deleted);
+    }
+
+    EXPECT_TRUE(deleted);
+
+    {
+        Ptr<Reporter> p1(new Reporter(&deleted));
+        Ptr<int> p2(p1, &dummyObject);
+        EXPECT_EQ(&dummyObject, p2.get());
+        p1.release();
+        EXPECT_FALSE(deleted);
+    }
+
+    EXPECT_TRUE(deleted);
+}
+
+TEST(Core_Ptr, assignment)
+{
+    bool deleted1 = false, deleted2 = false;
+
+    {
+        Ptr<Reporter> p1(new Reporter(&deleted1));
+        p1 = p1;
+        EXPECT_FALSE(deleted1);
+    }
+
+    EXPECT_TRUE(deleted1);
+
+    {
+        Ptr<Reporter> p1(new Reporter(&deleted1));
+        Ptr<Reporter> p2(new Reporter(&deleted2));
+        p2 = p1;
+        EXPECT_TRUE(deleted2);
+        EXPECT_EQ(p1.get(), p2.get());
+        p1.release();
+        EXPECT_FALSE(deleted1);
+    }
+
+    EXPECT_TRUE(deleted1);
+
+    {
+        Ptr<Reporter> p1(new Reporter(&deleted1));
+        Ptr<void> p2(new Reporter(&deleted2));
+        p2 = p1;
+        EXPECT_TRUE(deleted2);
+        EXPECT_EQ(p1.get(), p2.get());
+        p1.release();
+        EXPECT_FALSE(deleted1);
+    }
+
+    EXPECT_TRUE(deleted1);
+}
+
+TEST(Core_Ptr, release)
+{
+    bool deleted = false;
+
+    Ptr<Reporter> p1(new Reporter(&deleted));
+    p1.release();
+    EXPECT_TRUE(deleted);
+    EXPECT_EQ(NULL, p1.get());
+}
+
+TEST(Core_Ptr, reset)
+{
+    bool deleted_old = false, deleted_new = false;
+
+    {
+        Ptr<void> p(new Reporter(&deleted_old));
+        Reporter* r = new Reporter(&deleted_new);
+        p.reset(r);
+        EXPECT_TRUE(deleted_old);
+        EXPECT_EQ(r, p.get());
+    }
+
+    EXPECT_TRUE(deleted_new);
+
+    {
+        Ptr<void> p(new Reporter(&deleted_old));
+        p.reset(&dummyObject, ReportingDeleter(&deleted_new));
+        EXPECT_TRUE(deleted_old);
+        EXPECT_EQ(&dummyObject, p.get());
+    }
+
+    EXPECT_TRUE(deleted_new);
+}
+
+TEST(Core_Ptr, swap)
+{
+    bool deleted1 = false, deleted2 = false;
+
+    {
+        Reporter* r1 = new Reporter(&deleted1);
+        Reporter* r2 = new Reporter(&deleted2);
+        Ptr<Reporter> p1(r1), p2(r2);
+        p1.swap(p2);
+        EXPECT_EQ(r1, p2.get());
+        EXPECT_EQ(r2, p1.get());
+        EXPECT_FALSE(deleted1);
+        EXPECT_FALSE(deleted2);
+        p1.release();
+        EXPECT_TRUE(deleted2);
+    }
+
+    EXPECT_TRUE(deleted1);
+
+    {
+        Reporter* r1 = new Reporter(&deleted1);
+        Reporter* r2 = new Reporter(&deleted2);
+        Ptr<Reporter> p1(r1), p2(r2);
+        swap(p1, p2);
+        EXPECT_EQ(r1, p2.get());
+        EXPECT_EQ(r2, p1.get());
+        EXPECT_FALSE(deleted1);
+        EXPECT_FALSE(deleted2);
+        p1.release();
+        EXPECT_TRUE(deleted2);
+    }
+
+    EXPECT_TRUE(deleted1);
+}
+
+TEST(Core_Ptr, accessors)
+{
+    {
+        Ptr<int> p;
+        EXPECT_EQ(NULL, static_cast<int*>(p));
+        EXPECT_TRUE(p.empty());
+    }
+
+    {
+        Size* s = new Size();
+        Ptr<Size> p(s);
+        EXPECT_EQ(s, static_cast<Size*>(p));
+        EXPECT_EQ(s, &*p);
+        EXPECT_EQ(&s->width, &p->width);
+        EXPECT_FALSE(p.empty());
+    }
+}
+
+namespace {
+
+struct SubReporterBase {
+    virtual ~SubReporterBase() {}
+    int padding;
+};
+
+/* multiple inheritance, so that casts do something interesting */
+struct SubReporter : SubReporterBase, Reporter
+{
+    SubReporter(bool* deleted) : Reporter(deleted)
+    {}
+};
+
+}
+
+TEST(Core_Ptr, casts)
+{
+    bool deleted = false;
+
+    {
+        Ptr<const Reporter> p1(new Reporter(&deleted));
+        Ptr<Reporter> p2 = p1.constCast<Reporter>();
+        EXPECT_EQ(p1.get(), p2.get());
+        p1.release();
+        EXPECT_FALSE(deleted);
+    }
+
+    EXPECT_TRUE(deleted);
+
+    {
+        SubReporter* sr = new SubReporter(&deleted);
+        Ptr<Reporter> p1(sr);
+        // This next check isn't really for Ptr itself; it checks that Reporter
+        // is at a non-zero offset within SubReporter, so that the next
+        // check will give us more confidence that the cast actually did something.
+        EXPECT_NE(static_cast<void*>(sr), static_cast<void*>(p1.get()));
+        Ptr<SubReporter> p2 = p1.staticCast<SubReporter>();
+        EXPECT_EQ(sr, p2.get());
+        p1.release();
+        EXPECT_FALSE(deleted);
+    }
+
+    EXPECT_TRUE(deleted);
+
+    {
+        SubReporter* sr = new SubReporter(&deleted);
+        Ptr<Reporter> p1(sr);
+        EXPECT_NE(static_cast<void*>(sr), static_cast<void*>(p1.get()));
+        Ptr<void> p2 = p1.dynamicCast<void>();
+        EXPECT_EQ(sr, p2.get());
+        p1.release();
+        EXPECT_FALSE(deleted);
+    }
+
+    EXPECT_TRUE(deleted);
+
+    {
+        Ptr<Reporter> p1(new Reporter(&deleted));
+        Ptr<SubReporter> p2 = p1.dynamicCast<SubReporter>();
+        EXPECT_EQ(NULL, p2.get());
+        p1.release();
+        EXPECT_FALSE(deleted);
+    }
+
+    EXPECT_TRUE(deleted);
+}
+
+TEST(Core_Ptr, comparisons)
+{
+    Ptr<int> p1, p2(new int), p3(new int);
+    Ptr<int> p4(p2, p3.get());
+
+    // Not using EXPECT_EQ here, since none of them are really "expected" or "actual".
+    EXPECT_TRUE(p1 == p1);
+    EXPECT_TRUE(p2 == p2);
+    EXPECT_TRUE(p2 != p3);
+    EXPECT_TRUE(p2 != p4);
+    EXPECT_TRUE(p3 == p4);
+}
+
+TEST(Core_Ptr, make)
+{
+    bool deleted = true;
+
+    {
+        Ptr<void> p = makePtr<Reporter>(&deleted);
+        EXPECT_FALSE(deleted);
+    }
+
+    EXPECT_TRUE(deleted);
+}
+
+namespace {
+
+struct SpeciallyDeletable
+{
+    SpeciallyDeletable() : deleted(false)
+    {}
+    bool deleted;
+};
+
+}
+
+namespace cv {
+
+template<>
+void DefaultDeleter<SpeciallyDeletable>::operator()(SpeciallyDeletable * obj) const
+{ obj->deleted = true; }
+
+}
+
+TEST(Core_Ptr, specialized_deleter)
+{
+    SpeciallyDeletable sd;
+
+    { Ptr<void> p(&sd); }
+
+    ASSERT_TRUE(sd.deleted);
+}
diff --git a/modules/cudev/CMakeLists.txt b/modules/cudev/CMakeLists.txt
new file mode 100644
index 000000000..3c7b0592c
--- /dev/null
+++ b/modules/cudev/CMakeLists.txt
@@ -0,0 +1,22 @@
+if(NOT HAVE_CUDA)
+  ocv_module_disable(cudev)
+endif()
+
+set(the_description "CUDA device layer")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4189 /wd4505 -Wundef -Wmissing-declarations -Wunused-function -Wunused-variable)
+
+ocv_add_module(cudev)
+
+ocv_module_include_directories(opencv_core)
+
+file(GLOB_RECURSE lib_hdrs "include/opencv2/*.hpp")
+file(GLOB         lib_srcs "src/*.cpp")
+
+ocv_set_module_sources(HEADERS ${lib_hdrs} SOURCES ${lib_srcs})
+
+ocv_create_module()
+
+if(BUILD_TESTS)
+  add_subdirectory(test)
+endif()
diff --git a/modules/cudev/include/opencv2/cudev.hpp b/modules/cudev/include/opencv2/cudev.hpp
new file mode 100644
index 000000000..63bfc9569
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev.hpp
@@ -0,0 +1,112 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_HPP__
+#define __OPENCV_CUDEV_HPP__
+
+#include "cudev/common.hpp"
+
+#include "cudev/util/atomic.hpp"
+#include "cudev/util/limits.hpp"
+#include "cudev/util/saturate_cast.hpp"
+#include "cudev/util/simd_functions.hpp"
+#include "cudev/util/tuple.hpp"
+#include "cudev/util/type_traits.hpp"
+#include "cudev/util/vec_math.hpp"
+#include "cudev/util/vec_traits.hpp"
+
+#include "cudev/functional/color_cvt.hpp"
+#include "cudev/functional/functional.hpp"
+#include "cudev/functional/tuple_adapter.hpp"
+
+#include "cudev/warp/reduce.hpp"
+#include "cudev/warp/scan.hpp"
+#include "cudev/warp/shuffle.hpp"
+#include "cudev/warp/warp.hpp"
+
+#include "cudev/block/block.hpp"
+#include "cudev/block/dynamic_smem.hpp"
+#include "cudev/block/reduce.hpp"
+#include "cudev/block/scan.hpp"
+#include "cudev/block/vec_distance.hpp"
+
+#include "cudev/grid/copy.hpp"
+#include "cudev/grid/glob_reduce.hpp"
+#include "cudev/grid/histogram.hpp"
+#include "cudev/grid/integral.hpp"
+#include "cudev/grid/pyramids.hpp"
+#include "cudev/grid/reduce_to_vec.hpp"
+#include "cudev/grid/split_merge.hpp"
+#include "cudev/grid/transform.hpp"
+#include "cudev/grid/transpose.hpp"
+
+#include "cudev/ptr2d/constant.hpp"
+#include "cudev/ptr2d/deriv.hpp"
+#include "cudev/ptr2d/extrapolation.hpp"
+#include "cudev/ptr2d/glob.hpp"
+#include "cudev/ptr2d/gpumat.hpp"
+#include "cudev/ptr2d/interpolation.hpp"
+#include "cudev/ptr2d/lut.hpp"
+#include "cudev/ptr2d/mask.hpp"
+#include "cudev/ptr2d/remap.hpp"
+#include "cudev/ptr2d/resize.hpp"
+#include "cudev/ptr2d/texture.hpp"
+#include "cudev/ptr2d/traits.hpp"
+#include "cudev/ptr2d/transform.hpp"
+#include "cudev/ptr2d/warping.hpp"
+#include "cudev/ptr2d/zip.hpp"
+
+#include "cudev/expr/binary_func.hpp"
+#include "cudev/expr/binary_op.hpp"
+#include "cudev/expr/color.hpp"
+#include "cudev/expr/deriv.hpp"
+#include "cudev/expr/expr.hpp"
+#include "cudev/expr/per_element_func.hpp"
+#include "cudev/expr/reduction.hpp"
+#include "cudev/expr/unary_func.hpp"
+#include "cudev/expr/unary_op.hpp"
+#include "cudev/expr/warping.hpp"
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/block/block.hpp b/modules/cudev/include/opencv2/cudev/block/block.hpp
new file mode 100644
index 000000000..385e1713e
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/block/block.hpp
@@ -0,0 +1,127 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_BLOCK_BLOCK_HPP__
+#define __OPENCV_CUDEV_BLOCK_BLOCK_HPP__
+
+#include "../common.hpp"
+
+namespace cv { namespace cudev {
+
+struct Block
+{
+    __device__ __forceinline__ static uint blockId()
+    {
+        return (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
+    }
+
+    __device__ __forceinline__ static uint blockSize()
+    {
+        return blockDim.x * blockDim.y * blockDim.z;
+    }
+
+    __device__ __forceinline__ static uint threadLineId()
+    {
+        return (threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x;
+    }
+};
+
+template <class It, typename T>
+__device__ __forceinline__ static void blockFill(It beg, It end, const T& value)
+{
+    uint STRIDE = Block::blockSize();
+    It t = beg + Block::threadLineId();
+
+    for(; t < end; t += STRIDE)
+        *t = value;
+}
+
+template <class OutIt, typename T>
+__device__ __forceinline__ static void blockYota(OutIt beg, OutIt end, T value)
+{
+    uint STRIDE = Block::blockSize();
+    uint tid = Block::threadLineId();
+    value += tid;
+
+    for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
+        *t = value;
+}
+
+template <class InIt, class OutIt>
+__device__ __forceinline__ static void blockCopy(InIt beg, InIt end, OutIt out)
+{
+    uint STRIDE = Block::blockSize();
+    InIt  t = beg + Block::threadLineId();
+    OutIt o = out + (t - beg);
+
+    for(; t < end; t += STRIDE, o += STRIDE)
+        *o = *t;
+}
+
+template <class InIt, class OutIt, class UnOp>
+__device__ __forceinline__ static void blockTransfrom(InIt beg, InIt end, OutIt out, const UnOp& op)
+{
+    uint STRIDE = Block::blockSize();
+    InIt  t = beg + Block::threadLineId();
+    OutIt o = out + (t - beg);
+
+    for(; t < end; t += STRIDE, o += STRIDE)
+        *o = op(*t);
+}
+
+template <class InIt1, class InIt2, class OutIt, class BinOp>
+__device__ __forceinline__ static void blockTransfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, const BinOp& op)
+{
+    uint STRIDE = Block::blockSize();
+    InIt1 t1 = beg1 + Block::threadLineId();
+    InIt2 t2 = beg2 + Block::threadLineId();
+    OutIt o  = out + (t1 - beg1);
+
+    for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
+        *o = op(*t1, *t2);
+}
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp b/modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp
new file mode 100644
index 000000000..02803b003
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp
@@ -0,0 +1,392 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_BLOCK_REDUCE_DETAIL_HPP__
+#define __OPENCV_CUDEV_BLOCK_REDUCE_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/tuple.hpp"
+#include "../../util/type_traits.hpp"
+#include "../../warp/warp.hpp"
+#include "../../warp/shuffle.hpp"
+
+namespace cv { namespace cudev {
+
+namespace block_reduce_detail
+{
+    // GetType
+
+    template <typename T> struct GetType;
+
+    template <typename T> struct GetType<T*>
+    {
+        typedef T type;
+    };
+
+    template <typename T> struct GetType<volatile T*>
+    {
+        typedef T type;
+    };
+
+    template <typename T> struct GetType<T&>
+    {
+        typedef T type;
+    };
+
+    // For
+
+    template <int I, int N> struct For
+    {
+        template <class PointerTuple, class ValTuple>
+        __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
+        {
+            get<I>(smem)[tid] = get<I>(val);
+
+            For<I + 1, N>::loadToSmem(smem, val, tid);
+        }
+
+        template <class PointerTuple, class ValTuple>
+        __device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
+        {
+            get<I>(val) = get<I>(smem)[tid];
+
+            For<I + 1, N>::loadFromSmem(smem, val, tid);
+        }
+
+        template <class PointerTuple, class ValTuple, class OpTuple>
+        __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op)
+        {
+            typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta];
+            get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg);
+
+            For<I + 1, N>::merge(smem, val, tid, delta, op);
+        }
+
+#if CV_CUDEV_ARCH >= 300
+        template <class ValTuple, class OpTuple>
+        __device__ static void mergeShfl(const ValTuple& val, uint delta, uint width, const OpTuple& op)
+        {
+            typename GetType<typename tuple_element<I, ValTuple>::type>::type reg = shfl_down(get<I>(val), delta, width);
+            get<I>(val) = get<I>(op)(get<I>(val), reg);
+
+            For<I + 1, N>::mergeShfl(val, delta, width, op);
+        }
+#endif
+    };
+
+    template <int N> struct For<N, N>
+    {
+        template <class PointerTuple, class ValTuple>
+        __device__ __forceinline__ static void loadToSmem(const PointerTuple&, const ValTuple&, uint)
+        {
+        }
+        template <class PointerTuple, class ValTuple>
+        __device__ __forceinline__ static void loadFromSmem(const PointerTuple&, const ValTuple&, uint)
+        {
+        }
+
+        template <class PointerTuple, class ValTuple, class OpTuple>
+        __device__ __forceinline__ static void merge(const PointerTuple&, const ValTuple&, uint, uint, const OpTuple&)
+        {
+        }
+
+#if CV_CUDEV_ARCH >= 300
+        template <class ValTuple, class OpTuple>
+        __device__ __forceinline__ static void mergeShfl(const ValTuple&, uint, uint, const OpTuple&)
+        {
+        }
+#endif
+    };
+
+    // loadToSmem / loadFromSmem
+
+    template <typename T>
+    __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid)
+    {
+        smem[tid] = val;
+    }
+
+    template <typename T>
+    __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, uint tid)
+    {
+        val = smem[tid];
+    }
+
+    template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+              typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
+    __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                               const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                               uint tid)
+    {
+        For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
+    }
+
+    template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+              typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
+    __device__ __forceinline__ void loadFromSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                                     const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                                     uint tid)
+    {
+        For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
+    }
+
+    // merge
+
+    template <typename T, class Op>
+    __device__ __forceinline__ void merge(volatile T* smem, T& val, uint tid, uint delta, const Op& op)
+    {
+        T reg = smem[tid + delta];
+        smem[tid] = val = op(val, reg);
+    }
+
+    template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+              typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+              class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+    __device__ __forceinline__ void merge(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                          const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                          uint tid,
+                                          uint delta,
+                                          const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+    {
+        For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
+    }
+
+    // mergeShfl
+
+#if CV_CUDEV_ARCH >= 300
+    template <typename T, class Op>
+    __device__ __forceinline__ void mergeShfl(T& val, uint delta, uint width, const Op& op)
+    {
+        T reg = shfl_down(val, delta, width);
+        val = op(val, reg);
+    }
+
+    template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+              class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+    __device__ __forceinline__ void mergeShfl(const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                              uint delta,
+                                              uint width,
+                                              const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+    {
+        For<0, tuple_size<tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
+    }
+#endif
+
+    // Generic
+
+    template <int N> struct Generic
+    {
+        template <typename Pointer, typename Reference, class Op>
+        __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op)
+        {
+            loadToSmem(smem, val, tid);
+            if (N >= 32)
+                __syncthreads();
+
+            if (N >= 2048)
+            {
+                if (tid < 1024)
+                    merge(smem, val, tid, 1024, op);
+
+                __syncthreads();
+            }
+            if (N >= 1024)
+            {
+                if (tid < 512)
+                    merge(smem, val, tid, 512, op);
+
+                __syncthreads();
+            }
+            if (N >= 512)
+            {
+                if (tid < 256)
+                    merge(smem, val, tid, 256, op);
+
+                __syncthreads();
+            }
+            if (N >= 256)
+            {
+                if (tid < 128)
+                    merge(smem, val, tid, 128, op);
+
+                __syncthreads();
+            }
+            if (N >= 128)
+            {
+                if (tid < 64)
+                    merge(smem, val, tid, 64, op);
+
+                __syncthreads();
+            }
+            if (N >= 64)
+            {
+                if (tid < 32)
+                    merge(smem, val, tid, 32, op);
+            }
+
+            if (tid < 16)
+            {
+                merge(smem, val, tid, 16, op);
+                merge(smem, val, tid, 8, op);
+                merge(smem, val, tid, 4, op);
+                merge(smem, val, tid, 2, op);
+                merge(smem, val, tid, 1, op);
+            }
+        }
+    };
+
+    // Unroll
+
+    template <int I, typename Pointer, typename Reference, class Op> struct Unroll
+    {
+        __device__ static void loop(Pointer smem, Reference val, uint tid, Op op)
+        {
+            merge(smem, val, tid, I, op);
+            Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
+        }
+
+#if CV_CUDEV_ARCH >= 300
+        __device__ static void loopShfl(Reference val, Op op, uint N)
+        {
+            mergeShfl(val, I, N, op);
+            Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
+        }
+#endif
+    };
+
+    template <typename Pointer, typename Reference, class Op> struct Unroll<0, Pointer, Reference, Op>
+    {
+        __device__ __forceinline__ static void loop(Pointer, Reference, uint, Op)
+        {
+        }
+
+#if CV_CUDEV_ARCH >= 300
+        __device__ __forceinline__ static void loopShfl(Reference, Op, uint)
+        {
+        }
+#endif
+    };
+
+    // WarpOptimized
+
+    template <int N> struct WarpOptimized
+    {
+        template <typename Pointer, typename Reference, class Op>
+        __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op)
+        {
+        #if CV_CUDEV_ARCH >= 300
+            (void) smem;
+            (void) tid;
+
+            Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
+        #else
+            loadToSmem(smem, val, tid);
+
+            if (tid < N / 2)
+                Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
+        #endif
+        }
+    };
+
+    // GenericOptimized32
+
+    template <int N> struct GenericOptimized32
+    {
+        enum { M = N / 32 };
+
+        template <typename Pointer, typename Reference, class Op>
+        __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op)
+        {
+            const uint laneId = Warp::laneId();
+
+        #if CV_CUDEV_ARCH >= 300
+            Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
+
+            if (laneId == 0)
+                loadToSmem(smem, val, tid / 32);
+        #else
+            loadToSmem(smem, val, tid);
+
+            if (laneId < 16)
+                Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
+
+            __syncthreads();
+
+            if (laneId == 0)
+                loadToSmem(smem, val, tid / 32);
+        #endif
+
+            __syncthreads();
+
+            loadFromSmem(smem, val, tid);
+
+            if (tid < 32)
+            {
+        #if CV_CUDEV_ARCH >= 300
+                Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
+        #else
+                Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
+        #endif
+            }
+        }
+    };
+
+    template <int N> struct Dispatcher
+    {
+        typedef typename SelectIf<
+            (N <= 32) && IsPowerOf2<N>::value,
+            WarpOptimized<N>,
+            typename SelectIf<
+                (N <= 1024) && IsPowerOf2<N>::value,
+                GenericOptimized32<N>,
+                Generic<N>
+            >::type
+        >::type reductor;
+    };
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp b/modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp
new file mode 100644
index 000000000..edbe8a586
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp
@@ -0,0 +1,394 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_BLOCK_REDUCE_KEY_VAL_DETAIL_HPP__
+#define __OPENCV_CUDEV_BLOCK_REDUCE_KEY_VAL_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/tuple.hpp"
+#include "../../util/type_traits.hpp"
+#include "../../warp/warp.hpp"
+
+namespace cv { namespace cudev {
+
+namespace block_reduce_key_val_detail
+{
+    // GetType
+
+    template <typename T> struct GetType;
+
+    template <typename T> struct GetType<T*>
+    {
+        typedef T type;
+    };
+
+    template <typename T> struct GetType<volatile T*>
+    {
+        typedef T type;
+    };
+
+    template <typename T> struct GetType<T&>
+    {
+        typedef T type;
+    };
+
+    // For
+
+    template <int I, int N> struct For
+    {
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid)
+        {
+            get<I>(smem)[tid] = get<I>(data);
+
+            For<I + 1, N>::loadToSmem(smem, data, tid);
+        }
+
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ static void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid)
+        {
+            get<I>(data) = get<I>(smem)[tid];
+
+            For<I + 1, N>::loadFromSmem(smem, data, tid);
+        }
+
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ static void copy(const PointerTuple& svals, const ReferenceTuple& val, uint tid, uint delta)
+        {
+            get<I>(svals)[tid] = get<I>(val) = get<I>(svals)[tid + delta];
+
+            For<I + 1, N>::copy(svals, val, tid, delta);
+        }
+
+        template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
+        __device__ static void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
+                                     const ValPointerTuple& svals, const ValReferenceTuple& val,
+                                     const CmpTuple& cmp,
+                                     uint tid, uint delta)
+        {
+            typename GetType<typename tuple_element<I, KeyPointerTuple>::type>::type reg = get<I>(skeys)[tid + delta];
+
+            if (get<I>(cmp)(reg, get<I>(key)))
+            {
+                get<I>(skeys)[tid] = get<I>(key) = reg;
+                get<I>(svals)[tid] = get<I>(val) = get<I>(svals)[tid + delta];
+            }
+
+            For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
+        }
+    };
+
+    template <int N> struct For<N, N>
+    {
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ static void loadToSmem(const PointerTuple&, const ReferenceTuple&, uint)
+        {
+        }
+
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ static void loadFromSmem(const PointerTuple&, const ReferenceTuple&, uint)
+        {
+        }
+
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ static void copy(const PointerTuple&, const ReferenceTuple&, uint, uint)
+        {
+        }
+
+        template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
+        __device__ static void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
+                                     const ValPointerTuple&, const ValReferenceTuple&,
+                                     const CmpTuple&,
+                                     uint, uint)
+        {
+        }
+    };
+
+    // loadToSmem / loadFromSmem
+
+    template <typename T>
+    __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid)
+    {
+        smem[tid] = data;
+    }
+
+    template <typename T>
+    __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, uint tid)
+    {
+        data = smem[tid];
+    }
+
+    template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+    __device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
+                                               const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
+                                               uint tid)
+    {
+        For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
+    }
+
+    template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+    __device__ __forceinline__ void loadFromSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
+                                                 const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
+                                                 uint tid)
+    {
+        For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
+    }
+
+    // copyVals
+
+    template <typename V>
+    __device__ __forceinline__ void copyVals(volatile V* svals, V& val, uint tid, uint delta)
+    {
+        svals[tid] = val = svals[tid + delta];
+    }
+
+    template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+    __device__ __forceinline__ void copyVals(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                             const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                             uint tid, uint delta)
+    {
+        For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
+    }
+
+    // merge
+
+    template <typename K, typename V, class Cmp>
+    __device__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, uint tid, uint delta)
+    {
+        K reg = skeys[tid + delta];
+
+        if (cmp(reg, key))
+        {
+            skeys[tid] = key = reg;
+            copyVals(svals, val, tid, delta);
+        }
+    }
+
+    template <typename K,
+              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+              class Cmp>
+    __device__ void merge(volatile K* skeys, K& key,
+                          const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                          const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                          const Cmp& cmp, uint tid, uint delta)
+    {
+        K reg = skeys[tid + delta];
+
+        if (cmp(reg, key))
+        {
+            skeys[tid] = key = reg;
+            copyVals(svals, val, tid, delta);
+        }
+    }
+
+    template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
+              typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+              class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+    __device__ __forceinline__ void merge(const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
+                                          const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                          const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                          const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                          const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
+                                          uint tid, uint delta)
+    {
+        For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
+    }
+
+    // Generic
+
+    template <int N> struct Generic
+    {
+        template <class KP, class KR, class VP, class VR, class Cmp>
+        __device__ static void reduce(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
+        {
+            loadToSmem(skeys, key, tid);
+            loadValsToSmem(svals, val, tid);
+            if (N >= 32)
+                __syncthreads();
+
+            if (N >= 2048)
+            {
+                if (tid < 1024)
+                    merge(skeys, key, svals, val, cmp, tid, 1024);
+
+                __syncthreads();
+            }
+            if (N >= 1024)
+            {
+                if (tid < 512)
+                    merge(skeys, key, svals, val, cmp, tid, 512);
+
+                __syncthreads();
+            }
+            if (N >= 512)
+            {
+                if (tid < 256)
+                    merge(skeys, key, svals, val, cmp, tid, 256);
+
+                __syncthreads();
+            }
+            if (N >= 256)
+            {
+                if (tid < 128)
+                    merge(skeys, key, svals, val, cmp, tid, 128);
+
+                __syncthreads();
+            }
+            if (N >= 128)
+            {
+                if (tid < 64)
+                    merge(skeys, key, svals, val, cmp, tid, 64);
+
+                __syncthreads();
+            }
+            if (N >= 64)
+            {
+                if (tid < 32)
+                    merge(skeys, key, svals, val, cmp, tid, 32);
+            }
+
+            if (tid < 16)
+            {
+                merge(skeys, key, svals, val, cmp, tid, 16);
+                merge(skeys, key, svals, val, cmp, tid, 8);
+                merge(skeys, key, svals, val, cmp, tid, 4);
+                merge(skeys, key, svals, val, cmp, tid, 2);
+                merge(skeys, key, svals, val, cmp, tid, 1);
+            }
+        }
+    };
+
+    // Unroll
+
+    template <int I, class KP, class KR, class VP, class VR, class Cmp> struct Unroll
+    {
+        __device__ static void loop(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
+        {
+            merge(skeys, key, svals, val, cmp, tid, I);
+            Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+        }
+    };
+
+    template <class KP, class KR, class VP, class VR, class Cmp> struct Unroll<0, KP, KR, VP, VR, Cmp>
+    {
+        __device__ __forceinline__ static void loop(KP, KR, VP, VR, uint, Cmp)
+        {
+        }
+    };
+
+    // WarpOptimized
+
+    template <int N> struct WarpOptimized
+    {
+        template <class KP, class KR, class VP, class VR, class Cmp>
+        __device__ static void reduce(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
+        {
+            loadToSmem(skeys, key, tid);
+            loadToSmem(svals, val, tid);
+
+            if (tid < N / 2)
+                Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+        }
+    };
+
+    // GenericOptimized32
+
+    template <uint N> struct GenericOptimized32
+    {
+        enum { M = N / 32 };
+
+        template <class KP, class KR, class VP, class VR, class Cmp>
+        __device__ static void reduce(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
+        {
+            const uint laneId = Warp::laneId();
+
+            loadToSmem(skeys, key, tid);
+            loadToSmem(svals, val, tid);
+
+            if (laneId < 16)
+                Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+
+            __syncthreads();
+
+            if (laneId == 0)
+            {
+                loadToSmem(skeys, key, tid / 32);
+                loadToSmem(svals, val, tid / 32);
+            }
+
+            __syncthreads();
+
+            loadFromSmem(skeys, key, tid);
+
+            if (tid < 32)
+            {
+                Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+            }
+        }
+    };
+
+    template <int N> struct Dispatcher
+    {
+        typedef typename SelectIf<
+            (N <= 32) && IsPowerOf2<N>::value,
+            WarpOptimized<N>,
+            typename SelectIf<
+                (N <= 1024) && IsPowerOf2<N>::value,
+                GenericOptimized32<N>,
+                Generic<N>
+            >::type
+        >::type reductor;
+    };
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/block/dynamic_smem.hpp b/modules/cudev/include/opencv2/cudev/block/dynamic_smem.hpp
new file mode 100644
index 000000000..9f9ba6000
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/block/dynamic_smem.hpp
@@ -0,0 +1,86 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_BLOCK_DYNAMIC_SMEM_HPP__
+#define __OPENCV_CUDEV_BLOCK_DYNAMIC_SMEM_HPP__
+
+#include "../common.hpp"
+
+namespace cv { namespace cudev {
+
+template <class T> struct DynamicSharedMem
+{
+    __device__ __forceinline__ operator T*()
+    {
+        extern __shared__ int __smem[];
+        return (T*) __smem;
+    }
+
+    __device__ __forceinline__ operator const T*() const
+    {
+        extern __shared__ int __smem[];
+        return (T*) __smem;
+    }
+};
+
+// specialize for double to avoid unaligned memory access compile errors
+template <> struct DynamicSharedMem<double>
+{
+    __device__ __forceinline__ operator double*()
+    {
+        extern __shared__ double __smem_d[];
+        return (double*) __smem_d;
+    }
+
+    __device__ __forceinline__ operator const double*() const
+    {
+        extern __shared__ double __smem_d[];
+        return (double*) __smem_d;
+    }
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/block/reduce.hpp b/modules/cudev/include/opencv2/cudev/block/reduce.hpp
new file mode 100644
index 000000000..4c9022631
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/block/reduce.hpp
@@ -0,0 +1,128 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_BLOCK_REDUCE_HPP__
+#define __OPENCV_CUDEV_BLOCK_REDUCE_HPP__
+
+#include "../common.hpp"
+#include "../util/tuple.hpp"
+#include "../warp/reduce.hpp"
+#include "detail/reduce.hpp"
+#include "detail/reduce_key_val.hpp"
+
+namespace cv { namespace cudev {
+
+// blockReduce
+
+template <int N, typename T, class Op>
+__device__ __forceinline__ void blockReduce(volatile T* smem, T& val, uint tid, const Op& op)
+{
+    block_reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
+}
+
+template <int N,
+          typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+          typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+          class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+__device__ __forceinline__ void blockReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                            const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                            uint tid,
+                                            const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+{
+    block_reduce_detail::Dispatcher<N>::reductor::template reduce<
+            const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
+            const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
+            const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
+}
+
+// blockReduceKeyVal
+
+template <int N, typename K, typename V, class Cmp>
+__device__ __forceinline__ void blockReduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, uint tid, const Cmp& cmp)
+{
+    block_reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
+}
+
+template <int N,
+          typename K,
+          typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+          typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+          class Cmp>
+__device__ __forceinline__ void blockReduceKeyVal(volatile K* skeys, K& key,
+                                                  const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                  const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                  uint tid, const Cmp& cmp)
+{
+    block_reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
+            const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
+            const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
+            const Cmp&>(skeys, key, svals, val, tid, cmp);
+}
+
+template <int N,
+          typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
+          typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+          typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+          typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+          class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+__device__ __forceinline__ void blockReduceKeyVal(const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
+                                                  const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                                  const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                  const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                  uint tid,
+                                                  const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
+{
+    block_reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
+            const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
+            const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
+            const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
+            const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
+            const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
+            >(skeys, key, svals, val, tid, cmp);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/block/scan.hpp b/modules/cudev/include/opencv2/cudev/block/scan.hpp
new file mode 100644
index 000000000..c54dfef9f
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/block/scan.hpp
@@ -0,0 +1,101 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_BLOCK_SCAN_HPP__
+#define __OPENCV_CUDEV_BLOCK_SCAN_HPP__
+
+#include "../common.hpp"
+#include "../warp/scan.hpp"
+
+namespace cv { namespace cudev {
+
+template <int THREADS_NUM, typename T>
+__device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
+{
+    if (THREADS_NUM > WARP_SIZE)
+    {
+        // bottom-level inclusive warp scan
+        T warpResult = warpScanInclusive(data, smem, tid);
+
+        __syncthreads();
+
+        // save top elements of each warp for exclusive warp scan
+        // sync to wait for warp scans to complete (because s_Data is being overwritten)
+        if ((tid & (WARP_SIZE - 1)) == (WARP_SIZE - 1))
+        {
+            smem[tid >> LOG_WARP_SIZE] = warpResult;
+        }
+
+        __syncthreads();
+
+        if (tid < (THREADS_NUM / WARP_SIZE))
+        {
+            // grab top warp elements
+            T val = smem[tid];
+
+            // calculate exclusive scan and write back to shared memory
+            smem[tid] = warpScanExclusive(val, smem, tid);
+        }
+
+        __syncthreads();
+
+        // return updated warp scans with exclusive scan results
+        return warpResult + smem[tid >> LOG_WARP_SIZE];
+    }
+    else
+    {
+        return warpScanInclusive(data, smem, tid);
+    }
+}
+
+template <int THREADS_NUM, typename T>
+__device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid)
+{
+    return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data;
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/block/vec_distance.hpp b/modules/cudev/include/opencv2/cudev/block/vec_distance.hpp
new file mode 100644
index 000000000..c48e9146e
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/block/vec_distance.hpp
@@ -0,0 +1,184 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_BLOCK_VEC_DISTANCE_HPP__
+#define __OPENCV_CUDEV_BLOCK_VEC_DISTANCE_HPP__
+
+#include "../common.hpp"
+#include "../functional/functional.hpp"
+#include "../warp/reduce.hpp"
+#include "reduce.hpp"
+
+namespace cv { namespace cudev {
+
+// NormL1
+
+template <typename T> struct NormL1
+{
+    typedef int value_type;
+    typedef uint result_type;
+
+    result_type mySum;
+
+    __device__ __forceinline__ NormL1() : mySum(0) {}
+
+    __device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
+    {
+        mySum = __sad(val1, val2, mySum);
+    }
+
+    __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
+    {
+        warpReduce(smem, mySum, tid, plus<result_type>());
+    }
+
+    template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
+    {
+        blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
+    }
+
+    __device__ __forceinline__ operator result_type() const
+    {
+        return mySum;
+    }
+};
+template <> struct NormL1<float>
+{
+    typedef float value_type;
+    typedef float result_type;
+
+    result_type mySum;
+
+    __device__ __forceinline__ NormL1() : mySum(0.0f) {}
+
+    __device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
+    {
+        mySum += ::fabsf(val1 - val2);
+    }
+
+    __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
+    {
+        warpReduce(smem, mySum, tid, plus<result_type>());
+    }
+
+    template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
+    {
+        blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
+    }
+
+    __device__ __forceinline__ operator result_type() const
+    {
+        return mySum;
+    }
+};
+
+// NormL2
+
+struct NormL2
+{
+    typedef float value_type;
+    typedef float result_type;
+
+    result_type mySum;
+
+    __device__ __forceinline__ NormL2() : mySum(0.0f) {}
+
+    __device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
+    {
+        const float diff = val1 - val2;
+        mySum += diff * diff;
+    }
+
+    __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
+    {
+        warpReduce(smem, mySum, tid, plus<result_type>());
+    }
+
+    template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
+    {
+        blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
+    }
+
+    __device__ __forceinline__ operator result_type() const
+    {
+        return ::sqrtf(mySum);
+    }
+};
+
+// NormHamming
+
+struct NormHamming
+{
+    typedef int value_type;
+    typedef int result_type;
+
+    result_type mySum;
+
+    __device__ __forceinline__ NormHamming() : mySum(0) {}
+
+    __device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
+    {
+        mySum += __popc(val1 ^ val2);
+    }
+
+    __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
+    {
+        warpReduce(smem, mySum, tid, plus<result_type>());
+    }
+
+    template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
+    {
+        blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
+    }
+
+    __device__ __forceinline__ operator result_type() const
+    {
+        return mySum;
+    }
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/common.hpp b/modules/cudev/include/opencv2/cudev/common.hpp
new file mode 100644
index 000000000..be7990172
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/common.hpp
@@ -0,0 +1,93 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_COMMON_HPP__
+#define __OPENCV_CUDEV_COMMON_HPP__
+
+#include <cuda_runtime.h>
+#include "opencv2/core/gpu.hpp"
+#include "opencv2/core/gpu_stream_accessor.hpp"
+
+namespace cv { namespace cudev {
+
+using namespace cv::gpu;
+
+// CV_CUDEV_ARCH
+
+#ifndef __CUDA_ARCH__
+#   define CV_CUDEV_ARCH 0
+#else
+#   define CV_CUDEV_ARCH __CUDA_ARCH__
+#endif
+
+// CV_CUDEV_SAFE_CALL
+
+__host__ __forceinline__ void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
+{
+    if (cudaSuccess != err)
+        cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
+}
+
+#ifdef __GNUC__
+#   define CV_CUDEV_SAFE_CALL(expr) cv::cudev::checkCudaError((expr), __FILE__, __LINE__, __func__)
+#else
+#   define CV_CUDEV_SAFE_CALL(expr) cv::cudev::checkCudaError((expr), __FILE__, __LINE__, "")
+#endif
+
+// divUp
+
+__host__ __device__ __forceinline__ int divUp(int total, int grain)
+{
+    return (total + grain - 1) / grain;
+}
+
+// math constants
+
+#define CV_PI_F   ((float)CV_PI)
+#define CV_LOG2_F ((float)CV_LOG2)
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/binary_func.hpp b/modules/cudev/include/opencv2/cudev/expr/binary_func.hpp
new file mode 100644
index 000000000..f35ea2dc3
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/binary_func.hpp
@@ -0,0 +1,75 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_BINARY_FUNC_HPP__
+#define __OPENCV_CUDEV_EXPR_BINARY_FUNC_HPP__
+
+#include "../common.hpp"
+#include "../util/type_traits.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/transform.hpp"
+#include "../functional/functional.hpp"
+#include "expr.hpp"
+
+namespace cv { namespace cudev {
+
+#define CV_CUDEV_EXPR_BINARY_FUNC(name) \
+    template <class SrcPtr1, class SrcPtr2> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<SrcPtr1>::ptr_type, typename PtrTraits<SrcPtr2>::ptr_type, name ## _func<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type> > > \
+    name ## _(const SrcPtr1& src1, const SrcPtr2& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, name ## _func<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type>())); \
+    }
+
+CV_CUDEV_EXPR_BINARY_FUNC(hypot)
+CV_CUDEV_EXPR_BINARY_FUNC(magnitude)
+CV_CUDEV_EXPR_BINARY_FUNC(atan2)
+CV_CUDEV_EXPR_BINARY_FUNC(absdiff)
+
+#undef CV_CUDEV_EXPR_BINARY_FUNC
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/binary_op.hpp b/modules/cudev/include/opencv2/cudev/expr/binary_op.hpp
new file mode 100644
index 000000000..f7e965572
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/binary_op.hpp
@@ -0,0 +1,235 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_BINARY_OP_HPP__
+#define __OPENCV_CUDEV_EXPR_BINARY_OP_HPP__
+
+#include "../common.hpp"
+#include "../util/type_traits.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/transform.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "../ptr2d/texture.hpp"
+#include "../ptr2d/glob.hpp"
+#include "../functional/functional.hpp"
+#include "expr.hpp"
+
+namespace cv { namespace cudev {
+
+// Binary Operations
+
+#define CV_CUDEV_EXPR_BINOP_INST(op, functor) \
+    template <typename T> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, typename PtrTraits<GpuMat_<T> >::ptr_type, functor<T> > > \
+    operator op(const GpuMat_<T>& src1, const GpuMat_<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, functor<T>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<T> > > \
+    operator op(const GpuMat_<T>& src1, const GlobPtrSz<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, functor<T>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, typename PtrTraits<GpuMat_<T> >::ptr_type, functor<T> > > \
+    operator op(const GlobPtrSz<T>& src1, const GpuMat_<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, functor<T>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, typename PtrTraits<Texture<T> >::ptr_type, functor<T> > > \
+    operator op(const GpuMat_<T>& src1, const Texture<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, functor<T>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, typename PtrTraits<GpuMat_<T> >::ptr_type, functor<T> > > \
+    operator op(const Texture<T>& src1, const GpuMat_<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, functor<T>())); \
+    } \
+    template <typename T, class Body> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, typename PtrTraits<Body>::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
+    operator op(const GpuMat_<T>& src1, const Expr<Body>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2.body, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
+    } \
+    template <typename T, class Body> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, typename PtrTraits<GpuMat_<T> >::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
+    operator op(const Expr<Body>& src1, const GpuMat_<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1.body, src2, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, Binder2nd< functor<T> > > > \
+    operator op(const GpuMat_<T>& src, T val) \
+    { \
+        return makeExpr(transformPtr(src, bind2nd(functor<T>(), val))); \
+    } \
+    template <typename T> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, Binder1st< functor<T> > > > \
+    operator op(T val, const GpuMat_<T>& src) \
+    { \
+        return makeExpr(transformPtr(src, bind1st(functor<T>(), val))); \
+    } \
+    template <typename T> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<T> > > \
+    operator op(const GlobPtrSz<T>& src1, const GlobPtrSz<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, functor<T>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, typename PtrTraits<Texture<T> >::ptr_type, functor<T> > > \
+    operator op(const GlobPtrSz<T>& src1, const Texture<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, functor<T>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<T> > > \
+    operator op(const Texture<T>& src1, const GlobPtrSz<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, functor<T>())); \
+    } \
+    template <typename T, class Body> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, typename PtrTraits<Body>::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
+    operator op(const GlobPtrSz<T>& src1, const Expr<Body>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2.body, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
+    } \
+    template <typename T, class Body> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
+    operator op(const Expr<Body>& src1, const GlobPtrSz<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1.body, src2, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, Binder2nd< functor<T> > > > \
+    operator op(const GlobPtrSz<T>& src, T val) \
+    { \
+        return makeExpr(transformPtr(src, bind2nd(functor<T>(), val))); \
+    } \
+    template <typename T> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, Binder1st< functor<T> > > > \
+    operator op(T val, const GlobPtrSz<T>& src) \
+    { \
+        return makeExpr(transformPtr(src, bind1st(functor<T>(), val))); \
+    } \
+    template <typename T> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, typename PtrTraits<Texture<T> >::ptr_type, functor<T> > > \
+    operator op(const Texture<T>& src1, const Texture<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2, functor<T>())); \
+    } \
+    template <typename T, class Body> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, typename PtrTraits<Body>::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
+    operator op(const Texture<T>& src1, const Expr<Body>& src2) \
+    { \
+        return makeExpr(transformPtr(src1, src2.body, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
+    } \
+    template <typename T, class Body> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, typename PtrTraits<Texture<T> >::ptr_type, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type> > > \
+    operator op(const Expr<Body>& src1, const Texture<T>& src2) \
+    { \
+        return makeExpr(transformPtr(src1.body, src2, functor<typename LargerType<T, typename PtrTraits<Body>::value_type>::type>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, Binder2nd< functor<T> > > > \
+    operator op(const Texture<T>& src, T val) \
+    { \
+        return makeExpr(transformPtr(src, bind2nd(functor<T>(), val))); \
+    } \
+    template <typename T> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, Binder1st< functor<T> > > > \
+    operator op(T val, const Texture<T>& src) \
+    { \
+        return makeExpr(transformPtr(src, bind1st(functor<T>(), val))); \
+    } \
+    template <class Body1, class Body2> \
+    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<Body1>::ptr_type, typename PtrTraits<Body2>::ptr_type, functor<typename LargerType<typename PtrTraits<Body1>::value_type, typename PtrTraits<Body2>::value_type>::type> > > \
+    operator op(const Expr<Body1>& a, const Expr<Body2>& b) \
+    { \
+        return makeExpr(transformPtr(a.body, b.body, functor<typename LargerType<typename PtrTraits<Body1>::value_type, typename PtrTraits<Body2>::value_type>::type>())); \
+    } \
+    template <class Body> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, Binder2nd< functor<typename Body::value_type> > > > \
+    operator op(const Expr<Body>& a, typename Body::value_type val) \
+    { \
+        return makeExpr(transformPtr(a.body, bind2nd(functor<typename Body::value_type>(), val))); \
+    } \
+    template <class Body> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, Binder1st< functor<typename Body::value_type> > > > \
+    operator op(typename Body::value_type val, const Expr<Body>& a) \
+    { \
+        return makeExpr(transformPtr(a.body, bind1st(functor<typename Body::value_type>(), val))); \
+    }
+
+CV_CUDEV_EXPR_BINOP_INST(+, plus)
+CV_CUDEV_EXPR_BINOP_INST(-, minus)
+CV_CUDEV_EXPR_BINOP_INST(*, multiplies)
+CV_CUDEV_EXPR_BINOP_INST(/, divides)
+CV_CUDEV_EXPR_BINOP_INST(%, modulus)
+
+CV_CUDEV_EXPR_BINOP_INST(==, equal_to)
+CV_CUDEV_EXPR_BINOP_INST(!=, not_equal_to)
+CV_CUDEV_EXPR_BINOP_INST(>, greater)
+CV_CUDEV_EXPR_BINOP_INST(<, less)
+CV_CUDEV_EXPR_BINOP_INST(>=, greater_equal)
+CV_CUDEV_EXPR_BINOP_INST(<=, less_equal)
+
+CV_CUDEV_EXPR_BINOP_INST(&&, logical_and)
+CV_CUDEV_EXPR_BINOP_INST(||, logical_or)
+
+CV_CUDEV_EXPR_BINOP_INST(&, bit_and)
+CV_CUDEV_EXPR_BINOP_INST(|, bit_or)
+CV_CUDEV_EXPR_BINOP_INST(^, bit_xor)
+CV_CUDEV_EXPR_BINOP_INST(<<, bit_lshift)
+CV_CUDEV_EXPR_BINOP_INST(>>, bit_rshift)
+
+#undef CV_CUDEV_EXPR_BINOP_INST
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/color.hpp b/modules/cudev/include/opencv2/cudev/expr/color.hpp
new file mode 100644
index 000000000..13f07c15a
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/color.hpp
@@ -0,0 +1,282 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_COLOR_HPP__
+#define __OPENCV_CUDEV_EXPR_COLOR_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/transform.hpp"
+#include "../functional/color_cvt.hpp"
+#include "expr.hpp"
+
+namespace cv { namespace cudev {
+
+#define CV_CUDEV_EXPR_CVTCOLOR_INST(name) \
+    template <class SrcPtr> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, name ## _func<typename VecTraits<typename PtrTraits<SrcPtr>::value_type>::elem_type> > > \
+    name ## _(const SrcPtr& src) \
+    { \
+        return makeExpr(transformPtr(src, name ## _func<typename VecTraits<typename PtrTraits<SrcPtr>::value_type>::elem_type>())); \
+    }
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_BGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_RGBA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_GRAY)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_GRAY)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_GRAY)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_GRAY)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(GRAY_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(GRAY_to_BGRA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_YUV)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_YUV)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_YUV4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_YUV4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_YUV)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_YUV)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_YUV4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_YUV4)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(YUV_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YUV_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YUV4_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YUV4_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YUV_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YUV_to_BGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YUV4_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YUV4_to_BGRA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_YCrCb)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_YCrCb)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_YCrCb4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_YCrCb4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_YCrCb)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_YCrCb)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_YCrCb4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_YCrCb4)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb4_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb4_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb_to_BGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb4_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(YCrCb4_to_BGRA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_XYZ)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_XYZ)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_XYZ4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_XYZ4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_XYZ)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_XYZ)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_XYZ4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_XYZ4)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ4_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ4_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ4_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ_to_BGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(XYZ4_to_BGRA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HSV)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HSV)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HSV4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HSV4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HSV)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HSV)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HSV4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HSV4)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HSV_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HSV_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HSV4_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HSV4_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HSV_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HSV_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HSV4_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HSV4_FULL)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_BGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_BGRA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_RGB_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_RGBA_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_RGB_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_RGBA_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_BGR_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV_to_BGRA_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_BGR_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HSV4_to_BGRA_FULL)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HLS)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HLS)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HLS4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HLS4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HLS)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HLS)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HLS4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HLS4)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HLS_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HLS_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_HLS4_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_HLS4_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HLS_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HLS_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_HLS4_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_HLS4_FULL)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_BGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_BGRA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_RGB_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_RGBA_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_RGB_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_RGBA_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_BGR_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS_to_BGRA_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_BGR_FULL)
+CV_CUDEV_EXPR_CVTCOLOR_INST(HLS4_to_BGRA_FULL)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_Lab)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_Lab)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_Lab4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_Lab4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_Lab)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_Lab)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_Lab4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_Lab4)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(LRGB_to_Lab)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LRGBA_to_Lab)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LRGB_to_Lab4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LRGBA_to_Lab4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LBGR_to_Lab)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LBGRA_to_Lab)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LBGR_to_Lab4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LBGRA_to_Lab4)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_BGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_BGRA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_LRGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_LRGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_LRGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_LRGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_LBGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_LBGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab_to_LBGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Lab4_to_LBGRA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_Luv)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_Luv)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGB_to_Luv4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(RGBA_to_Luv4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_Luv)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_Luv)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGR_to_Luv4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(BGRA_to_Luv4)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(LRGB_to_Luv)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LRGBA_to_Luv)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LRGB_to_Luv4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LRGBA_to_Luv4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LBGR_to_Luv)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LBGRA_to_Luv)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LBGR_to_Luv4)
+CV_CUDEV_EXPR_CVTCOLOR_INST(LBGRA_to_Luv4)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_RGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_RGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_BGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_BGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_BGRA)
+
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_LRGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LRGB)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_LRGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LRGBA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_LBGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LBGR)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv_to_LBGRA)
+CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LBGRA)
+
+#undef CV_CUDEV_EXPR_CVTCOLOR_INST
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/deriv.hpp b/modules/cudev/include/opencv2/cudev/expr/deriv.hpp
new file mode 100644
index 000000000..822a86b9a
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/deriv.hpp
@@ -0,0 +1,121 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_DERIV_HPP__
+#define __OPENCV_CUDEV_EXPR_DERIV_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/deriv.hpp"
+#include "expr.hpp"
+
+namespace cv { namespace cudev {
+
+// derivX
+
+template <class SrcPtr>
+__host__ Expr<DerivXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
+derivX_(const SrcPtr& src)
+{
+    return makeExpr(derivXPtr(src));
+}
+
+// derivY
+
+template <class SrcPtr>
+__host__ Expr<DerivYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
+derivY_(const SrcPtr& src)
+{
+    return makeExpr(derivYPtr(src));
+}
+
+// sobelX
+
+template <class SrcPtr>
+__host__ Expr<SobelXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
+sobelX_(const SrcPtr& src)
+{
+    return makeExpr(sobelXPtr(src));
+}
+
+// sobelY
+
+template <class SrcPtr>
+__host__ Expr<SobelYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
+sobelY_(const SrcPtr& src)
+{
+    return makeExpr(sobelYPtr(src));
+}
+
+// scharrX
+
+template <class SrcPtr>
+__host__ Expr<ScharrXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
+scharrX_(const SrcPtr& src)
+{
+    return makeExpr(scharrXPtr(src));
+}
+
+// scharrY
+
+template <class SrcPtr>
+__host__ Expr<ScharrYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
+scharrY_(const SrcPtr& src)
+{
+    return makeExpr(scharrYPtr(src));
+}
+
+// laplacian
+
+template <int ksize, class SrcPtr>
+__host__ Expr<LaplacianPtrSz<ksize, typename PtrTraits<SrcPtr>::ptr_type> >
+laplacian_(const SrcPtr& src)
+{
+    return makeExpr(laplacianPtr<ksize>(src));
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/expr.hpp b/modules/cudev/include/opencv2/cudev/expr/expr.hpp
new file mode 100644
index 000000000..46c780b4a
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/expr.hpp
@@ -0,0 +1,92 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_EXPR_HPP__
+#define __OPENCV_CUDEV_EXPR_EXPR_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+
+namespace cv { namespace cudev {
+
+template <class Body> struct Expr
+{
+    Body body;
+};
+
+template <class Body>
+__host__ Expr<Body> makeExpr(const Body& body)
+{
+    Expr<Body> e;
+    e.body = body;
+    return e;
+}
+
+template <class Body> struct PtrTraits< Expr<Body> >
+{
+    typedef Expr<Body>                         ptr_sz_type;
+    typedef typename PtrTraits<Body>::ptr_type ptr_type;
+
+    typedef typename ptr_type::value_type value_type;
+
+    __host__ static ptr_type shrinkPtr(const Expr<Body>& expr)
+    {
+        return PtrTraits<Body>::shrinkPtr(expr.body);
+    }
+
+    __host__ static int getRows(const Expr<Body>& expr)
+    {
+        return PtrTraits<Body>::getRows(expr.body);
+    }
+
+    __host__ static int getCols(const Expr<Body>& expr)
+    {
+        return PtrTraits<Body>::getCols(expr.body);
+    }
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/per_element_func.hpp b/modules/cudev/include/opencv2/cudev/expr/per_element_func.hpp
new file mode 100644
index 000000000..56a067de9
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/per_element_func.hpp
@@ -0,0 +1,132 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_PER_ELEMENT_FUNC_HPP__
+#define __OPENCV_CUDEV_EXPR_PER_ELEMENT_FUNC_HPP__
+
+#include "../common.hpp"
+#include "../util/type_traits.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/transform.hpp"
+#include "../ptr2d/lut.hpp"
+#include "../functional/functional.hpp"
+#include "expr.hpp"
+
+namespace cv { namespace cudev {
+
+// min/max
+
+template <class SrcPtr1, class SrcPtr2>
+__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<SrcPtr1>::ptr_type, typename PtrTraits<SrcPtr2>::ptr_type, minimum<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type> > >
+min_(const SrcPtr1& src1, const SrcPtr2& src2)
+{
+    return makeExpr(transformPtr(src1, src2, minimum<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type>()));
+}
+
+template <class SrcPtr1, class SrcPtr2>
+__host__ Expr<BinaryTransformPtrSz<typename PtrTraits<SrcPtr1>::ptr_type, typename PtrTraits<SrcPtr2>::ptr_type, maximum<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type> > >
+max_(const SrcPtr1& src1, const SrcPtr2& src2)
+{
+    return makeExpr(transformPtr(src1, src2, maximum<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type>()));
+}
+
+// threshold
+
+template <class SrcPtr>
+__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshBinaryFunc<typename PtrTraits<SrcPtr>::value_type> > >
+threshBinary_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh, typename PtrTraits<SrcPtr>::value_type maxVal)
+{
+    return makeExpr(transformPtr(src, thresh_binary_func(thresh, maxVal)));
+}
+
+template <class SrcPtr>
+__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshBinaryInvFunc<typename PtrTraits<SrcPtr>::value_type> > >
+threshBinaryInv_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh, typename PtrTraits<SrcPtr>::value_type maxVal)
+{
+    return makeExpr(transformPtr(src, thresh_binary_inv_func(thresh, maxVal)));
+}
+
+template <class SrcPtr>
+__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshTruncFunc<typename PtrTraits<SrcPtr>::value_type> > >
+threshTrunc_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh)
+{
+    return makeExpr(transformPtr(src, thresh_trunc_func(thresh)));
+}
+
+template <class SrcPtr>
+__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshToZeroFunc<typename PtrTraits<SrcPtr>::value_type> > >
+threshToZero_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh)
+{
+    return makeExpr(transformPtr(src, thresh_to_zero_func(thresh)));
+}
+
+template <class SrcPtr>
+__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, ThreshToZeroInvFunc<typename PtrTraits<SrcPtr>::value_type> > >
+threshToZeroInv_(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type thresh)
+{
+    return makeExpr(transformPtr(src, thresh_to_zero_inv_func(thresh)));
+}
+
+// cvt
+
+template <typename D, class SrcPtr>
+__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, saturate_cast_func<typename PtrTraits<SrcPtr>::value_type, D> > >
+cvt_(const SrcPtr& src)
+{
+    return makeExpr(transformPtr(src, saturate_cast_func<typename PtrTraits<SrcPtr>::value_type, D>()));
+}
+
+// lut
+
+template <class SrcPtr, class TablePtr>
+__host__ Expr<LutPtrSz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<TablePtr>::ptr_type> >
+lut_(const SrcPtr& src, const TablePtr& tbl)
+{
+    return makeExpr(lutPtr(src, tbl));
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/reduction.hpp b/modules/cudev/include/opencv2/cudev/expr/reduction.hpp
new file mode 100644
index 000000000..0fdde02e5
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/reduction.hpp
@@ -0,0 +1,259 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_REDUCTION_HPP__
+#define __OPENCV_CUDEV_EXPR_REDUCTION_HPP__
+
+#include "../common.hpp"
+#include "../grid/glob_reduce.hpp"
+#include "../grid/histogram.hpp"
+#include "../grid/integral.hpp"
+#include "../grid/reduce_to_vec.hpp"
+#include "../ptr2d/traits.hpp"
+#include "expr.hpp"
+
+namespace cv { namespace cudev {
+
+// sum
+
+template <class SrcPtr> struct SumExprBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCalcSum(src, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ Expr<SumExprBody<SrcPtr> >
+sum_(const SrcPtr& src)
+{
+    SumExprBody<SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// minVal
+
+template <class SrcPtr> struct FindMinValExprBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridFindMinVal(src, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ Expr<FindMinValExprBody<SrcPtr> >
+minVal_(const SrcPtr& src)
+{
+    FindMinValExprBody<SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// maxVal
+
+template <class SrcPtr> struct FindMaxValExprBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridFindMaxVal(src, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ Expr<FindMaxValExprBody<SrcPtr> >
+maxVal_(const SrcPtr& src)
+{
+    FindMaxValExprBody<SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// minMaxVal
+
+template <class SrcPtr> struct FindMinMaxValExprBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridFindMinMaxVal(src, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ Expr<FindMinMaxValExprBody<SrcPtr> >
+minMaxVal_(const SrcPtr& src)
+{
+    FindMinMaxValExprBody<SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// countNonZero
+
+template <class SrcPtr> struct CountNonZeroExprBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCountNonZero(src, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ Expr<CountNonZeroExprBody<SrcPtr> >
+countNonZero_(const SrcPtr& src)
+{
+    CountNonZeroExprBody<SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// reduceToRow
+
+template <class Reductor, class SrcPtr> struct ReduceToRowBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridReduceToRow<Reductor>(src, dst, stream);
+    }
+};
+
+template <class Reductor, class SrcPtr>
+__host__ Expr<ReduceToRowBody<Reductor, SrcPtr> >
+reduceToRow_(const SrcPtr& src)
+{
+    ReduceToRowBody<Reductor, SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// reduceToColumn
+
+template <class Reductor, class SrcPtr> struct ReduceToColumnBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridReduceToColumn<Reductor>(src, dst, stream);
+    }
+};
+
+template <class Reductor, class SrcPtr>
+__host__ Expr<ReduceToColumnBody<Reductor, SrcPtr> >
+reduceToColumn_(const SrcPtr& src)
+{
+    ReduceToColumnBody<Reductor, SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// histogram
+
+template <int BIN_COUNT, class SrcPtr> struct HistogramBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridHistogram<BIN_COUNT>(src, dst, stream);
+    }
+};
+
+template <int BIN_COUNT, class SrcPtr>
+__host__ Expr<HistogramBody<BIN_COUNT, SrcPtr> >
+histogram_(const SrcPtr& src)
+{
+    HistogramBody<BIN_COUNT, SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// integral
+
+template <class SrcPtr> struct IntegralBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridIntegral(src, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ Expr<IntegralBody<SrcPtr> >
+integral_(const SrcPtr& src)
+{
+    IntegralBody<SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/unary_func.hpp b/modules/cudev/include/opencv2/cudev/expr/unary_func.hpp
new file mode 100644
index 000000000..a30f6a6f3
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/unary_func.hpp
@@ -0,0 +1,98 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_UNARY_FUNC_HPP__
+#define __OPENCV_CUDEV_EXPR_UNARY_FUNC_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/transform.hpp"
+#include "../functional/functional.hpp"
+#include "expr.hpp"
+
+namespace cv { namespace cudev {
+
+#define CV_CUDEV_EXPR_UNARY_FUNC(name) \
+    template <class SrcPtr> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, name ## _func<typename PtrTraits<SrcPtr>::value_type> > > \
+    name ## _(const SrcPtr& src) \
+    { \
+        return makeExpr(transformPtr(src, name ## _func<typename PtrTraits<SrcPtr>::value_type>())); \
+    }
+
+CV_CUDEV_EXPR_UNARY_FUNC(abs)
+CV_CUDEV_EXPR_UNARY_FUNC(sqr)
+CV_CUDEV_EXPR_UNARY_FUNC(sqrt)
+CV_CUDEV_EXPR_UNARY_FUNC(exp)
+CV_CUDEV_EXPR_UNARY_FUNC(exp2)
+CV_CUDEV_EXPR_UNARY_FUNC(exp10)
+CV_CUDEV_EXPR_UNARY_FUNC(log)
+CV_CUDEV_EXPR_UNARY_FUNC(log2)
+CV_CUDEV_EXPR_UNARY_FUNC(log10)
+CV_CUDEV_EXPR_UNARY_FUNC(sin)
+CV_CUDEV_EXPR_UNARY_FUNC(cos)
+CV_CUDEV_EXPR_UNARY_FUNC(tan)
+CV_CUDEV_EXPR_UNARY_FUNC(asin)
+CV_CUDEV_EXPR_UNARY_FUNC(acos)
+CV_CUDEV_EXPR_UNARY_FUNC(atan)
+CV_CUDEV_EXPR_UNARY_FUNC(sinh)
+CV_CUDEV_EXPR_UNARY_FUNC(cosh)
+CV_CUDEV_EXPR_UNARY_FUNC(tanh)
+CV_CUDEV_EXPR_UNARY_FUNC(asinh)
+CV_CUDEV_EXPR_UNARY_FUNC(acosh)
+CV_CUDEV_EXPR_UNARY_FUNC(atanh)
+
+#undef CV_CUDEV_EXPR_UNARY_FUNC
+
+template <class SrcPtr>
+__host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, Binder2nd<pow_func<typename PtrTraits<SrcPtr>::value_type> > > >
+pow_(const SrcPtr& src, float power)
+{
+    return makeExpr(transformPtr(src, bind2nd(pow_func<typename PtrTraits<SrcPtr>::value_type>(), power)));
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/unary_op.hpp b/modules/cudev/include/opencv2/cudev/expr/unary_op.hpp
new file mode 100644
index 000000000..905013e42
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/unary_op.hpp
@@ -0,0 +1,94 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_UNARY_OP_HPP__
+#define __OPENCV_CUDEV_EXPR_UNARY_OP_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/transform.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "../ptr2d/texture.hpp"
+#include "../ptr2d/glob.hpp"
+#include "../functional/functional.hpp"
+#include "expr.hpp"
+
+namespace cv { namespace cudev {
+
+#define CV_CUDEV_EXPR_UNOP_INST(op, functor) \
+    template <typename T> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, functor<T> > > \
+    operator op(const GpuMat_<T>& src) \
+    { \
+        return makeExpr(transformPtr(src, functor<T>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GlobPtrSz<T> >::ptr_type, functor<T> > > \
+    operator op(const GlobPtrSz<T>& src) \
+    { \
+        return makeExpr(transformPtr(src, functor<T>())); \
+    } \
+    template <typename T> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Texture<T> >::ptr_type, functor<T> > > \
+    operator op(const Texture<T>& src) \
+    { \
+        return makeExpr(transformPtr(src, functor<T>())); \
+    } \
+    template <class Body> \
+    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<Body>::ptr_type, functor<typename Body::value_type> > > \
+    operator op(const Expr<Body>& src) \
+    { \
+        return makeExpr(transformPtr(src.body, functor<typename Body::value_type>())); \
+    }
+
+CV_CUDEV_EXPR_UNOP_INST(-, negate)
+CV_CUDEV_EXPR_UNOP_INST(!, logical_not)
+CV_CUDEV_EXPR_UNOP_INST(~, bit_not)
+
+#undef CV_CUDEV_EXPR_UNOP_INST
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/expr/warping.hpp b/modules/cudev/include/opencv2/cudev/expr/warping.hpp
new file mode 100644
index 000000000..f942a3fb6
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/expr/warping.hpp
@@ -0,0 +1,171 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_EXPR_WARPING_HPP__
+#define __OPENCV_CUDEV_EXPR_WARPING_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/resize.hpp"
+#include "../ptr2d/remap.hpp"
+#include "../ptr2d/warping.hpp"
+#include "../grid/pyramids.hpp"
+#include "../grid/transpose.hpp"
+#include "expr.hpp"
+
+namespace cv { namespace cudev {
+
+// resize
+
+template <class SrcPtr>
+__host__ Expr<ResizePtrSz<typename PtrTraits<SrcPtr>::ptr_type> >
+resize_(const SrcPtr& src, float fx, float fy)
+{
+    return makeExpr(resizePtr(src, fx, fy));
+}
+
+// remap
+
+template <class SrcPtr, class MapPtr>
+__host__ Expr<RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapPtr>::ptr_type> >
+remap_(const SrcPtr& src, const MapPtr& map)
+{
+    return makeExpr(remapPtr(src, map));
+}
+
+template <class SrcPtr, class MapXPtr, class MapYPtr>
+__host__ Expr<RemapPtr2Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapXPtr>::ptr_type, typename PtrTraits<MapYPtr>::ptr_type> >
+remap_(const SrcPtr& src, const MapXPtr& mapx, const MapYPtr& mapy)
+{
+    return makeExpr(remapPtr(src, mapx, mapy));
+}
+
+// warpAffine
+
+template <class SrcPtr>
+__host__ Expr<RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, AffineMapPtr> >
+warpAffine_(const SrcPtr& src, Size dstSize, const GpuMat_<float>& warpMat)
+{
+    return makeExpr(warpAffinePtr(src, dstSize, warpMat));
+}
+
+// warpPerspective
+
+template <class SrcPtr>
+__host__ Expr<RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, PerspectiveMapPtr> >
+warpPerspective_(const SrcPtr& src, Size dstSize, const GpuMat_<float>& warpMat)
+{
+    return makeExpr(warpPerspectivePtr(src, dstSize, warpMat));
+}
+
+// pyrDown
+
+template <class SrcPtr> struct PyrDownBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridPyrDown(src, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ Expr<PyrDownBody<SrcPtr> >
+pyrDown_(const SrcPtr& src)
+{
+    PyrDownBody<SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// pyrUp
+
+template <class SrcPtr> struct PyrUpBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridPyrUp(src, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ Expr<PyrUpBody<SrcPtr> >
+pyrUp_(const SrcPtr& src)
+{
+    PyrUpBody<SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+// transpose
+
+template <class SrcPtr> struct TransposeBody
+{
+    SrcPtr src;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridTranspose(src, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ Expr<TransposeBody<SrcPtr> >
+transpose_(const SrcPtr& src)
+{
+    TransposeBody<SrcPtr> body;
+    body.src = src;
+    return makeExpr(body);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/functional/color_cvt.hpp b/modules/cudev/include/opencv2/cudev/functional/color_cvt.hpp
new file mode 100644
index 000000000..8be854780
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/functional/color_cvt.hpp
@@ -0,0 +1,474 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_FUNCTIONAL_COLOR_CVT_HPP__
+#define __OPENCV_CUDEV_FUNCTIONAL_COLOR_CVT_HPP__
+
+#include "../common.hpp"
+#include "detail/color_cvt.hpp"
+
+namespace cv { namespace cudev {
+
+// Various 3/4-channel to 3/4-channel RGB transformations
+
+#define CV_CUDEV_RGB2RGB_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2RGB<SrcDepth, scn, dcn, bidx> \
+    { \
+    };
+
+CV_CUDEV_RGB2RGB_INST(BGR_to_RGB, 3, 3, 2)
+CV_CUDEV_RGB2RGB_INST(BGR_to_BGRA, 3, 4, 0)
+CV_CUDEV_RGB2RGB_INST(BGR_to_RGBA, 3, 4, 2)
+CV_CUDEV_RGB2RGB_INST(BGRA_to_BGR, 4, 3, 0)
+CV_CUDEV_RGB2RGB_INST(BGRA_to_RGB, 4, 3, 2)
+CV_CUDEV_RGB2RGB_INST(BGRA_to_RGBA, 4, 4, 2)
+
+#undef CV_CUDEV_RGB2RGB_INST
+
+// RGB to Grayscale
+
+#define CV_CUDEV_RGB2GRAY_INST(name, scn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2Gray<SrcDepth, scn, bidx> \
+    { \
+    };
+
+CV_CUDEV_RGB2GRAY_INST(RGB_to_GRAY, 3, 2)
+CV_CUDEV_RGB2GRAY_INST(BGR_to_GRAY, 3, 0)
+CV_CUDEV_RGB2GRAY_INST(RGBA_to_GRAY, 4, 2)
+CV_CUDEV_RGB2GRAY_INST(BGRA_to_GRAY, 4, 0)
+
+#undef CV_CUDEV_RGB2GRAY_INST
+
+// Grayscale to RGB
+
+#define CV_CUDEV_GRAY2RGB_INST(name, dcn) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::Gray2RGB<SrcDepth, dcn> \
+    { \
+    };
+
+CV_CUDEV_GRAY2RGB_INST(GRAY_to_BGR, 3)
+CV_CUDEV_GRAY2RGB_INST(GRAY_to_BGRA, 4)
+
+#undef CV_CUDEV_GRAY2RGB_INST
+
+// RGB to YUV
+
+#define CV_CUDEV_RGB2YUV_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2YUV<SrcDepth, scn, dcn, bidx> \
+    { \
+    };
+
+CV_CUDEV_RGB2YUV_INST(RGB_to_YUV, 3, 3, 2)
+CV_CUDEV_RGB2YUV_INST(RGBA_to_YUV, 4, 3, 2)
+CV_CUDEV_RGB2YUV_INST(RGB_to_YUV4, 3, 4, 2)
+CV_CUDEV_RGB2YUV_INST(RGBA_to_YUV4, 4, 4, 2)
+CV_CUDEV_RGB2YUV_INST(BGR_to_YUV, 3, 3, 0)
+CV_CUDEV_RGB2YUV_INST(BGRA_to_YUV, 4, 3, 0)
+CV_CUDEV_RGB2YUV_INST(BGR_to_YUV4, 3, 4, 0)
+CV_CUDEV_RGB2YUV_INST(BGRA_to_YUV4, 4, 4, 0)
+
+#undef CV_CUDEV_RGB2YUV_INST
+
+// YUV to RGB
+
+#define CV_CUDEV_YUV2RGB_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::YUV2RGB<SrcDepth, scn, dcn, bidx> \
+    { \
+    };
+
+CV_CUDEV_YUV2RGB_INST(YUV_to_RGB, 3, 3, 2)
+CV_CUDEV_YUV2RGB_INST(YUV_to_RGBA, 3, 4, 2)
+CV_CUDEV_YUV2RGB_INST(YUV4_to_RGB, 4, 3, 2)
+CV_CUDEV_YUV2RGB_INST(YUV4_to_RGBA, 4, 4, 2)
+CV_CUDEV_YUV2RGB_INST(YUV_to_BGR, 3, 3, 0)
+CV_CUDEV_YUV2RGB_INST(YUV_to_BGRA, 3, 4, 0)
+CV_CUDEV_YUV2RGB_INST(YUV4_to_BGR, 4, 3, 0)
+CV_CUDEV_YUV2RGB_INST(YUV4_to_BGRA, 4, 4, 0)
+
+#undef CV_CUDEV_YUV2RGB_INST
+
+// RGB to YCrCb
+
+#define CV_CUDEV_RGB2YCrCb_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2YCrCb<SrcDepth, scn, dcn, bidx> \
+    { \
+    };
+
+CV_CUDEV_RGB2YCrCb_INST(RGB_to_YCrCb, 3, 3, 2)
+CV_CUDEV_RGB2YCrCb_INST(RGBA_to_YCrCb, 4, 3, 2)
+CV_CUDEV_RGB2YCrCb_INST(RGB_to_YCrCb4, 3, 4, 2)
+CV_CUDEV_RGB2YCrCb_INST(RGBA_to_YCrCb4, 4, 4, 2)
+CV_CUDEV_RGB2YCrCb_INST(BGR_to_YCrCb, 3, 3, 0)
+CV_CUDEV_RGB2YCrCb_INST(BGRA_to_YCrCb, 4, 3, 0)
+CV_CUDEV_RGB2YCrCb_INST(BGR_to_YCrCb4, 3, 4, 0)
+CV_CUDEV_RGB2YCrCb_INST(BGRA_to_YCrCb4, 4, 4, 0)
+
+#undef CV_CUDEV_RGB2YCrCb_INST
+
+// YCrCb to RGB
+
+#define CV_CUDEV_YCrCb2RGB_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::YCrCb2RGB<SrcDepth, scn, dcn, bidx> \
+    { \
+    };
+
+CV_CUDEV_YCrCb2RGB_INST(YCrCb_to_RGB, 3, 3, 2)
+CV_CUDEV_YCrCb2RGB_INST(YCrCb_to_RGBA, 3, 4, 2)
+CV_CUDEV_YCrCb2RGB_INST(YCrCb4_to_RGB, 4, 3, 2)
+CV_CUDEV_YCrCb2RGB_INST(YCrCb4_to_RGBA, 4, 4, 2)
+CV_CUDEV_YCrCb2RGB_INST(YCrCb_to_BGR, 3, 3, 0)
+CV_CUDEV_YCrCb2RGB_INST(YCrCb_to_BGRA, 3, 4, 0)
+CV_CUDEV_YCrCb2RGB_INST(YCrCb4_to_BGR, 4, 3, 0)
+CV_CUDEV_YCrCb2RGB_INST(YCrCb4_to_BGRA, 4, 4, 0)
+
+#undef CV_CUDEV_YCrCb2RGB_INST
+
+// RGB to XYZ
+
+#define CV_CUDEV_RGB2XYZ_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2XYZ<SrcDepth, scn, dcn, bidx> \
+    { \
+    };
+
+CV_CUDEV_RGB2XYZ_INST(RGB_to_XYZ, 3, 3, 2)
+CV_CUDEV_RGB2XYZ_INST(RGBA_to_XYZ, 4, 3, 2)
+CV_CUDEV_RGB2XYZ_INST(RGB_to_XYZ4, 3, 4, 2)
+CV_CUDEV_RGB2XYZ_INST(RGBA_to_XYZ4, 4, 4, 2)
+CV_CUDEV_RGB2XYZ_INST(BGR_to_XYZ, 3, 3, 0)
+CV_CUDEV_RGB2XYZ_INST(BGRA_to_XYZ, 4, 3, 0)
+CV_CUDEV_RGB2XYZ_INST(BGR_to_XYZ4, 3, 4, 0)
+CV_CUDEV_RGB2XYZ_INST(BGRA_to_XYZ4, 4, 4, 0)
+
+#undef CV_CUDEV_RGB2XYZ_INST
+
+// XYZ to RGB
+
+#define CV_CUDEV_XYZ2RGB_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::XYZ2RGB<SrcDepth, scn, dcn, bidx> \
+    { \
+    };
+
+CV_CUDEV_XYZ2RGB_INST(XYZ_to_RGB, 3, 3, 2)
+CV_CUDEV_XYZ2RGB_INST(XYZ4_to_RGB, 4, 3, 2)
+CV_CUDEV_XYZ2RGB_INST(XYZ_to_RGBA, 3, 4, 2)
+CV_CUDEV_XYZ2RGB_INST(XYZ4_to_RGBA, 4, 4, 2)
+CV_CUDEV_XYZ2RGB_INST(XYZ_to_BGR, 3, 3, 0)
+CV_CUDEV_XYZ2RGB_INST(XYZ4_to_BGR, 4, 3, 0)
+CV_CUDEV_XYZ2RGB_INST(XYZ_to_BGRA, 3, 4, 0)
+CV_CUDEV_XYZ2RGB_INST(XYZ4_to_BGRA, 4, 4, 0)
+
+#undef CV_CUDEV_XYZ2RGB_INST
+
+// RGB to HSV
+
+#define CV_CUDEV_RGB2HSV_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2HSV<SrcDepth, scn, dcn, bidx, 180> \
+    { \
+    }; \
+    template <typename SrcDepth> struct name ## _FULL ## _func : cv::cudev::color_cvt_detail::RGB2HSV<SrcDepth, scn, dcn, bidx, 256> \
+    { \
+    }; \
+    template <> struct name ## _func<float> : cv::cudev::color_cvt_detail::RGB2HSV<float, scn, dcn, bidx, 360> \
+    { \
+    }; \
+    template <> struct name ## _FULL ## _func<float> : cv::cudev::color_cvt_detail::RGB2HSV<float, scn, dcn, bidx, 360> \
+    { \
+    };
+
+CV_CUDEV_RGB2HSV_INST(RGB_to_HSV, 3, 3, 2)
+CV_CUDEV_RGB2HSV_INST(RGBA_to_HSV, 4, 3, 2)
+CV_CUDEV_RGB2HSV_INST(RGB_to_HSV4, 3, 4, 2)
+CV_CUDEV_RGB2HSV_INST(RGBA_to_HSV4, 4, 4, 2)
+CV_CUDEV_RGB2HSV_INST(BGR_to_HSV, 3, 3, 0)
+CV_CUDEV_RGB2HSV_INST(BGRA_to_HSV, 4, 3, 0)
+CV_CUDEV_RGB2HSV_INST(BGR_to_HSV4, 3, 4, 0)
+CV_CUDEV_RGB2HSV_INST(BGRA_to_HSV4, 4, 4, 0)
+
+#undef CV_CUDEV_RGB2HSV_INST
+
+// HSV to RGB
+
+#define CV_CUDEV_HSV2RGB_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::HSV2RGB<SrcDepth, scn, dcn, bidx, 180> \
+    { \
+    }; \
+    template <typename SrcDepth> struct name ## _FULL ## _func : cv::cudev::color_cvt_detail::HSV2RGB<SrcDepth, scn, dcn, bidx, 255> \
+    { \
+    }; \
+    template <> struct name ## _func<float> : cv::cudev::color_cvt_detail::HSV2RGB<float, scn, dcn, bidx, 360> \
+    { \
+    }; \
+    template <> struct name ## _FULL ## _func<float> : cv::cudev::color_cvt_detail::HSV2RGB<float, scn, dcn, bidx, 360> \
+    { \
+    };
+
+CV_CUDEV_HSV2RGB_INST(HSV_to_RGB, 3, 3, 2)
+CV_CUDEV_HSV2RGB_INST(HSV_to_RGBA, 3, 4, 2)
+CV_CUDEV_HSV2RGB_INST(HSV4_to_RGB, 4, 3, 2)
+CV_CUDEV_HSV2RGB_INST(HSV4_to_RGBA, 4, 4, 2)
+CV_CUDEV_HSV2RGB_INST(HSV_to_BGR, 3, 3, 0)
+CV_CUDEV_HSV2RGB_INST(HSV_to_BGRA, 3, 4, 0)
+CV_CUDEV_HSV2RGB_INST(HSV4_to_BGR, 4, 3, 0)
+CV_CUDEV_HSV2RGB_INST(HSV4_to_BGRA, 4, 4, 0)
+
+#undef CV_CUDEV_HSV2RGB_INST
+
+// RGB to HLS
+
+#define CV_CUDEV_RGB2HLS_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2HLS<SrcDepth, scn, dcn, bidx, 180> \
+    { \
+    }; \
+    template <typename SrcDepth> struct name ## _FULL ## _func : cv::cudev::color_cvt_detail::RGB2HLS<SrcDepth, scn, dcn, bidx, 256> \
+    { \
+    }; \
+    template <> struct name ## _func<float> : cv::cudev::color_cvt_detail::RGB2HLS<float, scn, dcn, bidx, 360> \
+    { \
+    }; \
+    template <> struct name ## _FULL ## _func<float> : cv::cudev::color_cvt_detail::RGB2HLS<float, scn, dcn, bidx, 360> \
+    { \
+    };
+
+CV_CUDEV_RGB2HLS_INST(RGB_to_HLS, 3, 3, 2)
+CV_CUDEV_RGB2HLS_INST(RGBA_to_HLS, 4, 3, 2)
+CV_CUDEV_RGB2HLS_INST(RGB_to_HLS4, 3, 4, 2)
+CV_CUDEV_RGB2HLS_INST(RGBA_to_HLS4, 4, 4, 2)
+CV_CUDEV_RGB2HLS_INST(BGR_to_HLS, 3, 3, 0)
+CV_CUDEV_RGB2HLS_INST(BGRA_to_HLS, 4, 3, 0)
+CV_CUDEV_RGB2HLS_INST(BGR_to_HLS4, 3, 4, 0)
+CV_CUDEV_RGB2HLS_INST(BGRA_to_HLS4, 4, 4, 0)
+
+#undef CV_CUDEV_RGB2HLS_INST
+
+// HLS to RGB
+
+#define CV_CUDEV_HLS2RGB_INST(name, scn, dcn, bidx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::HLS2RGB<SrcDepth, scn, dcn, bidx, 180> \
+    { \
+    }; \
+    template <typename SrcDepth> struct name ## _FULL ## _func : cv::cudev::color_cvt_detail::HLS2RGB<SrcDepth, scn, dcn, bidx, 255> \
+    { \
+    }; \
+    template <> struct name ## _func<float> : cv::cudev::color_cvt_detail::HLS2RGB<float, scn, dcn, bidx, 360> \
+    { \
+    }; \
+    template <> struct name ## _FULL ## _func<float> : cv::cudev::color_cvt_detail::HLS2RGB<float, scn, dcn, bidx, 360> \
+    { \
+    };
+
+CV_CUDEV_HLS2RGB_INST(HLS_to_RGB, 3, 3, 2)
+CV_CUDEV_HLS2RGB_INST(HLS_to_RGBA, 3, 4, 2)
+CV_CUDEV_HLS2RGB_INST(HLS4_to_RGB, 4, 3, 2)
+CV_CUDEV_HLS2RGB_INST(HLS4_to_RGBA, 4, 4, 2)
+CV_CUDEV_HLS2RGB_INST(HLS_to_BGR, 3, 3, 0)
+CV_CUDEV_HLS2RGB_INST(HLS_to_BGRA, 3, 4, 0)
+CV_CUDEV_HLS2RGB_INST(HLS4_to_BGR, 4, 3, 0)
+CV_CUDEV_HLS2RGB_INST(HLS4_to_BGRA, 4, 4, 0)
+
+#undef CV_CUDEV_HLS2RGB_INST
+
+// RGB to Lab
+
+#define CV_CUDEV_RGB2Lab_INST(name, scn, dcn, sRGB, blueIdx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2Lab<SrcDepth, scn, dcn, sRGB, blueIdx> \
+    { \
+    };
+
+CV_CUDEV_RGB2Lab_INST(RGB_to_Lab, 3, 3, true, 2)
+CV_CUDEV_RGB2Lab_INST(RGBA_to_Lab, 4, 3, true, 2)
+CV_CUDEV_RGB2Lab_INST(RGB_to_Lab4, 3, 4, true, 2)
+CV_CUDEV_RGB2Lab_INST(RGBA_to_Lab4, 4, 4, true, 2)
+CV_CUDEV_RGB2Lab_INST(BGR_to_Lab, 3, 3, true, 0)
+CV_CUDEV_RGB2Lab_INST(BGRA_to_Lab, 4, 3, true, 0)
+CV_CUDEV_RGB2Lab_INST(BGR_to_Lab4, 3, 4, true, 0)
+CV_CUDEV_RGB2Lab_INST(BGRA_to_Lab4, 4, 4, true, 0)
+
+CV_CUDEV_RGB2Lab_INST(LRGB_to_Lab, 3, 3, false, 2)
+CV_CUDEV_RGB2Lab_INST(LRGBA_to_Lab, 4, 3, false, 2)
+CV_CUDEV_RGB2Lab_INST(LRGB_to_Lab4, 3, 4, false, 2)
+CV_CUDEV_RGB2Lab_INST(LRGBA_to_Lab4, 4, 4, false, 2)
+CV_CUDEV_RGB2Lab_INST(LBGR_to_Lab, 3, 3, false, 0)
+CV_CUDEV_RGB2Lab_INST(LBGRA_to_Lab, 4, 3, false, 0)
+CV_CUDEV_RGB2Lab_INST(LBGR_to_Lab4, 3, 4, false, 0)
+CV_CUDEV_RGB2Lab_INST(LBGRA_to_Lab4, 4, 4, false, 0)
+
+#undef CV_CUDEV_RGB2Lab_INST
+
+// Lab to RGB
+
+#define CV_CUDEV_Lab2RGB_INST(name, scn, dcn, sRGB, blueIdx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::Lab2RGB<SrcDepth, scn, dcn, sRGB, blueIdx> \
+    { \
+    };
+
+CV_CUDEV_Lab2RGB_INST(Lab_to_RGB, 3, 3, true, 2)
+CV_CUDEV_Lab2RGB_INST(Lab4_to_RGB, 4, 3, true, 2)
+CV_CUDEV_Lab2RGB_INST(Lab_to_RGBA, 3, 4, true, 2)
+CV_CUDEV_Lab2RGB_INST(Lab4_to_RGBA, 4, 4, true, 2)
+CV_CUDEV_Lab2RGB_INST(Lab_to_BGR, 3, 3, true, 0)
+CV_CUDEV_Lab2RGB_INST(Lab4_to_BGR, 4, 3, true, 0)
+CV_CUDEV_Lab2RGB_INST(Lab_to_BGRA, 3, 4, true, 0)
+CV_CUDEV_Lab2RGB_INST(Lab4_to_BGRA, 4, 4, true, 0)
+
+CV_CUDEV_Lab2RGB_INST(Lab_to_LRGB, 3, 3, false, 2)
+CV_CUDEV_Lab2RGB_INST(Lab4_to_LRGB, 4, 3, false, 2)
+CV_CUDEV_Lab2RGB_INST(Lab_to_LRGBA, 3, 4, false, 2)
+CV_CUDEV_Lab2RGB_INST(Lab4_to_LRGBA, 4, 4, false, 2)
+CV_CUDEV_Lab2RGB_INST(Lab_to_LBGR, 3, 3, false, 0)
+CV_CUDEV_Lab2RGB_INST(Lab4_to_LBGR, 4, 3, false, 0)
+CV_CUDEV_Lab2RGB_INST(Lab_to_LBGRA, 3, 4, false, 0)
+CV_CUDEV_Lab2RGB_INST(Lab4_to_LBGRA, 4, 4, false, 0)
+
+#undef CV_CUDEV_Lab2RGB_INST
+
+// RGB to Luv
+
+#define CV_CUDEV_RGB2Luv_INST(name, scn, dcn, sRGB, blueIdx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::RGB2Luv<SrcDepth, scn, dcn, sRGB, blueIdx> \
+    { \
+    };
+
+CV_CUDEV_RGB2Luv_INST(RGB_to_Luv, 3, 3, true, 2)
+CV_CUDEV_RGB2Luv_INST(RGBA_to_Luv, 4, 3, true, 2)
+CV_CUDEV_RGB2Luv_INST(RGB_to_Luv4, 3, 4, true, 2)
+CV_CUDEV_RGB2Luv_INST(RGBA_to_Luv4, 4, 4, true, 2)
+CV_CUDEV_RGB2Luv_INST(BGR_to_Luv, 3, 3, true, 0)
+CV_CUDEV_RGB2Luv_INST(BGRA_to_Luv, 4, 3, true, 0)
+CV_CUDEV_RGB2Luv_INST(BGR_to_Luv4, 3, 4, true, 0)
+CV_CUDEV_RGB2Luv_INST(BGRA_to_Luv4, 4, 4, true, 0)
+
+CV_CUDEV_RGB2Luv_INST(LRGB_to_Luv, 3, 3, false, 2)
+CV_CUDEV_RGB2Luv_INST(LRGBA_to_Luv, 4, 3, false, 2)
+CV_CUDEV_RGB2Luv_INST(LRGB_to_Luv4, 3, 4, false, 2)
+CV_CUDEV_RGB2Luv_INST(LRGBA_to_Luv4, 4, 4, false, 2)
+CV_CUDEV_RGB2Luv_INST(LBGR_to_Luv, 3, 3, false, 0)
+CV_CUDEV_RGB2Luv_INST(LBGRA_to_Luv, 4, 3, false, 0)
+CV_CUDEV_RGB2Luv_INST(LBGR_to_Luv4, 3, 4, false, 0)
+CV_CUDEV_RGB2Luv_INST(LBGRA_to_Luv4, 4, 4, false, 0)
+
+#undef CV_CUDEV_RGB2Luv_INST
+
+// Luv to RGB
+
+#define CV_CUDEV_Luv2RGB_INST(name, scn, dcn, sRGB, blueIdx) \
+    template <typename SrcDepth> struct name ## _func : cv::cudev::color_cvt_detail::Luv2RGB<SrcDepth, scn, dcn, sRGB, blueIdx> \
+    { \
+    };
+
+CV_CUDEV_Luv2RGB_INST(Luv_to_RGB, 3, 3, true, 2)
+CV_CUDEV_Luv2RGB_INST(Luv4_to_RGB, 4, 3, true, 2)
+CV_CUDEV_Luv2RGB_INST(Luv_to_RGBA, 3, 4, true, 2)
+CV_CUDEV_Luv2RGB_INST(Luv4_to_RGBA, 4, 4, true, 2)
+CV_CUDEV_Luv2RGB_INST(Luv_to_BGR, 3, 3, true, 0)
+CV_CUDEV_Luv2RGB_INST(Luv4_to_BGR, 4, 3, true, 0)
+CV_CUDEV_Luv2RGB_INST(Luv_to_BGRA, 3, 4, true, 0)
+CV_CUDEV_Luv2RGB_INST(Luv4_to_BGRA, 4, 4, true, 0)
+
+CV_CUDEV_Luv2RGB_INST(Luv_to_LRGB, 3, 3, false, 2)
+CV_CUDEV_Luv2RGB_INST(Luv4_to_LRGB, 4, 3, false, 2)
+CV_CUDEV_Luv2RGB_INST(Luv_to_LRGBA, 3, 4, false, 2)
+CV_CUDEV_Luv2RGB_INST(Luv4_to_LRGBA, 4, 4, false, 2)
+CV_CUDEV_Luv2RGB_INST(Luv_to_LBGR, 3, 3, false, 0)
+CV_CUDEV_Luv2RGB_INST(Luv4_to_LBGR, 4, 3, false, 0)
+CV_CUDEV_Luv2RGB_INST(Luv_to_LBGRA, 3, 4, false, 0)
+CV_CUDEV_Luv2RGB_INST(Luv4_to_LBGRA, 4, 4, false, 0)
+
+#undef CV_CUDEV_Luv2RGB_INST
+
+// 24/32-bit RGB to 16-bit (565 or 555) RGB
+
+#define CV_CUDEV_RGB2RGB5x5_INST(name, scn, bidx, green_bits) \
+    typedef cv::cudev::color_cvt_detail::RGB2RGB5x5<scn, bidx, green_bits> name ## _func;
+
+CV_CUDEV_RGB2RGB5x5_INST(BGR_to_BGR555, 3, 0, 5)
+CV_CUDEV_RGB2RGB5x5_INST(BGR_to_BGR565, 3, 0, 6)
+CV_CUDEV_RGB2RGB5x5_INST(RGB_to_BGR555, 3, 2, 5)
+CV_CUDEV_RGB2RGB5x5_INST(RGB_to_BGR565, 3, 2, 6)
+CV_CUDEV_RGB2RGB5x5_INST(BGRA_to_BGR555, 4, 0, 5)
+CV_CUDEV_RGB2RGB5x5_INST(BGRA_to_BGR565, 4, 0, 6)
+CV_CUDEV_RGB2RGB5x5_INST(RGBA_to_BGR555, 4, 2, 5)
+CV_CUDEV_RGB2RGB5x5_INST(RGBA_to_BGR565, 4, 2, 6)
+
+#undef CV_CUDEV_RGB2RGB5x5_INST
+
+// 16-bit (565 or 555) RGB to 24/32-bit RGB
+
+#define CV_CUDEV_RGB5x52RGB_INST(name, dcn, bidx, green_bits) \
+    typedef cv::cudev::color_cvt_detail::RGB5x52RGB<dcn, bidx, green_bits> name ## _func;
+
+CV_CUDEV_RGB5x52RGB_INST(BGR555_to_RGB, 3, 2, 5)
+CV_CUDEV_RGB5x52RGB_INST(BGR565_to_RGB, 3, 2, 6)
+CV_CUDEV_RGB5x52RGB_INST(BGR555_to_BGR, 3, 0, 5)
+CV_CUDEV_RGB5x52RGB_INST(BGR565_to_BGR, 3, 0, 6)
+CV_CUDEV_RGB5x52RGB_INST(BGR555_to_RGBA, 4, 2, 5)
+CV_CUDEV_RGB5x52RGB_INST(BGR565_to_RGBA, 4, 2, 6)
+CV_CUDEV_RGB5x52RGB_INST(BGR555_to_BGRA, 4, 0, 5)
+CV_CUDEV_RGB5x52RGB_INST(BGR565_to_BGRA, 4, 0, 6)
+
+#undef CV_CUDEV_RGB5x52RGB_INST
+
+// Grayscale to 16-bit (565 or 555) RGB
+
+#define CV_CUDEV_GRAY2RGB5x5_INST(name, green_bits) \
+    typedef cv::cudev::color_cvt_detail::Gray2RGB5x5<green_bits> name ## _func;
+
+CV_CUDEV_GRAY2RGB5x5_INST(GRAY_to_BGR555, 5)
+CV_CUDEV_GRAY2RGB5x5_INST(GRAY_to_BGR565, 6)
+
+#undef CV_CUDEV_GRAY2RGB5x5_INST
+
+// 16-bit (565 or 555) RGB to Grayscale
+
+#define CV_CUDEV_RGB5x52GRAY_INST(name, green_bits) \
+    typedef cv::cudev::color_cvt_detail::RGB5x52Gray<green_bits> name ## _func;
+
+CV_CUDEV_RGB5x52GRAY_INST(BGR555_to_GRAY, 5)
+CV_CUDEV_RGB5x52GRAY_INST(BGR565_to_GRAY, 6)
+
+#undef CV_CUDEV_RGB5x52GRAY_INST
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/functional/detail/color_cvt.hpp b/modules/cudev/include/opencv2/cudev/functional/detail/color_cvt.hpp
new file mode 100644
index 000000000..9b20fcc2c
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/functional/detail/color_cvt.hpp
@@ -0,0 +1,1279 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_FUNCTIONAL_COLOR_CVT_DETAIL_HPP__
+#define __OPENCV_CUDEV_FUNCTIONAL_COLOR_CVT_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/vec_traits.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../util/limits.hpp"
+#include "../functional.hpp"
+
+namespace cv { namespace cudev {
+
+namespace color_cvt_detail
+{
+    // utility
+
+    #define CV_CUDEV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
+
+    template <typename T> struct ColorChannel
+    {
+        __device__ __forceinline__ static T max() { return numeric_limits<T>::max(); }
+        __device__ __forceinline__ static T half() { return (T)(max()/2 + 1); }
+    };
+
+    template <> struct ColorChannel<float>
+    {
+        __device__ __forceinline__ static float max() { return 1.f; }
+        __device__ __forceinline__ static float half() { return 0.5f; }
+    };
+
+    template <typename T> __device__ __forceinline__ void setAlpha(typename MakeVec<T, 3>::type& vec, T val)
+    {
+    }
+
+    template <typename T> __device__ __forceinline__ void setAlpha(typename MakeVec<T, 4>::type& vec, T val)
+    {
+        vec.w = val;
+    }
+
+    template <typename T> __device__ __forceinline__ T getAlpha(const typename MakeVec<T, 3>::type& vec)
+    {
+        return ColorChannel<T>::max();
+    }
+
+    template <typename T> __device__ __forceinline__ T getAlpha(const typename MakeVec<T, 4>::type& vec)
+    {
+        return vec.w;
+    }
+
+    enum
+    {
+        yuv_shift  = 14,
+        xyz_shift  = 12,
+        R2Y        = 4899,
+        G2Y        = 9617,
+        B2Y        = 1868,
+        BLOCK_SIZE = 256
+    };
+
+    // Various 3/4-channel to 3/4-channel RGB transformations
+
+    template <typename T, int scn, int dcn, int bidx> struct RGB2RGB
+            : unary_function<typename MakeVec<T, scn>::type, typename MakeVec<T, dcn>::type>
+    {
+        __device__ typename MakeVec<T, dcn>::type operator ()(const typename MakeVec<T, scn>::type& src) const
+        {
+            typename MakeVec<T, dcn>::type dst;
+
+            dst.x = bidx == 0 ? src.x : src.z;
+            dst.y = src.y;
+            dst.z = bidx == 0 ? src.z : src.x;
+            setAlpha(dst, getAlpha<T>(src));
+
+            return dst;
+        }
+    };
+
+    // 24/32-bit RGB to 16-bit (565 or 555) RGB
+
+    template <int scn, int bidx, int green_bits> struct RGB2RGB5x5;
+
+    template <int scn, int bidx> struct RGB2RGB5x5<scn, bidx, 6>
+            : unary_function<typename MakeVec<uchar, scn>::type, ushort>
+    {
+        __device__ ushort operator ()(const typename MakeVec<uchar, scn>::type& src) const
+        {
+            const int b = bidx == 0 ? src.x : src.z;
+            const int g = src.y;
+            const int r = bidx == 0 ? src.z : src.x;
+            return (ushort) ((b >> 3) | ((g & ~3) << 3) | ((r & ~7) << 8));
+        }
+    };
+
+    template <int bidx> struct RGB2RGB5x5<3, bidx, 5>
+            : unary_function<uchar3, ushort>
+    {
+        __device__ ushort operator ()(const uchar3& src) const
+        {
+            const int b = bidx == 0 ? src.x : src.z;
+            const int g = src.y;
+            const int r = bidx == 0 ? src.z : src.x;
+            return (ushort) ((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7));
+        }
+    };
+
+    template <int bidx> struct RGB2RGB5x5<4, bidx, 5>
+            : unary_function<uchar4, ushort>
+    {
+        __device__ ushort operator ()(const uchar4& src) const
+        {
+            const int b = bidx == 0 ? src.x : src.z;
+            const int g = src.y;
+            const int r = bidx == 0 ? src.z : src.x;
+            const int a = src.w;
+            return (ushort) ((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7) | (a * 0x8000));
+        }
+    };
+
+    // 16-bit (565 or 555) RGB to 24/32-bit RGB
+
+    template <int dcn, int bidx, int green_bits> struct RGB5x52RGB;
+
+    template <int bidx> struct RGB5x52RGB<3, bidx, 5>
+            : unary_function<ushort, uchar3>
+    {
+        __device__ uchar3 operator ()(ushort src) const
+        {
+            const int b = src << 3;
+            const int r = (src >> 7) & ~7;
+
+            uchar3 dst;
+
+            dst.x = bidx == 0 ? b : r;
+            dst.y = (src >> 2) & ~7;
+            dst.z = bidx == 0 ? r : b;
+
+            return dst;
+        }
+    };
+
+    template <int bidx> struct RGB5x52RGB<4, bidx, 5>
+            : unary_function<ushort, uchar4>
+    {
+        __device__ uchar4 operator ()(ushort src) const
+        {
+            const int b = src << 3;
+            const int r = (src >> 7) & ~7;
+
+            uchar4 dst;
+
+            dst.x = bidx == 0 ? b : r;
+            dst.y = (src >> 2) & ~7;
+            dst.z = bidx == 0 ? r : b;
+            dst.w = (src & 0x8000) * 0xffu;
+
+            return dst;
+        }
+    };
+
+    template <int bidx> struct RGB5x52RGB<3, bidx, 6>
+            : unary_function<ushort, uchar3>
+    {
+        __device__ uchar3 operator ()(ushort src) const
+        {
+            const int b = src << 3;
+            const int r = (src >> 8) & ~7;
+
+            uchar3 dst;
+
+            dst.x = bidx == 0 ? b : r;
+            dst.y = (src >> 3) & ~3;
+            dst.z = bidx == 0 ? r : b;
+
+            return dst;
+        }
+    };
+
+    template <int bidx> struct RGB5x52RGB<4, bidx, 6>
+            : unary_function<ushort, uchar4>
+    {
+        __device__ uchar4 operator ()(ushort src) const
+        {
+            const int b = src << 3;
+            const int r = (src >> 8) & ~7;
+
+            uchar4 dst;
+
+            dst.x = bidx == 0 ? b : r;
+            dst.y = (src >> 3) & ~3;
+            dst.z = bidx == 0 ? r : b;
+            dst.w = 255;
+
+            return dst;
+        }
+    };
+
+    // Grayscale to RGB
+
+    template <typename T, int dcn> struct Gray2RGB
+            : unary_function<T, typename MakeVec<T, dcn>::type>
+    {
+        __device__ typename MakeVec<T, dcn>::type operator ()(T src) const
+        {
+            typename MakeVec<T, dcn>::type dst;
+
+            dst.z = dst.y = dst.x = src;
+            setAlpha(dst, ColorChannel<T>::max());
+
+            return dst;
+        }
+    };
+
+    // Grayscale to 16-bit (565 or 555) RGB
+
+    template <int green_bits> struct Gray2RGB5x5;
+
+    template <> struct Gray2RGB5x5<5>
+            : unary_function<uchar, ushort>
+    {
+        __device__ ushort operator ()(uchar src) const
+        {
+            return (ushort) (src | (src << 5) | (src << 10));
+        }
+    };
+
+    template <> struct Gray2RGB5x5<6>
+            : unary_function<uchar, ushort>
+    {
+        __device__ ushort operator ()(uchar src) const
+        {
+            return (ushort) ((src >> 3) | ((src & ~3) << 3) | ((src & ~7) << 8));
+        }
+    };
+
+    // 16-bit (565 or 555) RGB to Grayscale
+
+    template <int green_bits> struct RGB5x52Gray;
+
+    template <> struct RGB5x52Gray<5>
+            : unary_function<ushort, uchar>
+    {
+        __device__ uchar operator ()(ushort src) const
+        {
+            return (uchar) CV_CUDEV_DESCALE(((src << 3) & 0xf8) * B2Y + ((src >> 2) & 0xf8) * G2Y + ((src >> 7) & 0xf8) * R2Y, yuv_shift);
+        }
+    };
+
+    template <> struct RGB5x52Gray<6>
+            : unary_function<ushort, uchar>
+    {
+        __device__ uchar operator ()(ushort src) const
+        {
+            return (uchar) CV_CUDEV_DESCALE(((src << 3) & 0xf8) * B2Y + ((src >> 3) & 0xfc) * G2Y + ((src >> 8) & 0xf8) * R2Y, yuv_shift);
+        }
+    };
+
+    // RGB to Grayscale
+
+    template <typename T, int scn, int bidx> struct RGB2Gray
+            : unary_function<typename MakeVec<T, scn>::type, T>
+    {
+        __device__ T operator ()(const typename MakeVec<T, scn>::type& src) const
+        {
+            const int b = bidx == 0 ? src.x : src.z;
+            const int g = src.y;
+            const int r = bidx == 0 ? src.z : src.x;
+            return (T) CV_CUDEV_DESCALE(b * B2Y + g * G2Y + r * R2Y, yuv_shift);
+        }
+    };
+
+    template <int scn, int bidx> struct RGB2Gray<float, scn, bidx>
+            : unary_function<typename MakeVec<float, scn>::type, float>
+    {
+        __device__ float operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float b = bidx == 0 ? src.x : src.z;
+            const float g = src.y;
+            const float r = bidx == 0 ? src.z : src.x;
+            return b * 0.114f + g * 0.587f + r * 0.299f;
+        }
+    };
+
+    // RGB to YUV
+
+    __constant__ float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
+    __constant__ int   c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
+
+    template <typename T, int scn, int dcn, int bidx> struct RGB2YUV
+            : unary_function<typename MakeVec<T, scn>::type, typename MakeVec<T, dcn>::type>
+    {
+        __device__ typename MakeVec<T, dcn>::type operator ()(const typename MakeVec<T, scn>::type& src) const
+        {
+            const int b = bidx == 0 ? src.x : src.z;
+            const int g = src.y;
+            const int r = bidx == 0 ? src.z : src.x;
+
+            const int delta = ColorChannel<T>::half() * (1 << yuv_shift);
+
+            const int Y = CV_CUDEV_DESCALE(b * c_RGB2YUVCoeffs_i[2] + g * c_RGB2YUVCoeffs_i[1] + r * c_RGB2YUVCoeffs_i[0], yuv_shift);
+            const int Cr = CV_CUDEV_DESCALE((r - Y) * c_RGB2YUVCoeffs_i[3] + delta, yuv_shift);
+            const int Cb = CV_CUDEV_DESCALE((b - Y) * c_RGB2YUVCoeffs_i[4] + delta, yuv_shift);
+
+            typename MakeVec<T, dcn>::type dst;
+
+            dst.x = saturate_cast<T>(Y);
+            dst.y = saturate_cast<T>(Cr);
+            dst.z = saturate_cast<T>(Cb);
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx> struct RGB2YUV<float, scn, dcn, bidx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float b = bidx == 0 ? src.x : src.z;
+            const float g = src.y;
+            const float r = bidx == 0 ? src.z : src.x;
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = b * c_RGB2YUVCoeffs_f[2] + g * c_RGB2YUVCoeffs_f[1] + r * c_RGB2YUVCoeffs_f[0];
+            dst.y = (r - dst.x) * c_RGB2YUVCoeffs_f[3] + ColorChannel<float>::half();
+            dst.z = (b - dst.x) * c_RGB2YUVCoeffs_f[4] + ColorChannel<float>::half();
+
+            return dst;
+        }
+    };
+
+    // YUV to RGB
+
+    __constant__ float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
+    __constant__ int   c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
+
+    template <typename T, int scn, int dcn, int bidx> struct YUV2RGB
+            : unary_function<typename MakeVec<T, scn>::type, typename MakeVec<T, dcn>::type>
+    {
+        __device__ typename MakeVec<T, dcn>::type operator ()(const typename MakeVec<T, scn>::type& src) const
+        {
+            const int b = src.x + CV_CUDEV_DESCALE((src.z - ColorChannel<T>::half()) * c_YUV2RGBCoeffs_i[3], yuv_shift);
+            const int g = src.x + CV_CUDEV_DESCALE((src.z - ColorChannel<T>::half()) * c_YUV2RGBCoeffs_i[2] + (src.y - ColorChannel<T>::half()) * c_YUV2RGBCoeffs_i[1], yuv_shift);
+            const int r = src.x + CV_CUDEV_DESCALE((src.y - ColorChannel<T>::half()) * c_YUV2RGBCoeffs_i[0], yuv_shift);
+
+            typename MakeVec<T, dcn>::type dst;
+
+            dst.x = saturate_cast<T>(bidx == 0 ? b : r);
+            dst.y = saturate_cast<T>(g);
+            dst.z = saturate_cast<T>(bidx == 0 ? r : b);
+            setAlpha(dst, ColorChannel<T>::max());
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx> struct YUV2RGB<float, scn, dcn, bidx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float b = src.x + (src.z - ColorChannel<float>::half()) * c_YUV2RGBCoeffs_f[3];
+            const float g = src.x + (src.z - ColorChannel<float>::half()) * c_YUV2RGBCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * c_YUV2RGBCoeffs_f[1];
+            const float r = src.x + (src.y - ColorChannel<float>::half()) * c_YUV2RGBCoeffs_f[0];
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = bidx == 0 ? b : r;
+            dst.y = g;
+            dst.z = bidx == 0 ? r : b;
+            setAlpha(dst, ColorChannel<float>::max());
+
+            return dst;
+        }
+    };
+
+    // RGB to YCrCb
+
+    __constant__ float c_RGB2YCrCbCoeffs_f[5] = { 0.299f, 0.587f, 0.114f, 0.713f, 0.564f };
+    __constant__ int   c_RGB2YCrCbCoeffs_i[5] = { R2Y, G2Y, B2Y, 11682, 9241 };
+
+    template <typename T, int scn, int dcn, int bidx> struct RGB2YCrCb
+            : unary_function<typename MakeVec<T, scn>::type, typename MakeVec<T, dcn>::type>
+    {
+        __device__ typename MakeVec<T, dcn>::type operator ()(const typename MakeVec<T, scn>::type& src) const
+        {
+            const int b = bidx == 0 ? src.x : src.z;
+            const int g = src.y;
+            const int r = bidx == 0 ? src.z : src.x;
+
+            const int delta = ColorChannel<T>::half() * (1 << yuv_shift);
+
+            const int Y = CV_CUDEV_DESCALE(b * c_RGB2YCrCbCoeffs_i[2] + g * c_RGB2YCrCbCoeffs_i[1] + r * c_RGB2YCrCbCoeffs_i[0], yuv_shift);
+            const int Cr = CV_CUDEV_DESCALE((r - Y) * c_RGB2YCrCbCoeffs_i[3] + delta, yuv_shift);
+            const int Cb = CV_CUDEV_DESCALE((b - Y) * c_RGB2YCrCbCoeffs_i[4] + delta, yuv_shift);
+
+            typename MakeVec<T, dcn>::type dst;
+
+            dst.x = saturate_cast<T>(Y);
+            dst.y = saturate_cast<T>(Cr);
+            dst.z = saturate_cast<T>(Cb);
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx> struct RGB2YCrCb<float, scn, dcn, bidx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float b = bidx == 0 ? src.x : src.z;
+            const float g = src.y;
+            const float r = bidx == 0 ? src.z : src.x;
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = b * c_RGB2YCrCbCoeffs_f[2] + g * c_RGB2YCrCbCoeffs_f[1] + r * c_RGB2YCrCbCoeffs_f[0];
+            dst.y = (r - dst.x) * c_RGB2YCrCbCoeffs_f[3] + ColorChannel<float>::half();
+            dst.z = (b - dst.x) * c_RGB2YCrCbCoeffs_f[4] + ColorChannel<float>::half();
+
+            return dst;
+        }
+    };
+
+    // YCrCb to RGB
+
+    __constant__ float c_YCrCb2RGBCoeffs_f[5] = {1.403f, -0.714f, -0.344f, 1.773f};
+    __constant__ int   c_YCrCb2RGBCoeffs_i[5] = {22987, -11698, -5636, 29049};
+
+    template <typename T, int scn, int dcn, int bidx> struct YCrCb2RGB
+            : unary_function<typename MakeVec<T, scn>::type, typename MakeVec<T, dcn>::type>
+    {
+        __device__ typename MakeVec<T, dcn>::type operator ()(const typename MakeVec<T, scn>::type& src) const
+        {
+            const int b = src.x + CV_CUDEV_DESCALE((src.z - ColorChannel<T>::half()) * c_YCrCb2RGBCoeffs_i[3], yuv_shift);
+            const int g = src.x + CV_CUDEV_DESCALE((src.z - ColorChannel<T>::half()) * c_YCrCb2RGBCoeffs_i[2] + (src.y - ColorChannel<T>::half()) * c_YCrCb2RGBCoeffs_i[1], yuv_shift);
+            const int r = src.x + CV_CUDEV_DESCALE((src.y - ColorChannel<T>::half()) * c_YCrCb2RGBCoeffs_i[0], yuv_shift);
+
+            typename MakeVec<T, dcn>::type dst;
+
+            dst.x = saturate_cast<T>(bidx == 0 ? b : r);
+            dst.y = saturate_cast<T>(g);
+            dst.z = saturate_cast<T>(bidx == 0 ? r : b);
+            setAlpha(dst, ColorChannel<T>::max());
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx> struct YCrCb2RGB<float, scn, dcn, bidx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float b = src.x + (src.z - ColorChannel<float>::half()) * c_YCrCb2RGBCoeffs_f[3];
+            const float g = src.x + (src.z - ColorChannel<float>::half()) * c_YCrCb2RGBCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * c_YCrCb2RGBCoeffs_f[1];
+            const float r = src.x + (src.y - ColorChannel<float>::half()) * c_YCrCb2RGBCoeffs_f[0];
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = bidx == 0 ? b : r;
+            dst.y = g;
+            dst.z = bidx == 0 ? r : b;
+            setAlpha(dst, ColorChannel<float>::max());
+
+            return dst;
+        }
+    };
+
+    // RGB to XYZ
+
+    __constant__ float c_RGB2XYZ_D65f[9] = { 0.412453f, 0.357580f, 0.180423f, 0.212671f, 0.715160f, 0.072169f, 0.019334f, 0.119193f, 0.950227f };
+    __constant__ int   c_RGB2XYZ_D65i[9] = { 1689, 1465, 739, 871, 2929, 296, 79, 488, 3892 };
+
+    template <typename T, int scn, int dcn, int bidx> struct RGB2XYZ
+            : unary_function<typename MakeVec<T, scn>::type, typename MakeVec<T, dcn>::type>
+    {
+        __device__ typename MakeVec<T, dcn>::type operator ()(const typename MakeVec<T, scn>::type& src) const
+        {
+            const int b = bidx == 0 ? src.x : src.z;
+            const int g = src.y;
+            const int r = bidx == 0 ? src.z : src.x;
+
+            typename MakeVec<T, dcn>::type dst;
+
+            dst.z = saturate_cast<T>(CV_CUDEV_DESCALE(r * c_RGB2XYZ_D65i[6] + g * c_RGB2XYZ_D65i[7] + b * c_RGB2XYZ_D65i[8], xyz_shift));
+            dst.x = saturate_cast<T>(CV_CUDEV_DESCALE(r * c_RGB2XYZ_D65i[0] + g * c_RGB2XYZ_D65i[1] + b * c_RGB2XYZ_D65i[2], xyz_shift));
+            dst.y = saturate_cast<T>(CV_CUDEV_DESCALE(r * c_RGB2XYZ_D65i[3] + g * c_RGB2XYZ_D65i[4] + b * c_RGB2XYZ_D65i[5], xyz_shift));
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx> struct RGB2XYZ<float, scn, dcn, bidx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float b = bidx == 0 ? src.x : src.z;
+            const float g = src.y;
+            const float r = bidx == 0 ? src.z : src.x;
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = r * c_RGB2XYZ_D65f[0] + g * c_RGB2XYZ_D65f[1] + b * c_RGB2XYZ_D65f[2];
+            dst.y = r * c_RGB2XYZ_D65f[3] + g * c_RGB2XYZ_D65f[4] + b * c_RGB2XYZ_D65f[5];
+            dst.z = r * c_RGB2XYZ_D65f[6] + g * c_RGB2XYZ_D65f[7] + b * c_RGB2XYZ_D65f[8];
+
+            return dst;
+        }
+    };
+
+    // XYZ to RGB
+
+    __constant__ float c_XYZ2sRGB_D65f[9] = { 3.240479f, -1.53715f, -0.498535f, -0.969256f, 1.875991f, 0.041556f, 0.055648f, -0.204043f, 1.057311f };
+    __constant__ int   c_XYZ2sRGB_D65i[9] = { 13273, -6296, -2042, -3970, 7684, 170, 228, -836, 4331 };
+
+    template <typename T, int scn, int dcn, int bidx> struct XYZ2RGB
+            : unary_function<typename MakeVec<T, scn>::type, typename MakeVec<T, dcn>::type>
+    {
+        __device__ typename MakeVec<T, dcn>::type operator ()(const typename MakeVec<T, scn>::type& src) const
+        {
+            const int b = CV_CUDEV_DESCALE(src.x * c_XYZ2sRGB_D65i[6] + src.y * c_XYZ2sRGB_D65i[7] + src.z * c_XYZ2sRGB_D65i[8], xyz_shift);
+            const int g = CV_CUDEV_DESCALE(src.x * c_XYZ2sRGB_D65i[3] + src.y * c_XYZ2sRGB_D65i[4] + src.z * c_XYZ2sRGB_D65i[5], xyz_shift);
+            const int r = CV_CUDEV_DESCALE(src.x * c_XYZ2sRGB_D65i[0] + src.y * c_XYZ2sRGB_D65i[1] + src.z * c_XYZ2sRGB_D65i[2], xyz_shift);
+
+            typename MakeVec<T, dcn>::type dst;
+
+            dst.x = saturate_cast<T>(bidx == 0 ? b : r);
+            dst.y = saturate_cast<T>(g);
+            dst.z = saturate_cast<T>(bidx == 0 ? r : b);
+            setAlpha(dst, ColorChannel<T>::max());
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx> struct XYZ2RGB<float, scn, dcn, bidx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float b = src.x * c_XYZ2sRGB_D65f[6] + src.y * c_XYZ2sRGB_D65f[7] + src.z * c_XYZ2sRGB_D65f[8];
+            const float g = src.x * c_XYZ2sRGB_D65f[3] + src.y * c_XYZ2sRGB_D65f[4] + src.z * c_XYZ2sRGB_D65f[5];
+            const float r = src.x * c_XYZ2sRGB_D65f[0] + src.y * c_XYZ2sRGB_D65f[1] + src.z * c_XYZ2sRGB_D65f[2];
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = bidx == 0 ? b : r;
+            dst.y = g;
+            dst.z = bidx == 0 ? r : b;
+            setAlpha(dst, ColorChannel<float>::max());
+
+            return dst;
+        }
+    };
+
+    // RGB to HSV
+
+    __constant__ int c_HsvDivTable   [256] = {0, 1044480, 522240, 348160, 261120, 208896, 174080, 149211, 130560, 116053, 104448, 94953, 87040, 80345, 74606, 69632, 65280, 61440, 58027, 54973, 52224, 49737, 47476, 45412, 43520, 41779, 40172, 38684, 37303, 36017, 34816, 33693, 32640, 31651, 30720, 29842, 29013, 28229, 27486, 26782, 26112, 25475, 24869, 24290, 23738, 23211, 22706, 22223, 21760, 21316, 20890, 20480, 20086, 19707, 19342, 18991, 18651, 18324, 18008, 17703, 17408, 17123, 16846, 16579, 16320, 16069, 15825, 15589, 15360, 15137, 14921, 14711, 14507, 14308, 14115, 13926, 13743, 13565, 13391, 13221, 13056, 12895, 12738, 12584, 12434, 12288, 12145, 12006, 11869, 11736, 11605, 11478, 11353, 11231, 11111, 10995, 10880, 10768, 10658, 10550, 10445, 10341, 10240, 10141, 10043, 9947, 9854, 9761, 9671, 9582, 9495, 9410, 9326, 9243, 9162, 9082, 9004, 8927, 8852, 8777, 8704, 8632, 8561, 8492, 8423, 8356, 8290, 8224, 8160, 8097, 8034, 7973, 7913, 7853, 7795, 7737, 7680, 7624, 7569, 7514, 7461, 7408, 7355, 7304, 7253, 7203, 7154, 7105, 7057, 7010, 6963, 6917, 6872, 6827, 6782, 6739, 6695, 6653, 6611, 6569, 6528, 6487, 6447, 6408, 6369, 6330, 6292, 6254, 6217, 6180, 6144, 6108, 6073, 6037, 6003, 5968, 5935, 5901, 5868, 5835, 5803, 5771, 5739, 5708, 5677, 5646, 5615, 5585, 5556, 5526, 5497, 5468, 5440, 5412, 5384, 5356, 5329, 5302, 5275, 5249, 5222, 5196, 5171, 5145, 5120, 5095, 5070, 5046, 5022, 4998, 4974, 4950, 4927, 4904, 4881, 4858, 4836, 4813, 4791, 4769, 4748, 4726, 4705, 4684, 4663, 4642, 4622, 4601, 4581, 4561, 4541, 4522, 4502, 4483, 4464, 4445, 4426, 4407, 4389, 4370, 4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229, 4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096};
+    __constant__ int c_HsvDivTable180[256] = {0, 122880, 61440, 40960, 30720, 24576, 20480, 17554, 15360, 13653, 12288, 11171, 10240, 9452, 8777, 8192, 7680, 7228, 6827, 6467, 6144, 5851, 5585, 5343, 5120, 4915, 4726, 4551, 4389, 4237, 4096, 3964, 3840, 3724, 3614, 3511, 3413, 3321, 3234, 3151, 3072, 2997, 2926, 2858, 2793, 2731, 2671, 2614, 2560, 2508, 2458, 2409, 2363, 2318, 2276, 2234, 2194, 2156, 2119, 2083, 2048, 2014, 1982, 1950, 1920, 1890, 1862, 1834, 1807, 1781, 1755, 1731, 1707, 1683, 1661, 1638, 1617, 1596, 1575, 1555, 1536, 1517, 1499, 1480, 1463, 1446, 1429, 1412, 1396, 1381, 1365, 1350, 1336, 1321, 1307, 1293, 1280, 1267, 1254, 1241, 1229, 1217, 1205, 1193, 1182, 1170, 1159, 1148, 1138, 1127, 1117, 1107, 1097, 1087, 1078, 1069, 1059, 1050, 1041, 1033, 1024, 1016, 1007, 999, 991, 983, 975, 968, 960, 953, 945, 938, 931, 924, 917, 910, 904, 897, 890, 884, 878, 871, 865, 859, 853, 847, 842, 836, 830, 825, 819, 814, 808, 803, 798, 793, 788, 783, 778, 773, 768, 763, 759, 754, 749, 745, 740, 736, 731, 727, 723, 719, 714, 710, 706, 702, 698, 694, 690, 686, 683, 679, 675, 671, 668, 664, 661, 657, 654, 650, 647, 643, 640, 637, 633, 630, 627, 624, 621, 617, 614, 611, 608, 605, 602, 599, 597, 594, 591, 588, 585, 582, 580, 577, 574, 572, 569, 566, 564, 561, 559, 556, 554, 551, 549, 546, 544, 541, 539, 537, 534, 532, 530, 527, 525, 523, 521, 518, 516, 514, 512, 510, 508, 506, 504, 502, 500, 497, 495, 493, 492, 490, 488, 486, 484, 482};
+    __constant__ int c_HsvDivTable256[256] = {0, 174763, 87381, 58254, 43691, 34953, 29127, 24966, 21845, 19418, 17476, 15888, 14564, 13443, 12483, 11651, 10923, 10280, 9709, 9198, 8738, 8322, 7944, 7598, 7282, 6991, 6722, 6473, 6242, 6026, 5825, 5638, 5461, 5296, 5140, 4993, 4855, 4723, 4599, 4481, 4369, 4263, 4161, 4064, 3972, 3884, 3799, 3718, 3641, 3567, 3495, 3427, 3361, 3297, 3236, 3178, 3121, 3066, 3013, 2962, 2913, 2865, 2819, 2774, 2731, 2689, 2648, 2608, 2570, 2533, 2497, 2461, 2427, 2394, 2362, 2330, 2300, 2270, 2241, 2212, 2185, 2158, 2131, 2106, 2081, 2056, 2032, 2009, 1986, 1964, 1942, 1920, 1900, 1879, 1859, 1840, 1820, 1802, 1783, 1765, 1748, 1730, 1713, 1697, 1680, 1664, 1649, 1633, 1618, 1603, 1589, 1574, 1560, 1547, 1533, 1520, 1507, 1494, 1481, 1469, 1456, 1444, 1432, 1421, 1409, 1398, 1387, 1376, 1365, 1355, 1344, 1334, 1324, 1314, 1304, 1295, 1285, 1276, 1266, 1257, 1248, 1239, 1231, 1222, 1214, 1205, 1197, 1189, 1181, 1173, 1165, 1157, 1150, 1142, 1135, 1128, 1120, 1113, 1106, 1099, 1092, 1085, 1079, 1072, 1066, 1059, 1053, 1046, 1040, 1034, 1028, 1022, 1016, 1010, 1004, 999, 993, 987, 982, 976, 971, 966, 960, 955, 950, 945, 940, 935, 930, 925, 920, 915, 910, 906, 901, 896, 892, 887, 883, 878, 874, 869, 865, 861, 857, 853, 848, 844, 840, 836, 832, 828, 824, 820, 817, 813, 809, 805, 802, 798, 794, 791, 787, 784, 780, 777, 773, 770, 767, 763, 760, 757, 753, 750, 747, 744, 741, 737, 734, 731, 728, 725, 722, 719, 716, 713, 710, 708, 705, 702, 699, 696, 694, 691, 688, 685};
+
+    template <typename T, int scn, int dcn, int bidx, int hr> struct RGB2HSV;
+
+    template <int scn, int dcn, int bidx, int hr> struct RGB2HSV<uchar, scn, dcn, bidx, hr>
+            : unary_function<typename MakeVec<uchar, scn>::type, typename MakeVec<uchar, dcn>::type>
+    {
+        __device__ typename MakeVec<uchar, dcn>::type operator ()(const typename MakeVec<uchar, scn>::type& src) const
+        {
+            const int hsv_shift = 12;
+            const int* hdiv_table = hr == 180 ? c_HsvDivTable180 : c_HsvDivTable256;
+
+            const int b = bidx == 0 ? src.x : src.z;
+            const int g = src.y;
+            const int r = bidx == 0 ? src.z : src.x;
+
+            int h, s, v = b;
+            int vmin = b, diff;
+            int vr, vg;
+
+            v = ::max(v, g);
+            v = ::max(v, r);
+            vmin = ::min(vmin, g);
+            vmin = ::min(vmin, r);
+
+            diff = v - vmin;
+            vr = (v == r) * -1;
+            vg = (v == g) * -1;
+
+            s = (diff * c_HsvDivTable[v] + (1 << (hsv_shift-1))) >> hsv_shift;
+            h = (vr & (g - b)) + (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
+            h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
+            h += (h < 0) * hr;
+
+            typename MakeVec<uchar, dcn>::type dst;
+
+            dst.x = saturate_cast<uchar>(h);
+            dst.y = saturate_cast<uchar>(s);
+            dst.z = saturate_cast<uchar>(v);
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx, int hr> struct RGB2HSV<float, scn, dcn, bidx, hr>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float hscale = hr * (1.f / 360.f);
+
+            const float b = bidx == 0 ? src.x : src.z;
+            const float g = src.y;
+            const float r = bidx == 0 ? src.z : src.x;
+
+            float h, s, v;
+            float vmin, diff;
+
+            v = vmin = r;
+            v = ::fmax(v, g);
+            v = ::fmax(v, b);
+            vmin = ::fmin(vmin, g);
+            vmin = ::fmin(vmin, b);
+
+            diff = v - vmin;
+            s = diff / (float)(::fabs(v) + numeric_limits<float>::epsilon());
+            diff = (float)(60. / (diff + numeric_limits<float>::epsilon()));
+
+            h  = (v == r) * (g - b) * diff;
+            h += (v != r && v == g) * ((b - r) * diff + 120.f);
+            h += (v != r && v != g) * ((r - g) * diff + 240.f);
+            h += (h < 0) * 360.f;
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = h * hscale;
+            dst.y = s;
+            dst.z = v;
+
+            return dst;
+        }
+    };
+
+    // HSV to RGB
+
+    __constant__ int c_HsvSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };
+
+    template <typename T, int scn, int dcn, int bidx, int hr> struct HSV2RGB;
+
+    template <int scn, int dcn, int bidx, int hr> struct HSV2RGB<float, scn, dcn, bidx, hr>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float hscale = 6.f / hr;
+
+            float h = src.x, s = src.y, v = src.z;
+            float b = v, g = v, r = v;
+
+            if (s != 0)
+            {
+                h *= hscale;
+
+                if( h < 0 )
+                    do h += 6; while( h < 0 );
+                else if( h >= 6 )
+                    do h -= 6; while( h >= 6 );
+
+                int sector = __float2int_rd(h);
+                h -= sector;
+
+                if ( (unsigned)sector >= 6u )
+                {
+                    sector = 0;
+                    h = 0.f;
+                }
+
+                float tab[4];
+                tab[0] = v;
+                tab[1] = v * (1.f - s);
+                tab[2] = v * (1.f - s * h);
+                tab[3] = v * (1.f - s * (1.f - h));
+
+                b = tab[c_HsvSectorData[sector][0]];
+                g = tab[c_HsvSectorData[sector][1]];
+                r = tab[c_HsvSectorData[sector][2]];
+            }
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = bidx == 0 ? b : r;
+            dst.y = g;
+            dst.z = bidx == 0 ? r : b;
+            setAlpha(dst, ColorChannel<float>::max());
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx, int hr> struct HSV2RGB<uchar, scn, dcn, bidx, hr>
+            : unary_function<typename MakeVec<uchar, scn>::type, typename MakeVec<uchar, dcn>::type>
+    {
+        __device__ typename MakeVec<uchar, dcn>::type operator ()(const typename MakeVec<uchar, scn>::type& src) const
+        {
+            float3 buf;
+
+            buf.x = src.x;
+            buf.y = src.y * (1.f / 255.f);
+            buf.z = src.z * (1.f / 255.f);
+
+            HSV2RGB<float, 3, 3, bidx, hr> cvtf;
+            buf = cvtf(buf);
+
+            typename MakeVec<uchar, dcn>::type dst;
+
+            dst.x = saturate_cast<uchar>(buf.x * 255.f);
+            dst.y = saturate_cast<uchar>(buf.y * 255.f);
+            dst.z = saturate_cast<uchar>(buf.z * 255.f);
+            setAlpha(dst, ColorChannel<uchar>::max());
+
+            return dst;
+        }
+    };
+
+    // RGB to HLS
+
+    template <typename T, int scn, int dcn, int bidx, int hr> struct RGB2HLS;
+
+    template <int scn, int dcn, int bidx, int hr> struct RGB2HLS<float, scn, dcn, bidx, hr>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float hscale = hr * (1.f / 360.f);
+
+            const float b = bidx == 0 ? src.x : src.z;
+            const float g = src.y;
+            const float r = bidx == 0 ? src.z : src.x;
+
+            float h = 0.f, s = 0.f, l;
+            float vmin, vmax, diff;
+
+            vmax = vmin = r;
+            vmax = ::fmax(vmax, g);
+            vmax = ::fmax(vmax, b);
+            vmin = ::fmin(vmin, g);
+            vmin = ::fmin(vmin, b);
+
+            diff = vmax - vmin;
+            l = (vmax + vmin) * 0.5f;
+
+            if (diff > numeric_limits<float>::epsilon())
+            {
+                s = (l < 0.5f) * diff / (vmax + vmin);
+                s += (l >= 0.5f) * diff / (2.0f - vmax - vmin);
+
+                diff = 60.f / diff;
+
+                h  = (vmax == r) * (g - b) * diff;
+                h += (vmax != r && vmax == g) * ((b - r) * diff + 120.f);
+                h += (vmax != r && vmax != g) * ((r - g) * diff + 240.f);
+                h += (h < 0.f) * 360.f;
+            }
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = h * hscale;
+            dst.y = l;
+            dst.z = s;
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx, int hr> struct RGB2HLS<uchar, scn, dcn, bidx, hr>
+            : unary_function<typename MakeVec<uchar, scn>::type, typename MakeVec<uchar, dcn>::type>
+    {
+        __device__ typename MakeVec<uchar, dcn>::type operator ()(const typename MakeVec<uchar, scn>::type& src) const
+        {
+            float3 buf;
+
+            buf.x = src.x * (1.f / 255.f);
+            buf.y = src.y * (1.f / 255.f);
+            buf.z = src.z * (1.f / 255.f);
+
+            RGB2HLS<float, 3, 3, bidx, hr> cvtf;
+            buf = cvtf(buf);
+
+            typename MakeVec<uchar, dcn>::type dst;
+
+            dst.x = saturate_cast<uchar>(buf.x);
+            dst.y = saturate_cast<uchar>(buf.y * 255.f);
+            dst.z = saturate_cast<uchar>(buf.z * 255.f);
+
+            return dst;
+        }
+    };
+
+    // HLS to RGB
+
+    __constant__ int c_HlsSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };
+
+    template <typename T, int scn, int dcn, int bidx, int hr> struct HLS2RGB;
+
+    template <int scn, int dcn, int bidx, int hr> struct HLS2RGB<float, scn, dcn, bidx, hr>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float hscale = 6.0f / hr;
+
+            float h = src.x, l = src.y, s = src.z;
+            float b = l, g = l, r = l;
+
+            if (s != 0)
+            {
+                float p2  = (l <= 0.5f) * l * (1 + s);
+                      p2 += (l > 0.5f) * (l + s - l * s);
+                float p1 = 2 * l - p2;
+
+                h *= hscale;
+
+                if( h < 0 )
+                    do h += 6; while( h < 0 );
+                else if( h >= 6 )
+                    do h -= 6; while( h >= 6 );
+
+                int sector;
+                sector = __float2int_rd(h);
+
+                h -= sector;
+
+                float tab[4];
+                tab[0] = p2;
+                tab[1] = p1;
+                tab[2] = p1 + (p2 - p1) * (1 - h);
+                tab[3] = p1 + (p2 - p1) * h;
+
+                b = tab[c_HlsSectorData[sector][0]];
+                g = tab[c_HlsSectorData[sector][1]];
+                r = tab[c_HlsSectorData[sector][2]];
+            }
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = bidx == 0 ? b : r;
+            dst.y = g;
+            dst.z = bidx == 0 ? r : b;
+            setAlpha(dst, ColorChannel<float>::max());
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, int bidx, int hr> struct HLS2RGB<uchar, scn, dcn, bidx, hr>
+            : unary_function<typename MakeVec<uchar, scn>::type, typename MakeVec<uchar, dcn>::type>
+    {
+        __device__ typename MakeVec<uchar, dcn>::type operator ()(const typename MakeVec<uchar, scn>::type& src) const
+        {
+            float3 buf;
+
+            buf.x = src.x;
+            buf.y = src.y * (1.f / 255.f);
+            buf.z = src.z * (1.f / 255.f);
+
+            HLS2RGB<float, 3, 3, bidx, hr> cvtf;
+            buf = cvtf(buf);
+
+            typename MakeVec<uchar, dcn>::type dst;
+
+            dst.x = saturate_cast<uchar>(buf.x * 255.f);
+            dst.y = saturate_cast<uchar>(buf.y * 255.f);
+            dst.z = saturate_cast<uchar>(buf.z * 255.f);
+            setAlpha(dst, ColorChannel<uchar>::max());
+
+            return dst;
+        }
+    };
+
+    // RGB to Lab
+
+    enum
+    {
+        LAB_CBRT_TAB_SIZE = 1024,
+        GAMMA_TAB_SIZE = 1024,
+        lab_shift = xyz_shift,
+        gamma_shift = 3,
+        lab_shift2 = (lab_shift + gamma_shift),
+        LAB_CBRT_TAB_SIZE_B = (256 * 3 / 2 * (1 << gamma_shift))
+    };
+
+    __constant__ ushort c_sRGBGammaTab_b[] = {0,1,1,2,2,3,4,4,5,6,6,7,8,8,9,10,11,11,12,13,14,15,16,17,19,20,21,22,24,25,26,28,29,31,33,34,36,38,40,41,43,45,47,49,51,54,56,58,60,63,65,68,70,73,75,78,81,83,86,89,92,95,98,101,105,108,111,115,118,121,125,129,132,136,140,144,147,151,155,160,164,168,172,176,181,185,190,194,199,204,209,213,218,223,228,233,239,244,249,255,260,265,271,277,282,288,294,300,306,312,318,324,331,337,343,350,356,363,370,376,383,390,397,404,411,418,426,433,440,448,455,463,471,478,486,494,502,510,518,527,535,543,552,560,569,578,586,595,604,613,622,631,641,650,659,669,678,688,698,707,717,727,737,747,757,768,778,788,799,809,820,831,842,852,863,875,886,897,908,920,931,943,954,966,978,990,1002,1014,1026,1038,1050,1063,1075,1088,1101,1113,1126,1139,1152,1165,1178,1192,1205,1218,1232,1245,1259,1273,1287,1301,1315,1329,1343,1357,1372,1386,1401,1415,1430,1445,1460,1475,1490,1505,1521,1536,1551,1567,1583,1598,1614,1630,1646,1662,1678,1695,1711,1728,1744,1761,1778,1794,1811,1828,1846,1863,1880,1897,1915,1933,1950,1968,1986,2004,2022,2040};
+    __constant__ float c_sRGBGammaTab[] = {0,7.55853e-05,0.,-7.51331e-13,7.55853e-05,7.55853e-05,-2.25399e-12,3.75665e-12,0.000151171,7.55853e-05,9.01597e-12,-6.99932e-12,0.000226756,7.55853e-05,-1.1982e-11,2.41277e-12,0.000302341,7.55853e-05,-4.74369e-12,1.19001e-11,0.000377927,7.55853e-05,3.09568e-11,-2.09095e-11,0.000453512,7.55853e-05,-3.17718e-11,1.35303e-11,0.000529097,7.55853e-05,8.81905e-12,-4.10782e-12,0.000604683,7.55853e-05,-3.50439e-12,2.90097e-12,0.000680268,7.55853e-05,5.19852e-12,-7.49607e-12,0.000755853,7.55853e-05,-1.72897e-11,2.70833e-11,0.000831439,7.55854e-05,6.39602e-11,-4.26295e-11,0.000907024,7.55854e-05,-6.39282e-11,2.70193e-11,0.000982609,7.55853e-05,1.71298e-11,-7.24017e-12,0.00105819,7.55853e-05,-4.59077e-12,1.94137e-12,0.00113378,7.55853e-05,1.23333e-12,-5.25291e-13,0.00120937,7.55853e-05,-3.42545e-13,1.59799e-13,0.00128495,7.55853e-05,1.36852e-13,-1.13904e-13,0.00136054,7.55853e-05,-2.04861e-13,2.95818e-13,0.00143612,7.55853e-05,6.82594e-13,-1.06937e-12,0.00151171,7.55853e-05,-2.52551e-12,3.98166e-12,0.00158729,7.55853e-05,9.41946e-12,-1.48573e-11,0.00166288,7.55853e-05,-3.51523e-11,5.54474e-11,0.00173846,7.55854e-05,1.3119e-10,-9.0517e-11,0.00181405,7.55854e-05,-1.40361e-10,7.37899e-11,0.00188963,7.55853e-05,8.10085e-11,-8.82272e-11,0.00196522,7.55852e-05,-1.83673e-10,1.62704e-10,0.0020408,7.55853e-05,3.04438e-10,-2.13341e-10,0.00211639,7.55853e-05,-3.35586e-10,2.25e-10,0.00219197,7.55853e-05,3.39414e-10,-2.20997e-10,0.00226756,7.55853e-05,-3.23576e-10,1.93326e-10,0.00234315,7.55853e-05,2.564e-10,-8.66446e-11,0.00241873,7.55855e-05,-3.53328e-12,-7.9578e-11,0.00249432,7.55853e-05,-2.42267e-10,1.72126e-10,0.0025699,7.55853e-05,2.74111e-10,-1.43265e-10,0.00264549,7.55854e-05,-1.55683e-10,-6.47292e-11,0.00272107,7.55849e-05,-3.4987e-10,8.67842e-10,0.00279666,7.55868e-05,2.25366e-09,-3.8723e-09,0.00287224,7.55797e-05,-9.36325e-09,1.5087e-08,0.00294783,7.56063e-05,3.58978e-08,-5.69415e-08,0.00302341,7.55072e-05,-1.34927e-07,2.13144e-07,0.003099,7.58768e-05,5.04507e-07,1.38713e-07,0.00317552,7.7302e-05,9.20646e-07,-1.55186e-07,0.00325359,7.86777e-05,4.55087e-07,4.26813e-08,0.00333276,7.97159e-05,5.83131e-07,-1.06495e-08,0.00341305,8.08502e-05,5.51182e-07,3.87467e-09,0.00349446,8.19642e-05,5.62806e-07,-1.92586e-10,0.00357698,8.30892e-05,5.62228e-07,1.0866e-09,0.00366063,8.4217e-05,5.65488e-07,5.02818e-10,0.00374542,8.53494e-05,5.66997e-07,8.60211e-10,0.00383133,8.6486e-05,5.69577e-07,7.13044e-10,0.00391839,8.76273e-05,5.71716e-07,4.78527e-10,0.00400659,8.87722e-05,5.73152e-07,1.09818e-09,0.00409594,8.99218e-05,5.76447e-07,2.50964e-10,0.00418644,9.10754e-05,5.772e-07,1.15762e-09,0.00427809,9.22333e-05,5.80672e-07,2.40865e-10,0.0043709,9.33954e-05,5.81395e-07,1.13854e-09,0.00446488,9.45616e-05,5.84811e-07,3.27267e-10,0.00456003,9.57322e-05,5.85792e-07,8.1197e-10,0.00465635,9.69062e-05,5.88228e-07,6.15823e-10,0.00475384,9.80845e-05,5.90076e-07,9.15747e-10,0.00485252,9.92674e-05,5.92823e-07,3.778e-10,0.00495238,0.000100454,5.93956e-07,8.32623e-10,0.00505343,0.000101645,5.96454e-07,4.82695e-10,0.00515567,0.000102839,5.97902e-07,9.61904e-10,0.00525911,0.000104038,6.00788e-07,3.26281e-10,0.00536375,0.00010524,6.01767e-07,9.926e-10,0.00546959,0.000106447,6.04745e-07,3.59933e-10,0.00557664,0.000107657,6.05824e-07,8.2728e-10,0.0056849,0.000108871,6.08306e-07,5.21898e-10,0.00579438,0.00011009,6.09872e-07,8.10492e-10,0.00590508,0.000111312,6.12303e-07,4.27046e-10,0.00601701,0.000112538,6.13585e-07,7.40878e-10,0.00613016,0.000113767,6.15807e-07,8.00469e-10,0.00624454,0.000115001,6.18209e-07,2.48178e-10,0.00636016,0.000116238,6.18953e-07,1.00073e-09,0.00647702,0.000117479,6.21955e-07,4.05654e-10,0.00659512,0.000118724,6.23172e-07,6.36192e-10,0.00671447,0.000119973,6.25081e-07,7.74927e-10,0.00683507,0.000121225,6.27406e-07,4.54975e-10,0.00695692,0.000122481,6.28771e-07,6.64841e-10,0.00708003,0.000123741,6.30765e-07,6.10972e-10,0.00720441,0.000125004,6.32598e-07,6.16543e-10,0.00733004,0.000126271,6.34448e-07,6.48204e-10,0.00745695,0.000127542,6.36392e-07,5.15835e-10,0.00758513,0.000128816,6.3794e-07,5.48103e-10,0.00771458,0.000130094,6.39584e-07,1.01706e-09,0.00784532,0.000131376,6.42635e-07,4.0283e-11,0.00797734,0.000132661,6.42756e-07,6.84471e-10,0.00811064,0.000133949,6.4481e-07,9.47144e-10,0.00824524,0.000135241,6.47651e-07,1.83472e-10,0.00838112,0.000136537,6.48201e-07,1.11296e-09,0.00851831,0.000137837,6.5154e-07,2.13163e-11,0.0086568,0.00013914,6.51604e-07,6.64462e-10,0.00879659,0.000140445,6.53598e-07,1.04613e-09,0.00893769,0.000141756,6.56736e-07,-1.92377e-10,0.0090801,0.000143069,6.56159e-07,1.58601e-09,0.00922383,0.000144386,6.60917e-07,-5.63754e-10,0.00936888,0.000145706,6.59226e-07,1.60033e-09,0.00951524,0.000147029,6.64027e-07,-2.49543e-10,0.00966294,0.000148356,6.63278e-07,1.26043e-09,0.00981196,0.000149687,6.67059e-07,-1.35572e-10,0.00996231,0.00015102,6.66653e-07,1.14458e-09,0.010114,0.000152357,6.70086e-07,2.13864e-10,0.010267,0.000153698,6.70728e-07,7.93856e-10,0.0104214,0.000155042,6.73109e-07,3.36077e-10,0.0105771,0.000156389,6.74118e-07,6.55765e-10,0.0107342,0.000157739,6.76085e-07,7.66211e-10,0.0108926,0.000159094,6.78384e-07,4.66116e-12,0.0110524,0.000160451,6.78398e-07,1.07775e-09,0.0112135,0.000161811,6.81631e-07,3.41023e-10,0.011376,0.000163175,6.82654e-07,3.5205e-10,0.0115398,0.000164541,6.8371e-07,1.04473e-09,0.0117051,0.000165912,6.86844e-07,1.25757e-10,0.0118717,0.000167286,6.87222e-07,3.14818e-10,0.0120396,0.000168661,6.88166e-07,1.40886e-09,0.012209,0.000170042,6.92393e-07,-3.62244e-10,0.0123797,0.000171425,6.91306e-07,9.71397e-10,0.0125518,0.000172811,6.9422e-07,2.02003e-10,0.0127253,0.0001742,6.94826e-07,1.01448e-09,0.0129002,0.000175593,6.97869e-07,3.96653e-10,0.0130765,0.00017699,6.99059e-07,1.92927e-10,0.0132542,0.000178388,6.99638e-07,6.94305e-10,0.0134333,0.00017979,7.01721e-07,7.55108e-10,0.0136138,0.000181195,7.03986e-07,1.05918e-11,0.0137957,0.000182603,7.04018e-07,1.06513e-09,0.013979,0.000184015,7.07214e-07,3.85512e-10,0.0141637,0.00018543,7.0837e-07,1.86769e-10,0.0143499,0.000186848,7.0893e-07,7.30116e-10,0.0145374,0.000188268,7.11121e-07,6.17983e-10,0.0147264,0.000189692,7.12975e-07,5.23282e-10,0.0149168,0.000191119,7.14545e-07,8.28398e-11,0.0151087,0.000192549,7.14793e-07,1.0081e-09,0.0153019,0.000193981,7.17817e-07,5.41244e-10,0.0154966,0.000195418,7.19441e-07,-3.7907e-10,0.0156928,0.000196856,7.18304e-07,1.90641e-09,0.0158903,0.000198298,7.24023e-07,-7.27387e-10,0.0160893,0.000199744,7.21841e-07,1.00317e-09,0.0162898,0.000201191,7.24851e-07,4.39949e-10,0.0164917,0.000202642,7.2617e-07,9.6234e-10,0.0166951,0.000204097,7.29057e-07,-5.64019e-10,0.0168999,0.000205554,7.27365e-07,1.29374e-09,0.0171062,0.000207012,7.31247e-07,9.77025e-10,0.017314,0.000208478,7.34178e-07,-1.47651e-09,0.0175232,0.000209942,7.29748e-07,3.06636e-09,0.0177338,0.00021141,7.38947e-07,-1.47573e-09,0.017946,0.000212884,7.3452e-07,9.7386e-10,0.0181596,0.000214356,7.37442e-07,1.30562e-09,0.0183747,0.000215835,7.41358e-07,-6.08376e-10,0.0185913,0.000217315,7.39533e-07,1.12785e-09,0.0188093,0.000218798,7.42917e-07,-1.77711e-10,0.0190289,0.000220283,7.42384e-07,1.44562e-09,0.0192499,0.000221772,7.46721e-07,-1.68825e-11,0.0194724,0.000223266,7.4667e-07,4.84533e-10,0.0196964,0.000224761,7.48124e-07,-5.85298e-11,0.0199219,0.000226257,7.47948e-07,1.61217e-09,0.0201489,0.000227757,7.52785e-07,-8.02136e-10,0.0203775,0.00022926,7.50378e-07,1.59637e-09,0.0206075,0.000230766,7.55167e-07,4.47168e-12,0.020839,0.000232276,7.55181e-07,2.48387e-10,0.021072,0.000233787,7.55926e-07,8.6474e-10,0.0213066,0.000235302,7.5852e-07,1.78299e-11,0.0215426,0.000236819,7.58573e-07,9.26567e-10,0.0217802,0.000238339,7.61353e-07,1.34529e-12,0.0220193,0.000239862,7.61357e-07,9.30659e-10,0.0222599,0.000241387,7.64149e-07,1.34529e-12,0.0225021,0.000242915,7.64153e-07,9.26567e-10,0.0227458,0.000244447,7.66933e-07,1.76215e-11,0.022991,0.00024598,7.66986e-07,8.65536e-10,0.0232377,0.000247517,7.69582e-07,2.45677e-10,0.023486,0.000249057,7.70319e-07,1.44193e-11,0.0237358,0.000250598,7.70363e-07,1.55918e-09,0.0239872,0.000252143,7.7504e-07,-6.63173e-10,0.0242401,0.000253691,7.73051e-07,1.09357e-09,0.0244946,0.000255241,7.76331e-07,1.41919e-11,0.0247506,0.000256793,7.76374e-07,7.12248e-10,0.0250082,0.000258348,7.78511e-07,8.62049e-10,0.0252673,0.000259908,7.81097e-07,-4.35061e-10,0.025528,0.000261469,7.79792e-07,8.7825e-10,0.0257902,0.000263031,7.82426e-07,6.47181e-10,0.0260541,0.000264598,7.84368e-07,2.58448e-10,0.0263194,0.000266167,7.85143e-07,1.81558e-10,0.0265864,0.000267738,7.85688e-07,8.78041e-10,0.0268549,0.000269312,7.88322e-07,3.15102e-11,0.027125,0.000270889,7.88417e-07,8.58525e-10,0.0273967,0.000272468,7.90992e-07,2.59812e-10,0.02767,0.000274051,7.91772e-07,-3.5224e-11,0.0279448,0.000275634,7.91666e-07,1.74377e-09,0.0282212,0.000277223,7.96897e-07,-1.35196e-09,0.0284992,0.000278813,7.92841e-07,1.80141e-09,0.0287788,0.000280404,7.98246e-07,-2.65629e-10,0.0290601,0.000281999,7.97449e-07,1.12374e-09,0.0293428,0.000283598,8.0082e-07,-5.04106e-10,0.0296272,0.000285198,7.99308e-07,8.92764e-10,0.0299132,0.000286799,8.01986e-07,6.58379e-10,0.0302008,0.000288405,8.03961e-07,1.98971e-10,0.0304901,0.000290014,8.04558e-07,4.08382e-10,0.0307809,0.000291624,8.05783e-07,3.01839e-11,0.0310733,0.000293236,8.05874e-07,1.33343e-09,0.0313673,0.000294851,8.09874e-07,2.2419e-10,0.031663,0.000296472,8.10547e-07,-3.67606e-10,0.0319603,0.000298092,8.09444e-07,1.24624e-09,0.0322592,0.000299714,8.13182e-07,-8.92025e-10,0.0325597,0.000301338,8.10506e-07,2.32183e-09,0.0328619,0.000302966,8.17472e-07,-9.44719e-10,0.0331657,0.000304598,8.14638e-07,1.45703e-09,0.0334711,0.000306232,8.19009e-07,-1.15805e-09,0.0337781,0.000307866,8.15535e-07,3.17507e-09,0.0340868,0.000309507,8.2506e-07,-4.09161e-09,0.0343971,0.000311145,8.12785e-07,5.74079e-09,0.0347091,0.000312788,8.30007e-07,-3.97034e-09,0.0350227,0.000314436,8.18096e-07,2.68985e-09,0.035338,0.00031608,8.26166e-07,6.61676e-10,0.0356549,0.000317734,8.28151e-07,-1.61123e-09,0.0359734,0.000319386,8.23317e-07,2.05786e-09,0.0362936,0.000321038,8.29491e-07,8.30388e-10,0.0366155,0.0003227,8.31982e-07,-1.65424e-09,0.036939,0.000324359,8.27019e-07,2.06129e-09,0.0372642,0.000326019,8.33203e-07,8.59719e-10,0.0375911,0.000327688,8.35782e-07,-1.77488e-09,0.0379196,0.000329354,8.30458e-07,2.51464e-09,0.0382498,0.000331023,8.38002e-07,-8.33135e-10,0.0385817,0.000332696,8.35502e-07,8.17825e-10,0.0389152,0.00033437,8.37956e-07,1.28718e-09,0.0392504,0.00033605,8.41817e-07,-2.2413e-09,0.0395873,0.000337727,8.35093e-07,3.95265e-09,0.0399258,0.000339409,8.46951e-07,-2.39332e-09,0.0402661,0.000341095,8.39771e-07,1.89533e-09,0.040608,0.000342781,8.45457e-07,-1.46271e-09,0.0409517,0.000344467,8.41069e-07,3.95554e-09,0.041297,0.000346161,8.52936e-07,-3.18369e-09,0.041644,0.000347857,8.43385e-07,1.32873e-09,0.0419927,0.000349548,8.47371e-07,1.59402e-09,0.0423431,0.000351248,8.52153e-07,-2.54336e-10,0.0426952,0.000352951,8.5139e-07,-5.76676e-10,0.043049,0.000354652,8.4966e-07,2.56114e-09,0.0434045,0.000356359,8.57343e-07,-2.21744e-09,0.0437617,0.000358067,8.50691e-07,2.58344e-09,0.0441206,0.000359776,8.58441e-07,-6.65826e-10,0.0444813,0.000361491,8.56444e-07,7.99218e-11,0.0448436,0.000363204,8.56684e-07,3.46063e-10,0.0452077,0.000364919,8.57722e-07,2.26116e-09,0.0455734,0.000366641,8.64505e-07,-1.94005e-09,0.045941,0.000368364,8.58685e-07,1.77384e-09,0.0463102,0.000370087,8.64007e-07,-1.43005e-09,0.0466811,0.000371811,8.59717e-07,3.94634e-09,0.0470538,0.000373542,8.71556e-07,-3.17946e-09,0.0474282,0.000375276,8.62017e-07,1.32104e-09,0.0478043,0.000377003,8.6598e-07,1.62045e-09,0.0481822,0.00037874,8.70842e-07,-3.52297e-10,0.0485618,0.000380481,8.69785e-07,-2.11211e-10,0.0489432,0.00038222,8.69151e-07,1.19716e-09,0.0493263,0.000383962,8.72743e-07,-8.52026e-10,0.0497111,0.000385705,8.70187e-07,2.21092e-09,0.0500977,0.000387452,8.76819e-07,-5.41339e-10,0.050486,0.000389204,8.75195e-07,-4.5361e-11,0.0508761,0.000390954,8.75059e-07,7.22669e-10,0.0512679,0.000392706,8.77227e-07,8.79936e-10,0.0516615,0.000394463,8.79867e-07,-5.17048e-10,0.0520568,0.000396222,8.78316e-07,1.18833e-09,0.0524539,0.000397982,8.81881e-07,-5.11022e-10,0.0528528,0.000399744,8.80348e-07,8.55683e-10,0.0532534,0.000401507,8.82915e-07,8.13562e-10,0.0536558,0.000403276,8.85356e-07,-3.84603e-10,0.05406,0.000405045,8.84202e-07,7.24962e-10,0.0544659,0.000406816,8.86377e-07,1.20986e-09,0.0548736,0.000408592,8.90006e-07,-1.83896e-09,0.0552831,0.000410367,8.84489e-07,2.42071e-09,0.0556944,0.000412143,8.91751e-07,-3.93413e-10,0.0561074,0.000413925,8.90571e-07,-8.46967e-10,0.0565222,0.000415704,8.8803e-07,3.78122e-09,0.0569388,0.000417491,8.99374e-07,-3.1021e-09,0.0573572,0.000419281,8.90068e-07,1.17658e-09,0.0577774,0.000421064,8.93597e-07,2.12117e-09,0.0581993,0.000422858,8.99961e-07,-2.21068e-09,0.0586231,0.000424651,8.93329e-07,2.9961e-09,0.0590486,0.000426447,9.02317e-07,-2.32311e-09,0.059476,0.000428244,8.95348e-07,2.57122e-09,0.0599051,0.000430043,9.03062e-07,-5.11098e-10,0.0603361,0.000431847,9.01528e-07,-5.27166e-10,0.0607688,0.000433649,8.99947e-07,2.61984e-09,0.0612034,0.000435457,9.07806e-07,-2.50141e-09,0.0616397,0.000437265,9.00302e-07,3.66045e-09,0.0620779,0.000439076,9.11283e-07,-4.68977e-09,0.0625179,0.000440885,8.97214e-07,7.64783e-09,0.0629597,0.000442702,9.20158e-07,-7.27499e-09,0.0634033,0.000444521,8.98333e-07,6.55113e-09,0.0638487,0.000446337,9.17986e-07,-4.02844e-09,0.0642959,0.000448161,9.05901e-07,2.11196e-09,0.064745,0.000449979,9.12236e-07,3.03125e-09,0.0651959,0.000451813,9.2133e-07,-6.78648e-09,0.0656486,0.000453635,9.00971e-07,9.21375e-09,0.0661032,0.000455464,9.28612e-07,-7.71684e-09,0.0665596,0.000457299,9.05462e-07,6.7522e-09,0.0670178,0.00045913,9.25718e-07,-4.3907e-09,0.0674778,0.000460968,9.12546e-07,3.36e-09,0.0679397,0.000462803,9.22626e-07,-1.59876e-09,0.0684034,0.000464644,9.1783e-07,3.0351e-09,0.068869,0.000466488,9.26935e-07,-3.09101e-09,0.0693364,0.000468333,9.17662e-07,1.8785e-09,0.0698057,0.000470174,9.23298e-07,3.02733e-09,0.0702768,0.00047203,9.3238e-07,-6.53722e-09,0.0707497,0.000473875,9.12768e-07,8.22054e-09,0.0712245,0.000475725,9.37429e-07,-3.99325e-09,0.0717012,0.000477588,9.2545e-07,3.01839e-10,0.0721797,0.00047944,9.26355e-07,2.78597e-09,0.0726601,0.000481301,9.34713e-07,-3.99507e-09,0.0731423,0.000483158,9.22728e-07,5.7435e-09,0.0736264,0.000485021,9.39958e-07,-4.07776e-09,0.0741123,0.000486888,9.27725e-07,3.11695e-09,0.0746002,0.000488753,9.37076e-07,-9.39394e-10,0.0750898,0.000490625,9.34258e-07,6.4055e-10,0.0755814,0.000492495,9.3618e-07,-1.62265e-09,0.0760748,0.000494363,9.31312e-07,5.84995e-09,0.0765701,0.000496243,9.48861e-07,-6.87601e-09,0.0770673,0.00049812,9.28233e-07,6.75296e-09,0.0775664,0.000499997,9.48492e-07,-5.23467e-09,0.0780673,0.000501878,9.32788e-07,6.73523e-09,0.0785701,0.000503764,9.52994e-07,-6.80514e-09,0.0790748,0.000505649,9.32578e-07,5.5842e-09,0.0795814,0.000507531,9.49331e-07,-6.30583e-10,0.0800899,0.000509428,9.47439e-07,-3.0618e-09,0.0806003,0.000511314,9.38254e-07,5.4273e-09,0.0811125,0.000513206,9.54536e-07,-3.74627e-09,0.0816267,0.000515104,9.43297e-07,2.10713e-09,0.0821427,0.000516997,9.49618e-07,2.76839e-09,0.0826607,0.000518905,9.57924e-07,-5.73006e-09,0.0831805,0.000520803,9.40733e-07,5.25072e-09,0.0837023,0.0005227,9.56486e-07,-3.71718e-10,0.084226,0.000524612,9.5537e-07,-3.76404e-09,0.0847515,0.000526512,9.44078e-07,7.97735e-09,0.085279,0.000528424,9.6801e-07,-5.79367e-09,0.0858084,0.000530343,9.50629e-07,2.96268e-10,0.0863397,0.000532245,9.51518e-07,4.6086e-09,0.0868729,0.000534162,9.65344e-07,-3.82947e-09,0.087408,0.000536081,9.53856e-07,3.25861e-09,0.087945,0.000537998,9.63631e-07,-1.7543e-09,0.088484,0.00053992,9.58368e-07,3.75849e-09,0.0890249,0.000541848,9.69644e-07,-5.82891e-09,0.0895677,0.00054377,9.52157e-07,4.65593e-09,0.0901124,0.000545688,9.66125e-07,2.10643e-09,0.0906591,0.000547627,9.72444e-07,-5.63099e-09,0.0912077,0.000549555,9.55551e-07,5.51627e-09,0.0917582,0.000551483,9.721e-07,-1.53292e-09,0.0923106,0.000553422,9.67501e-07,6.15311e-10,0.092865,0.000555359,9.69347e-07,-9.28291e-10,0.0934213,0.000557295,9.66562e-07,3.09774e-09,0.0939796,0.000559237,9.75856e-07,-4.01186e-09,0.0945398,0.000561177,9.6382e-07,5.49892e-09,0.095102,0.000563121,9.80317e-07,-3.08258e-09,0.0956661,0.000565073,9.71069e-07,-6.19176e-10,0.0962321,0.000567013,9.69212e-07,5.55932e-09,0.0968001,0.000568968,9.8589e-07,-6.71704e-09,0.09737,0.00057092,9.65738e-07,6.40762e-09,0.0979419,0.00057287,9.84961e-07,-4.0122e-09,0.0985158,0.000574828,9.72925e-07,2.19059e-09,0.0990916,0.000576781,9.79496e-07,2.70048e-09,0.0996693,0.000578748,9.87598e-07,-5.54193e-09,0.100249,0.000580706,9.70972e-07,4.56597e-09,0.100831,0.000582662,9.8467e-07,2.17923e-09,0.101414,0.000584638,9.91208e-07,-5.83232e-09,0.102,0.000586603,9.73711e-07,6.24884e-09,0.102588,0.000588569,9.92457e-07,-4.26178e-09,0.103177,0.000590541,9.79672e-07,3.34781e-09,0.103769,0.00059251,9.89715e-07,-1.67904e-09,0.104362,0.000594485,9.84678e-07,3.36839e-09,0.104958,0.000596464,9.94783e-07,-4.34397e-09,0.105555,0.000598441,9.81751e-07,6.55696e-09,0.106155,0.000600424,1.00142e-06,-6.98272e-09,0.106756,0.000602406,9.80474e-07,6.4728e-09,0.107359,0.000604386,9.99893e-07,-4.00742e-09,0.107965,0.000606374,9.8787e-07,2.10654e-09,0.108572,0.000608356,9.9419e-07,3.0318e-09,0.109181,0.000610353,1.00329e-06,-6.7832e-09,0.109793,0.00061234,9.82936e-07,9.1998e-09,0.110406,0.000614333,1.01054e-06,-7.6642e-09,0.111021,0.000616331,9.87543e-07,6.55579e-09,0.111639,0.000618326,1.00721e-06,-3.65791e-09,0.112258,0.000620329,9.96236e-07,6.25467e-10,0.112879,0.000622324,9.98113e-07,1.15593e-09,0.113503,0.000624323,1.00158e-06,2.20158e-09,0.114128,0.000626333,1.00819e-06,-2.51191e-09,0.114755,0.000628342,1.00065e-06,3.95517e-10,0.115385,0.000630345,1.00184e-06,9.29807e-10,0.116016,0.000632351,1.00463e-06,3.33599e-09,0.116649,0.00063437,1.01463e-06,-6.82329e-09,0.117285,0.000636379,9.94163e-07,9.05595e-09,0.117922,0.000638395,1.02133e-06,-7.04862e-09,0.118562,0.000640416,1.00019e-06,4.23737e-09,0.119203,0.000642429,1.0129e-06,-2.45033e-09,0.119847,0.000644448,1.00555e-06,5.56395e-09,0.120492,0.000646475,1.02224e-06,-4.9043e-09,0.121139,0.000648505,1.00753e-06,-8.47952e-10,0.121789,0.000650518,1.00498e-06,8.29622e-09,0.122441,0.000652553,1.02987e-06,-9.98538e-09,0.123094,0.000654582,9.99914e-07,9.2936e-09,0.12375,0.00065661,1.02779e-06,-4.83707e-09,0.124407,0.000658651,1.01328e-06,2.60411e-09,0.125067,0.000660685,1.0211e-06,-5.57945e-09,0.125729,0.000662711,1.00436e-06,1.22631e-08,0.126392,0.000664756,1.04115e-06,-1.36704e-08,0.127058,0.000666798,1.00014e-06,1.26161e-08,0.127726,0.000668836,1.03798e-06,-6.99155e-09,0.128396,0.000670891,1.01701e-06,4.48836e-10,0.129068,0.000672926,1.01836e-06,5.19606e-09,0.129742,0.000674978,1.03394e-06,-6.3319e-09,0.130418,0.000677027,1.01495e-06,5.2305e-09,0.131096,0.000679073,1.03064e-06,3.11123e-10,0.131776,0.000681135,1.03157e-06,-6.47511e-09,0.132458,0.000683179,1.01215e-06,1.06882e-08,0.133142,0.000685235,1.04421e-06,-6.47519e-09,0.133829,0.000687304,1.02479e-06,3.11237e-10,0.134517,0.000689355,1.02572e-06,5.23035e-09,0.135207,0.000691422,1.04141e-06,-6.3316e-09,0.1359,0.000693486,1.02242e-06,5.19484e-09,0.136594,0.000695546,1.038e-06,4.53497e-10,0.137291,0.000697623,1.03936e-06,-7.00891e-09,0.137989,0.000699681,1.01834e-06,1.2681e-08,0.13869,0.000701756,1.05638e-06,-1.39128e-08,0.139393,0.000703827,1.01464e-06,1.31679e-08,0.140098,0.000705896,1.05414e-06,-8.95659e-09,0.140805,0.000707977,1.02727e-06,7.75742e-09,0.141514,0.000710055,1.05055e-06,-7.17182e-09,0.142225,0.000712135,1.02903e-06,6.02862e-09,0.142938,0.000714211,1.04712e-06,-2.04163e-09,0.143653,0.000716299,1.04099e-06,2.13792e-09,0.144371,0.000718387,1.04741e-06,-6.51009e-09,0.14509,0.000720462,1.02787e-06,9.00123e-09,0.145812,0.000722545,1.05488e-06,3.07523e-10,0.146535,0.000724656,1.0558e-06,-1.02312e-08,0.147261,0.000726737,1.02511e-06,1.0815e-08,0.147989,0.000728819,1.05755e-06,-3.22681e-09,0.148719,0.000730925,1.04787e-06,2.09244e-09,0.14945,0.000733027,1.05415e-06,-5.143e-09,0.150185,0.00073512,1.03872e-06,3.57844e-09,0.150921,0.000737208,1.04946e-06,5.73027e-09,0.151659,0.000739324,1.06665e-06,-1.15983e-08,0.152399,0.000741423,1.03185e-06,1.08605e-08,0.153142,0.000743519,1.06443e-06,-2.04106e-09,0.153886,0.000745642,1.05831e-06,-2.69642e-09,0.154633,0.00074775,1.05022e-06,-2.07425e-09,0.155382,0.000749844,1.044e-06,1.09934e-08,0.156133,0.000751965,1.07698e-06,-1.20972e-08,0.156886,0.000754083,1.04069e-06,7.59288e-09,0.157641,0.000756187,1.06347e-06,-3.37305e-09,0.158398,0.000758304,1.05335e-06,5.89921e-09,0.159158,0.000760428,1.07104e-06,-5.32248e-09,0.159919,0.000762554,1.05508e-06,4.8927e-10,0.160683,0.000764666,1.05654e-06,3.36547e-09,0.161448,0.000766789,1.06664e-06,9.50081e-10,0.162216,0.000768925,1.06949e-06,-7.16568e-09,0.162986,0.000771043,1.04799e-06,1.28114e-08,0.163758,0.000773177,1.08643e-06,-1.42774e-08,0.164533,0.000775307,1.0436e-06,1.44956e-08,0.165309,0.000777438,1.08708e-06,-1.39025e-08,0.166087,0.00077957,1.04538e-06,1.13118e-08,0.166868,0.000781695,1.07931e-06,-1.54224e-09,0.167651,0.000783849,1.07468e-06,-5.14312e-09,0.168436,0.000785983,1.05925e-06,7.21381e-09,0.169223,0.000788123,1.0809e-06,-8.81096e-09,0.170012,0.000790259,1.05446e-06,1.31289e-08,0.170803,0.000792407,1.09385e-06,-1.39022e-08,0.171597,0.000794553,1.05214e-06,1.26775e-08,0.172392,0.000796695,1.09018e-06,-7.00557e-09,0.17319,0.000798855,1.06916e-06,4.43796e-10,0.17399,0.000800994,1.07049e-06,5.23031e-09,0.174792,0.000803151,1.08618e-06,-6.46397e-09,0.175596,0.000805304,1.06679e-06,5.72444e-09,0.176403,0.000807455,1.08396e-06,-1.53254e-09,0.177211,0.000809618,1.07937e-06,4.05673e-10,0.178022,0.000811778,1.08058e-06,-9.01916e-11,0.178835,0.000813939,1.08031e-06,-4.49821e-11,0.17965,0.000816099,1.08018e-06,2.70234e-10,0.180467,0.00081826,1.08099e-06,-1.03603e-09,0.181286,0.000820419,1.07788e-06,3.87392e-09,0.182108,0.000822587,1.0895e-06,4.41522e-10,0.182932,0.000824767,1.09083e-06,-5.63997e-09,0.183758,0.000826932,1.07391e-06,7.21707e-09,0.184586,0.000829101,1.09556e-06,-8.32718e-09,0.185416,0.000831267,1.07058e-06,1.11907e-08,0.186248,0.000833442,1.10415e-06,-6.63336e-09,0.187083,0.00083563,1.08425e-06,4.41484e-10,0.187919,0.0008378,1.08557e-06,4.86754e-09,0.188758,0.000839986,1.10017e-06,-5.01041e-09,0.189599,0.000842171,1.08514e-06,2.72811e-10,0.190443,0.000844342,1.08596e-06,3.91916e-09,0.191288,0.000846526,1.09772e-06,-1.04819e-09,0.192136,0.000848718,1.09457e-06,2.73531e-10,0.192985,0.000850908,1.0954e-06,-4.58916e-11,0.193837,0.000853099,1.09526e-06,-9.01158e-11,0.194692,0.000855289,1.09499e-06,4.06506e-10,0.195548,0.00085748,1.09621e-06,-1.53595e-09,0.196407,0.000859668,1.0916e-06,5.73717e-09,0.197267,0.000861869,1.10881e-06,-6.51164e-09,0.19813,0.000864067,1.08928e-06,5.40831e-09,0.198995,0.000866261,1.1055e-06,-2.20401e-10,0.199863,0.000868472,1.10484e-06,-4.52652e-09,0.200732,0.000870668,1.09126e-06,3.42508e-09,0.201604,0.000872861,1.10153e-06,5.72762e-09,0.202478,0.000875081,1.11872e-06,-1.14344e-08,0.203354,0.000877284,1.08441e-06,1.02076e-08,0.204233,0.000879484,1.11504e-06,4.06355e-10,0.205113,0.000881715,1.11626e-06,-1.18329e-08,0.205996,0.000883912,1.08076e-06,1.71227e-08,0.206881,0.000886125,1.13213e-06,-1.19546e-08,0.207768,0.000888353,1.09626e-06,8.93465e-10,0.208658,0.000890548,1.09894e-06,8.38062e-09,0.209549,0.000892771,1.12408e-06,-4.61353e-09,0.210443,0.000895006,1.11024e-06,-4.82756e-09,0.211339,0.000897212,1.09576e-06,9.02245e-09,0.212238,0.00089943,1.12283e-06,-1.45997e-09,0.213138,0.000901672,1.11845e-06,-3.18255e-09,0.214041,0.000903899,1.1089e-06,-7.11073e-10,0.214946,0.000906115,1.10677e-06,6.02692e-09,0.215853,0.000908346,1.12485e-06,-8.49548e-09,0.216763,0.00091057,1.09936e-06,1.30537e-08,0.217675,0.000912808,1.13852e-06,-1.3917e-08,0.218588,0.000915044,1.09677e-06,1.28121e-08,0.219505,0.000917276,1.13521e-06,-7.5288e-09,0.220423,0.000919523,1.11262e-06,2.40205e-09,0.221344,0.000921756,1.11983e-06,-2.07941e-09,0.222267,0.000923989,1.11359e-06,5.91551e-09,0.223192,0.000926234,1.13134e-06,-6.68149e-09,0.224119,0.000928477,1.11129e-06,5.90929e-09,0.225049,0.000930717,1.12902e-06,-2.05436e-09,0.22598,0.000932969,1.12286e-06,2.30807e-09,0.226915,0.000935222,1.12978e-06,-7.17796e-09,0.227851,0.00093746,1.10825e-06,1.15028e-08,0.228789,0.000939711,1.14276e-06,-9.03083e-09,0.22973,0.000941969,1.11566e-06,9.71932e-09,0.230673,0.00094423,1.14482e-06,-1.49452e-08,0.231619,0.000946474,1.09998e-06,2.02591e-08,0.232566,0.000948735,1.16076e-06,-2.13879e-08,0.233516,0.000950993,1.0966e-06,2.05888e-08,0.234468,0.000953247,1.15837e-06,-1.62642e-08,0.235423,0.000955515,1.10957e-06,1.46658e-08,0.236379,0.000957779,1.15357e-06,-1.25966e-08,0.237338,0.000960048,1.11578e-06,5.91793e-09,0.238299,0.000962297,1.13353e-06,3.82602e-09,0.239263,0.000964576,1.14501e-06,-6.3208e-09,0.240229,0.000966847,1.12605e-06,6.55613e-09,0.241197,0.000969119,1.14572e-06,-5.00268e-09,0.242167,0.000971395,1.13071e-06,-1.44659e-09,0.243139,0.000973652,1.12637e-06,1.07891e-08,0.244114,0.000975937,1.15874e-06,-1.19073e-08,0.245091,0.000978219,1.12302e-06,7.03782e-09,0.246071,0.000980486,1.14413e-06,-1.34276e-09,0.247052,0.00098277,1.1401e-06,-1.66669e-09,0.248036,0.000985046,1.1351e-06,8.00935e-09,0.249022,0.00098734,1.15913e-06,-1.54694e-08,0.250011,0.000989612,1.11272e-06,2.4066e-08,0.251002,0.000991909,1.18492e-06,-2.11901e-08,0.251995,0.000994215,1.12135e-06,1.08973e-09,0.25299,0.000996461,1.12462e-06,1.68311e-08,0.253988,0.000998761,1.17511e-06,-8.8094e-09,0.254987,0.00100109,1.14868e-06,-1.13958e-08,0.25599,0.00100335,1.1145e-06,2.45902e-08,0.256994,0.00100565,1.18827e-06,-2.73603e-08,0.258001,0.00100795,1.10618e-06,2.52464e-08,0.25901,0.00101023,1.18192e-06,-1.40207e-08,0.260021,0.00101256,1.13986e-06,1.03387e-09,0.261035,0.00101484,1.14296e-06,9.8853e-09,0.262051,0.00101715,1.17262e-06,-1.07726e-08,0.263069,0.00101947,1.1403e-06,3.40272e-09,0.26409,0.00102176,1.15051e-06,-2.83827e-09,0.265113,0.00102405,1.142e-06,7.95039e-09,0.266138,0.00102636,1.16585e-06,8.39047e-10,0.267166,0.00102869,1.16836e-06,-1.13066e-08,0.268196,0.00103099,1.13444e-06,1.4585e-08,0.269228,0.00103331,1.1782e-06,-1.72314e-08,0.270262,0.00103561,1.1265e-06,2.45382e-08,0.271299,0.00103794,1.20012e-06,-2.13166e-08,0.272338,0.00104028,1.13617e-06,1.12364e-09,0.273379,0.00104255,1.13954e-06,1.68221e-08,0.274423,0.00104488,1.19001e-06,-8.80736e-09,0.275469,0.00104723,1.16358e-06,-1.13948e-08,0.276518,0.00104953,1.1294e-06,2.45839e-08,0.277568,0.00105186,1.20315e-06,-2.73361e-08,0.278621,0.00105418,1.12114e-06,2.51559e-08,0.279677,0.0010565,1.19661e-06,-1.36832e-08,0.280734,0.00105885,1.15556e-06,-2.25706e-10,0.281794,0.00106116,1.15488e-06,1.45862e-08,0.282857,0.00106352,1.19864e-06,-2.83167e-08,0.283921,0.00106583,1.11369e-06,3.90759e-08,0.284988,0.00106817,1.23092e-06,-3.85801e-08,0.286058,0.00107052,1.11518e-06,2.58375e-08,0.287129,0.00107283,1.19269e-06,-5.16498e-09,0.288203,0.0010752,1.1772e-06,-5.17768e-09,0.28928,0.00107754,1.16167e-06,-3.92671e-09,0.290358,0.00107985,1.14988e-06,2.08846e-08,0.29144,0.00108221,1.21254e-06,-2.00072e-08,0.292523,0.00108458,1.15252e-06,-4.60659e-10,0.293609,0.00108688,1.15114e-06,2.18499e-08,0.294697,0.00108925,1.21669e-06,-2.73343e-08,0.295787,0.0010916,1.13468e-06,2.78826e-08,0.29688,0.00109395,1.21833e-06,-2.45915e-08,0.297975,0.00109632,1.14456e-06,1.08787e-08,0.299073,0.00109864,1.17719e-06,1.08788e-08,0.300172,0.00110102,1.20983e-06,-2.45915e-08,0.301275,0.00110337,1.13605e-06,2.78828e-08,0.302379,0.00110573,1.2197e-06,-2.73348e-08,0.303486,0.00110808,1.1377e-06,2.18518e-08,0.304595,0.00111042,1.20325e-06,-4.67556e-10,0.305707,0.00111283,1.20185e-06,-1.99816e-08,0.306821,0.00111517,1.14191e-06,2.07891e-08,0.307937,0.00111752,1.20427e-06,-3.57026e-09,0.309056,0.00111992,1.19356e-06,-6.50797e-09,0.310177,0.00112228,1.17404e-06,-2.00165e-10,0.3113,0.00112463,1.17344e-06,7.30874e-09,0.312426,0.001127,1.19536e-06,7.67424e-10,0.313554,0.00112939,1.19767e-06,-1.03784e-08,0.314685,0.00113176,1.16653e-06,1.09437e-08,0.315818,0.00113412,1.19936e-06,-3.59406e-09,0.316953,0.00113651,1.18858e-06,3.43251e-09,0.318091,0.0011389,1.19888e-06,-1.0136e-08,0.319231,0.00114127,1.16847e-06,7.30915e-09,0.320374,0.00114363,1.1904e-06,1.07018e-08,0.321518,0.00114604,1.2225e-06,-2.03137e-08,0.322666,0.00114842,1.16156e-06,1.09484e-08,0.323815,0.00115078,1.19441e-06,6.32224e-09,0.324967,0.00115319,1.21337e-06,-6.43509e-09,0.326122,0.00115559,1.19407e-06,-1.03842e-08,0.327278,0.00115795,1.16291e-06,1.81697e-08,0.328438,0.00116033,1.21742e-06,-2.6901e-09,0.329599,0.00116276,1.20935e-06,-7.40939e-09,0.330763,0.00116515,1.18713e-06,2.52533e-09,0.331929,0.00116754,1.1947e-06,-2.69191e-09,0.333098,0.00116992,1.18663e-06,8.24218e-09,0.334269,0.00117232,1.21135e-06,-4.74377e-10,0.335443,0.00117474,1.20993e-06,-6.34471e-09,0.336619,0.00117714,1.1909e-06,-3.94922e-09,0.337797,0.00117951,1.17905e-06,2.21417e-08,0.338978,0.00118193,1.24547e-06,-2.50128e-08,0.340161,0.00118435,1.17043e-06,1.8305e-08,0.341346,0.00118674,1.22535e-06,-1.84048e-08,0.342534,0.00118914,1.17013e-06,2.55121e-08,0.343725,0.00119156,1.24667e-06,-2.40389e-08,0.344917,0.00119398,1.17455e-06,1.10389e-08,0.346113,0.00119636,1.20767e-06,9.68574e-09,0.34731,0.0011988,1.23673e-06,-1.99797e-08,0.34851,0.00120122,1.17679e-06,1.06284e-08,0.349713,0.0012036,1.20867e-06,7.26868e-09,0.350917,0.00120604,1.23048e-06,-9.90072e-09,0.352125,0.00120847,1.20078e-06,2.53177e-09,0.353334,0.00121088,1.20837e-06,-2.26199e-10,0.354546,0.0012133,1.20769e-06,-1.62705e-09,0.355761,0.00121571,1.20281e-06,6.73435e-09,0.356978,0.00121813,1.22302e-06,4.49207e-09,0.358197,0.00122059,1.23649e-06,-2.47027e-08,0.359419,0.00122299,1.16238e-06,3.47142e-08,0.360643,0.00122542,1.26653e-06,-2.47472e-08,0.36187,0.00122788,1.19229e-06,4.66965e-09,0.363099,0.00123028,1.20629e-06,6.06872e-09,0.36433,0.00123271,1.2245e-06,8.57729e-10,0.365564,0.00123516,1.22707e-06,-9.49952e-09,0.366801,0.00123759,1.19858e-06,7.33792e-09,0.36804,0.00124001,1.22059e-06,9.95025e-09,0.369281,0.00124248,1.25044e-06,-1.73366e-08,0.370525,0.00124493,1.19843e-06,-2.08464e-10,0.371771,0.00124732,1.1978e-06,1.81704e-08,0.373019,0.00124977,1.25232e-06,-1.28683e-08,0.37427,0.00125224,1.21371e-06,3.50042e-09,0.375524,0.00125468,1.22421e-06,-1.1335e-09,0.37678,0.00125712,1.22081e-06,1.03345e-09,0.378038,0.00125957,1.22391e-06,-3.00023e-09,0.379299,0.00126201,1.21491e-06,1.09676e-08,0.380562,0.00126447,1.24781e-06,-1.10676e-08,0.381828,0.00126693,1.21461e-06,3.50042e-09,0.383096,0.00126937,1.22511e-06,-2.93403e-09,0.384366,0.00127181,1.21631e-06,8.23574e-09,0.385639,0.00127427,1.24102e-06,-2.06607e-10,0.386915,0.00127675,1.2404e-06,-7.40935e-09,0.388193,0.00127921,1.21817e-06,4.1761e-11,0.389473,0.00128165,1.21829e-06,7.24223e-09,0.390756,0.0012841,1.24002e-06,7.91564e-10,0.392042,0.00128659,1.2424e-06,-1.04086e-08,0.393329,0.00128904,1.21117e-06,1.10405e-08,0.39462,0.0012915,1.24429e-06,-3.951e-09,0.395912,0.00129397,1.23244e-06,4.7634e-09,0.397208,0.00129645,1.24673e-06,-1.51025e-08,0.398505,0.0012989,1.20142e-06,2.58443e-08,0.399805,0.00130138,1.27895e-06,-2.86702e-08,0.401108,0.00130385,1.19294e-06,2.92318e-08,0.402413,0.00130632,1.28064e-06,-2.86524e-08,0.403721,0.0013088,1.19468e-06,2.57731e-08,0.405031,0.00131127,1.272e-06,-1.48355e-08,0.406343,0.00131377,1.2275e-06,3.76652e-09,0.407658,0.00131623,1.23879e-06,-2.30784e-10,0.408976,0.00131871,1.2381e-06,-2.84331e-09,0.410296,0.00132118,1.22957e-06,1.16041e-08,0.411618,0.00132367,1.26438e-06,-1.37708e-08,0.412943,0.00132616,1.22307e-06,1.36768e-08,0.41427,0.00132865,1.2641e-06,-1.1134e-08,0.4156,0.00133114,1.2307e-06,1.05714e-09,0.416933,0.00133361,1.23387e-06,6.90538e-09,0.418267,0.00133609,1.25459e-06,1.12372e-09,0.419605,0.00133861,1.25796e-06,-1.14002e-08,0.420945,0.00134109,1.22376e-06,1.46747e-08,0.422287,0.00134358,1.26778e-06,-1.7496e-08,0.423632,0.00134606,1.21529e-06,2.5507e-08,0.424979,0.00134857,1.29182e-06,-2.49272e-08,0.426329,0.00135108,1.21703e-06,1.45972e-08,0.427681,0.00135356,1.26083e-06,-3.65935e-09,0.429036,0.00135607,1.24985e-06,4.00178e-11,0.430393,0.00135857,1.24997e-06,3.49917e-09,0.431753,0.00136108,1.26047e-06,-1.40366e-08,0.433116,0.00136356,1.21836e-06,2.28448e-08,0.43448,0.00136606,1.28689e-06,-1.77378e-08,0.435848,0.00136858,1.23368e-06,1.83043e-08,0.437218,0.0013711,1.28859e-06,-2.56769e-08,0.43859,0.0013736,1.21156e-06,2.47987e-08,0.439965,0.0013761,1.28595e-06,-1.39133e-08,0.441342,0.00137863,1.24421e-06,1.05202e-09,0.442722,0.00138112,1.24737e-06,9.70507e-09,0.444104,0.00138365,1.27649e-06,-1.00698e-08,0.445489,0.00138617,1.24628e-06,7.72123e-10,0.446877,0.00138867,1.24859e-06,6.98132e-09,0.448267,0.00139118,1.26954e-06,1.10477e-09,0.449659,0.00139373,1.27285e-06,-1.14003e-08,0.451054,0.00139624,1.23865e-06,1.4694e-08,0.452452,0.00139876,1.28273e-06,-1.75734e-08,0.453852,0.00140127,1.23001e-06,2.5797e-08,0.455254,0.00140381,1.3074e-06,-2.60097e-08,0.456659,0.00140635,1.22937e-06,1.86371e-08,0.458067,0.00140886,1.28529e-06,-1.8736e-08,0.459477,0.00141137,1.22908e-06,2.65048e-08,0.46089,0.00141391,1.30859e-06,-2.76784e-08,0.462305,0.00141645,1.22556e-06,2.46043e-08,0.463722,0.00141897,1.29937e-06,-1.11341e-08,0.465143,0.00142154,1.26597e-06,-9.87033e-09,0.466565,0.00142404,1.23636e-06,2.08131e-08,0.467991,0.00142657,1.2988e-06,-1.37773e-08,0.469419,0.00142913,1.25746e-06,4.49378e-09,0.470849,0.00143166,1.27094e-06,-4.19781e-09,0.472282,0.00143419,1.25835e-06,1.22975e-08,0.473717,0.00143674,1.29524e-06,-1.51902e-08,0.475155,0.00143929,1.24967e-06,1.86608e-08,0.476596,0.00144184,1.30566e-06,-2.96506e-08,0.478039,0.00144436,1.2167e-06,4.03368e-08,0.479485,0.00144692,1.33771e-06,-4.22896e-08,0.480933,0.00144947,1.21085e-06,3.94148e-08,0.482384,0.00145201,1.32909e-06,-2.59626e-08,0.483837,0.00145459,1.2512e-06,4.83124e-09,0.485293,0.0014571,1.2657e-06,6.63757e-09,0.486751,0.00145966,1.28561e-06,-1.57911e-09,0.488212,0.00146222,1.28087e-06,-3.21468e-10,0.489676,0.00146478,1.27991e-06,2.86517e-09,0.491142,0.00146735,1.2885e-06,-1.11392e-08,0.49261,0.00146989,1.25508e-06,1.18893e-08,0.494081,0.00147244,1.29075e-06,-6.61574e-09,0.495555,0.001475,1.27091e-06,1.45736e-08,0.497031,0.00147759,1.31463e-06,-2.18759e-08,0.49851,0.00148015,1.249e-06,1.33252e-08,0.499992,0.00148269,1.28897e-06,-1.62277e-09,0.501476,0.00148526,1.28411e-06,-6.83421e-09,0.502962,0.00148781,1.2636e-06,2.89596e-08,0.504451,0.00149042,1.35048e-06,-4.93997e-08,0.505943,0.00149298,1.20228e-06,4.94299e-08,0.507437,0.00149553,1.35057e-06,-2.91107e-08,0.508934,0.00149814,1.26324e-06,7.40848e-09,0.510434,0.00150069,1.28547e-06,-5.23187e-10,0.511936,0.00150326,1.2839e-06,-5.31585e-09,0.51344,0.00150581,1.26795e-06,2.17866e-08,0.514947,0.00150841,1.33331e-06,-2.22257e-08,0.516457,0.00151101,1.26663e-06,7.51178e-09,0.517969,0.00151357,1.28917e-06,-7.82128e-09,0.519484,0.00151613,1.2657e-06,2.37733e-08,0.521002,0.00151873,1.33702e-06,-2.76674e-08,0.522522,0.00152132,1.25402e-06,2.72917e-08,0.524044,0.00152391,1.3359e-06,-2.18949e-08,0.525569,0.00152652,1.27021e-06,6.83372e-10,0.527097,0.00152906,1.27226e-06,1.91613e-08,0.528628,0.00153166,1.32974e-06,-1.77241e-08,0.53016,0.00153427,1.27657e-06,-7.86963e-09,0.531696,0.0015368,1.25296e-06,4.92027e-08,0.533234,0.00153945,1.40057e-06,-6.9732e-08,0.534775,0.00154204,1.19138e-06,5.09114e-08,0.536318,0.00154458,1.34411e-06,-1.4704e-08,0.537864,0.00154722,1.3e-06,7.9048e-09,0.539413,0.00154984,1.32371e-06,-1.69152e-08,0.540964,0.00155244,1.27297e-06,1.51355e-10,0.542517,0.00155499,1.27342e-06,1.63099e-08,0.544074,0.00155758,1.32235e-06,-5.78647e-09,0.545633,0.00156021,1.30499e-06,6.83599e-09,0.547194,0.00156284,1.3255e-06,-2.15575e-08,0.548758,0.00156543,1.26083e-06,1.97892e-08,0.550325,0.00156801,1.32019e-06,2.00525e-09,0.551894,0.00157065,1.32621e-06,-2.78103e-08,0.553466,0.00157322,1.24278e-06,4.96314e-08,0.555041,0.00157586,1.39167e-06,-5.1506e-08,0.556618,0.00157849,1.23716e-06,3.71835e-08,0.558198,0.00158107,1.34871e-06,-3.76233e-08,0.55978,0.00158366,1.23584e-06,5.37052e-08,0.561365,0.00158629,1.39695e-06,-5.79884e-08,0.562953,0.00158891,1.22299e-06,5.90392e-08,0.564543,0.00159153,1.4001e-06,-5.89592e-08,0.566136,0.00159416,1.22323e-06,5.7588e-08,0.567731,0.00159678,1.39599e-06,-5.21835e-08,0.569329,0.00159941,1.23944e-06,3.19369e-08,0.57093,0.00160199,1.33525e-06,-1.59594e-08,0.572533,0.00160461,1.28737e-06,3.19006e-08,0.574139,0.00160728,1.38307e-06,-5.20383e-08,0.575748,0.00160989,1.22696e-06,5.70431e-08,0.577359,0.00161251,1.39809e-06,-5.69247e-08,0.578973,0.00161514,1.22731e-06,5.14463e-08,0.580589,0.00161775,1.38165e-06,-2.9651e-08,0.582208,0.00162042,1.2927e-06,7.55339e-09,0.58383,0.00162303,1.31536e-06,-5.62636e-10,0.585455,0.00162566,1.31367e-06,-5.30281e-09,0.587081,0.00162827,1.29776e-06,2.17738e-08,0.588711,0.00163093,1.36309e-06,-2.21875e-08,0.590343,0.00163359,1.29652e-06,7.37164e-09,0.591978,0.00163621,1.31864e-06,-7.29907e-09,0.593616,0.00163882,1.29674e-06,2.18247e-08,0.595256,0.00164148,1.36221e-06,-2.03952e-08,0.596899,0.00164414,1.30103e-06,1.51241e-10,0.598544,0.00164675,1.30148e-06,1.97902e-08,0.600192,0.00164941,1.36085e-06,-1.97074e-08,0.601843,0.00165207,1.30173e-06,-5.65175e-10,0.603496,0.00165467,1.30004e-06,2.1968e-08,0.605152,0.00165734,1.36594e-06,-2.77024e-08,0.606811,0.00165999,1.28283e-06,2.92369e-08,0.608472,0.00166264,1.37054e-06,-2.96407e-08,0.610136,0.00166529,1.28162e-06,2.97215e-08,0.611803,0.00166795,1.37079e-06,-2.96408e-08,0.613472,0.0016706,1.28186e-06,2.92371e-08,0.615144,0.00167325,1.36957e-06,-2.77031e-08,0.616819,0.00167591,1.28647e-06,2.19708e-08,0.618496,0.00167855,1.35238e-06,-5.75407e-10,0.620176,0.00168125,1.35065e-06,-1.9669e-08,0.621858,0.00168389,1.29164e-06,1.96468e-08,0.623544,0.00168653,1.35058e-06,6.86403e-10,0.625232,0.00168924,1.35264e-06,-2.23924e-08,0.626922,0.00169187,1.28547e-06,2.92788e-08,0.628615,0.00169453,1.3733e-06,-3.51181e-08,0.630311,0.00169717,1.26795e-06,5.15889e-08,0.63201,0.00169987,1.42272e-06,-5.2028e-08,0.633711,0.00170255,1.26663e-06,3.73139e-08,0.635415,0.0017052,1.37857e-06,-3.76227e-08,0.637121,0.00170784,1.2657e-06,5.35722e-08,0.63883,0.00171054,1.42642e-06,-5.74567e-08,0.640542,0.00171322,1.25405e-06,5.70456e-08,0.642257,0.0017159,1.42519e-06,-5.15163e-08,0.643974,0.00171859,1.27064e-06,2.98103e-08,0.645694,0.00172122,1.36007e-06,-8.12016e-09,0.647417,0.00172392,1.33571e-06,2.67039e-09,0.649142,0.0017266,1.34372e-06,-2.56152e-09,0.65087,0.00172928,1.33604e-06,7.57571e-09,0.6526,0.00173197,1.35876e-06,-2.77413e-08,0.654334,0.00173461,1.27554e-06,4.3785e-08,0.65607,0.00173729,1.40689e-06,-2.81896e-08,0.657808,0.00174002,1.32233e-06,9.36893e-09,0.65955,0.00174269,1.35043e-06,-9.28617e-09,0.661294,0.00174536,1.32257e-06,2.77757e-08,0.66304,0.00174809,1.4059e-06,-4.2212e-08,0.66479,0.00175078,1.27926e-06,2.1863e-08,0.666542,0.0017534,1.34485e-06,1.43648e-08,0.668297,0.00175613,1.38795e-06,-1.97177e-08,0.670054,0.00175885,1.3288e-06,4.90115e-09,0.671814,0.00176152,1.3435e-06,1.13232e-10,0.673577,0.00176421,1.34384e-06,-5.3542e-09,0.675343,0.00176688,1.32778e-06,2.13035e-08,0.677111,0.0017696,1.39169e-06,-2.02553e-08,0.678882,0.00177232,1.33092e-06,1.13005e-10,0.680656,0.00177499,1.33126e-06,1.98031e-08,0.682432,0.00177771,1.39067e-06,-1.97211e-08,0.684211,0.00178043,1.33151e-06,-5.2349e-10,0.685993,0.00178309,1.32994e-06,2.18151e-08,0.687777,0.00178582,1.39538e-06,-2.71325e-08,0.689564,0.00178853,1.31398e-06,2.71101e-08,0.691354,0.00179124,1.39531e-06,-2.17035e-08,0.693147,0.00179396,1.3302e-06,9.92865e-11,0.694942,0.00179662,1.3305e-06,2.13063e-08,0.69674,0.00179935,1.39442e-06,-2.57198e-08,0.698541,0.00180206,1.31726e-06,2.19682e-08,0.700344,0.00180476,1.38317e-06,-2.54852e-09,0.70215,0.00180752,1.37552e-06,-1.17741e-08,0.703959,0.00181023,1.3402e-06,-9.95999e-09,0.705771,0.00181288,1.31032e-06,5.16141e-08,0.707585,0.00181566,1.46516e-06,-7.72869e-08,0.709402,0.00181836,1.2333e-06,7.87197e-08,0.711222,0.00182106,1.46946e-06,-5.87781e-08,0.713044,0.00182382,1.29312e-06,3.71834e-08,0.714869,0.00182652,1.40467e-06,-3.03511e-08,0.716697,0.00182924,1.31362e-06,2.46161e-08,0.718528,0.00183194,1.38747e-06,-8.5087e-09,0.720361,0.00183469,1.36194e-06,9.41892e-09,0.722197,0.00183744,1.3902e-06,-2.91671e-08,0.724036,0.00184014,1.3027e-06,4.76448e-08,0.725878,0.00184288,1.44563e-06,-4.22028e-08,0.727722,0.00184565,1.31902e-06,1.95682e-09,0.729569,0.00184829,1.3249e-06,3.43754e-08,0.731419,0.00185104,1.42802e-06,-2.0249e-08,0.733271,0.00185384,1.36727e-06,-1.29838e-08,0.735126,0.00185654,1.32832e-06,1.25794e-08,0.736984,0.00185923,1.36606e-06,2.22711e-08,0.738845,0.00186203,1.43287e-06,-4.20594e-08,0.740708,0.00186477,1.3067e-06,2.67571e-08,0.742574,0.00186746,1.38697e-06,-5.36424e-09,0.744443,0.00187022,1.37087e-06,-5.30023e-09,0.746315,0.00187295,1.35497e-06,2.65653e-08,0.748189,0.00187574,1.43467e-06,-4.13564e-08,0.750066,0.00187848,1.3106e-06,1.9651e-08,0.751946,0.00188116,1.36955e-06,2.23572e-08,0.753828,0.00188397,1.43663e-06,-4.9475e-08,0.755714,0.00188669,1.2882e-06,5.63335e-08,0.757602,0.00188944,1.4572e-06,-5.66499e-08,0.759493,0.00189218,1.28725e-06,5.10567e-08,0.761386,0.00189491,1.44042e-06,-2.83677e-08,0.763283,0.00189771,1.35532e-06,2.80962e-09,0.765182,0.00190042,1.36375e-06,1.71293e-08,0.767083,0.0019032,1.41513e-06,-1.17221e-08,0.768988,0.001906,1.37997e-06,-2.98453e-08,0.770895,0.00190867,1.29043e-06,7.14987e-08,0.772805,0.00191146,1.50493e-06,-7.73354e-08,0.774718,0.00191424,1.27292e-06,5.90292e-08,0.776634,0.00191697,1.45001e-06,-3.9572e-08,0.778552,0.00191975,1.33129e-06,3.9654e-08,0.780473,0.00192253,1.45026e-06,-5.94395e-08,0.782397,0.00192525,1.27194e-06,7.88945e-08,0.784324,0.00192803,1.50862e-06,-7.73249e-08,0.786253,0.00193082,1.27665e-06,5.15913e-08,0.788185,0.00193352,1.43142e-06,-9.83099e-09,0.79012,0.00193636,1.40193e-06,-1.22672e-08,0.792058,0.00193912,1.36513e-06,-7.05275e-10,0.793999,0.00194185,1.36301e-06,1.50883e-08,0.795942,0.00194462,1.40828e-06,-4.33147e-11,0.797888,0.00194744,1.40815e-06,-1.49151e-08,0.799837,0.00195021,1.3634e-06,9.93244e-11,0.801788,0.00195294,1.3637e-06,1.45179e-08,0.803743,0.00195571,1.40725e-06,1.43363e-09,0.8057,0.00195853,1.41155e-06,-2.02525e-08,0.80766,0.00196129,1.35079e-06,1.99718e-08,0.809622,0.00196405,1.41071e-06,-3.01649e-11,0.811588,0.00196687,1.41062e-06,-1.9851e-08,0.813556,0.00196964,1.35107e-06,1.98296e-08,0.815527,0.0019724,1.41056e-06,1.37485e-10,0.817501,0.00197522,1.41097e-06,-2.03796e-08,0.819477,0.00197798,1.34983e-06,2.17763e-08,0.821457,0.00198074,1.41516e-06,-7.12085e-09,0.823439,0.00198355,1.3938e-06,6.70707e-09,0.825424,0.00198636,1.41392e-06,-1.97074e-08,0.827412,0.00198913,1.35479e-06,1.25179e-08,0.829402,0.00199188,1.39235e-06,2.92405e-08,0.831396,0.00199475,1.48007e-06,-6.98755e-08,0.833392,0.0019975,1.27044e-06,7.14477e-08,0.835391,0.00200026,1.48479e-06,-3.71014e-08,0.837392,0.00200311,1.37348e-06,1.73533e-08,0.839397,0.00200591,1.42554e-06,-3.23118e-08,0.841404,0.00200867,1.32861e-06,5.2289e-08,0.843414,0.00201148,1.48547e-06,-5.76348e-08,0.845427,0.00201428,1.31257e-06,5.9041e-08,0.847443,0.00201708,1.48969e-06,-5.93197e-08,0.849461,0.00201988,1.31173e-06,5.90289e-08,0.851482,0.00202268,1.48882e-06,-5.75864e-08,0.853507,0.00202549,1.31606e-06,5.21075e-08,0.855533,0.00202828,1.47238e-06,-3.16344e-08,0.857563,0.00203113,1.37748e-06,1.48257e-08,0.859596,0.00203393,1.42196e-06,-2.76684e-08,0.861631,0.00203669,1.33895e-06,3.62433e-08,0.863669,0.00203947,1.44768e-06,1.90463e-09,0.86571,0.00204237,1.45339e-06,-4.38617e-08,0.867754,0.00204515,1.32181e-06,5.43328e-08,0.8698,0.00204796,1.48481e-06,-5.42603e-08,0.87185,0.00205076,1.32203e-06,4.34989e-08,0.873902,0.00205354,1.45252e-06,-5.26029e-10,0.875957,0.00205644,1.45095e-06,-4.13949e-08,0.878015,0.00205922,1.32676e-06,4.68962e-08,0.880075,0.00206201,1.46745e-06,-2.69807e-08,0.882139,0.00206487,1.38651e-06,1.42181e-09,0.884205,0.00206764,1.39077e-06,2.12935e-08,0.886274,0.00207049,1.45465e-06,-2.69912e-08,0.888346,0.00207332,1.37368e-06,2.70664e-08,0.890421,0.00207615,1.45488e-06,-2.16698e-08,0.892498,0.00207899,1.38987e-06,8.14756e-12,0.894579,0.00208177,1.38989e-06,2.16371e-08,0.896662,0.00208462,1.45481e-06,-2.6952e-08,0.898748,0.00208744,1.37395e-06,2.65663e-08,0.900837,0.00209027,1.45365e-06,-1.97084e-08,0.902928,0.00209312,1.39452e-06,-7.33731e-09,0.905023,0.00209589,1.37251e-06,4.90578e-08,0.90712,0.00209878,1.51968e-06,-6.96845e-08,0.90922,0.00210161,1.31063e-06,5.08664e-08,0.911323,0.00210438,1.46323e-06,-1.45717e-08,0.913429,0.00210727,1.41952e-06,7.42038e-09,0.915538,0.00211013,1.44178e-06,-1.51097e-08,0.917649,0.00211297,1.39645e-06,-6.58618e-09,0.919764,0.00211574,1.37669e-06,4.14545e-08,0.921881,0.00211862,1.50105e-06,-4.00222e-08,0.924001,0.0021215,1.38099e-06,-5.7518e-10,0.926124,0.00212426,1.37926e-06,4.23229e-08,0.92825,0.00212714,1.50623e-06,-4.9507e-08,0.930378,0.00213001,1.35771e-06,3.64958e-08,0.93251,0.00213283,1.4672e-06,-3.68713e-08,0.934644,0.00213566,1.35658e-06,5.13848e-08,0.936781,0.00213852,1.51074e-06,-4.94585e-08,0.938921,0.0021414,1.36236e-06,2.72399e-08,0.941064,0.0021442,1.44408e-06,1.0372e-10,0.943209,0.00214709,1.44439e-06,-2.76547e-08,0.945358,0.0021499,1.36143e-06,5.09106e-08,0.947509,0.00215277,1.51416e-06,-5.67784e-08,0.949663,0.00215563,1.34382e-06,5.69935e-08,0.95182,0.00215849,1.5148e-06,-5.19861e-08,0.95398,0.00216136,1.35885e-06,3.17417e-08,0.956143,0.00216418,1.45407e-06,-1.53758e-08,0.958309,0.00216704,1.40794e-06,2.97615e-08,0.960477,0.00216994,1.49723e-06,-4.40657e-08,0.962649,0.00217281,1.36503e-06,2.72919e-08,0.964823,0.00217562,1.44691e-06,-5.49729e-09,0.967,0.0021785,1.43041e-06,-5.30273e-09,0.96918,0.00218134,1.41451e-06,2.67084e-08,0.971363,0.00218425,1.49463e-06,-4.19265e-08,0.973548,0.00218711,1.36885e-06,2.17881e-08,0.975737,0.00218992,1.43422e-06,1.43789e-08,0.977928,0.00219283,1.47735e-06,-1.96989e-08,0.980122,0.00219572,1.41826e-06,4.81221e-09,0.98232,0.00219857,1.43269e-06,4.50048e-10,0.98452,0.00220144,1.43404e-06,-6.61237e-09,0.986722,0.00220429,1.41421e-06,2.59993e-08,0.988928,0.0022072,1.4922e-06,-3.77803e-08,0.991137,0.00221007,1.37886e-06,5.9127e-09,0.993348,0.00221284,1.3966e-06,1.33339e-07,0.995563,0.00221604,1.79662e-06,-5.98872e-07,0.99778,0.00222015,0.,0.};
+
+    __device__ static int LabCbrt_b(int i)
+    {
+        float x = i * (1.f / (255.f * (1 << gamma_shift)));
+        return (1 << lab_shift2) * (x < 0.008856f ? x * 7.787f + 0.13793103448275862f : ::cbrtf(x));
+    }
+
+    __device__ static float splineInterpolate(float x, const float* tab, int n)
+    {
+        int ix = ::min(::max(int(x), 0), n-1);
+        x -= ix;
+        tab += ix * 4;
+        return ((tab[3] * x + tab[2]) * x + tab[1]) * x + tab[0];
+    }
+
+    template <typename T, int scn, int dcn, bool srgb, int blueIdx> struct RGB2Lab;
+
+    template <int scn, int dcn, bool srgb, int blueIdx> struct RGB2Lab<uchar, scn, dcn, srgb, blueIdx>
+            : unary_function<typename MakeVec<uchar, scn>::type, typename MakeVec<uchar, dcn>::type>
+    {
+        __device__ typename MakeVec<uchar, dcn>::type operator ()(const typename MakeVec<uchar, scn>::type& src) const
+        {
+            const int Lscale = (116 * 255 + 50) / 100;
+            const int Lshift = -((16 * 255 * (1 << lab_shift2) + 50) / 100);
+
+            int B = blueIdx == 0 ? src.x : src.z;
+            int G = src.y;
+            int R = blueIdx == 0 ? src.z : src.x;
+
+            if (srgb)
+            {
+                B = c_sRGBGammaTab_b[B];
+                G = c_sRGBGammaTab_b[G];
+                R = c_sRGBGammaTab_b[R];
+            }
+            else
+            {
+                B <<= 3;
+                G <<= 3;
+                R <<= 3;
+            }
+
+            int fX = LabCbrt_b(CV_CUDEV_DESCALE(B * 778 + G * 1541 + R * 1777, lab_shift));
+            int fY = LabCbrt_b(CV_CUDEV_DESCALE(B * 296 + G * 2929 + R * 871, lab_shift));
+            int fZ = LabCbrt_b(CV_CUDEV_DESCALE(B * 3575 + G * 448 + R * 73, lab_shift));
+
+            int L = CV_CUDEV_DESCALE(Lscale * fY + Lshift, lab_shift2);
+            int a = CV_CUDEV_DESCALE(500 * (fX - fY) + 128 * (1 << lab_shift2), lab_shift2);
+            int b = CV_CUDEV_DESCALE(200 * (fY - fZ) + 128 * (1 << lab_shift2), lab_shift2);
+
+            typename MakeVec<uchar, dcn>::type dst;
+
+            dst.x = saturate_cast<uchar>(L);
+            dst.y = saturate_cast<uchar>(a);
+            dst.z = saturate_cast<uchar>(b);
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, bool srgb, int blueIdx> struct RGB2Lab<float, scn, dcn, srgb, blueIdx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float _1_3 = 1.0f / 3.0f;
+            const float _a = 16.0f / 116.0f;
+
+            float B = blueIdx == 0 ? src.x : src.z;
+            float G = src.y;
+            float R = blueIdx == 0 ? src.z : src.x;
+
+            if (srgb)
+            {
+                B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+                G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+                R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+            }
+
+            float X = B * 0.189828f + G * 0.376219f + R * 0.433953f;
+            float Y = B * 0.072169f + G * 0.715160f + R * 0.212671f;
+            float Z = B * 0.872766f + G * 0.109477f + R * 0.017758f;
+
+            float FX = X > 0.008856f ? ::powf(X, _1_3) : (7.787f * X + _a);
+            float FY = Y > 0.008856f ? ::powf(Y, _1_3) : (7.787f * Y + _a);
+            float FZ = Z > 0.008856f ? ::powf(Z, _1_3) : (7.787f * Z + _a);
+
+            float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y);
+            float a = 500.f * (FX - FY);
+            float b = 200.f * (FY - FZ);
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = L;
+            dst.y = a;
+            dst.z = b;
+
+            return dst;
+        }
+    };
+
+    // Lab to RGB
+
+    __constant__ float c_sRGBInvGammaTab[] = {0,0.0126255,0.,-8.33961e-06,0.0126172,0.0126005,-2.50188e-05,4.1698e-05,0.0252344,0.0126756,0.000100075,-0.000158451,0.0378516,0.0124004,-0.000375277,-0.000207393,0.0496693,0.0110276,-0.000997456,0.00016837,0.0598678,0.00953783,-0.000492346,2.07235e-05,0.068934,0.00861531,-0.000430176,3.62876e-05,0.0771554,0.00786382,-0.000321313,1.87625e-05,0.0847167,0.00727748,-0.000265025,1.53594e-05,0.0917445,0.00679351,-0.000218947,1.10545e-05,0.0983301,0.00638877,-0.000185784,8.66984e-06,0.104542,0.00604322,-0.000159774,6.82996e-06,0.110432,0.00574416,-0.000139284,5.51008e-06,0.116042,0.00548212,-0.000122754,4.52322e-06,0.121406,0.00525018,-0.000109184,3.75557e-06,0.126551,0.00504308,-9.79177e-05,3.17134e-06,0.131499,0.00485676,-8.84037e-05,2.68469e-06,0.13627,0.004688,-8.03496e-05,2.31725e-06,0.14088,0.00453426,-7.33978e-05,2.00868e-06,0.145343,0.00439349,-6.73718e-05,1.74775e-06,0.149671,0.00426399,-6.21286e-05,1.53547e-06,0.153875,0.00414434,-5.75222e-05,1.364e-06,0.157963,0.00403338,-5.34301e-05,1.20416e-06,0.161944,0.00393014,-4.98177e-05,1.09114e-06,0.165825,0.00383377,-4.65443e-05,9.57987e-07,0.169613,0.00374356,-4.36703e-05,8.88359e-07,0.173314,0.00365888,-4.10052e-05,7.7849e-07,0.176933,0.00357921,-3.86697e-05,7.36254e-07,0.180474,0.00350408,-3.6461e-05,6.42534e-07,0.183942,0.00343308,-3.45334e-05,6.12614e-07,0.187342,0.00336586,-3.26955e-05,5.42894e-07,0.190675,0.00330209,-3.10669e-05,5.08967e-07,0.193947,0.00324149,-2.954e-05,4.75977e-07,0.197159,0.00318383,-2.8112e-05,4.18343e-07,0.200315,0.00312887,-2.6857e-05,4.13651e-07,0.203418,0.00307639,-2.5616e-05,3.70847e-07,0.206469,0.00302627,-2.45035e-05,3.3813e-07,0.209471,0.00297828,-2.34891e-05,3.32999e-07,0.212426,0.0029323,-2.24901e-05,2.96826e-07,0.215336,0.00288821,-2.15996e-05,2.82736e-07,0.218203,0.00284586,-2.07514e-05,2.70961e-07,0.221029,0.00280517,-1.99385e-05,2.42744e-07,0.223814,0.00276602,-1.92103e-05,2.33277e-07,0.226561,0.0027283,-1.85105e-05,2.2486e-07,0.229271,0.00269195,-1.78359e-05,2.08383e-07,0.231945,0.00265691,-1.72108e-05,1.93305e-07,0.234585,0.00262307,-1.66308e-05,1.80687e-07,0.237192,0.00259035,-1.60888e-05,1.86632e-07,0.239766,0.00255873,-1.55289e-05,1.60569e-07,0.24231,0.00252815,-1.50472e-05,1.54566e-07,0.244823,0.00249852,-1.45835e-05,1.59939e-07,0.247307,0.00246983,-1.41037e-05,1.29549e-07,0.249763,0.00244202,-1.3715e-05,1.41429e-07,0.252191,0.00241501,-1.32907e-05,1.39198e-07,0.254593,0.00238885,-1.28731e-05,1.06444e-07,0.256969,0.00236342,-1.25538e-05,1.2048e-07,0.25932,0.00233867,-1.21924e-05,1.26892e-07,0.261647,0.00231467,-1.18117e-05,8.72084e-08,0.26395,0.00229131,-1.15501e-05,1.20323e-07,0.26623,0.00226857,-1.11891e-05,8.71514e-08,0.268487,0.00224645,-1.09276e-05,9.73165e-08,0.270723,0.00222489,-1.06357e-05,8.98259e-08,0.272937,0.00220389,-1.03662e-05,7.98218e-08,0.275131,0.00218339,-1.01267e-05,9.75254e-08,0.277304,0.00216343,-9.83416e-06,6.65195e-08,0.279458,0.00214396,-9.63461e-06,8.34313e-08,0.281592,0.00212494,-9.38431e-06,7.65919e-08,0.283708,0.00210641,-9.15454e-06,5.7236e-08,0.285805,0.00208827,-8.98283e-06,8.18939e-08,0.287885,0.00207055,-8.73715e-06,6.2224e-08,0.289946,0.00205326,-8.55047e-06,5.66388e-08,0.291991,0.00203633,-8.38056e-06,6.88491e-08,0.294019,0.00201978,-8.17401e-06,5.53955e-08,0.296031,0.00200359,-8.00782e-06,6.71971e-08,0.298027,0.00198778,-7.80623e-06,3.34439e-08,0.300007,0.00197227,-7.7059e-06,6.7248e-08,0.301971,0.00195706,-7.50416e-06,5.51915e-08,0.303921,0.00194221,-7.33858e-06,3.98124e-08,0.305856,0.00192766,-7.21915e-06,5.37795e-08,0.307776,0.00191338,-7.05781e-06,4.30919e-08,0.309683,0.00189939,-6.92853e-06,4.20744e-08,0.311575,0.00188566,-6.80231e-06,5.68321e-08,0.313454,0.00187223,-6.63181e-06,2.86195e-08,0.31532,0.00185905,-6.54595e-06,3.73075e-08,0.317172,0.00184607,-6.43403e-06,6.05684e-08,0.319012,0.00183338,-6.25233e-06,1.84426e-08,0.320839,0.00182094,-6.197e-06,4.44757e-08,0.322654,0.00180867,-6.06357e-06,4.20729e-08,0.324456,0.00179667,-5.93735e-06,2.56511e-08,0.326247,0.00178488,-5.8604e-06,3.41368e-08,0.328026,0.00177326,-5.75799e-06,4.64177e-08,0.329794,0.00176188,-5.61874e-06,1.86107e-08,0.33155,0.0017507,-5.5629e-06,2.81511e-08,0.333295,0.00173966,-5.47845e-06,4.75987e-08,0.335029,0.00172884,-5.33565e-06,1.98726e-08,0.336753,0.00171823,-5.27604e-06,2.19226e-08,0.338466,0.00170775,-5.21027e-06,4.14483e-08,0.340169,0.00169745,-5.08592e-06,2.09017e-08,0.341861,0.00168734,-5.02322e-06,2.39561e-08,0.343543,0.00167737,-4.95135e-06,3.22852e-08,0.345216,0.00166756,-4.85449e-06,2.57173e-08,0.346878,0.00165793,-4.77734e-06,1.38569e-08,0.348532,0.00164841,-4.73577e-06,3.80634e-08,0.350175,0.00163906,-4.62158e-06,1.27043e-08,0.35181,0.00162985,-4.58347e-06,3.03279e-08,0.353435,0.00162078,-4.49249e-06,1.49961e-08,0.355051,0.00161184,-4.4475e-06,2.88977e-08,0.356659,0.00160303,-4.3608e-06,1.84241e-08,0.358257,0.00159436,-4.30553e-06,1.6616e-08,0.359848,0.0015858,-4.25568e-06,3.43218e-08,0.361429,0.00157739,-4.15272e-06,-4.89172e-09,0.363002,0.00156907,-4.16739e-06,4.48498e-08,0.364567,0.00156087,-4.03284e-06,4.30676e-09,0.366124,0.00155282,-4.01992e-06,2.73303e-08,0.367673,0.00154486,-3.93793e-06,5.58036e-09,0.369214,0.001537,-3.92119e-06,3.97554e-08,0.370747,0.00152928,-3.80193e-06,-1.55904e-08,0.372272,0.00152163,-3.8487e-06,5.24081e-08,0.37379,0.00151409,-3.69147e-06,-1.52272e-08,0.375301,0.00150666,-3.73715e-06,3.83028e-08,0.376804,0.0014993,-3.62225e-06,1.10278e-08,0.378299,0.00149209,-3.58916e-06,6.99326e-09,0.379788,0.00148493,-3.56818e-06,2.06038e-08,0.381269,0.00147786,-3.50637e-06,2.98009e-08,0.382744,0.00147093,-3.41697e-06,-2.05978e-08,0.384211,0.00146404,-3.47876e-06,5.25899e-08,0.385672,0.00145724,-3.32099e-06,-1.09471e-08,0.387126,0.00145056,-3.35383e-06,2.10009e-08,0.388573,0.00144392,-3.29083e-06,1.63501e-08,0.390014,0.00143739,-3.24178e-06,3.00641e-09,0.391448,0.00143091,-3.23276e-06,3.12282e-08,0.392875,0.00142454,-3.13908e-06,-8.70932e-09,0.394297,0.00141824,-3.16521e-06,3.34114e-08,0.395712,0.00141201,-3.06497e-06,-5.72754e-09,0.397121,0.00140586,-3.08215e-06,1.9301e-08,0.398524,0.00139975,-3.02425e-06,1.7931e-08,0.39992,0.00139376,-2.97046e-06,-1.61822e-09,0.401311,0.00138781,-2.97531e-06,1.83442e-08,0.402696,0.00138192,-2.92028e-06,1.76485e-08,0.404075,0.00137613,-2.86733e-06,4.68617e-10,0.405448,0.00137039,-2.86593e-06,1.02794e-08,0.406816,0.00136469,-2.83509e-06,1.80179e-08,0.408178,0.00135908,-2.78104e-06,7.05594e-09,0.409534,0.00135354,-2.75987e-06,1.33633e-08,0.410885,0.00134806,-2.71978e-06,-9.04568e-10,0.41223,0.00134261,-2.72249e-06,2.0057e-08,0.41357,0.00133723,-2.66232e-06,1.00841e-08,0.414905,0.00133194,-2.63207e-06,-7.88835e-10,0.416234,0.00132667,-2.63444e-06,2.28734e-08,0.417558,0.00132147,-2.56582e-06,-1.29785e-09,0.418877,0.00131633,-2.56971e-06,1.21205e-08,0.420191,0.00131123,-2.53335e-06,1.24202e-08,0.421499,0.0013062,-2.49609e-06,-2.19681e-09,0.422803,0.0013012,-2.50268e-06,2.61696e-08,0.424102,0.00129628,-2.42417e-06,-1.30747e-08,0.425396,0.00129139,-2.46339e-06,2.6129e-08,0.426685,0.00128654,-2.38501e-06,-2.03454e-09,0.427969,0.00128176,-2.39111e-06,1.18115e-08,0.429248,0.00127702,-2.35567e-06,1.43932e-08,0.430523,0.00127235,-2.31249e-06,-9.77965e-09,0.431793,0.00126769,-2.34183e-06,2.47253e-08,0.433058,0.00126308,-2.26766e-06,2.85278e-10,0.434319,0.00125855,-2.2668e-06,3.93614e-09,0.435575,0.00125403,-2.25499e-06,1.37722e-08,0.436827,0.00124956,-2.21368e-06,5.79803e-10,0.438074,0.00124513,-2.21194e-06,1.37112e-08,0.439317,0.00124075,-2.1708e-06,4.17973e-09,0.440556,0.00123642,-2.15826e-06,-6.27703e-10,0.44179,0.0012321,-2.16015e-06,2.81332e-08,0.44302,0.00122787,-2.07575e-06,-2.24985e-08,0.444246,0.00122365,-2.14324e-06,3.20586e-08,0.445467,0.00121946,-2.04707e-06,-1.6329e-08,0.446685,0.00121532,-2.09605e-06,3.32573e-08,0.447898,0.00121122,-1.99628e-06,-2.72927e-08,0.449107,0.00120715,-2.07816e-06,4.6111e-08,0.450312,0.00120313,-1.93983e-06,-3.79416e-08,0.451514,0.00119914,-2.05365e-06,4.60507e-08,0.452711,0.00119517,-1.9155e-06,-2.7052e-08,0.453904,0.00119126,-1.99666e-06,3.23551e-08,0.455093,0.00118736,-1.89959e-06,-1.29613e-08,0.456279,0.00118352,-1.93848e-06,1.94905e-08,0.45746,0.0011797,-1.88e-06,-5.39588e-09,0.458638,0.00117593,-1.89619e-06,2.09282e-09,0.459812,0.00117214,-1.88991e-06,2.68267e-08,0.460982,0.00116844,-1.80943e-06,-1.99925e-08,0.462149,0.00116476,-1.86941e-06,2.3341e-08,0.463312,0.00116109,-1.79939e-06,-1.37674e-08,0.464471,0.00115745,-1.84069e-06,3.17287e-08,0.465627,0.00115387,-1.7455e-06,-2.37407e-08,0.466779,0.00115031,-1.81673e-06,3.34315e-08,0.467927,0.00114677,-1.71643e-06,-2.05786e-08,0.469073,0.00114328,-1.77817e-06,1.90802e-08,0.470214,0.00113978,-1.72093e-06,3.86247e-09,0.471352,0.00113635,-1.70934e-06,-4.72759e-09,0.472487,0.00113292,-1.72352e-06,1.50478e-08,0.473618,0.00112951,-1.67838e-06,4.14108e-09,0.474746,0.00112617,-1.66595e-06,-1.80986e-09,0.47587,0.00112283,-1.67138e-06,3.09816e-09,0.476991,0.0011195,-1.66209e-06,1.92198e-08,0.478109,0.00111623,-1.60443e-06,-2.03726e-08,0.479224,0.00111296,-1.66555e-06,3.2468e-08,0.480335,0.00110973,-1.56814e-06,-2.00922e-08,0.481443,0.00110653,-1.62842e-06,1.80983e-08,0.482548,0.00110333,-1.57413e-06,7.30362e-09,0.48365,0.0011002,-1.55221e-06,-1.75107e-08,0.484749,0.00109705,-1.60475e-06,3.29373e-08,0.485844,0.00109393,-1.50594e-06,-2.48315e-08,0.486937,0.00109085,-1.58043e-06,3.65865e-08,0.488026,0.0010878,-1.47067e-06,-3.21078e-08,0.489112,0.00108476,-1.56699e-06,3.22397e-08,0.490195,0.00108172,-1.47027e-06,-7.44391e-09,0.491276,0.00107876,-1.49261e-06,-2.46428e-09,0.492353,0.00107577,-1.5e-06,1.73011e-08,0.493427,0.00107282,-1.4481e-06,-7.13552e-09,0.494499,0.0010699,-1.4695e-06,1.1241e-08,0.495567,0.001067,-1.43578e-06,-8.02637e-09,0.496633,0.0010641,-1.45986e-06,2.08645e-08,0.497695,0.00106124,-1.39726e-06,-1.58271e-08,0.498755,0.0010584,-1.44475e-06,1.26415e-08,0.499812,0.00105555,-1.40682e-06,2.48655e-08,0.500866,0.00105281,-1.33222e-06,-5.24988e-08,0.501918,0.00104999,-1.48972e-06,6.59206e-08,0.502966,0.00104721,-1.29196e-06,-3.237e-08,0.504012,0.00104453,-1.38907e-06,3.95479e-09,0.505055,0.00104176,-1.3772e-06,1.65509e-08,0.506096,0.00103905,-1.32755e-06,-1.05539e-08,0.507133,0.00103637,-1.35921e-06,2.56648e-08,0.508168,0.00103373,-1.28222e-06,-3.25007e-08,0.509201,0.00103106,-1.37972e-06,4.47336e-08,0.51023,0.00102844,-1.24552e-06,-2.72245e-08,0.511258,0.00102587,-1.32719e-06,4.55952e-09,0.512282,0.00102323,-1.31352e-06,8.98645e-09,0.513304,0.00102063,-1.28656e-06,1.90992e-08,0.514323,0.00101811,-1.22926e-06,-2.57786e-08,0.51534,0.00101557,-1.30659e-06,2.44104e-08,0.516355,0.00101303,-1.23336e-06,-1.22581e-08,0.517366,0.00101053,-1.27014e-06,2.4622e-08,0.518376,0.00100806,-1.19627e-06,-2.66253e-08,0.519383,0.00100559,-1.27615e-06,2.22744e-08,0.520387,0.00100311,-1.20932e-06,-2.8679e-09,0.521389,0.00100068,-1.21793e-06,-1.08029e-08,0.522388,0.000998211,-1.25034e-06,4.60795e-08,0.523385,0.000995849,-1.1121e-06,-5.4306e-08,0.52438,0.000993462,-1.27502e-06,5.19354e-08,0.525372,0.000991067,-1.11921e-06,-3.42262e-08,0.526362,0.000988726,-1.22189e-06,2.53646e-08,0.52735,0.000986359,-1.14579e-06,-7.62782e-09,0.528335,0.000984044,-1.16868e-06,5.14668e-09,0.529318,0.000981722,-1.15324e-06,-1.29589e-08,0.530298,0.000979377,-1.19211e-06,4.66888e-08,0.531276,0.000977133,-1.05205e-06,-5.45868e-08,0.532252,0.000974865,-1.21581e-06,5.24495e-08,0.533226,0.000972591,-1.05846e-06,-3.60019e-08,0.534198,0.000970366,-1.16647e-06,3.19537e-08,0.535167,0.000968129,-1.07061e-06,-3.2208e-08,0.536134,0.000965891,-1.16723e-06,3.72738e-08,0.537099,0.000963668,-1.05541e-06,2.32205e-09,0.538061,0.000961564,-1.04844e-06,-4.65618e-08,0.539022,0.000959328,-1.18813e-06,6.47159e-08,0.53998,0.000957146,-9.93979e-07,-3.3488e-08,0.540936,0.000955057,-1.09444e-06,9.63166e-09,0.54189,0.000952897,-1.06555e-06,-5.03871e-09,0.542842,0.000950751,-1.08066e-06,1.05232e-08,0.543792,0.000948621,-1.04909e-06,2.25503e-08,0.544739,0.000946591,-9.81444e-07,-4.11195e-08,0.545685,0.000944504,-1.1048e-06,2.27182e-08,0.546628,0.000942363,-1.03665e-06,9.85146e-09,0.54757,0.000940319,-1.00709e-06,-2.51938e-09,0.548509,0.000938297,-1.01465e-06,2.25858e-10,0.549446,0.000936269,-1.01397e-06,1.61598e-09,0.550381,0.000934246,-1.00913e-06,-6.68983e-09,0.551315,0.000932207,-1.0292e-06,2.51434e-08,0.552246,0.000930224,-9.53765e-07,-3.42793e-08,0.553175,0.000928214,-1.0566e-06,5.23688e-08,0.554102,0.000926258,-8.99497e-07,-5.59865e-08,0.555028,0.000924291,-1.06746e-06,5.23679e-08,0.555951,0.000922313,-9.10352e-07,-3.42763e-08,0.556872,0.00092039,-1.01318e-06,2.51326e-08,0.557792,0.000918439,-9.37783e-07,-6.64954e-09,0.558709,0.000916543,-9.57732e-07,1.46554e-09,0.559625,0.000914632,-9.53335e-07,7.87281e-10,0.560538,0.000912728,-9.50973e-07,-4.61466e-09,0.56145,0.000910812,-9.64817e-07,1.76713e-08,0.56236,0.000908935,-9.11804e-07,-6.46564e-09,0.563268,0.000907092,-9.312e-07,8.19121e-09,0.564174,0.000905255,-9.06627e-07,-2.62992e-08,0.565078,0.000903362,-9.85524e-07,3.74007e-08,0.565981,0.000901504,-8.73322e-07,-4.0942e-09,0.566882,0.000899745,-8.85605e-07,-2.1024e-08,0.56778,0.00089791,-9.48677e-07,2.85854e-08,0.568677,0.000896099,-8.62921e-07,-3.3713e-08,0.569573,0.000894272,-9.64059e-07,4.6662e-08,0.570466,0.000892484,-8.24073e-07,-3.37258e-08,0.571358,0.000890734,-9.25251e-07,2.86365e-08,0.572247,0.00088897,-8.39341e-07,-2.12155e-08,0.573135,0.000887227,-9.02988e-07,-3.37913e-09,0.574022,0.000885411,-9.13125e-07,3.47319e-08,0.574906,0.000883689,-8.08929e-07,-1.63394e-08,0.575789,0.000882022,-8.57947e-07,-2.8979e-08,0.57667,0.00088022,-9.44885e-07,7.26509e-08,0.57755,0.000878548,-7.26932e-07,-8.28106e-08,0.578427,0.000876845,-9.75364e-07,7.97774e-08,0.579303,0.000875134,-7.36032e-07,-5.74849e-08,0.580178,0.00087349,-9.08486e-07,3.09529e-08,0.58105,0.000871765,-8.15628e-07,-6.72206e-09,0.581921,0.000870114,-8.35794e-07,-4.06451e-09,0.582791,0.00086843,-8.47987e-07,2.29799e-08,0.583658,0.000866803,-7.79048e-07,-2.82503e-08,0.584524,0.00086516,-8.63799e-07,3.04167e-08,0.585388,0.000863524,-7.72548e-07,-3.38119e-08,0.586251,0.000861877,-8.73984e-07,4.52264e-08,0.587112,0.000860265,-7.38305e-07,-2.78842e-08,0.587972,0.000858705,-8.21958e-07,6.70567e-09,0.58883,0.000857081,-8.01841e-07,1.06161e-09,0.589686,0.000855481,-7.98656e-07,-1.09521e-08,0.590541,0.00085385,-8.31512e-07,4.27468e-08,0.591394,0.000852316,-7.03272e-07,-4.08257e-08,0.592245,0.000850787,-8.25749e-07,1.34677e-09,0.593095,0.000849139,-8.21709e-07,3.54387e-08,0.593944,0.000847602,-7.15393e-07,-2.38924e-08,0.59479,0.0008461,-7.8707e-07,5.26143e-10,0.595636,0.000844527,-7.85491e-07,2.17879e-08,0.596479,0.000843021,-7.20127e-07,-2.80733e-08,0.597322,0.000841497,-8.04347e-07,3.09005e-08,0.598162,0.000839981,-7.11646e-07,-3.5924e-08,0.599002,0.00083845,-8.19418e-07,5.3191e-08,0.599839,0.000836971,-6.59845e-07,-5.76307e-08,0.600676,0.000835478,-8.32737e-07,5.81227e-08,0.60151,0.000833987,-6.58369e-07,-5.56507e-08,0.602344,0.000832503,-8.25321e-07,4.52706e-08,0.603175,0.000830988,-6.89509e-07,-6.22236e-09,0.604006,0.000829591,-7.08176e-07,-2.03811e-08,0.604834,0.000828113,-7.6932e-07,2.8142e-08,0.605662,0.000826659,-6.84894e-07,-3.25822e-08,0.606488,0.000825191,-7.8264e-07,4.25823e-08,0.607312,0.000823754,-6.54893e-07,-1.85376e-08,0.608135,0.000822389,-7.10506e-07,-2.80365e-08,0.608957,0.000820883,-7.94616e-07,7.1079e-08,0.609777,0.000819507,-5.81379e-07,-7.74655e-08,0.610596,0.000818112,-8.13775e-07,5.9969e-08,0.611413,0.000816665,-6.33868e-07,-4.32013e-08,0.612229,0.000815267,-7.63472e-07,5.32313e-08,0.613044,0.0008139,-6.03778e-07,-5.05148e-08,0.613857,0.000812541,-7.55323e-07,2.96187e-08,0.614669,0.000811119,-6.66466e-07,-8.35545e-09,0.615479,0.000809761,-6.91533e-07,3.80301e-09,0.616288,0.00080839,-6.80124e-07,-6.85666e-09,0.617096,0.000807009,-7.00694e-07,2.36237e-08,0.617903,0.000805678,-6.29822e-07,-2.80336e-08,0.618708,0.000804334,-7.13923e-07,2.8906e-08,0.619511,0.000802993,-6.27205e-07,-2.79859e-08,0.620314,0.000801655,-7.11163e-07,2.34329e-08,0.621114,0.000800303,-6.40864e-07,-6.14108e-09,0.621914,0.000799003,-6.59287e-07,1.13151e-09,0.622712,0.000797688,-6.55893e-07,1.61507e-09,0.62351,0.000796381,-6.51048e-07,-7.59186e-09,0.624305,0.000795056,-6.73823e-07,2.87524e-08,0.6251,0.000793794,-5.87566e-07,-4.7813e-08,0.625893,0.000792476,-7.31005e-07,4.32901e-08,0.626685,0.000791144,-6.01135e-07,-6.13814e-09,0.627475,0.000789923,-6.19549e-07,-1.87376e-08,0.628264,0.000788628,-6.75762e-07,2.14837e-08,0.629052,0.000787341,-6.11311e-07,-7.59265e-09,0.629839,0.000786095,-6.34089e-07,8.88692e-09,0.630625,0.000784854,-6.07428e-07,-2.7955e-08,0.631409,0.000783555,-6.91293e-07,4.33285e-08,0.632192,0.000782302,-5.61307e-07,-2.61497e-08,0.632973,0.000781101,-6.39757e-07,1.6658e-09,0.633754,0.000779827,-6.34759e-07,1.94866e-08,0.634533,0.000778616,-5.76299e-07,-2.00076e-08,0.635311,0.000777403,-6.36322e-07,9.39091e-10,0.636088,0.000776133,-6.33505e-07,1.62512e-08,0.636863,0.000774915,-5.84751e-07,-6.33937e-09,0.637638,0.000773726,-6.03769e-07,9.10609e-09,0.638411,0.000772546,-5.76451e-07,-3.00849e-08,0.639183,0.000771303,-6.66706e-07,5.1629e-08,0.639953,0.000770125,-5.11819e-07,-5.7222e-08,0.640723,0.000768929,-6.83485e-07,5.80497e-08,0.641491,0.000767736,-5.09336e-07,-5.57674e-08,0.642259,0.000766551,-6.76638e-07,4.58105e-08,0.643024,0.000765335,-5.39206e-07,-8.26541e-09,0.643789,0.000764231,-5.64002e-07,-1.27488e-08,0.644553,0.000763065,-6.02249e-07,-3.44168e-10,0.645315,0.00076186,-6.03281e-07,1.41254e-08,0.646077,0.000760695,-5.60905e-07,3.44727e-09,0.646837,0.000759584,-5.50563e-07,-2.79144e-08,0.647596,0.000758399,-6.34307e-07,4.86057e-08,0.648354,0.000757276,-4.88489e-07,-4.72989e-08,0.64911,0.000756158,-6.30386e-07,2.13807e-08,0.649866,0.000754961,-5.66244e-07,2.13808e-08,0.65062,0.000753893,-5.02102e-07,-4.7299e-08,0.651374,0.000752746,-6.43999e-07,4.86059e-08,0.652126,0.000751604,-4.98181e-07,-2.79154e-08,0.652877,0.000750524,-5.81927e-07,3.45089e-09,0.653627,0.000749371,-5.71575e-07,1.41119e-08,0.654376,0.00074827,-5.29239e-07,-2.93748e-10,0.655123,0.00074721,-5.3012e-07,-1.29368e-08,0.65587,0.000746111,-5.68931e-07,-7.56355e-09,0.656616,0.000744951,-5.91621e-07,4.3191e-08,0.65736,0.000743897,-4.62048e-07,-4.59911e-08,0.658103,0.000742835,-6.00022e-07,2.15642e-08,0.658846,0.0007417,-5.35329e-07,1.93389e-08,0.659587,0.000740687,-4.77312e-07,-3.93152e-08,0.660327,0.000739615,-5.95258e-07,1.87126e-08,0.661066,0.00073848,-5.3912e-07,2.40695e-08,0.661804,0.000737474,-4.66912e-07,-5.53859e-08,0.662541,0.000736374,-6.33069e-07,7.82648e-08,0.663277,0.000735343,-3.98275e-07,-7.88593e-08,0.664012,0.00073431,-6.34853e-07,5.83585e-08,0.664745,0.000733215,-4.59777e-07,-3.53656e-08,0.665478,0.000732189,-5.65874e-07,2.34994e-08,0.66621,0.000731128,-4.95376e-07,9.72743e-10,0.66694,0.00073014,-4.92458e-07,-2.73903e-08,0.66767,0.000729073,-5.74629e-07,4.89839e-08,0.668398,0.000728071,-4.27677e-07,-4.93359e-08,0.669126,0.000727068,-5.75685e-07,2.91504e-08,0.669853,0.000726004,-4.88234e-07,-7.66109e-09,0.670578,0.000725004,-5.11217e-07,1.49392e-09,0.671303,0.000723986,-5.06735e-07,1.68533e-09,0.672026,0.000722978,-5.01679e-07,-8.23525e-09,0.672749,0.00072195,-5.26385e-07,3.12556e-08,0.67347,0.000720991,-4.32618e-07,-5.71825e-08,0.674191,0.000719954,-6.04166e-07,7.8265e-08,0.67491,0.00071898,-3.69371e-07,-7.70634e-08,0.675628,0.00071801,-6.00561e-07,5.11747e-08,0.676346,0.000716963,-4.47037e-07,-8.42615e-09,0.677062,0.000716044,-4.72315e-07,-1.747e-08,0.677778,0.000715046,-5.24725e-07,1.87015e-08,0.678493,0.000714053,-4.68621e-07,2.26856e-09,0.679206,0.000713123,-4.61815e-07,-2.77758e-08,0.679919,0.000712116,-5.45142e-07,4.92298e-08,0.68063,0.000711173,-3.97453e-07,-4.99339e-08,0.681341,0.000710228,-5.47255e-07,3.12967e-08,0.682051,0.000709228,-4.53365e-07,-1.56481e-08,0.68276,0.000708274,-5.00309e-07,3.12958e-08,0.683467,0.000707367,-4.06422e-07,-4.99303e-08,0.684174,0.000706405,-5.56213e-07,4.9216e-08,0.68488,0.00070544,-4.08565e-07,-2.77245e-08,0.685585,0.00070454,-4.91738e-07,2.07748e-09,0.686289,0.000703562,-4.85506e-07,1.94146e-08,0.686992,0.00070265,-4.27262e-07,-2.01314e-08,0.687695,0.000701735,-4.87656e-07,1.50616e-09,0.688396,0.000700764,-4.83137e-07,1.41067e-08,0.689096,0.00069984,-4.40817e-07,1.67168e-09,0.689795,0.000698963,-4.35802e-07,-2.07934e-08,0.690494,0.000698029,-4.98182e-07,2.18972e-08,0.691192,0.000697099,-4.32491e-07,-7.19092e-09,0.691888,0.000696212,-4.54064e-07,6.86642e-09,0.692584,0.000695325,-4.33464e-07,-2.02747e-08,0.693279,0.000694397,-4.94288e-07,1.46279e-08,0.693973,0.000693452,-4.50405e-07,2.13678e-08,0.694666,0.000692616,-3.86301e-07,-4.04945e-08,0.695358,0.000691721,-5.07785e-07,2.14009e-08,0.696049,0.00069077,-4.43582e-07,1.44955e-08,0.69674,0.000689926,-4.00096e-07,-1.97783e-08,0.697429,0.000689067,-4.5943e-07,5.01296e-09,0.698118,0.000688163,-4.44392e-07,-2.73521e-10,0.698805,0.000687273,-4.45212e-07,-3.91893e-09,0.699492,0.000686371,-4.56969e-07,1.59493e-08,0.700178,0.000685505,-4.09121e-07,-2.73351e-10,0.700863,0.000684686,-4.09941e-07,-1.4856e-08,0.701548,0.000683822,-4.54509e-07,9.25979e-11,0.702231,0.000682913,-4.54231e-07,1.44855e-08,0.702913,0.000682048,-4.10775e-07,1.56992e-09,0.703595,0.000681231,-4.06065e-07,-2.07652e-08,0.704276,0.000680357,-4.68361e-07,2.18864e-08,0.704956,0.000679486,-4.02701e-07,-7.17595e-09,0.705635,0.000678659,-4.24229e-07,6.81748e-09,0.706313,0.000677831,-4.03777e-07,-2.0094e-08,0.70699,0.000676963,-4.64059e-07,1.39538e-08,0.707667,0.000676077,-4.22197e-07,2.38835e-08,0.708343,0.000675304,-3.50547e-07,-4.98831e-08,0.709018,0.000674453,-5.00196e-07,5.64395e-08,0.709692,0.000673622,-3.30878e-07,-5.66657e-08,0.710365,0.00067279,-5.00875e-07,5.1014e-08,0.711037,0.000671942,-3.47833e-07,-2.81809e-08,0.711709,0.000671161,-4.32376e-07,2.10513e-09,0.712379,0.000670303,-4.2606e-07,1.97604e-08,0.713049,0.00066951,-3.66779e-07,-2.15422e-08,0.713718,0.000668712,-4.31406e-07,6.8038e-09,0.714387,0.000667869,-4.10994e-07,-5.67295e-09,0.715054,0.00066703,-4.28013e-07,1.5888e-08,0.715721,0.000666222,-3.80349e-07,1.72576e-09,0.716387,0.000665467,-3.75172e-07,-2.27911e-08,0.717052,0.000664648,-4.43545e-07,2.9834e-08,0.717716,0.00066385,-3.54043e-07,-3.69401e-08,0.718379,0.000663031,-4.64864e-07,5.83219e-08,0.719042,0.000662277,-2.89898e-07,-7.71382e-08,0.719704,0.000661465,-5.21313e-07,7.14171e-08,0.720365,0.000660637,-3.07061e-07,-2.97161e-08,0.721025,0.000659934,-3.96209e-07,-1.21575e-08,0.721685,0.000659105,-4.32682e-07,1.87412e-08,0.722343,0.000658296,-3.76458e-07,-3.2029e-09,0.723001,0.000657533,-3.86067e-07,-5.9296e-09,0.723659,0.000656743,-4.03856e-07,2.69213e-08,0.724315,0.000656016,-3.23092e-07,-4.21511e-08,0.724971,0.000655244,-4.49545e-07,2.24737e-08,0.725625,0.000654412,-3.82124e-07,1.18611e-08,0.726279,0.000653683,-3.46541e-07,-1.03132e-08,0.726933,0.000652959,-3.7748e-07,-3.02128e-08,0.727585,0.000652114,-4.68119e-07,7.15597e-08,0.728237,0.000651392,-2.5344e-07,-7.72119e-08,0.728888,0.000650654,-4.85075e-07,5.8474e-08,0.729538,0.000649859,-3.09654e-07,-3.74746e-08,0.730188,0.000649127,-4.22077e-07,3.18197e-08,0.730837,0.000648379,-3.26618e-07,-3.01997e-08,0.731485,0.000647635,-4.17217e-07,2.93747e-08,0.732132,0.000646888,-3.29093e-07,-2.76943e-08,0.732778,0.000646147,-4.12176e-07,2.17979e-08,0.733424,0.000645388,-3.46783e-07,1.07292e-10,0.734069,0.000644695,-3.46461e-07,-2.22271e-08,0.734713,0.000643935,-4.13142e-07,2.91963e-08,0.735357,0.000643197,-3.25553e-07,-3.49536e-08,0.736,0.000642441,-4.30414e-07,5.10133e-08,0.736642,0.000641733,-2.77374e-07,-4.98904e-08,0.737283,0.000641028,-4.27045e-07,2.93392e-08,0.737924,0.000640262,-3.39028e-07,-7.86156e-09,0.738564,0.000639561,-3.62612e-07,2.10703e-09,0.739203,0.000638842,-3.56291e-07,-5.6653e-10,0.739842,0.000638128,-3.57991e-07,1.59086e-10,0.740479,0.000637412,-3.57513e-07,-6.98321e-11,0.741116,0.000636697,-3.57723e-07,1.20214e-10,0.741753,0.000635982,-3.57362e-07,-4.10987e-10,0.742388,0.000635266,-3.58595e-07,1.5237e-09,0.743023,0.000634553,-3.54024e-07,-5.68376e-09,0.743657,0.000633828,-3.71075e-07,2.12113e-08,0.744291,0.00063315,-3.07441e-07,-1.95569e-08,0.744924,0.000632476,-3.66112e-07,-2.58816e-09,0.745556,0.000631736,-3.73877e-07,2.99096e-08,0.746187,0.000631078,-2.84148e-07,-5.74454e-08,0.746818,0.000630337,-4.56484e-07,8.06629e-08,0.747448,0.000629666,-2.14496e-07,-8.63922e-08,0.748077,0.000628978,-4.73672e-07,8.60918e-08,0.748706,0.000628289,-2.15397e-07,-7.91613e-08,0.749334,0.000627621,-4.5288e-07,5.17393e-08,0.749961,0.00062687,-2.97663e-07,-8.58662e-09,0.750588,0.000626249,-3.23422e-07,-1.73928e-08,0.751214,0.00062555,-3.75601e-07,1.85532e-08,0.751839,0.000624855,-3.19941e-07,2.78479e-09,0.752463,0.000624223,-3.11587e-07,-2.96923e-08,0.753087,0.000623511,-4.00664e-07,5.63799e-08,0.75371,0.000622879,-2.31524e-07,-7.66179e-08,0.754333,0.000622186,-4.61378e-07,7.12778e-08,0.754955,0.000621477,-2.47545e-07,-2.96794e-08,0.755576,0.000620893,-3.36583e-07,-1.21648e-08,0.756196,0.000620183,-3.73077e-07,1.87339e-08,0.756816,0.000619493,-3.16875e-07,-3.16622e-09,0.757435,0.00061885,-3.26374e-07,-6.0691e-09,0.758054,0.000618179,-3.44581e-07,2.74426e-08,0.758672,0.000617572,-2.62254e-07,-4.40968e-08,0.759289,0.000616915,-3.94544e-07,2.97352e-08,0.759906,0.000616215,-3.05338e-07,-1.52393e-08,0.760522,0.000615559,-3.51056e-07,3.12221e-08,0.761137,0.000614951,-2.5739e-07,-5.00443e-08,0.761751,0.000614286,-4.07523e-07,4.9746e-08,0.762365,0.00061362,-2.58285e-07,-2.97303e-08,0.762979,0.000613014,-3.47476e-07,9.57079e-09,0.763591,0.000612348,-3.18764e-07,-8.55287e-09,0.764203,0.000611685,-3.44422e-07,2.46407e-08,0.764815,0.00061107,-2.705e-07,-3.04053e-08,0.765426,0.000610437,-3.61716e-07,3.73759e-08,0.766036,0.000609826,-2.49589e-07,-5.94935e-08,0.766645,0.000609149,-4.28069e-07,8.13889e-08,0.767254,0.000608537,-1.83902e-07,-8.72483e-08,0.767862,0.000607907,-4.45647e-07,8.87901e-08,0.76847,0.000607282,-1.79277e-07,-8.90983e-08,0.769077,0.000606656,-4.46572e-07,8.87892e-08,0.769683,0.000606029,-1.80204e-07,-8.72446e-08,0.770289,0.000605407,-4.41938e-07,8.13752e-08,0.770894,0.000604768,-1.97812e-07,-5.94423e-08,0.771498,0.000604194,-3.76139e-07,3.71848e-08,0.772102,0.000603553,-2.64585e-07,-2.96922e-08,0.772705,0.000602935,-3.53661e-07,2.19793e-08,0.773308,0.000602293,-2.87723e-07,1.37955e-09,0.77391,0.000601722,-2.83585e-07,-2.74976e-08,0.774512,0.000601072,-3.66077e-07,4.9006e-08,0.775112,0.000600487,-2.19059e-07,-4.93171e-08,0.775712,0.000599901,-3.67011e-07,2.90531e-08,0.776312,0.000599254,-2.79851e-07,-7.29081e-09,0.776911,0.000598673,-3.01724e-07,1.10077e-10,0.777509,0.00059807,-3.01393e-07,6.85053e-09,0.778107,0.000597487,-2.80842e-07,-2.75123e-08,0.778704,0.000596843,-3.63379e-07,4.35939e-08,0.779301,0.000596247,-2.32597e-07,-2.7654e-08,0.779897,0.000595699,-3.15559e-07,7.41741e-09,0.780492,0.00059509,-2.93307e-07,-2.01562e-09,0.781087,0.000594497,-2.99354e-07,6.45059e-10,0.781681,0.000593901,-2.97418e-07,-5.64635e-10,0.782275,0.000593304,-2.99112e-07,1.61347e-09,0.782868,0.000592711,-2.94272e-07,-5.88926e-09,0.78346,0.000592105,-3.1194e-07,2.19436e-08,0.784052,0.000591546,-2.46109e-07,-2.22805e-08,0.784643,0.000590987,-3.1295e-07,7.57368e-09,0.785234,0.000590384,-2.90229e-07,-8.01428e-09,0.785824,0.00058978,-3.14272e-07,2.44834e-08,0.786414,0.000589225,-2.40822e-07,-3.03148e-08,0.787003,0.000588652,-3.31766e-07,3.7171e-08,0.787591,0.0005881,-2.20253e-07,-5.87646e-08,0.788179,0.000587483,-3.96547e-07,7.86782e-08,0.788766,0.000586926,-1.60512e-07,-7.71342e-08,0.789353,0.000586374,-3.91915e-07,5.10444e-08,0.789939,0.000585743,-2.38782e-07,-7.83422e-09,0.790524,0.000585242,-2.62284e-07,-1.97076e-08,0.791109,0.000584658,-3.21407e-07,2.70598e-08,0.791693,0.000584097,-2.40228e-07,-2.89269e-08,0.792277,0.000583529,-3.27008e-07,2.90431e-08,0.792861,0.000582963,-2.39879e-07,-2.76409e-08,0.793443,0.0005824,-3.22802e-07,2.1916e-08,0.794025,0.00058182,-2.57054e-07,-4.18368e-10,0.794607,0.000581305,-2.58309e-07,-2.02425e-08,0.795188,0.000580727,-3.19036e-07,2.17838e-08,0.795768,0.000580155,-2.53685e-07,-7.28814e-09,0.796348,0.000579625,-2.75549e-07,7.36871e-09,0.796928,0.000579096,-2.53443e-07,-2.21867e-08,0.797506,0.000578523,-3.20003e-07,2.17736e-08,0.798085,0.000577948,-2.54683e-07,-5.30296e-09,0.798662,0.000577423,-2.70592e-07,-5.61698e-10,0.799239,0.00057688,-2.72277e-07,7.54977e-09,0.799816,0.000576358,-2.49627e-07,-2.96374e-08,0.800392,0.00057577,-3.38539e-07,5.1395e-08,0.800968,0.000575247,-1.84354e-07,-5.67335e-08,0.801543,0.000574708,-3.54555e-07,5.63297e-08,0.802117,0.000574168,-1.85566e-07,-4.93759e-08,0.802691,0.000573649,-3.33693e-07,2.19646e-08,0.803264,0.000573047,-2.678e-07,2.1122e-08,0.803837,0.000572575,-2.04433e-07,-4.68482e-08,0.804409,0.000572026,-3.44978e-07,4.70613e-08,0.804981,0.000571477,-2.03794e-07,-2.21877e-08,0.805552,0.000571003,-2.70357e-07,-1.79153e-08,0.806123,0.000570408,-3.24103e-07,3.42443e-08,0.806693,0.000569863,-2.2137e-07,1.47556e-10,0.807263,0.000569421,-2.20928e-07,-3.48345e-08,0.807832,0.000568874,-3.25431e-07,1.99812e-08,0.808401,0.000568283,-2.65487e-07,1.45143e-08,0.808969,0.000567796,-2.21945e-07,-1.84338e-08,0.809536,0.000567297,-2.77246e-07,-3.83608e-10,0.810103,0.000566741,-2.78397e-07,1.99683e-08,0.81067,0.000566244,-2.18492e-07,-1.98848e-08,0.811236,0.000565747,-2.78146e-07,-3.38976e-11,0.811801,0.000565191,-2.78248e-07,2.00204e-08,0.812366,0.000564695,-2.18187e-07,-2.04429e-08,0.812931,0.000564197,-2.79516e-07,2.1467e-09,0.813495,0.000563644,-2.73076e-07,1.18561e-08,0.814058,0.000563134,-2.37507e-07,1.00334e-08,0.814621,0.000562689,-2.07407e-07,-5.19898e-08,0.815183,0.000562118,-3.63376e-07,7.87163e-08,0.815745,0.000561627,-1.27227e-07,-8.40616e-08,0.816306,0.000561121,-3.79412e-07,7.87163e-08,0.816867,0.000560598,-1.43263e-07,-5.19898e-08,0.817428,0.000560156,-2.99233e-07,1.00335e-08,0.817988,0.000559587,-2.69132e-07,1.18559e-08,0.818547,0.000559085,-2.33564e-07,2.14764e-09,0.819106,0.000558624,-2.27122e-07,-2.04464e-08,0.819664,0.000558108,-2.88461e-07,2.00334e-08,0.820222,0.000557591,-2.28361e-07,-8.24277e-11,0.820779,0.000557135,-2.28608e-07,-1.97037e-08,0.821336,0.000556618,-2.87719e-07,1.92925e-08,0.821893,0.000556101,-2.29841e-07,2.13831e-09,0.822448,0.000555647,-2.23427e-07,-2.78458e-08,0.823004,0.000555117,-3.06964e-07,4.96402e-08,0.823559,0.000554652,-1.58043e-07,-5.15058e-08,0.824113,0.000554181,-3.12561e-07,3.71737e-08,0.824667,0.000553668,-2.0104e-07,-3.75844e-08,0.82522,0.000553153,-3.13793e-07,5.35592e-08,0.825773,0.000552686,-1.53115e-07,-5.74431e-08,0.826326,0.000552207,-3.25444e-07,5.7004e-08,0.826878,0.000551728,-1.54433e-07,-5.13635e-08,0.827429,0.000551265,-3.08523e-07,2.92406e-08,0.82798,0.000550735,-2.20801e-07,-5.99424e-09,0.828531,0.000550276,-2.38784e-07,-5.26363e-09,0.829081,0.000549782,-2.54575e-07,2.70488e-08,0.82963,0.000549354,-1.73429e-07,-4.33268e-08,0.83018,0.000548878,-3.03409e-07,2.7049e-08,0.830728,0.000548352,-2.22262e-07,-5.26461e-09,0.831276,0.000547892,-2.38056e-07,-5.99057e-09,0.831824,0.000547397,-2.56027e-07,2.92269e-08,0.832371,0.000546973,-1.68347e-07,-5.13125e-08,0.832918,0.000546482,-3.22284e-07,5.68139e-08,0.833464,0.000546008,-1.51843e-07,-5.67336e-08,0.83401,0.000545534,-3.22043e-07,5.09113e-08,0.834555,0.000545043,-1.6931e-07,-2.77022e-08,0.8351,0.000544621,-2.52416e-07,2.92924e-10,0.835644,0.000544117,-2.51537e-07,2.65305e-08,0.836188,0.000543694,-1.71946e-07,-4.68105e-08,0.836732,0.00054321,-3.12377e-07,4.15021e-08,0.837275,0.000542709,-1.87871e-07,1.13355e-11,0.837817,0.000542334,-1.87837e-07,-4.15474e-08,0.838359,0.000541833,-3.12479e-07,4.69691e-08,0.838901,0.000541349,-1.71572e-07,-2.71196e-08,0.839442,0.000540925,-2.52931e-07,1.90462e-09,0.839983,0.000540425,-2.47217e-07,1.95011e-08,0.840523,0.000539989,-1.88713e-07,-2.03045e-08,0.841063,0.00053955,-2.49627e-07,2.11216e-09,0.841602,0.000539057,-2.4329e-07,1.18558e-08,0.842141,0.000538606,-2.07723e-07,1.00691e-08,0.842679,0.000538221,-1.77516e-07,-5.21324e-08,0.843217,0.00053771,-3.33913e-07,7.92513e-08,0.843755,0.00053728,-9.6159e-08,-8.60587e-08,0.844292,0.000536829,-3.54335e-07,8.61696e-08,0.844828,0.000536379,-9.58263e-08,-7.98057e-08,0.845364,0.000535948,-3.35243e-07,5.42394e-08,0.8459,0.00053544,-1.72525e-07,-1.79426e-08,0.846435,0.000535041,-2.26353e-07,1.75308e-08,0.84697,0.000534641,-1.73761e-07,-5.21806e-08,0.847505,0.000534137,-3.30302e-07,7.19824e-08,0.848038,0.000533692,-1.14355e-07,-5.69349e-08,0.848572,0.000533293,-2.8516e-07,3.65479e-08,0.849105,0.000532832,-1.75516e-07,-2.96519e-08,0.849638,0.000532392,-2.64472e-07,2.2455e-08,0.85017,0.000531931,-1.97107e-07,-5.63451e-10,0.850702,0.000531535,-1.98797e-07,-2.02011e-08,0.851233,0.000531077,-2.59401e-07,2.17634e-08,0.851764,0.000530623,-1.94111e-07,-7.24794e-09,0.852294,0.000530213,-2.15854e-07,7.22832e-09,0.852824,0.000529803,-1.94169e-07,-2.16653e-08,0.853354,0.00052935,-2.59165e-07,1.98283e-08,0.853883,0.000528891,-1.9968e-07,1.95678e-09,0.854412,0.000528497,-1.9381e-07,-2.76554e-08,0.85494,0.000528027,-2.76776e-07,4.90603e-08,0.855468,0.00052762,-1.29596e-07,-4.93764e-08,0.855995,0.000527213,-2.77725e-07,2.92361e-08,0.856522,0.000526745,-1.90016e-07,-7.96341e-09,0.857049,0.000526341,-2.13907e-07,2.61752e-09,0.857575,0.000525922,-2.06054e-07,-2.50665e-09,0.8581,0.000525502,-2.13574e-07,7.40906e-09,0.858626,0.000525097,-1.91347e-07,-2.71296e-08,0.859151,0.000524633,-2.72736e-07,4.15048e-08,0.859675,0.000524212,-1.48221e-07,-1.96802e-08,0.860199,0.000523856,-2.07262e-07,-2.23886e-08,0.860723,0.000523375,-2.74428e-07,4.96299e-08,0.861246,0.000522975,-1.25538e-07,-5.69216e-08,0.861769,0.000522553,-2.96303e-07,5.88473e-08,0.862291,0.000522137,-1.19761e-07,-5.92584e-08,0.862813,0.00052172,-2.97536e-07,5.8977e-08,0.863334,0.000521301,-1.20605e-07,-5.74403e-08,0.863855,0.000520888,-2.92926e-07,5.15751e-08,0.864376,0.000520457,-1.38201e-07,-2.96506e-08,0.864896,0.000520091,-2.27153e-07,7.42277e-09,0.865416,0.000519659,-2.04885e-07,-4.05057e-11,0.865936,0.00051925,-2.05006e-07,-7.26074e-09,0.866455,0.000518818,-2.26788e-07,2.90835e-08,0.866973,0.000518451,-1.39538e-07,-4.94686e-08,0.867492,0.000518024,-2.87944e-07,4.95814e-08,0.868009,0.000517597,-1.39199e-07,-2.96479e-08,0.868527,0.000517229,-2.28143e-07,9.40539e-09,0.869044,0.000516801,-1.99927e-07,-7.9737e-09,0.86956,0.000516378,-2.23848e-07,2.24894e-08,0.870077,0.000515997,-1.5638e-07,-2.23793e-08,0.870592,0.000515617,-2.23517e-07,7.42302e-09,0.871108,0.000515193,-2.01248e-07,-7.31283e-09,0.871623,0.000514768,-2.23187e-07,2.18283e-08,0.872137,0.000514387,-1.57702e-07,-2.03959e-08,0.872652,0.000514011,-2.1889e-07,1.50711e-10,0.873165,0.000513573,-2.18437e-07,1.97931e-08,0.873679,0.000513196,-1.59058e-07,-1.97183e-08,0.874192,0.000512819,-2.18213e-07,-5.24324e-10,0.874704,0.000512381,-2.19786e-07,2.18156e-08,0.875217,0.000512007,-1.54339e-07,-2.71336e-08,0.875728,0.000511616,-2.3574e-07,2.71141e-08,0.87624,0.000511226,-1.54398e-07,-2.17182e-08,0.876751,0.000510852,-2.19552e-07,1.54131e-10,0.877262,0.000510414,-2.1909e-07,2.11017e-08,0.877772,0.000510039,-1.55785e-07,-2.49562e-08,0.878282,0.000509652,-2.30654e-07,1.91183e-08,0.878791,0.000509248,-1.73299e-07,8.08751e-09,0.8793,0.000508926,-1.49036e-07,-5.14684e-08,0.879809,0.000508474,-3.03441e-07,7.85766e-08,0.880317,0.000508103,-6.77112e-08,-8.40242e-08,0.880825,0.000507715,-3.19784e-07,7.87063e-08,0.881333,0.000507312,-8.36649e-08,-5.19871e-08,0.88184,0.000506988,-2.39626e-07,1.00327e-08,0.882346,0.000506539,-2.09528e-07,1.18562e-08,0.882853,0.000506156,-1.73959e-07,2.14703e-09,0.883359,0.000505814,-1.67518e-07,-2.04444e-08,0.883864,0.000505418,-2.28851e-07,2.00258e-08,0.88437,0.00050502,-1.68774e-07,-5.42855e-11,0.884874,0.000504682,-1.68937e-07,-1.98087e-08,0.885379,0.000504285,-2.28363e-07,1.96842e-08,0.885883,0.000503887,-1.6931e-07,6.76342e-10,0.886387,0.000503551,-1.67281e-07,-2.23896e-08,0.88689,0.000503149,-2.3445e-07,2.92774e-08,0.887393,0.000502768,-1.46618e-07,-3.51152e-08,0.887896,0.00050237,-2.51963e-07,5.15787e-08,0.888398,0.00050202,-9.72271e-08,-5.19903e-08,0.8889,0.00050167,-2.53198e-07,3.71732e-08,0.889401,0.000501275,-1.41678e-07,-3.70978e-08,0.889902,0.00050088,-2.52972e-07,5.16132e-08,0.890403,0.000500529,-9.81321e-08,-5.01459e-08,0.890903,0.000500183,-2.4857e-07,2.9761e-08,0.891403,0.000499775,-1.59287e-07,-9.29351e-09,0.891903,0.000499428,-1.87167e-07,7.41301e-09,0.892402,0.000499076,-1.64928e-07,-2.03585e-08,0.892901,0.000498685,-2.26004e-07,1.44165e-08,0.893399,0.000498276,-1.82754e-07,2.22974e-08,0.893898,0.000497978,-1.15862e-07,-4.40013e-08,0.894395,0.000497614,-2.47866e-07,3.44985e-08,0.894893,0.000497222,-1.44371e-07,-3.43882e-08,0.89539,0.00049683,-2.47535e-07,4.34497e-08,0.895886,0.000496465,-1.17186e-07,-2.02012e-08,0.896383,0.00049617,-1.7779e-07,-2.22497e-08,0.896879,0.000495748,-2.44539e-07,4.95952e-08,0.897374,0.000495408,-9.57532e-08,-5.69217e-08,0.89787,0.000495045,-2.66518e-07,5.88823e-08,0.898364,0.000494689,-8.98713e-08,-5.93983e-08,0.898859,0.000494331,-2.68066e-07,5.95017e-08,0.899353,0.000493973,-8.95613e-08,-5.9399e-08,0.899847,0.000493616,-2.67758e-07,5.8885e-08,0.90034,0.000493257,-9.11033e-08,-5.69317e-08,0.900833,0.000492904,-2.61898e-07,4.96326e-08,0.901326,0.000492529,-1.13001e-07,-2.23893e-08,0.901819,0.000492236,-1.80169e-07,-1.968e-08,0.902311,0.000491817,-2.39209e-07,4.15047e-08,0.902802,0.000491463,-1.14694e-07,-2.71296e-08,0.903293,0.000491152,-1.96083e-07,7.409e-09,0.903784,0.000490782,-1.73856e-07,-2.50645e-09,0.904275,0.000490427,-1.81376e-07,2.61679e-09,0.904765,0.000490072,-1.73525e-07,-7.96072e-09,0.905255,0.000489701,-1.97407e-07,2.92261e-08,0.905745,0.000489394,-1.09729e-07,-4.93389e-08,0.906234,0.000489027,-2.57746e-07,4.89204e-08,0.906723,0.000488658,-1.10985e-07,-2.71333e-08,0.907211,0.000488354,-1.92385e-07,8.30861e-12,0.907699,0.00048797,-1.9236e-07,2.71001e-08,0.908187,0.000487666,-1.1106e-07,-4.88041e-08,0.908675,0.000487298,-2.57472e-07,4.89069e-08,0.909162,0.000486929,-1.10751e-07,-2.76143e-08,0.909649,0.000486625,-1.93594e-07,1.9457e-09,0.910135,0.000486244,-1.87757e-07,1.98315e-08,0.910621,0.000485928,-1.28262e-07,-2.16671e-08,0.911107,0.000485606,-1.93264e-07,7.23216e-09,0.911592,0.000485241,-1.71567e-07,-7.26152e-09,0.912077,0.000484877,-1.93352e-07,2.18139e-08,0.912562,0.000484555,-1.2791e-07,-2.03895e-08,0.913047,0.000484238,-1.89078e-07,1.39494e-10,0.913531,0.000483861,-1.8866e-07,1.98315e-08,0.914014,0.000483543,-1.29165e-07,-1.98609e-08,0.914498,0.000483225,-1.88748e-07,7.39912e-12,0.914981,0.000482847,-1.88726e-07,1.98313e-08,0.915463,0.000482529,-1.29232e-07,-1.9728e-08,0.915946,0.000482212,-1.88416e-07,-5.24035e-10,0.916428,0.000481833,-1.89988e-07,2.18241e-08,0.916909,0.000481519,-1.24516e-07,-2.71679e-08,0.917391,0.000481188,-2.06019e-07,2.72427e-08,0.917872,0.000480858,-1.24291e-07,-2.21985e-08,0.918353,0.000480543,-1.90886e-07,1.94644e-09,0.918833,0.000480167,-1.85047e-07,1.44127e-08,0.919313,0.00047984,-1.41809e-07,7.39438e-12,0.919793,0.000479556,-1.41787e-07,-1.44423e-08,0.920272,0.000479229,-1.85114e-07,-1.84291e-09,0.920751,0.000478854,-1.90642e-07,2.18139e-08,0.92123,0.000478538,-1.25201e-07,-2.58081e-08,0.921708,0.00047821,-2.02625e-07,2.18139e-08,0.922186,0.00047787,-1.37183e-07,-1.84291e-09,0.922664,0.00047759,-1.42712e-07,-1.44423e-08,0.923141,0.000477262,-1.86039e-07,7.34701e-12,0.923618,0.00047689,-1.86017e-07,1.44129e-08,0.924095,0.000476561,-1.42778e-07,1.94572e-09,0.924572,0.000476281,-1.36941e-07,-2.21958e-08,0.925048,0.000475941,-2.03528e-07,2.72327e-08,0.925523,0.000475615,-1.2183e-07,-2.71304e-08,0.925999,0.00047529,-2.03221e-07,2.16843e-08,0.926474,0.000474949,-1.38168e-07,-2.16005e-12,0.926949,0.000474672,-1.38175e-07,-2.16756e-08,0.927423,0.000474331,-2.03202e-07,2.71001e-08,0.927897,0.000474006,-1.21902e-07,-2.71201e-08,0.928371,0.000473681,-2.03262e-07,2.17757e-08,0.928845,0.00047334,-1.37935e-07,-3.78028e-10,0.929318,0.000473063,-1.39069e-07,-2.02636e-08,0.929791,0.000472724,-1.9986e-07,2.18276e-08,0.930263,0.000472389,-1.34377e-07,-7.44231e-09,0.930736,0.000472098,-1.56704e-07,7.94165e-09,0.931208,0.000471809,-1.32879e-07,-2.43243e-08,0.931679,0.00047147,-2.05851e-07,2.97508e-08,0.932151,0.000471148,-1.16599e-07,-3.50742e-08,0.932622,0.000470809,-2.21822e-07,5.09414e-08,0.933092,0.000470518,-6.89976e-08,-4.94821e-08,0.933563,0.000470232,-2.17444e-07,2.77775e-08,0.934033,0.00046988,-1.34111e-07,-2.02351e-09,0.934502,0.000469606,-1.40182e-07,-1.96835e-08,0.934972,0.000469267,-1.99232e-07,2.11529e-08,0.935441,0.000468932,-1.35774e-07,-5.32332e-09,0.93591,0.000468644,-1.51743e-07,1.40413e-10,0.936378,0.000468341,-1.51322e-07,4.76166e-09,0.936846,0.000468053,-1.37037e-07,-1.9187e-08,0.937314,0.000467721,-1.94598e-07,1.23819e-08,0.937782,0.000467369,-1.57453e-07,2.92642e-08,0.938249,0.000467142,-6.96601e-08,-6.98342e-08,0.938716,0.000466793,-2.79163e-07,7.12586e-08,0.939183,0.000466449,-6.53869e-08,-3.63863e-08,0.939649,0.000466209,-1.74546e-07,1.46818e-08,0.940115,0.000465904,-1.305e-07,-2.2341e-08,0.940581,0.000465576,-1.97523e-07,1.50774e-08,0.941046,0.000465226,-1.52291e-07,2.16359e-08,0.941511,0.000464986,-8.73832e-08,-4.20162e-08,0.941976,0.000464685,-2.13432e-07,2.72198e-08,0.942441,0.00046434,-1.31773e-07,-7.2581e-09,0.942905,0.000464055,-1.53547e-07,1.81263e-09,0.943369,0.000463753,-1.48109e-07,7.58386e-12,0.943832,0.000463457,-1.48086e-07,-1.84298e-09,0.944296,0.000463155,-1.53615e-07,7.36433e-09,0.944759,0.00046287,-1.31522e-07,-2.76143e-08,0.945221,0.000462524,-2.14365e-07,4.34883e-08,0.945684,0.000462226,-8.39003e-08,-2.71297e-08,0.946146,0.000461977,-1.65289e-07,5.42595e-09,0.946608,0.000461662,-1.49012e-07,5.42593e-09,0.947069,0.000461381,-1.32734e-07,-2.71297e-08,0.94753,0.000461034,-2.14123e-07,4.34881e-08,0.947991,0.000460736,-8.36585e-08,-2.76134e-08,0.948452,0.000460486,-1.66499e-07,7.36083e-09,0.948912,0.000460175,-1.44416e-07,-1.82993e-09,0.949372,0.000459881,-1.49906e-07,-4.11073e-11,0.949832,0.000459581,-1.50029e-07,1.99434e-09,0.950291,0.000459287,-1.44046e-07,-7.93627e-09,0.950751,0.000458975,-1.67855e-07,2.97507e-08,0.951209,0.000458728,-7.86029e-08,-5.1462e-08,0.951668,0.000458417,-2.32989e-07,5.6888e-08,0.952126,0.000458121,-6.2325e-08,-5.68806e-08,0.952584,0.000457826,-2.32967e-07,5.14251e-08,0.953042,0.000457514,-7.86914e-08,-2.96107e-08,0.953499,0.000457268,-1.67523e-07,7.41296e-09,0.953956,0.000456955,-1.45285e-07,-4.11262e-11,0.954413,0.000456665,-1.45408e-07,-7.24847e-09,0.95487,0.000456352,-1.67153e-07,2.9035e-08,0.955326,0.000456105,-8.00484e-08,-4.92869e-08,0.955782,0.000455797,-2.27909e-07,4.89032e-08,0.956238,0.000455488,-8.11994e-08,-2.71166e-08,0.956693,0.000455244,-1.62549e-07,-4.13678e-11,0.957148,0.000454919,-1.62673e-07,2.72821e-08,0.957603,0.000454675,-8.0827e-08,-4.94824e-08,0.958057,0.000454365,-2.29274e-07,5.14382e-08,0.958512,0.000454061,-7.49597e-08,-3.7061e-08,0.958965,0.0004538,-1.86143e-07,3.72013e-08,0.959419,0.000453539,-7.45389e-08,-5.21396e-08,0.959873,0.000453234,-2.30958e-07,5.21476e-08,0.960326,0.000452928,-7.45146e-08,-3.72416e-08,0.960778,0.000452667,-1.8624e-07,3.72143e-08,0.961231,0.000452407,-7.45967e-08,-5.20109e-08,0.961683,0.000452101,-2.30629e-07,5.16199e-08,0.962135,0.000451795,-7.57696e-08,-3.52595e-08,0.962587,0.000451538,-1.81548e-07,2.98133e-08,0.963038,0.000451264,-9.2108e-08,-2.43892e-08,0.963489,0.000451007,-1.65276e-07,8.13892e-09,0.96394,0.000450701,-1.40859e-07,-8.16647e-09,0.964391,0.000450394,-1.65358e-07,2.45269e-08,0.964841,0.000450137,-9.17775e-08,-3.03367e-08,0.965291,0.000449863,-1.82787e-07,3.7215e-08,0.965741,0.000449609,-7.11424e-08,-5.89188e-08,0.96619,0.00044929,-2.47899e-07,7.92509e-08,0.966639,0.000449032,-1.01462e-08,-7.92707e-08,0.967088,0.000448773,-2.47958e-07,5.90181e-08,0.967537,0.000448455,-7.0904e-08,-3.75925e-08,0.967985,0.0004482,-1.83681e-07,3.17471e-08,0.968433,0.000447928,-8.84401e-08,-2.97913e-08,0.968881,0.000447662,-1.77814e-07,2.78133e-08,0.969329,0.000447389,-9.4374e-08,-2.18572e-08,0.969776,0.000447135,-1.59946e-07,1.10134e-11,0.970223,0.000446815,-1.59913e-07,2.18132e-08,0.97067,0.000446561,-9.44732e-08,-2.76591e-08,0.971116,0.000446289,-1.7745e-07,2.92185e-08,0.971562,0.000446022,-8.97948e-08,-2.96104e-08,0.972008,0.000445753,-1.78626e-07,2.96185e-08,0.972454,0.000445485,-8.97706e-08,-2.92588e-08,0.972899,0.000445218,-1.77547e-07,2.78123e-08,0.973344,0.000444946,-9.41103e-08,-2.23856e-08,0.973789,0.000444691,-1.61267e-07,2.12559e-09,0.974233,0.000444374,-1.5489e-07,1.38833e-08,0.974678,0.000444106,-1.13241e-07,1.94591e-09,0.975122,0.000443886,-1.07403e-07,-2.16669e-08,0.975565,0.000443606,-1.72404e-07,2.5117e-08,0.976009,0.000443336,-9.70526e-08,-1.91963e-08,0.976452,0.000443085,-1.54642e-07,-7.93627e-09,0.976895,0.000442752,-1.7845e-07,5.09414e-08,0.977338,0.000442548,-2.56262e-08,-7.66201e-08,0.97778,0.000442266,-2.55486e-07,7.67249e-08,0.978222,0.000441986,-2.53118e-08,-5.14655e-08,0.978664,0.000441781,-1.79708e-07,9.92773e-09,0.979106,0.000441451,-1.49925e-07,1.17546e-08,0.979547,0.000441186,-1.14661e-07,2.65868e-09,0.979988,0.000440965,-1.06685e-07,-2.23893e-08,0.980429,0.000440684,-1.73853e-07,2.72939e-08,0.980869,0.000440419,-9.19716e-08,-2.71816e-08,0.98131,0.000440153,-1.73516e-07,2.18278e-08,0.98175,0.000439872,-1.08033e-07,-5.24833e-10,0.982189,0.000439654,-1.09607e-07,-1.97284e-08,0.982629,0.000439376,-1.68793e-07,1.98339e-08,0.983068,0.000439097,-1.09291e-07,-2.62901e-12,0.983507,0.000438879,-1.09299e-07,-1.98234e-08,0.983946,0.000438601,-1.68769e-07,1.96916e-08,0.984384,0.000438322,-1.09694e-07,6.6157e-10,0.984823,0.000438105,-1.0771e-07,-2.23379e-08,0.985261,0.000437823,-1.74723e-07,2.90855e-08,0.985698,0.00043756,-8.74669e-08,-3.43992e-08,0.986136,0.000437282,-1.90665e-07,4.89068e-08,0.986573,0.000437048,-4.39442e-08,-4.20188e-08,0.98701,0.000436834,-1.7e-07,-4.11073e-11,0.987446,0.000436494,-1.70124e-07,4.21832e-08,0.987883,0.00043628,-4.35742e-08,-4.94824e-08,0.988319,0.000436044,-1.92021e-07,3.6537e-08,0.988755,0.00043577,-8.24102e-08,-3.70611e-08,0.989191,0.000435494,-1.93593e-07,5.21026e-08,0.989626,0.000435263,-3.72855e-08,-5.21402e-08,0.990061,0.000435032,-1.93706e-07,3.7249e-08,0.990496,0.000434756,-8.19592e-08,-3.72512e-08,0.990931,0.000434481,-1.93713e-07,5.21511e-08,0.991365,0.00043425,-3.72595e-08,-5.21439e-08,0.991799,0.000434019,-1.93691e-07,3.72152e-08,0.992233,0.000433743,-8.20456e-08,-3.71123e-08,0.992667,0.000433468,-1.93382e-07,5.16292e-08,0.9931,0.000433236,-3.84947e-08,-5.01953e-08,0.993533,0.000433008,-1.89081e-07,2.99427e-08,0.993966,0.00043272,-9.92525e-08,-9.9708e-09,0.994399,0.000432491,-1.29165e-07,9.94051e-09,0.994831,0.000432263,-9.93434e-08,-2.97912e-08,0.995263,0.000431975,-1.88717e-07,4.96198e-08,0.995695,0.000431746,-3.98578e-08,-4.94785e-08,0.996127,0.000431518,-1.88293e-07,2.9085e-08,0.996558,0.000431229,-1.01038e-07,-7.25675e-09,0.996989,0.000431005,-1.22809e-07,-5.79945e-11,0.99742,0.000430759,-1.22983e-07,7.48873e-09,0.997851,0.000430536,-1.00516e-07,-2.98969e-08,0.998281,0.000430245,-1.90207e-07,5.24942e-08,0.998711,0.000430022,-3.27246e-08,-6.08706e-08,0.999141,0.000429774,-2.15336e-07,7.17788e-08,0.999571,0.000429392,0.,0.};
+
+    template <typename T, int scn, int dcn, bool srgb, int blueIdx> struct Lab2RGB;
+
+    template <int scn, int dcn, bool srgb, int blueIdx> struct Lab2RGB<float, scn, dcn, srgb, blueIdx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float lThresh = 0.008856f * 903.3f;
+            const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
+
+            float Y, fy;
+
+            if (src.x <= lThresh)
+            {
+                Y = src.x / 903.3f;
+                fy = 7.787f * Y + 16.0f / 116.0f;
+            }
+            else
+            {
+                fy = (src.x + 16.0f) / 116.0f;
+                Y = fy * fy * fy;
+            }
+
+            float X = src.y / 500.0f + fy;
+            float Z = fy - src.z / 200.0f;
+
+            if (X <= fThresh)
+                X = (X - 16.0f / 116.0f) / 7.787f;
+            else
+                X = X * X * X;
+
+            if (Z <= fThresh)
+                Z = (Z - 16.0f / 116.0f) / 7.787f;
+            else
+                Z = Z * Z * Z;
+
+            float B = 0.052891f * X - 0.204043f * Y + 1.151152f * Z;
+            float G = -0.921235f * X + 1.875991f * Y + 0.045244f * Z;
+            float R = 3.079933f * X - 1.537150f * Y - 0.542782f * Z;
+
+            if (srgb)
+            {
+                B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+                G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+                R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+            }
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = blueIdx == 0 ? B : R;
+            dst.y = G;
+            dst.z = blueIdx == 0 ? R : B;
+            setAlpha(dst, ColorChannel<float>::max());
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, bool srgb, int blueIdx> struct Lab2RGB<uchar, scn, dcn, srgb, blueIdx>
+            : unary_function<typename MakeVec<uchar, scn>::type, typename MakeVec<uchar, dcn>::type>
+    {
+        __device__ typename MakeVec<uchar, dcn>::type operator ()(const typename MakeVec<uchar, scn>::type& src) const
+        {
+            float3 buf;
+
+            buf.x = src.x * (100.f / 255.f);
+            buf.y = src.y - 128;
+            buf.z = src.z - 128;
+
+            Lab2RGB<float, 3, 3, srgb, blueIdx> cvtf;
+            buf = cvtf(buf);
+
+            typename MakeVec<uchar, dcn>::type dst;
+
+            dst.x = saturate_cast<uchar>(buf.x * 255.f);
+            dst.y = saturate_cast<uchar>(buf.y * 255.f);
+            dst.z = saturate_cast<uchar>(buf.z * 255.f);
+            setAlpha(dst, ColorChannel<uchar>::max());
+
+            return dst;
+        }
+    };
+
+    // RGB to Luv
+
+    __constant__ float c_LabCbrtTab[] = {0.137931,0.0114066,0.,1.18859e-07,0.149338,0.011407,3.56578e-07,-5.79396e-07,0.160745,0.0114059,-1.38161e-06,2.16892e-06,0.172151,0.0114097,5.12516e-06,-8.0814e-06,0.183558,0.0113957,-1.9119e-05,3.01567e-05,0.194965,0.0114479,7.13509e-05,-0.000112545,0.206371,0.011253,-0.000266285,-0.000106493,0.217252,0.0104009,-0.000585765,7.32149e-05,0.22714,0.00944906,-0.00036612,1.21917e-05,0.236235,0.0087534,-0.000329545,2.01753e-05,0.244679,0.00815483,-0.000269019,1.24435e-05,0.252577,0.00765412,-0.000231689,1.05618e-05,0.26001,0.00722243,-0.000200003,8.26662e-06,0.267041,0.00684723,-0.000175203,6.76746e-06,0.27372,0.00651712,-0.000154901,5.61192e-06,0.280088,0.00622416,-0.000138065,4.67009e-06,0.286179,0.00596204,-0.000124055,3.99012e-06,0.292021,0.0057259,-0.000112085,3.36032e-06,0.297638,0.00551181,-0.000102004,2.95338e-06,0.30305,0.00531666,-9.31435e-05,2.52875e-06,0.308277,0.00513796,-8.55572e-05,2.22022e-06,0.313331,0.00497351,-7.88966e-05,1.97163e-06,0.318228,0.00482163,-7.29817e-05,1.7248e-06,0.322978,0.00468084,-6.78073e-05,1.55998e-06,0.327593,0.0045499,-6.31274e-05,1.36343e-06,0.332081,0.00442774,-5.90371e-05,1.27136e-06,0.336451,0.00431348,-5.5223e-05,1.09111e-06,0.34071,0.00420631,-5.19496e-05,1.0399e-06,0.344866,0.00410553,-4.88299e-05,9.18347e-07,0.348923,0.00401062,-4.60749e-05,8.29942e-07,0.352889,0.00392096,-4.35851e-05,7.98478e-07,0.356767,0.00383619,-4.11896e-05,6.84917e-07,0.360562,0.00375586,-3.91349e-05,6.63976e-07,0.36428,0.00367959,-3.7143e-05,5.93086e-07,0.367923,0.00360708,-3.53637e-05,5.6976e-07,0.371495,0.00353806,-3.36544e-05,4.95533e-07,0.375,0.00347224,-3.21678e-05,4.87951e-07,0.378441,0.00340937,-3.0704e-05,4.4349e-07,0.38182,0.00334929,-2.93735e-05,4.20297e-07,0.38514,0.0032918,-2.81126e-05,3.7872e-07,0.388404,0.00323671,-2.69764e-05,3.596e-07,0.391614,0.00318384,-2.58976e-05,3.5845e-07,0.394772,0.00313312,-2.48223e-05,2.92765e-07,0.397881,0.00308435,-2.3944e-05,3.18232e-07,0.400942,0.00303742,-2.29893e-05,2.82046e-07,0.403957,0.00299229,-2.21432e-05,2.52315e-07,0.406927,0.00294876,-2.13862e-05,2.58416e-07,0.409855,0.00290676,-2.0611e-05,2.33939e-07,0.412741,0.00286624,-1.99092e-05,2.36342e-07,0.415587,0.00282713,-1.92001e-05,1.916e-07,0.418396,0.00278931,-1.86253e-05,2.1915e-07,0.421167,0.00275271,-1.79679e-05,1.83498e-07,0.423901,0.00271733,-1.74174e-05,1.79343e-07,0.426602,0.00268303,-1.68794e-05,1.72013e-07,0.429268,0.00264979,-1.63633e-05,1.75686e-07,0.431901,0.00261759,-1.58363e-05,1.3852e-07,0.434503,0.00258633,-1.54207e-05,1.64304e-07,0.437074,0.00255598,-1.49278e-05,1.28136e-07,0.439616,0.00252651,-1.45434e-05,1.57618e-07,0.442128,0.0024979,-1.40705e-05,1.0566e-07,0.444612,0.00247007,-1.37535e-05,1.34998e-07,0.447068,0.00244297,-1.33485e-05,1.29207e-07,0.449498,0.00241666,-1.29609e-05,9.32347e-08,0.451902,0.00239102,-1.26812e-05,1.23703e-07,0.45428,0.00236603,-1.23101e-05,9.74072e-08,0.456634,0.0023417,-1.20179e-05,1.12518e-07,0.458964,0.002318,-1.16803e-05,7.83681e-08,0.46127,0.00229488,-1.14452e-05,1.10452e-07,0.463554,0.00227232,-1.11139e-05,7.58719e-08,0.465815,0.00225032,-1.08863e-05,9.2699e-08,0.468055,0.00222882,-1.06082e-05,8.97738e-08,0.470273,0.00220788,-1.03388e-05,5.4845e-08,0.47247,0.00218736,-1.01743e-05,1.0808e-07,0.474648,0.00216734,-9.85007e-06,4.9277e-08,0.476805,0.00214779,-9.70224e-06,8.22408e-08,0.478943,0.00212863,-9.45551e-06,6.87942e-08,0.481063,0.00210993,-9.24913e-06,5.98144e-08,0.483163,0.00209161,-9.06969e-06,7.93789e-08,0.485246,0.00207371,-8.83155e-06,3.99032e-08,0.487311,0.00205616,-8.71184e-06,8.88325e-08,0.489358,0.002039,-8.44534e-06,2.20004e-08,0.491389,0.00202218,-8.37934e-06,9.13872e-08,0.493403,0.0020057,-8.10518e-06,2.96829e-08,0.495401,0.00198957,-8.01613e-06,5.81028e-08,0.497382,0.00197372,-7.84183e-06,6.5731e-08,0.499348,0.00195823,-7.64463e-06,3.66019e-08,0.501299,0.00194305,-7.53483e-06,2.62811e-08,0.503234,0.00192806,-7.45598e-06,9.66907e-08,0.505155,0.00191344,-7.16591e-06,4.18928e-09,0.507061,0.00189912,-7.15334e-06,6.53665e-08,0.508953,0.00188501,-6.95724e-06,3.23686e-08,0.510831,0.00187119,-6.86014e-06,4.35774e-08,0.512696,0.0018576,-6.72941e-06,3.17406e-08,0.514547,0.00184424,-6.63418e-06,6.78785e-08,0.516384,0.00183117,-6.43055e-06,-5.23126e-09,0.518209,0.0018183,-6.44624e-06,7.22562e-08,0.520021,0.00180562,-6.22947e-06,1.42292e-08,0.52182,0.0017932,-6.18679e-06,4.9641e-08,0.523607,0.00178098,-6.03786e-06,2.56259e-08,0.525382,0.00176898,-5.96099e-06,2.66696e-08,0.527145,0.00175714,-5.88098e-06,4.65094e-08,0.528897,0.00174552,-5.74145e-06,2.57114e-08,0.530637,0.00173411,-5.66431e-06,2.94588e-08,0.532365,0.00172287,-5.57594e-06,3.52667e-08,0.534082,0.00171182,-5.47014e-06,8.28868e-09,0.535789,0.00170091,-5.44527e-06,5.07871e-08,0.537484,0.00169017,-5.29291e-06,2.69817e-08,0.539169,0.00167967,-5.21197e-06,2.01009e-08,0.540844,0.0016693,-5.15166e-06,1.18237e-08,0.542508,0.00165903,-5.11619e-06,5.18135e-08,0.544162,0.00164896,-4.96075e-06,1.9341e-08,0.545806,0.00163909,-4.90273e-06,-9.96867e-09,0.54744,0.00162926,-4.93263e-06,8.01382e-08,0.549064,0.00161963,-4.69222e-06,-1.25601e-08,0.550679,0.00161021,-4.7299e-06,2.97067e-08,0.552285,0.00160084,-4.64078e-06,1.29426e-08,0.553881,0.0015916,-4.60195e-06,3.77327e-08,0.555468,0.00158251,-4.48875e-06,1.49412e-08,0.557046,0.00157357,-4.44393e-06,2.17118e-08,0.558615,0.00156475,-4.3788e-06,1.74206e-08,0.560176,0.00155605,-4.32653e-06,2.78152e-08,0.561727,0.00154748,-4.24309e-06,-9.47239e-09,0.563271,0.00153896,-4.27151e-06,6.9679e-08,0.564805,0.00153063,-4.06247e-06,-3.08246e-08,0.566332,0.00152241,-4.15494e-06,5.36188e-08,0.56785,0.00151426,-3.99409e-06,-4.83594e-09,0.56936,0.00150626,-4.00859e-06,2.53293e-08,0.570863,0.00149832,-3.93261e-06,2.27286e-08,0.572357,0.00149052,-3.86442e-06,2.96541e-09,0.573844,0.0014828,-3.85552e-06,2.50147e-08,0.575323,0.00147516,-3.78048e-06,1.61842e-08,0.576794,0.00146765,-3.73193e-06,2.94582e-08,0.578258,0.00146028,-3.64355e-06,-1.48076e-08,0.579715,0.00145295,-3.68798e-06,2.97724e-08,0.581164,0.00144566,-3.59866e-06,1.49272e-08,0.582606,0.00143851,-3.55388e-06,2.97285e-08,0.584041,0.00143149,-3.46469e-06,-1.46323e-08,0.585469,0.00142451,-3.50859e-06,2.88004e-08,0.58689,0.00141758,-3.42219e-06,1.864e-08,0.588304,0.00141079,-3.36627e-06,1.58482e-08,0.589712,0.00140411,-3.31872e-06,-2.24279e-08,0.591112,0.00139741,-3.38601e-06,7.38639e-08,0.592507,0.00139085,-3.16441e-06,-3.46088e-08,0.593894,0.00138442,-3.26824e-06,4.96675e-09,0.595275,0.0013779,-3.25334e-06,7.4346e-08,0.59665,0.00137162,-3.0303e-06,-6.39319e-08,0.598019,0.00136536,-3.2221e-06,6.21725e-08,0.599381,0.00135911,-3.03558e-06,-5.94423e-09,0.600737,0.00135302,-3.05341e-06,2.12091e-08,0.602087,0.00134697,-2.98979e-06,-1.92876e-08,0.603431,0.00134094,-3.04765e-06,5.5941e-08,0.604769,0.00133501,-2.87983e-06,-2.56622e-08,0.606101,0.00132917,-2.95681e-06,4.67078e-08,0.607427,0.0013234,-2.81669e-06,-4.19592e-08,0.608748,0.00131764,-2.94257e-06,6.15243e-08,0.610062,0.00131194,-2.75799e-06,-2.53244e-08,0.611372,0.00130635,-2.83397e-06,3.97739e-08,0.612675,0.0013008,-2.71465e-06,-1.45618e-08,0.613973,0.00129533,-2.75833e-06,1.84733e-08,0.615266,0.00128986,-2.70291e-06,2.73606e-10,0.616553,0.00128446,-2.70209e-06,4.00367e-08,0.617835,0.00127918,-2.58198e-06,-4.12113e-08,0.619111,0.00127389,-2.70561e-06,6.52039e-08,0.620383,0.00126867,-2.51e-06,-4.07901e-08,0.621649,0.00126353,-2.63237e-06,3.83516e-08,0.62291,0.00125838,-2.51732e-06,6.59315e-09,0.624166,0.00125337,-2.49754e-06,-5.11939e-09,0.625416,0.00124836,-2.5129e-06,1.38846e-08,0.626662,0.00124337,-2.47124e-06,9.18514e-09,0.627903,0.00123846,-2.44369e-06,8.97952e-09,0.629139,0.0012336,-2.41675e-06,1.45012e-08,0.63037,0.00122881,-2.37325e-06,-7.37949e-09,0.631597,0.00122404,-2.39538e-06,1.50169e-08,0.632818,0.00121929,-2.35033e-06,6.91648e-09,0.634035,0.00121461,-2.32958e-06,1.69219e-08,0.635248,0.00121,-2.27882e-06,-1.49997e-08,0.636455,0.0012054,-2.32382e-06,4.30769e-08,0.637659,0.00120088,-2.19459e-06,-3.80986e-08,0.638857,0.00119638,-2.30888e-06,4.97134e-08,0.640051,0.00119191,-2.15974e-06,-4.15463e-08,0.641241,0.00118747,-2.28438e-06,5.68667e-08,0.642426,0.00118307,-2.11378e-06,-7.10641e-09,0.643607,0.00117882,-2.1351e-06,-2.8441e-08,0.644784,0.00117446,-2.22042e-06,6.12658e-08,0.645956,0.00117021,-2.03663e-06,-3.78083e-08,0.647124,0.00116602,-2.15005e-06,3.03627e-08,0.648288,0.00116181,-2.05896e-06,-2.40379e-08,0.649448,0.00115762,-2.13108e-06,6.57887e-08,0.650603,0.00115356,-1.93371e-06,-6.03028e-08,0.651755,0.00114951,-2.11462e-06,5.62134e-08,0.652902,0.00114545,-1.94598e-06,-4.53417e-08,0.654046,0.00114142,-2.082e-06,6.55489e-08,0.655185,0.00113745,-1.88536e-06,-3.80396e-08,0.656321,0.00113357,-1.99948e-06,2.70049e-08,0.657452,0.00112965,-1.91846e-06,-1.03755e-08,0.65858,0.00112578,-1.94959e-06,1.44973e-08,0.659704,0.00112192,-1.9061e-06,1.1991e-08,0.660824,0.00111815,-1.87012e-06,-2.85634e-09,0.66194,0.0011144,-1.87869e-06,-5.65782e-10,0.663053,0.00111064,-1.88039e-06,5.11947e-09,0.664162,0.0011069,-1.86503e-06,3.96924e-08,0.665267,0.00110328,-1.74595e-06,-4.46795e-08,0.666368,0.00109966,-1.87999e-06,1.98161e-08,0.667466,0.00109596,-1.82054e-06,2.502e-08,0.66856,0.00109239,-1.74548e-06,-6.86593e-10,0.669651,0.0010889,-1.74754e-06,-2.22739e-08,0.670738,0.00108534,-1.81437e-06,3.01776e-08,0.671821,0.0010818,-1.72383e-06,2.07732e-08,0.672902,0.00107841,-1.66151e-06,-5.36658e-08,0.673978,0.00107493,-1.82251e-06,7.46802e-08,0.675051,0.00107151,-1.59847e-06,-6.62411e-08,0.676121,0.00106811,-1.79719e-06,7.10748e-08,0.677188,0.00106473,-1.58397e-06,-3.92441e-08,0.678251,0.00106145,-1.7017e-06,2.62973e-08,0.679311,0.00105812,-1.62281e-06,-6.34035e-09,0.680367,0.00105486,-1.64183e-06,-9.36249e-10,0.68142,0.00105157,-1.64464e-06,1.00854e-08,0.68247,0.00104831,-1.61438e-06,2.01995e-08,0.683517,0.00104514,-1.55378e-06,-3.1279e-08,0.68456,0.00104194,-1.64762e-06,4.53114e-08,0.685601,0.00103878,-1.51169e-06,-3.07573e-08,0.686638,0.00103567,-1.60396e-06,1.81133e-08,0.687672,0.00103251,-1.54962e-06,1.79085e-08,0.688703,0.00102947,-1.49589e-06,-3.01428e-08,0.689731,0.00102639,-1.58632e-06,4.30583e-08,0.690756,0.00102334,-1.45715e-06,-2.28814e-08,0.691778,0.00102036,-1.52579e-06,-1.11373e-08,0.692797,0.00101727,-1.5592e-06,6.74305e-08,0.693812,0.00101436,-1.35691e-06,-7.97709e-08,0.694825,0.0010114,-1.59622e-06,7.28391e-08,0.695835,0.00100843,-1.37771e-06,-3.27715e-08,0.696842,0.00100558,-1.47602e-06,-1.35807e-09,0.697846,0.00100262,-1.48009e-06,3.82037e-08,0.698847,0.000999775,-1.36548e-06,-3.22474e-08,0.699846,0.000996948,-1.46223e-06,3.11809e-08,0.700841,0.000994117,-1.36868e-06,-3.28714e-08,0.701834,0.000991281,-1.4673e-06,4.07001e-08,0.702824,0.000988468,-1.3452e-06,-1.07197e-08,0.703811,0.000985746,-1.37736e-06,2.17866e-09,0.704795,0.000982998,-1.37082e-06,2.00521e-09,0.705777,0.000980262,-1.3648e-06,-1.01996e-08,0.706756,0.000977502,-1.3954e-06,3.87931e-08,0.707732,0.000974827,-1.27902e-06,-2.57632e-08,0.708706,0.000972192,-1.35631e-06,4.65513e-09,0.709676,0.000969493,-1.34235e-06,7.14257e-09,0.710645,0.00096683,-1.32092e-06,2.63791e-08,0.71161,0.000964267,-1.24178e-06,-5.30543e-08,0.712573,0.000961625,-1.40095e-06,6.66289e-08,0.713533,0.000959023,-1.20106e-06,-3.46474e-08,0.714491,0.000956517,-1.305e-06,1.23559e-08,0.715446,0.000953944,-1.26793e-06,-1.47763e-08,0.716399,0.000951364,-1.31226e-06,4.67494e-08,0.717349,0.000948879,-1.17201e-06,-5.3012e-08,0.718297,0.000946376,-1.33105e-06,4.60894e-08,0.719242,0.000943852,-1.19278e-06,-1.21366e-08,0.720185,0.00094143,-1.22919e-06,2.45673e-09,0.721125,0.000938979,-1.22182e-06,2.30966e-09,0.722063,0.000936543,-1.21489e-06,-1.16954e-08,0.722998,0.000934078,-1.24998e-06,4.44718e-08,0.723931,0.000931711,-1.11656e-06,-4.69823e-08,0.724861,0.000929337,-1.25751e-06,2.4248e-08,0.725789,0.000926895,-1.18477e-06,9.5949e-09,0.726715,0.000924554,-1.15598e-06,-3.02286e-09,0.727638,0.000922233,-1.16505e-06,2.49649e-09,0.72856,0.00091991,-1.15756e-06,-6.96321e-09,0.729478,0.000917575,-1.17845e-06,2.53564e-08,0.730395,0.000915294,-1.10238e-06,-3.48578e-08,0.731309,0.000912984,-1.20695e-06,5.44704e-08,0.732221,0.000910734,-1.04354e-06,-6.38144e-08,0.73313,0.000908455,-1.23499e-06,8.15781e-08,0.734038,0.00090623,-9.90253e-07,-8.3684e-08,0.734943,0.000903999,-1.2413e-06,7.43441e-08,0.735846,0.000901739,-1.01827e-06,-3.48787e-08,0.736746,0.000899598,-1.12291e-06,5.56596e-09,0.737645,0.000897369,-1.10621e-06,1.26148e-08,0.738541,0.000895194,-1.06837e-06,3.57935e-09,0.739435,0.000893068,-1.05763e-06,-2.69322e-08,0.740327,0.000890872,-1.13842e-06,4.45448e-08,0.741217,0.000888729,-1.00479e-06,-3.20376e-08,0.742105,0.000886623,-1.1009e-06,2.40011e-08,0.74299,0.000884493,-1.0289e-06,-4.36209e-09,0.743874,0.000882422,-1.04199e-06,-6.55268e-09,0.744755,0.000880319,-1.06164e-06,3.05728e-08,0.745634,0.000878287,-9.69926e-07,-5.61338e-08,0.746512,0.000876179,-1.13833e-06,7.4753e-08,0.747387,0.000874127,-9.14068e-07,-6.40644e-08,0.74826,0.000872106,-1.10626e-06,6.22955e-08,0.749131,0.000870081,-9.19375e-07,-6.59083e-08,0.75,0.000868044,-1.1171e-06,8.21284e-08,0.750867,0.000866056,-8.70714e-07,-8.37915e-08,0.751732,0.000864064,-1.12209e-06,7.42237e-08,0.752595,0.000862042,-8.99418e-07,-3.42894e-08,0.753456,0.00086014,-1.00229e-06,3.32955e-09,0.754315,0.000858146,-9.92297e-07,2.09712e-08,0.755173,0.000856224,-9.29384e-07,-2.76096e-08,0.756028,0.000854282,-1.01221e-06,2.98627e-08,0.756881,0.000852348,-9.22625e-07,-3.22365e-08,0.757733,0.000850406,-1.01933e-06,3.94786e-08,0.758582,0.000848485,-9.00898e-07,-6.46833e-09,0.75943,0.000846664,-9.20303e-07,-1.36052e-08,0.760275,0.000844783,-9.61119e-07,1.28447e-09,0.761119,0.000842864,-9.57266e-07,8.4674e-09,0.761961,0.000840975,-9.31864e-07,2.44506e-08,0.762801,0.000839185,-8.58512e-07,-4.6665e-08,0.763639,0.000837328,-9.98507e-07,4.30001e-08,0.764476,0.00083546,-8.69507e-07,-6.12609e-09,0.76531,0.000833703,-8.87885e-07,-1.84959e-08,0.766143,0.000831871,-9.43372e-07,2.05052e-08,0.766974,0.000830046,-8.81857e-07,-3.92026e-09,0.767803,0.000828271,-8.93618e-07,-4.82426e-09,0.768631,0.000826469,-9.0809e-07,2.32172e-08,0.769456,0.000824722,-8.38439e-07,-2.84401e-08,0.77028,0.00082296,-9.23759e-07,3.09386e-08,0.771102,0.000821205,-8.30943e-07,-3.57099e-08,0.771922,0.000819436,-9.38073e-07,5.22963e-08,0.772741,0.000817717,-7.81184e-07,-5.42658e-08,0.773558,0.000815992,-9.43981e-07,4.55579e-08,0.774373,0.000814241,-8.07308e-07,-8.75656e-09,0.775186,0.0008126,-8.33578e-07,-1.05315e-08,0.775998,0.000810901,-8.65172e-07,-8.72188e-09,0.776808,0.000809145,-8.91338e-07,4.54191e-08,0.777616,0.000807498,-7.5508e-07,-5.37454e-08,0.778423,0.000805827,-9.16317e-07,5.03532e-08,0.779228,0.000804145,-7.65257e-07,-2.84584e-08,0.780031,0.000802529,-8.50632e-07,3.87579e-09,0.780833,0.00080084,-8.39005e-07,1.29552e-08,0.781633,0.0007992,-8.00139e-07,3.90804e-09,0.782432,0.000797612,-7.88415e-07,-2.85874e-08,0.783228,0.000795949,-8.74177e-07,5.0837e-08,0.784023,0.000794353,-7.21666e-07,-5.55513e-08,0.784817,0.000792743,-8.8832e-07,5.21587e-08,0.785609,0.000791123,-7.31844e-07,-3.38744e-08,0.786399,0.000789558,-8.33467e-07,2.37342e-08,0.787188,0.000787962,-7.62264e-07,-1.45775e-09,0.787975,0.000786433,-7.66638e-07,-1.79034e-08,0.788761,0.000784846,-8.20348e-07,1.34665e-08,0.789545,0.000783246,-7.79948e-07,2.3642e-08,0.790327,0.000781757,-7.09022e-07,-4.84297e-08,0.791108,0.000780194,-8.54311e-07,5.08674e-08,0.791888,0.000778638,-7.01709e-07,-3.58303e-08,0.792666,0.000777127,-8.092e-07,3.28493e-08,0.793442,0.000775607,-7.10652e-07,-3.59624e-08,0.794217,0.000774078,-8.1854e-07,5.13959e-08,0.79499,0.000772595,-6.64352e-07,-5.04121e-08,0.795762,0.000771115,-8.15588e-07,3.10431e-08,0.796532,0.000769577,-7.22459e-07,-1.41557e-08,0.797301,0.00076809,-7.64926e-07,2.55795e-08,0.798069,0.000766636,-6.88187e-07,-2.85578e-08,0.798835,0.000765174,-7.73861e-07,2.90472e-08,0.799599,0.000763714,-6.86719e-07,-2.80262e-08,0.800362,0.000762256,-7.70798e-07,2.34531e-08,0.801123,0.000760785,-7.00438e-07,-6.18144e-09,0.801884,0.000759366,-7.18983e-07,1.27263e-09,0.802642,0.000757931,-7.15165e-07,1.09101e-09,0.803399,0.000756504,-7.11892e-07,-5.63675e-09,0.804155,0.000755064,-7.28802e-07,2.14559e-08,0.80491,0.00075367,-6.64434e-07,-2.05821e-08,0.805663,0.00075228,-7.26181e-07,1.26812e-09,0.806414,0.000750831,-7.22377e-07,1.55097e-08,0.807164,0.000749433,-6.75848e-07,-3.70216e-09,0.807913,0.00074807,-6.86954e-07,-7.0105e-10,0.80866,0.000746694,-6.89057e-07,6.5063e-09,0.809406,0.000745336,-6.69538e-07,-2.53242e-08,0.810151,0.000743921,-7.45511e-07,3.51858e-08,0.810894,0.000742535,-6.39953e-07,3.79034e-09,0.811636,0.000741267,-6.28582e-07,-5.03471e-08,0.812377,0.000739858,-7.79624e-07,7.83886e-08,0.813116,0.000738534,-5.44458e-07,-8.43935e-08,0.813854,0.000737192,-7.97638e-07,8.03714e-08,0.81459,0.000735838,-5.56524e-07,-5.82784e-08,0.815325,0.00073455,-7.31359e-07,3.35329e-08,0.816059,0.000733188,-6.3076e-07,-1.62486e-08,0.816792,0.000731878,-6.79506e-07,3.14614e-08,0.817523,0.000730613,-5.85122e-07,-4.99925e-08,0.818253,0.000729293,-7.35099e-07,4.92994e-08,0.818982,0.000727971,-5.87201e-07,-2.79959e-08,0.819709,0.000726712,-6.71189e-07,3.07959e-09,0.820435,0.000725379,-6.6195e-07,1.56777e-08,0.82116,0.000724102,-6.14917e-07,-6.18564e-09,0.821883,0.000722854,-6.33474e-07,9.06488e-09,0.822606,0.000721614,-6.06279e-07,-3.00739e-08,0.823327,0.000720311,-6.96501e-07,5.16262e-08,0.824046,0.000719073,-5.41623e-07,-5.72214e-08,0.824765,0.000717818,-7.13287e-07,5.80503e-08,0.825482,0.000716566,-5.39136e-07,-5.57703e-08,0.826198,0.00071532,-7.06447e-07,4.58215e-08,0.826912,0.000714045,-5.68983e-07,-8.30636e-09,0.827626,0.000712882,-5.93902e-07,-1.25961e-08,0.828338,0.000711656,-6.3169e-07,-9.13985e-10,0.829049,0.00071039,-6.34432e-07,1.62519e-08,0.829759,0.00070917,-5.85676e-07,-4.48904e-09,0.830468,0.000707985,-5.99143e-07,1.70418e-09,0.831175,0.000706792,-5.9403e-07,-2.32768e-09,0.831881,0.000705597,-6.01014e-07,7.60648e-09,0.832586,0.000704418,-5.78194e-07,-2.80982e-08,0.83329,0.000703177,-6.62489e-07,4.51817e-08,0.833993,0.000701988,-5.26944e-07,-3.34192e-08,0.834694,0.000700834,-6.27201e-07,2.88904e-08,0.835394,0.000699666,-5.4053e-07,-2.25378e-08,0.836093,0.000698517,-6.08143e-07,1.65589e-09,0.836791,0.000697306,-6.03176e-07,1.59142e-08,0.837488,0.000696147,-5.55433e-07,-5.70801e-09,0.838184,0.000695019,-5.72557e-07,6.91792e-09,0.838878,0.000693895,-5.51803e-07,-2.19637e-08,0.839571,0.000692725,-6.17694e-07,2.13321e-08,0.840263,0.000691554,-5.53698e-07,-3.75996e-09,0.840954,0.000690435,-5.64978e-07,-6.29219e-09,0.841644,0.000689287,-5.83855e-07,2.89287e-08,0.842333,0.000688206,-4.97068e-07,-4.98181e-08,0.843021,0.000687062,-6.46523e-07,5.11344e-08,0.843707,0.000685922,-4.9312e-07,-3.55102e-08,0.844393,0.00068483,-5.9965e-07,3.13019e-08,0.845077,0.000683724,-5.05745e-07,-3.00925e-08,0.84576,0.000682622,-5.96022e-07,2.94636e-08,0.846442,0.000681519,-5.07631e-07,-2.81572e-08,0.847123,0.000680419,-5.92103e-07,2.35606e-08,0.847803,0.000679306,-5.21421e-07,-6.48045e-09,0.848482,0.000678243,-5.40863e-07,2.36124e-09,0.849159,0.000677169,-5.33779e-07,-2.96461e-09,0.849836,0.000676092,-5.42673e-07,9.49728e-09,0.850512,0.000675035,-5.14181e-07,-3.50245e-08,0.851186,0.000673902,-6.19254e-07,7.09959e-08,0.851859,0.000672876,-4.06267e-07,-7.01453e-08,0.852532,0.000671853,-6.16703e-07,3.07714e-08,0.853203,0.000670712,-5.24388e-07,6.66423e-09,0.853873,0.000669684,-5.04396e-07,2.17629e-09,0.854542,0.000668681,-4.97867e-07,-1.53693e-08,0.855211,0.000667639,-5.43975e-07,-3.03752e-10,0.855878,0.000666551,-5.44886e-07,1.65844e-08,0.856544,0.000665511,-4.95133e-07,-6.42907e-09,0.857209,0.000664501,-5.1442e-07,9.13195e-09,0.857873,0.0006635,-4.87024e-07,-3.00987e-08,0.858536,0.000662435,-5.7732e-07,5.16584e-08,0.859198,0.000661436,-4.22345e-07,-5.73255e-08,0.859859,0.000660419,-5.94322e-07,5.84343e-08,0.860518,0.000659406,-4.19019e-07,-5.72022e-08,0.861177,0.000658396,-5.90626e-07,5.11653e-08,0.861835,0.000657368,-4.3713e-07,-2.82495e-08,0.862492,0.000656409,-5.21878e-07,2.22788e-09,0.863148,0.000655372,-5.15195e-07,1.9338e-08,0.863803,0.0006544,-4.5718e-07,-1.99754e-08,0.864457,0.000653425,-5.17107e-07,9.59024e-10,0.86511,0.000652394,-5.1423e-07,1.61393e-08,0.865762,0.000651414,-4.65812e-07,-5.91149e-09,0.866413,0.000650465,-4.83546e-07,7.50665e-09,0.867063,0.00064952,-4.61026e-07,-2.4115e-08,0.867712,0.000648526,-5.33371e-07,2.93486e-08,0.86836,0.000647547,-4.45325e-07,-3.36748e-08,0.869007,0.000646555,-5.4635e-07,4.57461e-08,0.869653,0.0006456,-4.09112e-07,-3.01002e-08,0.870298,0.000644691,-4.99412e-07,1.50501e-08,0.870942,0.000643738,-4.54262e-07,-3.01002e-08,0.871585,0.000642739,-5.44563e-07,4.57461e-08,0.872228,0.000641787,-4.07324e-07,-3.36748e-08,0.872869,0.000640871,-5.08349e-07,2.93486e-08,0.873509,0.000639943,-4.20303e-07,-2.4115e-08,0.874149,0.00063903,-4.92648e-07,7.50655e-09,0.874787,0.000638067,-4.70128e-07,-5.91126e-09,0.875425,0.000637109,-4.87862e-07,1.61385e-08,0.876062,0.000636182,-4.39447e-07,9.61961e-10,0.876697,0.000635306,-4.36561e-07,-1.99863e-08,0.877332,0.000634373,-4.9652e-07,1.93785e-08,0.877966,0.000633438,-4.38384e-07,2.07697e-09,0.878599,0.000632567,-4.32153e-07,-2.76864e-08,0.879231,0.00063162,-5.15212e-07,4.90641e-08,0.879862,0.000630737,-3.6802e-07,-4.93606e-08,0.880493,0.000629852,-5.16102e-07,2.9169e-08,0.881122,0.000628908,-4.28595e-07,-7.71083e-09,0.881751,0.000628027,-4.51727e-07,1.6744e-09,0.882378,0.000627129,-4.46704e-07,1.01317e-09,0.883005,0.000626239,-4.43665e-07,-5.72703e-09,0.883631,0.000625334,-4.60846e-07,2.1895e-08,0.884255,0.000624478,-3.95161e-07,-2.22481e-08,0.88488,0.000623621,-4.61905e-07,7.4928e-09,0.885503,0.00062272,-4.39427e-07,-7.72306e-09,0.886125,0.000621818,-4.62596e-07,2.33995e-08,0.886746,0.000620963,-3.92398e-07,-2.62704e-08,0.887367,0.000620099,-4.71209e-07,2.20775e-08,0.887987,0.000619223,-4.04976e-07,-2.43496e-09,0.888605,0.000618406,-4.12281e-07,-1.23377e-08,0.889223,0.000617544,-4.49294e-07,-7.81876e-09,0.88984,0.000616622,-4.72751e-07,4.36128e-08,0.890457,0.000615807,-3.41912e-07,-4.7423e-08,0.891072,0.000614981,-4.84181e-07,2.68698e-08,0.891687,0.000614093,-4.03572e-07,-4.51384e-10,0.8923,0.000613285,-4.04926e-07,-2.50643e-08,0.892913,0.0006124,-4.80119e-07,4.11038e-08,0.893525,0.000611563,-3.56808e-07,-2.01414e-08,0.894136,0.000610789,-4.17232e-07,-2.01426e-08,0.894747,0.000609894,-4.7766e-07,4.11073e-08,0.895356,0.000609062,-3.54338e-07,-2.50773e-08,0.895965,0.000608278,-4.2957e-07,-4.02954e-10,0.896573,0.000607418,-4.30779e-07,2.66891e-08,0.89718,0.000606636,-3.50711e-07,-4.67489e-08,0.897786,0.000605795,-4.90958e-07,4.10972e-08,0.898391,0.000604936,-3.67666e-07,1.56948e-09,0.898996,0.000604205,-3.62958e-07,-4.73751e-08,0.8996,0.000603337,-5.05083e-07,6.87214e-08,0.900202,0.000602533,-2.98919e-07,-4.86966e-08,0.900805,0.000601789,-4.45009e-07,6.85589e-09,0.901406,0.00060092,-4.24441e-07,2.1273e-08,0.902007,0.000600135,-3.60622e-07,-3.23434e-08,0.902606,0.000599317,-4.57652e-07,4.84959e-08,0.903205,0.000598547,-3.12164e-07,-4.24309e-08,0.903803,0.000597795,-4.39457e-07,2.01844e-09,0.904401,0.000596922,-4.33402e-07,3.43571e-08,0.904997,0.000596159,-3.30331e-07,-2.02374e-08,0.905593,0.000595437,-3.91043e-07,-1.30123e-08,0.906188,0.000594616,-4.3008e-07,1.26819e-08,0.906782,0.000593794,-3.92034e-07,2.18894e-08,0.907376,0.000593076,-3.26366e-07,-4.06349e-08,0.907968,0.000592301,-4.4827e-07,2.1441e-08,0.90856,0.000591469,-3.83947e-07,1.44754e-08,0.909151,0.000590744,-3.40521e-07,-1.97379e-08,0.909742,0.000590004,-3.99735e-07,4.87161e-09,0.910331,0.000589219,-3.8512e-07,2.51532e-10,0.91092,0.00058845,-3.84366e-07,-5.87776e-09,0.911508,0.000587663,-4.01999e-07,2.32595e-08,0.912096,0.000586929,-3.3222e-07,-2.75554e-08,0.912682,0.000586182,-4.14887e-07,2.73573e-08,0.913268,0.000585434,-3.32815e-07,-2.22692e-08,0.913853,0.000584702,-3.99622e-07,2.11486e-09,0.914437,0.000583909,-3.93278e-07,1.38098e-08,0.915021,0.000583164,-3.51848e-07,2.25042e-09,0.915604,0.000582467,-3.45097e-07,-2.28115e-08,0.916186,0.000581708,-4.13531e-07,2.93911e-08,0.916767,0.000580969,-3.25358e-07,-3.51481e-08,0.917348,0.000580213,-4.30803e-07,5.15967e-08,0.917928,0.000579506,-2.76012e-07,-5.20296e-08,0.918507,0.000578798,-4.32101e-07,3.73124e-08,0.919085,0.000578046,-3.20164e-07,-3.76154e-08,0.919663,0.000577293,-4.3301e-07,5.35447e-08,0.92024,0.000576587,-2.72376e-07,-5.7354e-08,0.920816,0.000575871,-4.44438e-07,5.66621e-08,0.921391,0.000575152,-2.74452e-07,-5.00851e-08,0.921966,0.000574453,-4.24707e-07,2.4469e-08,0.92254,0.000573677,-3.513e-07,1.18138e-08,0.923114,0.000573009,-3.15859e-07,-1.21195e-08,0.923686,0.000572341,-3.52217e-07,-2.29403e-08,0.924258,0.000571568,-4.21038e-07,4.4276e-08,0.924829,0.000570859,-2.8821e-07,-3.49546e-08,0.9254,0.000570178,-3.93074e-07,3.59377e-08,0.92597,0.000569499,-2.85261e-07,-4.91915e-08,0.926539,0.000568781,-4.32835e-07,4.16189e-08,0.927107,0.00056804,-3.07979e-07,1.92523e-09,0.927675,0.00056743,-3.02203e-07,-4.93198e-08,0.928242,0.000566678,-4.50162e-07,7.61447e-08,0.928809,0.000566006,-2.21728e-07,-7.6445e-08,0.929374,0.000565333,-4.51063e-07,5.08216e-08,0.929939,0.000564583,-2.98599e-07,-7.63212e-09,0.930503,0.000563963,-3.21495e-07,-2.02931e-08,0.931067,0.000563259,-3.82374e-07,2.92001e-08,0.93163,0.000562582,-2.94774e-07,-3.69025e-08,0.932192,0.000561882,-4.05482e-07,5.88053e-08,0.932754,0.000561247,-2.29066e-07,-7.91094e-08,0.933315,0.000560552,-4.66394e-07,7.88184e-08,0.933875,0.000559856,-2.29939e-07,-5.73501e-08,0.934434,0.000559224,-4.01989e-07,3.13727e-08,0.934993,0.000558514,-3.07871e-07,-8.53611e-09,0.935551,0.000557873,-3.33479e-07,2.77175e-09,0.936109,0.000557214,-3.25164e-07,-2.55091e-09,0.936666,0.000556556,-3.32817e-07,7.43188e-09,0.937222,0.000555913,-3.10521e-07,-2.71766e-08,0.937778,0.00055521,-3.92051e-07,4.167e-08,0.938333,0.000554551,-2.67041e-07,-2.02941e-08,0.938887,0.000553956,-3.27923e-07,-2.00984e-08,0.93944,0.00055324,-3.88218e-07,4.10828e-08,0.939993,0.000552587,-2.6497e-07,-2.50237e-08,0.940546,0.000551982,-3.40041e-07,-5.92583e-10,0.941097,0.0005513,-3.41819e-07,2.7394e-08,0.941648,0.000550698,-2.59637e-07,-4.93788e-08,0.942199,0.000550031,-4.07773e-07,5.09119e-08,0.942748,0.000549368,-2.55038e-07,-3.50595e-08,0.943297,0.000548753,-3.60216e-07,2.97214e-08,0.943846,0.000548122,-2.71052e-07,-2.42215e-08,0.944394,0.000547507,-3.43716e-07,7.55985e-09,0.944941,0.000546842,-3.21037e-07,-6.01796e-09,0.945487,0.000546182,-3.3909e-07,1.65119e-08,0.946033,0.000545553,-2.89555e-07,-4.2498e-10,0.946578,0.000544973,-2.9083e-07,-1.4812e-08,0.947123,0.000544347,-3.35266e-07,6.83068e-11,0.947667,0.000543676,-3.35061e-07,1.45388e-08,0.94821,0.00054305,-2.91444e-07,1.38123e-09,0.948753,0.000542471,-2.87301e-07,-2.00637e-08,0.949295,0.000541836,-3.47492e-07,1.92688e-08,0.949837,0.000541199,-2.89685e-07,2.59298e-09,0.950378,0.000540628,-2.81906e-07,-2.96407e-08,0.950918,0.000539975,-3.70829e-07,5.63652e-08,0.951458,0.000539402,-2.01733e-07,-7.66107e-08,0.951997,0.000538769,-4.31565e-07,7.12638e-08,0.952535,0.00053812,-2.17774e-07,-2.96305e-08,0.953073,0.000537595,-3.06665e-07,-1.23464e-08,0.95361,0.000536945,-3.43704e-07,1.94114e-08,0.954147,0.000536316,-2.8547e-07,-5.69451e-09,0.954683,0.000535728,-3.02554e-07,3.36666e-09,0.955219,0.000535133,-2.92454e-07,-7.77208e-09,0.955753,0.000534525,-3.1577e-07,2.77216e-08,0.956288,0.000533976,-2.32605e-07,-4.35097e-08,0.956821,0.00053338,-3.63134e-07,2.7108e-08,0.957354,0.000532735,-2.8181e-07,-5.31772e-09,0.957887,0.000532156,-2.97764e-07,-5.83718e-09,0.958419,0.000531543,-3.15275e-07,2.86664e-08,0.95895,0.000530998,-2.29276e-07,-4.9224e-08,0.959481,0.000530392,-3.76948e-07,4.90201e-08,0.960011,0.000529785,-2.29887e-07,-2.76471e-08,0.96054,0.000529243,-3.12829e-07,1.96385e-09,0.961069,0.000528623,-3.06937e-07,1.97917e-08,0.961598,0.000528068,-2.47562e-07,-2.15261e-08,0.962125,0.000527508,-3.1214e-07,6.70795e-09,0.962653,0.000526904,-2.92016e-07,-5.30573e-09,0.963179,0.000526304,-3.07934e-07,1.4515e-08,0.963705,0.000525732,-2.64389e-07,6.85048e-09,0.964231,0.000525224,-2.43837e-07,-4.19169e-08,0.964756,0.00052461,-3.69588e-07,4.1608e-08,0.96528,0.000523996,-2.44764e-07,-5.30598e-09,0.965804,0.000523491,-2.60682e-07,-2.03841e-08,0.966327,0.000522908,-3.21834e-07,2.72378e-08,0.966849,0.000522346,-2.40121e-07,-2.89625e-08,0.967371,0.000521779,-3.27008e-07,2.90075e-08,0.967893,0.000521212,-2.39986e-07,-2.74629e-08,0.968414,0.00052065,-3.22374e-07,2.12396e-08,0.968934,0.000520069,-2.58656e-07,2.10922e-09,0.969454,0.000519558,-2.52328e-07,-2.96765e-08,0.969973,0.000518964,-3.41357e-07,5.6992e-08,0.970492,0.000518452,-1.70382e-07,-7.90821e-08,0.97101,0.000517874,-4.07628e-07,8.05224e-08,0.971528,0.000517301,-1.66061e-07,-6.41937e-08,0.972045,0.000516776,-3.58642e-07,5.70429e-08,0.972561,0.00051623,-1.87513e-07,-4.47686e-08,0.973077,0.00051572,-3.21819e-07,2.82237e-09,0.973593,0.000515085,-3.13352e-07,3.34792e-08,0.974108,0.000514559,-2.12914e-07,-1.75298e-08,0.974622,0.000514081,-2.65503e-07,-2.29648e-08,0.975136,0.000513481,-3.34398e-07,4.97843e-08,0.975649,0.000512961,-1.85045e-07,-5.6963e-08,0.976162,0.00051242,-3.55934e-07,5.88585e-08,0.976674,0.000511885,-1.79359e-07,-5.92616e-08,0.977185,0.000511348,-3.57143e-07,5.89785e-08,0.977696,0.000510811,-1.80208e-07,-5.74433e-08,0.978207,0.000510278,-3.52538e-07,5.15854e-08,0.978717,0.000509728,-1.97781e-07,-2.9689e-08,0.979226,0.000509243,-2.86848e-07,7.56591e-09,0.979735,0.000508692,-2.64151e-07,-5.74649e-10,0.980244,0.000508162,-2.65875e-07,-5.26732e-09,0.980752,0.000507615,-2.81677e-07,2.16439e-08,0.981259,0.000507116,-2.16745e-07,-2.17037e-08,0.981766,0.000506618,-2.81856e-07,5.56636e-09,0.982272,0.000506071,-2.65157e-07,-5.61689e-10,0.982778,0.000505539,-2.66842e-07,-3.31963e-09,0.983283,0.000504995,-2.76801e-07,1.38402e-08,0.983788,0.000504483,-2.3528e-07,7.56339e-09,0.984292,0.000504035,-2.1259e-07,-4.40938e-08,0.984796,0.000503478,-3.44871e-07,4.96026e-08,0.985299,0.000502937,-1.96064e-07,-3.51071e-08,0.985802,0.000502439,-3.01385e-07,3.12212e-08,0.986304,0.00050193,-2.07721e-07,-3.0173e-08,0.986806,0.000501424,-2.9824e-07,2.9866e-08,0.987307,0.000500917,-2.08642e-07,-2.96865e-08,0.987808,0.000500411,-2.97702e-07,2.92753e-08,0.988308,0.000499903,-2.09876e-07,-2.78101e-08,0.988807,0.0004994,-2.93306e-07,2.23604e-08,0.989307,0.000498881,-2.26225e-07,-2.02681e-09,0.989805,0.000498422,-2.32305e-07,-1.42531e-08,0.990303,0.000497915,-2.75065e-07,-5.65232e-10,0.990801,0.000497363,-2.76761e-07,1.65141e-08,0.991298,0.000496859,-2.27218e-07,-5.88639e-09,0.991795,0.000496387,-2.44878e-07,7.0315e-09,0.992291,0.000495918,-2.23783e-07,-2.22396e-08,0.992787,0.000495404,-2.90502e-07,2.23224e-08,0.993282,0.00049489,-2.23535e-07,-7.44543e-09,0.993776,0.000494421,-2.45871e-07,7.45924e-09,0.994271,0.000493951,-2.23493e-07,-2.23915e-08,0.994764,0.000493437,-2.90668e-07,2.25021e-08,0.995257,0.000492923,-2.23161e-07,-8.01218e-09,0.99575,0.000492453,-2.47198e-07,9.54669e-09,0.996242,0.000491987,-2.18558e-07,-3.01746e-08,0.996734,0.000491459,-3.09082e-07,5.1547e-08,0.997225,0.000490996,-1.54441e-07,-5.68039e-08,0.997716,0.000490517,-3.24853e-07,5.64594e-08,0.998206,0.000490036,-1.55474e-07,-4.98245e-08,0.998696,0.000489576,-3.04948e-07,2.36292e-08,0.999186,0.000489037,-2.3406e-07,1.49121e-08,0.999674,0.000488613,-1.89324e-07,-2.3673e-08,1.00016,0.000488164,-2.60343e-07,2.01754e-08,1.00065,0.000487704,-1.99816e-07,-5.70288e-08,1.00114,0.000487133,-3.70903e-07,8.87303e-08,1.00162,0.000486657,-1.04712e-07,-5.94737e-08,1.00211,0.000486269,-2.83133e-07,2.99553e-08,1.0026,0.000485793,-1.93267e-07,-6.03474e-08,1.00308,0.000485225,-3.74309e-07,9.2225e-08,1.00357,0.000484754,-9.76345e-08,-7.0134e-08,1.00405,0.000484348,-3.08036e-07,6.91016e-08,1.00454,0.000483939,-1.00731e-07,-8.70633e-08,1.00502,0.000483476,-3.61921e-07,4.07328e-08,1.0055,0.000482875,-2.39723e-07,4.33413e-08,1.00599,0.000482525,-1.09699e-07,-9.48886e-08,1.00647,0.000482021,-3.94365e-07,9.77947e-08,1.00695,0.000481526,-1.00981e-07,-5.78713e-08,1.00743,0.00048115,-2.74595e-07,1.44814e-08,1.00791,0.000480645,-2.31151e-07,-5.42665e-11,1.00839,0.000480182,-2.31314e-07,-1.42643e-08,1.00887,0.000479677,-2.74106e-07,5.71115e-08,1.00935,0.0004793,-1.02772e-07,-9.49724e-08,1.00983,0.000478809,-3.87689e-07,8.43596e-08,1.01031,0.000478287,-1.3461e-07,-4.04755e-09,1.01079,0.000478006,-1.46753e-07,-6.81694e-08,1.01127,0.000477508,-3.51261e-07,3.83067e-08,1.01174,0.00047692,-2.36341e-07,3.41521e-08,1.01222,0.00047655,-1.33885e-07,-5.57058e-08,1.0127,0.000476115,-3.01002e-07,6.94616e-08,1.01317,0.000475721,-9.26174e-08,-1.02931e-07,1.01365,0.000475227,-4.01412e-07,1.03846e-07,1.01412,0.000474736,-8.98751e-08,-7.40321e-08,1.0146,0.000474334,-3.11971e-07,7.30735e-08,1.01507,0.00047393,-9.27508e-08,-9.90527e-08,1.01554,0.000473447,-3.89909e-07,8.47188e-08,1.01602,0.000472921,-1.35753e-07,-1.40381e-09,1.01649,0.000472645,-1.39964e-07,-7.91035e-08,1.01696,0.000472128,-3.77275e-07,7.93993e-08,1.01744,0.000471612,-1.39077e-07,-7.52607e-11,1.01791,0.000471334,-1.39302e-07,-7.90983e-08,1.01838,0.000470818,-3.76597e-07,7.80499e-08,1.01885,0.000470299,-1.42448e-07,5.31733e-09,1.01932,0.00047003,-1.26496e-07,-9.93193e-08,1.01979,0.000469479,-4.24453e-07,1.53541e-07,1.02026,0.00046909,3.617e-08,-1.57217e-07,1.02073,0.000468691,-4.35482e-07,1.177e-07,1.02119,0.000468173,-8.23808e-08,-7.51659e-08,1.02166,0.000467783,-3.07878e-07,6.37538e-08,1.02213,0.000467358,-1.16617e-07,-6.064e-08,1.0226,0.000466943,-2.98537e-07,5.9597e-08,1.02306,0.000466525,-1.19746e-07,-5.85386e-08,1.02353,0.00046611,-2.95362e-07,5.53482e-08,1.024,0.000465685,-1.29317e-07,-4.36449e-08,1.02446,0.000465296,-2.60252e-07,2.20268e-11,1.02493,0.000464775,-2.60186e-07,4.35568e-08,1.02539,0.000464386,-1.29516e-07,-5.50398e-08,1.02586,0.000463961,-2.94635e-07,5.73932e-08,1.02632,0.000463544,-1.22456e-07,-5.53236e-08,1.02678,0.000463133,-2.88426e-07,4.46921e-08,1.02725,0.000462691,-1.5435e-07,-4.23534e-09,1.02771,0.000462369,-1.67056e-07,-2.77507e-08,1.02817,0.000461952,-2.50308e-07,-3.97101e-09,1.02863,0.000461439,-2.62221e-07,4.36348e-08,1.02909,0.000461046,-1.31317e-07,-5.13589e-08,1.02955,0.000460629,-2.85394e-07,4.25913e-08,1.03001,0.000460186,-1.5762e-07,2.0285e-10,1.03047,0.000459871,-1.57011e-07,-4.34027e-08,1.03093,0.000459427,-2.87219e-07,5.41987e-08,1.03139,0.000459015,-1.24623e-07,-5.4183e-08,1.03185,0.000458604,-2.87172e-07,4.33239e-08,1.03231,0.000458159,-1.572e-07,9.65817e-11,1.03277,0.000457845,-1.56911e-07,-4.37103e-08,1.03323,0.0004574,-2.88041e-07,5.55351e-08,1.03368,0.000456991,-1.21436e-07,-5.9221e-08,1.03414,0.00045657,-2.99099e-07,6.21394e-08,1.0346,0.000456158,-1.1268e-07,-7.01275e-08,1.03505,0.000455723,-3.23063e-07,9.91614e-08,1.03551,0.000455374,-2.55788e-08,-8.80996e-08,1.03596,0.000455058,-2.89878e-07,1.48184e-08,1.03642,0.000454523,-2.45422e-07,2.88258e-08,1.03687,0.000454119,-1.58945e-07,-1.09125e-08,1.03733,0.000453768,-1.91682e-07,1.48241e-08,1.03778,0.000453429,-1.4721e-07,-4.83838e-08,1.03823,0.00045299,-2.92361e-07,5.95019e-08,1.03869,0.000452584,-1.13856e-07,-7.04146e-08,1.03914,0.000452145,-3.25099e-07,1.02947e-07,1.03959,0.000451803,-1.62583e-08,-1.02955e-07,1.04004,0.000451462,-3.25123e-07,7.04544e-08,1.04049,0.000451023,-1.1376e-07,-5.96534e-08,1.04094,0.000450616,-2.9272e-07,4.89499e-08,1.04139,0.000450178,-1.45871e-07,-1.69369e-08,1.04184,0.000449835,-1.96681e-07,1.87977e-08,1.04229,0.000449498,-1.40288e-07,-5.82539e-08,1.04274,0.000449043,-3.1505e-07,9.50087e-08,1.04319,0.000448698,-3.00238e-08,-8.33623e-08,1.04364,0.000448388,-2.80111e-07,2.20363e-11,1.04409,0.000447828,-2.80045e-07,8.32742e-08,1.04454,0.000447517,-3.02221e-08,-9.47002e-08,1.04498,0.000447173,-3.14323e-07,5.7108e-08,1.04543,0.000446716,-1.42999e-07,-1.45225e-08,1.04588,0.000446386,-1.86566e-07,9.82022e-10,1.04632,0.000446016,-1.8362e-07,1.05944e-08,1.04677,0.00044568,-1.51837e-07,-4.33597e-08,1.04721,0.000445247,-2.81916e-07,4.36352e-08,1.04766,0.000444814,-1.51011e-07,-1.19717e-08,1.0481,0.000444476,-1.86926e-07,4.25158e-09,1.04855,0.000444115,-1.74171e-07,-5.03461e-09,1.04899,0.000443751,-1.89275e-07,1.58868e-08,1.04944,0.00044342,-1.41614e-07,-5.85127e-08,1.04988,0.000442961,-3.17152e-07,9.89548e-08,1.05032,0.000442624,-2.0288e-08,-9.88878e-08,1.05076,0.000442287,-3.16951e-07,5.81779e-08,1.05121,0.000441827,-1.42418e-07,-1.46144e-08,1.05165,0.000441499,-1.86261e-07,2.79892e-10,1.05209,0.000441127,-1.85421e-07,1.34949e-08,1.05253,0.000440797,-1.44937e-07,-5.42594e-08,1.05297,0.000440344,-3.07715e-07,8.43335e-08,1.05341,0.000439982,-5.47146e-08,-4.46558e-08,1.05385,0.000439738,-1.88682e-07,-2.49193e-08,1.05429,0.000439286,-2.6344e-07,2.5124e-08,1.05473,0.000438835,-1.88068e-07,4.36328e-08,1.05517,0.000438589,-5.71699e-08,-8.04459e-08,1.05561,0.000438234,-2.98508e-07,3.97324e-08,1.05605,0.000437756,-1.79311e-07,4.07258e-08,1.05648,0.000437519,-5.71332e-08,-8.34263e-08,1.05692,0.000437155,-3.07412e-07,5.45608e-08,1.05736,0.000436704,-1.4373e-07,-1.56078e-08,1.05779,0.000436369,-1.90553e-07,7.87043e-09,1.05823,0.000436012,-1.66942e-07,-1.58739e-08,1.05867,0.00043563,-2.14563e-07,5.56251e-08,1.0591,0.000435368,-4.76881e-08,-8.74172e-08,1.05954,0.000435011,-3.0994e-07,5.56251e-08,1.05997,0.000434558,-1.43064e-07,-1.58739e-08,1.06041,0.000434224,-1.90686e-07,7.87042e-09,1.06084,0.000433866,-1.67075e-07,-1.56078e-08,1.06127,0.000433485,-2.13898e-07,5.45609e-08,1.06171,0.000433221,-5.02157e-08,-8.34263e-08,1.06214,0.00043287,-3.00495e-07,4.07258e-08,1.06257,0.000432391,-1.78317e-07,3.97325e-08,1.063,0.000432154,-5.91198e-08,-8.04464e-08,1.06344,0.000431794,-3.00459e-07,4.36347e-08,1.06387,0.000431324,-1.69555e-07,2.5117e-08,1.0643,0.000431061,-9.42041e-08,-2.48934e-08,1.06473,0.000430798,-1.68884e-07,-4.47527e-08,1.06516,0.000430326,-3.03142e-07,8.46951e-08,1.06559,0.000429973,-4.90573e-08,-5.56089e-08,1.06602,0.000429708,-2.15884e-07,1.85314e-08,1.06645,0.000429332,-1.6029e-07,-1.85166e-08,1.06688,0.000428956,-2.1584e-07,5.5535e-08,1.06731,0.000428691,-4.92347e-08,-8.44142e-08,1.06774,0.000428339,-3.02477e-07,4.37032e-08,1.06816,0.000427865,-1.71368e-07,2.88107e-08,1.06859,0.000427609,-8.49356e-08,-3.97367e-08,1.06902,0.00042732,-2.04146e-07,1.09267e-08,1.06945,0.000426945,-1.71365e-07,-3.97023e-09,1.06987,0.00042659,-1.83276e-07,4.9542e-09,1.0703,0.000426238,-1.68414e-07,-1.58466e-08,1.07073,0.000425854,-2.15953e-07,5.84321e-08,1.07115,0.000425597,-4.0657e-08,-9.86725e-08,1.07158,0.00042522,-3.36674e-07,9.78392e-08,1.072,0.00042484,-4.31568e-08,-5.42658e-08,1.07243,0.000424591,-2.05954e-07,1.45377e-11,1.07285,0.000424179,-2.0591e-07,5.42076e-08,1.07328,0.00042393,-4.32877e-08,-9.76357e-08,1.0737,0.00042355,-3.36195e-07,9.79165e-08,1.07412,0.000423172,-4.24451e-08,-5.56118e-08,1.07455,0.00042292,-2.09281e-07,5.32143e-09,1.07497,0.000422518,-1.93316e-07,3.43261e-08,1.07539,0.000422234,-9.0338e-08,-2.34165e-08,1.07581,0.000421983,-1.60588e-07,-5.98692e-08,1.07623,0.000421482,-3.40195e-07,1.43684e-07,1.07666,0.000421233,9.08574e-08,-1.5724e-07,1.07708,0.000420943,-3.80862e-07,1.27647e-07,1.0775,0.000420564,2.0791e-09,-1.1493e-07,1.07792,0.000420223,-3.4271e-07,9.36534e-08,1.07834,0.000419819,-6.17499e-08,-2.12653e-08,1.07876,0.000419632,-1.25546e-07,-8.59219e-09,1.07918,0.000419355,-1.51322e-07,-6.35752e-08,1.0796,0.000418861,-3.42048e-07,1.43684e-07,1.08002,0.000418608,8.90034e-08,-1.53532e-07,1.08043,0.000418326,-3.71593e-07,1.12817e-07,1.08085,0.000417921,-3.31414e-08,-5.93184e-08,1.08127,0.000417677,-2.11097e-07,5.24697e-09,1.08169,0.00041727,-1.95356e-07,3.83305e-08,1.0821,0.000416995,-8.03642e-08,-3.93597e-08,1.08252,0.000416716,-1.98443e-07,-1.0094e-10,1.08294,0.000416319,-1.98746e-07,3.97635e-08,1.08335,0.00041604,-7.94557e-08,-3.97437e-08,1.08377,0.000415762,-1.98687e-07,1.94215e-12,1.08419,0.000415365,-1.98681e-07,3.97359e-08,1.0846,0.000415087,-7.94732e-08,-3.97362e-08,1.08502,0.000414809,-1.98682e-07,-4.31063e-13,1.08543,0.000414411,-1.98683e-07,3.97379e-08,1.08584,0.000414133,-7.94694e-08,-3.97418e-08,1.08626,0.000413855,-1.98695e-07,2.00563e-11,1.08667,0.000413458,-1.98635e-07,3.96616e-08,1.08709,0.000413179,-7.965e-08,-3.9457e-08,1.0875,0.000412902,-1.98021e-07,-1.04281e-09,1.08791,0.000412502,-2.01149e-07,4.36282e-08,1.08832,0.000412231,-7.02648e-08,-5.42608e-08,1.08874,0.000411928,-2.33047e-07,5.42057e-08,1.08915,0.000411624,-7.04301e-08,-4.33527e-08,1.08956,0.000411353,-2.00488e-07,-4.07378e-12,1.08997,0.000410952,-2.005e-07,4.3369e-08,1.09038,0.000410681,-7.03934e-08,-5.42627e-08,1.09079,0.000410378,-2.33182e-07,5.44726e-08,1.0912,0.000410075,-6.97637e-08,-4.44186e-08,1.09161,0.000409802,-2.03019e-07,3.99235e-09,1.09202,0.000409408,-1.91042e-07,2.84491e-08,1.09243,0.000409111,-1.05695e-07,1.42043e-09,1.09284,0.000408904,-1.01434e-07,-3.41308e-08,1.09325,0.000408599,-2.03826e-07,1.58937e-08,1.09366,0.000408239,-1.56145e-07,-2.94438e-08,1.09406,0.000407838,-2.44476e-07,1.01881e-07,1.09447,0.000407655,6.11676e-08,-1.39663e-07,1.09488,0.000407358,-3.57822e-07,9.91432e-08,1.09529,0.00040694,-6.03921e-08,-1.84912e-08,1.09569,0.000406764,-1.15866e-07,-2.51785e-08,1.0961,0.000406457,-1.91401e-07,-4.03115e-12,1.09651,0.000406074,-1.91413e-07,2.51947e-08,1.09691,0.000405767,-1.15829e-07,1.84346e-08,1.09732,0.00040559,-6.05254e-08,-9.89332e-08,1.09772,0.000405172,-3.57325e-07,1.3888e-07,1.09813,0.000404874,5.93136e-08,-9.8957e-08,1.09853,0.000404696,-2.37557e-07,1.853e-08,1.09894,0.000404277,-1.81968e-07,2.48372e-08,1.09934,0.000403987,-1.07456e-07,1.33047e-09,1.09975,0.000403776,-1.03465e-07,-3.01591e-08,1.10015,0.000403479,-1.93942e-07,9.66054e-11,1.10055,0.000403091,-1.93652e-07,2.97727e-08,1.10096,0.000402793,-1.04334e-07,2.19273e-11,1.10136,0.000402585,-1.04268e-07,-2.98604e-08,1.10176,0.000402287,-1.93849e-07,2.10325e-10,1.10216,0.0004019,-1.93218e-07,2.90191e-08,1.10256,0.0004016,-1.06161e-07,2.92264e-09,1.10297,0.000401397,-9.73931e-08,-4.07096e-08,1.10337,0.00040108,-2.19522e-07,4.07067e-08,1.10377,0.000400763,-9.7402e-08,-2.90783e-09,1.10417,0.000400559,-1.06126e-07,-2.90754e-08,1.10457,0.00040026,-1.93352e-07,9.00021e-14,1.10497,0.000399873,-1.93351e-07,2.9075e-08,1.10537,0.000399574,-1.06126e-07,2.90902e-09,1.10577,0.00039937,-9.73992e-08,-4.07111e-08,1.10617,0.000399053,-2.19533e-07,4.07262e-08,1.10657,0.000398736,-9.73541e-08,-2.98424e-09,1.10697,0.000398533,-1.06307e-07,-2.87892e-08,1.10736,0.000398234,-1.92674e-07,-1.06824e-09,1.10776,0.000397845,-1.95879e-07,3.30622e-08,1.10816,0.000397552,-9.66926e-08,-1.19712e-08,1.10856,0.000397323,-1.32606e-07,1.48225e-08,1.10895,0.000397102,-8.81387e-08,-4.73187e-08,1.10935,0.000396784,-2.30095e-07,5.52429e-08,1.10975,0.00039649,-6.4366e-08,-5.44437e-08,1.11014,0.000396198,-2.27697e-07,4.33226e-08,1.11054,0.000395872,-9.77293e-08,3.62656e-10,1.11094,0.000395678,-9.66414e-08,-4.47732e-08,1.11133,0.00039535,-2.30961e-07,5.95208e-08,1.11173,0.000395067,-5.23985e-08,-7.41008e-08,1.11212,0.00039474,-2.74701e-07,1.17673e-07,1.11252,0.000394543,7.83181e-08,-1.58172e-07,1.11291,0.000394225,-3.96199e-07,1.57389e-07,1.1133,0.000393905,7.59679e-08,-1.13756e-07,1.1137,0.000393716,-2.653e-07,5.92165e-08,1.11409,0.000393363,-8.76507e-08,-3.90074e-09,1.11449,0.000393176,-9.93529e-08,-4.36136e-08,1.11488,0.000392846,-2.30194e-07,5.91457e-08,1.11527,0.000392563,-5.27564e-08,-7.376e-08,1.11566,0.000392237,-2.74037e-07,1.16685e-07,1.11606,0.000392039,7.60189e-08,-1.54562e-07,1.11645,0.000391727,-3.87667e-07,1.43935e-07,1.11684,0.000391384,4.4137e-08,-6.35487e-08,1.11723,0.000391281,-1.46509e-07,-8.94896e-09,1.11762,0.000390961,-1.73356e-07,-1.98647e-08,1.11801,0.000390555,-2.3295e-07,8.8408e-08,1.1184,0.000390354,3.22736e-08,-9.53486e-08,1.11879,0.000390133,-2.53772e-07,5.45677e-08,1.11918,0.000389789,-9.0069e-08,-3.71296e-09,1.11957,0.000389598,-1.01208e-07,-3.97159e-08,1.11996,0.000389276,-2.20355e-07,4.33671e-08,1.12035,0.000388966,-9.02542e-08,-1.45431e-08,1.12074,0.000388741,-1.33883e-07,1.48052e-08,1.12113,0.000388518,-8.94678e-08,-4.46778e-08,1.12152,0.000388205,-2.23501e-07,4.46966e-08,1.12191,0.000387892,-8.94114e-08,-1.48992e-08,1.12229,0.000387669,-1.34109e-07,1.49003e-08,1.12268,0.000387445,-8.94082e-08,-4.47019e-08,1.12307,0.000387132,-2.23514e-07,4.4698e-08,1.12345,0.000386819,-8.942e-08,-1.48806e-08,1.12384,0.000386596,-1.34062e-07,1.48245e-08,1.12423,0.000386372,-8.95885e-08,-4.44172e-08,1.12461,0.00038606,-2.2284e-07,4.36351e-08,1.125,0.000385745,-9.19348e-08,-1.09139e-08,1.12539,0.000385528,-1.24677e-07,2.05584e-11,1.12577,0.000385279,-1.24615e-07,1.08317e-08,1.12616,0.000385062,-9.21198e-08,-4.33473e-08,1.12654,0.000384748,-2.22162e-07,4.33481e-08,1.12693,0.000384434,-9.21174e-08,-1.08356e-08,1.12731,0.000384217,-1.24624e-07,-5.50907e-12,1.12769,0.000383968,-1.24641e-07,1.08577e-08,1.12808,0.000383751,-9.20679e-08,-4.34252e-08,1.12846,0.000383437,-2.22343e-07,4.36337e-08,1.12884,0.000383123,-9.14422e-08,-1.19005e-08,1.12923,0.000382904,-1.27144e-07,3.96813e-09,1.12961,0.000382662,-1.15239e-07,-3.97207e-09,1.12999,0.000382419,-1.27155e-07,1.19201e-08,1.13038,0.000382201,-9.1395e-08,-4.37085e-08,1.13076,0.000381887,-2.2252e-07,4.37046e-08,1.13114,0.000381573,-9.14068e-08,-1.19005e-08,1.13152,0.000381355,-1.27108e-07,3.89734e-09,1.1319,0.000381112,-1.15416e-07,-3.68887e-09,1.13228,0.00038087,-1.26483e-07,1.08582e-08,1.13266,0.00038065,-9.39083e-08,-3.97438e-08,1.13304,0.000380343,-2.1314e-07,2.89076e-08,1.13342,0.000380003,-1.26417e-07,4.33225e-08,1.1338,0.00037988,3.55072e-09,-8.29883e-08,1.13418,0.000379638,-2.45414e-07,5.0212e-08,1.13456,0.000379298,-9.47781e-08,1.34964e-09,1.13494,0.000379113,-9.07292e-08,-5.56105e-08,1.13532,0.000378764,-2.57561e-07,1.01883e-07,1.1357,0.000378555,4.80889e-08,-1.13504e-07,1.13608,0.000378311,-2.92423e-07,1.13713e-07,1.13646,0.000378067,4.87176e-08,-1.02931e-07,1.13683,0.000377856,-2.60076e-07,5.95923e-08,1.13721,0.000377514,-8.12988e-08,-1.62288e-08,1.13759,0.000377303,-1.29985e-07,5.32278e-09,1.13797,0.000377059,-1.14017e-07,-5.06237e-09,1.13834,0.000376816,-1.29204e-07,1.49267e-08,1.13872,0.000376602,-8.44237e-08,-5.46444e-08,1.1391,0.000376269,-2.48357e-07,8.44417e-08,1.13947,0.000376026,4.96815e-09,-4.47039e-08,1.13985,0.000375902,-1.29143e-07,-2.48355e-08,1.14023,0.000375569,-2.0365e-07,2.48368e-08,1.1406,0.000375236,-1.2914e-07,4.46977e-08,1.14098,0.000375112,4.95341e-09,-8.44184e-08,1.14135,0.000374869,-2.48302e-07,5.45572e-08,1.14173,0.000374536,-8.463e-08,-1.46013e-08,1.1421,0.000374323,-1.28434e-07,3.8478e-09,1.14247,0.000374077,-1.1689e-07,-7.89941e-10,1.14285,0.000373841,-1.1926e-07,-6.88042e-10,1.14322,0.0003736,-1.21324e-07,3.54213e-09,1.1436,0.000373368,-1.10698e-07,-1.34805e-08,1.14397,0.000373107,-1.51139e-07,5.03798e-08,1.14434,0.000372767,0.,0.};
+
+    template <typename T, int scn, int dcn, bool srgb, int blueIdx> struct RGB2Luv;
+
+    template <int scn, int dcn, bool srgb, int blueIdx> struct RGB2Luv<float, scn, dcn, srgb, blueIdx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float _d = 1.f / (0.950456f + 15 + 1.088754f * 3);
+            const float _un = 13 * (4 * 0.950456f * _d);
+            const float _vn = 13 * (9 * _d);
+
+            float B = blueIdx == 0 ? src.x : src.z;
+            float G = src.y;
+            float R = blueIdx == 0 ? src.z : src.x;
+
+            if (srgb)
+            {
+                B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+                G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+                R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+            }
+
+            float X = R * 0.412453f + G * 0.357580f + B * 0.180423f;
+            float Y = R * 0.212671f + G * 0.715160f + B * 0.072169f;
+            float Z = R * 0.019334f + G * 0.119193f + B * 0.950227f;
+
+            float L = splineInterpolate(Y * (LAB_CBRT_TAB_SIZE / 1.5f), c_LabCbrtTab, LAB_CBRT_TAB_SIZE);
+            L = 116.f * L - 16.f;
+
+            const float d = (4 * 13) / ::fmaxf(X + 15 * Y + 3 * Z, numeric_limits<float>::epsilon());
+            float u = L * (X * d - _un);
+            float v = L * ((9 * 0.25f) * Y * d - _vn);
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = L;
+            dst.y = u;
+            dst.z = v;
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, bool srgb, int blueIdx> struct RGB2Luv<uchar, scn, dcn, srgb, blueIdx>
+            : unary_function<typename MakeVec<uchar, scn>::type, typename MakeVec<uchar, dcn>::type>
+    {
+        __device__ typename MakeVec<uchar, dcn>::type operator ()(const typename MakeVec<uchar, scn>::type& src) const
+        {
+            float3 buf;
+
+            buf.x = src.x * (1.f / 255.f);
+            buf.y = src.y * (1.f / 255.f);
+            buf.z = src.z * (1.f / 255.f);
+
+            RGB2Luv<float, 3, 3, srgb, blueIdx> cvtf;
+            buf = cvtf(buf);
+
+            typename MakeVec<uchar, dcn>::type dst;
+
+            dst.x = saturate_cast<uchar>(buf.x * 2.55f);
+            dst.y = saturate_cast<uchar>(buf.y * 0.72033898305084743f + 96.525423728813564f);
+            dst.z = saturate_cast<uchar>(buf.z * 0.99609375f + 139.453125f);
+
+            return dst;
+        }
+    };
+
+    // Luv to RGB
+
+    template <typename T, int scn, int dcn, bool srgb, int blueIdx> struct Luv2RGB;
+
+    template <int scn, int dcn, bool srgb, int blueIdx> struct Luv2RGB<float, scn, dcn, srgb, blueIdx>
+            : unary_function<typename MakeVec<float, scn>::type, typename MakeVec<float, dcn>::type>
+    {
+        __device__ typename MakeVec<float, dcn>::type operator ()(const typename MakeVec<float, scn>::type& src) const
+        {
+            const float _d = 1.f / (0.950456f + 15 + 1.088754f * 3);
+            const float _un = 4 * 0.950456f * _d;
+            const float _vn = 9 * _d;
+
+            float L = src.x;
+            float u = src.y;
+            float v = src.z;
+
+            float Y = (L + 16.f) * (1.f / 116.f);
+            Y = Y * Y * Y;
+
+            float d = (1.f / 13.f) / L;
+            u = u * d + _un;
+            v = v * d + _vn;
+
+            float iv = 1.f / v;
+            float X = 2.25f * u * Y * iv;
+            float Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv;
+
+            float B = 0.055648f * X - 0.204043f * Y + 1.057311f * Z;
+            float G = -0.969256f * X + 1.875991f * Y + 0.041556f * Z;
+            float R = 3.240479f * X - 1.537150f * Y - 0.498535f * Z;
+
+            if (srgb)
+            {
+                B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+                G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+                R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+            }
+
+            typename MakeVec<float, dcn>::type dst;
+
+            dst.x = blueIdx == 0 ? B : R;
+            dst.y = G;
+            dst.z = blueIdx == 0 ? R : B;
+            setAlpha(dst, ColorChannel<float>::max());
+
+            return dst;
+        }
+    };
+
+    template <int scn, int dcn, bool srgb, int blueIdx> struct Luv2RGB<uchar, scn, dcn, srgb, blueIdx>
+            : unary_function<typename MakeVec<uchar, scn>::type, typename MakeVec<uchar, dcn>::type>
+    {
+        __device__ typename MakeVec<uchar, dcn>::type operator ()(const typename MakeVec<uchar, scn>::type& src) const
+        {
+            float3 buf;
+
+            buf.x = src.x * (100.f / 255.f);
+            buf.y = src.y * 1.388235294117647f - 134.f;
+            buf.z = src.z * 1.003921568627451f - 140.f;
+
+            Luv2RGB<float, 3, 3, srgb, blueIdx> cvtf;
+            buf = cvtf(buf);
+
+            typename MakeVec<uchar, dcn>::type dst;
+
+            dst.x = saturate_cast<uchar>(buf.x * 255.f);
+            dst.y = saturate_cast<uchar>(buf.y * 255.f);
+            dst.z = saturate_cast<uchar>(buf.z * 255.f);
+            setAlpha(dst, ColorChannel<uchar>::max());
+
+            return dst;
+        }
+    };
+
+    #undef CV_CUDEV_DESCALE
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/functional/functional.hpp b/modules/cudev/include/opencv2/cudev/functional/functional.hpp
new file mode 100644
index 000000000..3ac532819
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/functional/functional.hpp
@@ -0,0 +1,854 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_FUNCTIONAL_FUNCTIONAL_HPP__
+#define __OPENCV_CUDEV_FUNCTIONAL_FUNCTIONAL_HPP__
+
+#include "../common.hpp"
+#include "../util/saturate_cast.hpp"
+#include "../util/vec_traits.hpp"
+#include "../util/vec_math.hpp"
+#include "../util/type_traits.hpp"
+
+namespace cv { namespace cudev {
+
+// Function Objects
+
+template <typename _Arg, typename _Result> struct unary_function
+{
+    typedef _Arg    argument_type;
+    typedef _Result result_type;
+};
+
+template <typename _Arg1, typename _Arg2, typename _Result> struct binary_function
+{
+    typedef _Arg1   first_argument_type;
+    typedef _Arg2   second_argument_type;
+    typedef _Result result_type;
+};
+
+// Arithmetic Operations
+
+template <typename T> struct plus : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return saturate_cast<T>(a + b);
+    }
+};
+
+template <typename T> struct minus : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return saturate_cast<T>(a - b);
+    }
+};
+
+template <typename T> struct multiplies : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return saturate_cast<T>(a * b);
+    }
+};
+
+template <typename T> struct divides : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return saturate_cast<T>(a / b);
+    }
+};
+
+template <typename T> struct modulus : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return saturate_cast<T>(a % b);
+    }
+};
+
+template <typename T> struct negate : unary_function<T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a) const
+    {
+        return saturate_cast<T>(-a);
+    }
+};
+
+// Comparison Operations
+
+template <typename T> struct equal_to : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
+{
+    __device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
+                                    operator ()(typename TypeTraits<T>::parameter_type a,
+                                                typename TypeTraits<T>::parameter_type b) const
+    {
+        return a == b;
+    }
+};
+
+template <typename T> struct not_equal_to : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
+{
+    __device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
+                                    operator ()(typename TypeTraits<T>::parameter_type a,
+                                                typename TypeTraits<T>::parameter_type b) const
+    {
+        return a != b;
+    }
+};
+
+template <typename T> struct greater : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
+{
+    __device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
+                                    operator ()(typename TypeTraits<T>::parameter_type a,
+                                                typename TypeTraits<T>::parameter_type b) const
+    {
+        return a > b;
+    }
+};
+
+template <typename T> struct less : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
+{
+    __device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
+                                    operator ()(typename TypeTraits<T>::parameter_type a,
+                                                typename TypeTraits<T>::parameter_type b) const
+    {
+        return a < b;
+    }
+};
+
+template <typename T> struct greater_equal : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
+{
+    __device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
+                                    operator ()(typename TypeTraits<T>::parameter_type a,
+                                                typename TypeTraits<T>::parameter_type b) const
+    {
+        return a >= b;
+    }
+};
+
+template <typename T> struct less_equal : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
+{
+    __device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
+                                    operator ()(typename TypeTraits<T>::parameter_type a,
+                                                typename TypeTraits<T>::parameter_type b) const
+    {
+        return a <= b;
+    }
+};
+
+// Logical Operations
+
+template <typename T> struct logical_and : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
+{
+    __device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
+                                    operator ()(typename TypeTraits<T>::parameter_type a,
+                                                typename TypeTraits<T>::parameter_type b) const
+    {
+        return a && b;
+    }
+};
+
+template <typename T> struct logical_or : binary_function<T, T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
+{
+    __device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
+                                    operator ()(typename TypeTraits<T>::parameter_type a,
+                                                typename TypeTraits<T>::parameter_type b) const
+    {
+        return a || b;
+    }
+};
+
+template <typename T> struct logical_not : unary_function<T, typename MakeVec<uchar, VecTraits<T>::cn>::type>
+{
+    __device__ __forceinline__ typename MakeVec<uchar, VecTraits<T>::cn>::type
+                                    operator ()(typename TypeTraits<T>::parameter_type a) const
+    {
+        return !a;
+    }
+};
+
+// Bitwise Operations
+
+template <typename T> struct bit_and : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return a & b;
+    }
+};
+
+template <typename T> struct bit_or : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return a | b;
+    }
+};
+
+template <typename T> struct bit_xor : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return a ^ b;
+    }
+};
+
+template <typename T> struct bit_not : unary_function<T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type v) const
+    {
+        return ~v;
+    }
+};
+
+template <typename T> struct bit_lshift : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return a << b;
+    }
+};
+
+template <typename T> struct bit_rshift : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return a >> b;
+    }
+};
+
+// Generalized Identity Operations
+
+template <typename T> struct identity : unary_function<T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type x) const
+    {
+        return x;
+    }
+};
+
+template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
+{
+    __device__ __forceinline__ T1
+                    operator ()(typename TypeTraits<T1>::parameter_type lhs,
+                                typename TypeTraits<T2>::parameter_type) const
+    {
+        return lhs;
+    }
+};
+
+template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
+{
+    __device__ __forceinline__ T2
+                    operator ()(typename TypeTraits<T1>::parameter_type,
+                                typename TypeTraits<T2>::parameter_type rhs) const
+    {
+        return rhs;
+    }
+};
+
+// Min/Max Operations
+
+template <typename T> struct maximum : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return max(a, b);
+    }
+};
+
+template <typename T> struct minimum : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a,
+                                             typename TypeTraits<T>::parameter_type b) const
+    {
+        return min(a, b);
+    }
+};
+
+#define CV_CUDEV_MINMAX_INST(type, maxop, minop) \
+    template <> struct maximum<type> : binary_function<type, type, type> \
+    { \
+        __device__ __forceinline__ type operator ()(type a, type b) const {return maxop(a, b);} \
+    }; \
+    template <> struct minimum<type> : binary_function<type, type, type> \
+    { \
+        __device__ __forceinline__ type operator ()(type a, type b) const {return minop(a, b);} \
+    };
+
+
+CV_CUDEV_MINMAX_INST(uchar, ::max, ::min)
+CV_CUDEV_MINMAX_INST(schar, ::max, ::min)
+CV_CUDEV_MINMAX_INST(ushort, ::max, ::min)
+CV_CUDEV_MINMAX_INST(short, ::max, ::min)
+CV_CUDEV_MINMAX_INST(int, ::max, ::min)
+CV_CUDEV_MINMAX_INST(uint, ::max, ::min)
+CV_CUDEV_MINMAX_INST(float, ::fmaxf, ::fminf)
+CV_CUDEV_MINMAX_INST(double, ::fmax, ::fmin)
+
+#undef CV_CUDEV_MINMAX_INST
+
+// abs_func
+
+template <typename T> struct abs_func : unary_function<T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type x) const
+    {
+        return abs(x);
+    }
+};
+
+template <> struct abs_func<uchar> : unary_function<uchar, uchar>
+{
+    __device__ __forceinline__ uchar operator ()(uchar x) const
+    {
+        return x;
+    }
+};
+
+template <> struct abs_func<schar> : unary_function<schar, schar>
+{
+    __device__ __forceinline__ schar operator ()(schar x) const
+    {
+        return ::abs((int) x);
+    }
+};
+
+template <> struct abs_func<ushort> : unary_function<ushort, ushort>
+{
+    __device__ __forceinline__ ushort operator ()(ushort x) const
+    {
+        return x;
+    }
+};
+
+template <> struct abs_func<short> : unary_function<short, short>
+{
+    __device__ __forceinline__ short operator ()(short x) const
+    {
+        return ::abs((int) x);
+    }
+};
+
+template <> struct abs_func<uint> : unary_function<uint, uint>
+{
+    __device__ __forceinline__ uint operator ()(uint x) const
+    {
+        return x;
+    }
+};
+
+template <> struct abs_func<int> : unary_function<int, int>
+{
+    __device__ __forceinline__ int operator ()(int x) const
+    {
+        return ::abs(x);
+    }
+};
+
+template <> struct abs_func<float> : unary_function<float, float>
+{
+    __device__ __forceinline__ float operator ()(float x) const
+    {
+        return ::fabsf(x);
+    }
+};
+
+template <> struct abs_func<double> : unary_function<double, double>
+{
+    __device__ __forceinline__ double operator ()(double x) const
+    {
+        return ::fabs(x);
+    }
+};
+
+// absdiff_func
+
+template <typename T> struct absdiff_func : binary_function<T, T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type a, typename TypeTraits<T>::parameter_type b) const
+    {
+        abs_func<T> f;
+        return f(a - b);
+    }
+};
+
+// Math functions
+
+template <typename T> struct sqr_func : unary_function<T, T>
+{
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type x) const
+    {
+        return x * x;
+    }
+};
+
+namespace functional_detail
+{
+    template <typename T> struct FloatType
+    {
+        typedef typename MakeVec<
+            typename LargerType<float, typename VecTraits<T>::elem_type>::type,
+            VecTraits<T>::cn
+        >::type type;
+    };
+}
+
+#define CV_CUDEV_UNARY_FUNCTION_INST(name, func) \
+    template <typename T> struct name ## _func : unary_function<T, typename functional_detail::FloatType<T>::type> \
+    { \
+        __device__ __forceinline__ typename functional_detail::FloatType<T>::type operator ()(typename TypeTraits<T>::parameter_type a) const \
+        { \
+            return name(a); \
+        } \
+    }; \
+    template <> struct name ## _func<uchar> : unary_function<uchar, float> \
+    { \
+        __device__ __forceinline__ float operator ()(uchar a) const \
+        { \
+            return func ## f(a); \
+        } \
+    }; \
+    template <> struct name ## _func<schar> : unary_function<schar, float> \
+    { \
+        __device__ __forceinline__ float operator ()(schar a) const \
+        { \
+            return func ## f(a); \
+        } \
+    }; \
+    template <> struct name ## _func<ushort> : unary_function<ushort, float> \
+    { \
+        __device__ __forceinline__ float operator ()(ushort a) const \
+        { \
+            return func ## f(a); \
+        } \
+    }; \
+    template <> struct name ## _func<short> : unary_function<short, float> \
+    { \
+        __device__ __forceinline__ float operator ()(short a) const \
+        { \
+            return func ## f(a); \
+        } \
+    }; \
+    template <> struct name ## _func<uint> : unary_function<uint, float> \
+    { \
+        __device__ __forceinline__ float operator ()(uint a) const \
+        { \
+            return func ## f(a); \
+        } \
+    }; \
+    template <> struct name ## _func<int> : unary_function<int, float> \
+    { \
+        __device__ __forceinline__ float operator ()(int a) const \
+        { \
+            return func ## f(a); \
+        } \
+    }; \
+    template <> struct name ## _func<float> : unary_function<float, float> \
+    { \
+        __device__ __forceinline__ float operator ()(float a) const \
+        { \
+            return func ## f(a); \
+        } \
+    }; \
+    template <> struct name ## _func<double> : unary_function<double, double> \
+    { \
+        __device__ __forceinline__ double operator ()(double a) const \
+        { \
+            return func(a); \
+        } \
+    };
+
+CV_CUDEV_UNARY_FUNCTION_INST(sqrt, ::sqrt)
+CV_CUDEV_UNARY_FUNCTION_INST(exp, ::exp)
+CV_CUDEV_UNARY_FUNCTION_INST(exp2, ::exp2)
+CV_CUDEV_UNARY_FUNCTION_INST(exp10, ::exp10)
+CV_CUDEV_UNARY_FUNCTION_INST(log, ::log)
+CV_CUDEV_UNARY_FUNCTION_INST(log2, ::log2)
+CV_CUDEV_UNARY_FUNCTION_INST(log10, ::log10)
+CV_CUDEV_UNARY_FUNCTION_INST(sin, ::sin)
+CV_CUDEV_UNARY_FUNCTION_INST(cos, ::cos)
+CV_CUDEV_UNARY_FUNCTION_INST(tan, ::tan)
+CV_CUDEV_UNARY_FUNCTION_INST(asin, ::asin)
+CV_CUDEV_UNARY_FUNCTION_INST(acos, ::acos)
+CV_CUDEV_UNARY_FUNCTION_INST(atan, ::atan)
+CV_CUDEV_UNARY_FUNCTION_INST(sinh, ::sinh)
+CV_CUDEV_UNARY_FUNCTION_INST(cosh, ::cosh)
+CV_CUDEV_UNARY_FUNCTION_INST(tanh, ::tanh)
+CV_CUDEV_UNARY_FUNCTION_INST(asinh, ::asinh)
+CV_CUDEV_UNARY_FUNCTION_INST(acosh, ::acosh)
+CV_CUDEV_UNARY_FUNCTION_INST(atanh, ::atanh)
+
+#undef CV_CUDEV_UNARY_FUNCTION_INST
+
+#define CV_CUDEV_BINARY_FUNCTION_INST(name, func) \
+    template <typename T> struct name ## _func : binary_function<T, T, typename functional_detail::FloatType<T>::type> \
+    { \
+        __device__ __forceinline__ typename functional_detail::FloatType<T>::type operator ()(typename TypeTraits<T>::parameter_type a, typename TypeTraits<T>::parameter_type b) const \
+        { \
+            return name(a, b); \
+        } \
+    }; \
+    template <> struct name ## _func<uchar> : binary_function<uchar, uchar, float> \
+    { \
+        __device__ __forceinline__ float operator ()(uchar a, uchar b) const \
+        { \
+            return func ## f(a, b); \
+        } \
+    }; \
+    template <> struct name ## _func<schar> : binary_function<schar, schar, float> \
+    { \
+        __device__ __forceinline__ float operator ()(schar a, schar b) const \
+        { \
+            return func ## f(a, b); \
+        } \
+    }; \
+    template <> struct name ## _func<ushort> : binary_function<ushort, ushort, float> \
+    { \
+        __device__ __forceinline__ float operator ()(ushort a, ushort b) const \
+        { \
+            return func ## f(a, b); \
+        } \
+    }; \
+    template <> struct name ## _func<short> : binary_function<short, short, float> \
+    { \
+        __device__ __forceinline__ float operator ()(short a, short b) const \
+        { \
+            return func ## f(a, b); \
+        } \
+    }; \
+    template <> struct name ## _func<uint> : binary_function<uint, uint, float> \
+    { \
+        __device__ __forceinline__ float operator ()(uint a, uint b) const \
+        { \
+            return func ## f(a, b); \
+        } \
+    }; \
+    template <> struct name ## _func<int> : binary_function<int, int, float> \
+    { \
+        __device__ __forceinline__ float operator ()(int a, int b) const \
+        { \
+            return func ## f(a, b); \
+        } \
+    }; \
+    template <> struct name ## _func<float> : binary_function<float, float, float> \
+    { \
+        __device__ __forceinline__ float operator ()(float a, float b) const \
+        { \
+            return func ## f(a, b); \
+        } \
+    }; \
+    template <> struct name ## _func<double> : binary_function<double, double, double> \
+    { \
+        __device__ __forceinline__ double operator ()(double a, double b) const \
+        { \
+            return func(a, b); \
+        } \
+    };
+
+CV_CUDEV_BINARY_FUNCTION_INST(hypot, ::hypot)
+CV_CUDEV_BINARY_FUNCTION_INST(atan2, ::atan2)
+
+#undef CV_CUDEV_BINARY_FUNCTION_INST
+
+template <typename T> struct magnitude_func : binary_function<T, T, typename functional_detail::FloatType<T>::type>
+{
+    __device__ __forceinline__ typename functional_detail::FloatType<T>::type operator ()(typename TypeTraits<T>::parameter_type a, typename TypeTraits<T>::parameter_type b) const
+    {
+        sqrt_func<typename functional_detail::FloatType<T>::type> f;
+        return f(a * a + b * b);
+    }
+};
+
+template <typename T> struct pow_func : binary_function<T, float, float>
+{
+    __device__ __forceinline__ float operator ()(T val, float power) const
+    {
+        return ::powf(val, power);
+    }
+};
+template <> struct pow_func<double> : binary_function<double, double, double>
+{
+    __device__ __forceinline__ double operator ()(double val, double power) const
+    {
+        return ::pow(val, power);
+    }
+};
+
+// Saturate Cast Functor
+
+template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
+{
+    __device__ __forceinline__ D operator ()(typename TypeTraits<T>::parameter_type v) const
+    {
+        return saturate_cast<D>(v);
+    }
+};
+
+// Threshold Functors
+
+template <typename T> struct ThreshBinaryFunc : unary_function<T, T>
+{
+    T thresh;
+    T maxVal;
+
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
+    {
+        return saturate_cast<T>(src > thresh) * maxVal;
+    }
+};
+
+template <typename T>
+__host__ __device__ ThreshBinaryFunc<T> thresh_binary_func(T thresh, T maxVal)
+{
+    ThreshBinaryFunc<T> f;
+    f.thresh = thresh;
+    f.maxVal = maxVal;
+    return f;
+}
+
+template <typename T> struct ThreshBinaryInvFunc : unary_function<T, T>
+{
+    T thresh;
+    T maxVal;
+
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
+    {
+        return saturate_cast<T>(src <= thresh) * maxVal;
+    }
+};
+
+template <typename T>
+__host__ __device__ ThreshBinaryInvFunc<T> thresh_binary_inv_func(T thresh, T maxVal)
+{
+    ThreshBinaryInvFunc<T> f;
+    f.thresh = thresh;
+    f.maxVal = maxVal;
+    return f;
+}
+
+template <typename T> struct ThreshTruncFunc : unary_function<T, T>
+{
+    T thresh;
+
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
+    {
+        minimum<T> minOp;
+        return minOp(src, thresh);
+    }
+};
+
+template <typename T>
+__host__ __device__ ThreshTruncFunc<T> thresh_trunc_func(T thresh)
+{
+    ThreshTruncFunc<T> f;
+    f.thresh = thresh;
+    return f;
+}
+
+template <typename T> struct ThreshToZeroFunc : unary_function<T, T>
+{
+    T thresh;
+
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
+    {
+        return saturate_cast<T>(src > thresh) * src;
+    }
+};
+
+template <typename T>
+__host__ __device__ ThreshToZeroFunc<T> thresh_to_zero_func(T thresh)
+{
+    ThreshToZeroFunc<T> f;
+    f.thresh = thresh;
+    return f;
+}
+
+template <typename T> struct ThreshToZeroInvFunc : unary_function<T, T>
+{
+    T thresh;
+
+    __device__ __forceinline__ T operator ()(typename TypeTraits<T>::parameter_type src) const
+    {
+        return saturate_cast<T>(src <= thresh) * src;
+    }
+};
+
+template <typename T>
+__host__ __device__ ThreshToZeroInvFunc<T> thresh_to_zero_inv_func(T thresh)
+{
+    ThreshToZeroInvFunc<T> f;
+    f.thresh = thresh;
+    return f;
+}
+
+// Function Object Adaptors
+
+template <class Predicate> struct UnaryNegate : unary_function<typename Predicate::argument_type, typename Predicate::result_type>
+{
+    Predicate pred;
+
+    __device__ __forceinline__ typename Predicate::result_type operator ()(
+            typename TypeTraits<typename Predicate::argument_type>::parameter_type x) const
+    {
+        return !pred(x);
+    }
+};
+
+template <class Predicate>
+__host__ __device__ UnaryNegate<Predicate> not1(const Predicate& pred)
+{
+    UnaryNegate<Predicate> n;
+    n.pred = pred;
+    return n;
+}
+
+template <class Predicate> struct BinaryNegate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, typename Predicate::result_type>
+{
+    Predicate pred;
+
+    __device__ __forceinline__ typename Predicate::result_type operator ()(
+            typename TypeTraits<typename Predicate::first_argument_type>::parameter_type x,
+            typename TypeTraits<typename Predicate::second_argument_type>::parameter_type y) const
+    {
+        return !pred(x, y);
+    }
+};
+
+template <class Predicate>
+__host__ __device__ BinaryNegate<Predicate> not2(const Predicate& pred)
+{
+    BinaryNegate<Predicate> n;
+    n.pred = pred;
+    return n;
+}
+
+template <class Op> struct Binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
+{
+    Op op;
+    typename Op::first_argument_type arg1;
+
+    __device__ __forceinline__ typename Op::result_type operator ()(
+            typename TypeTraits<typename Op::second_argument_type>::parameter_type a) const
+    {
+        return op(arg1, a);
+    }
+};
+
+template <class Op>
+__host__ __device__ Binder1st<Op> bind1st(const Op& op, const typename Op::first_argument_type& arg1)
+{
+    Binder1st<Op> b;
+    b.op = op;
+    b.arg1 = arg1;
+    return b;
+}
+
+template <class Op> struct Binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
+{
+    Op op;
+    typename Op::second_argument_type arg2;
+
+    __device__ __forceinline__ typename Op::result_type operator ()(
+            typename TypeTraits<typename Op::first_argument_type>::parameter_type a) const
+    {
+        return op(a, arg2);
+    }
+};
+
+template <class Op>
+__host__ __device__ Binder2nd<Op> bind2nd(const Op& op, const typename Op::second_argument_type& arg2)
+{
+    Binder2nd<Op> b;
+    b.op = op;
+    b.arg2 = arg2;
+    return b;
+}
+
+// Functor Traits
+
+template <typename F> struct IsUnaryFunction
+{
+    typedef char Yes;
+    struct No {Yes a[2];};
+
+    template <typename T, typename D> static Yes check(unary_function<T, D>);
+    static No check(...);
+
+    static F makeF();
+
+    enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
+};
+
+template <typename F> struct IsBinaryFunction
+{
+    typedef char Yes;
+    struct No {Yes a[2];};
+
+    template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
+    static No check(...);
+
+    static F makeF();
+
+    enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/functional/tuple_adapter.hpp b/modules/cudev/include/opencv2/cudev/functional/tuple_adapter.hpp
new file mode 100644
index 000000000..d3a40db0e
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/functional/tuple_adapter.hpp
@@ -0,0 +1,98 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_FUNCTIONAL_TUPLE_ADAPTER_HPP__
+#define __OPENCV_CUDEV_FUNCTIONAL_TUPLE_ADAPTER_HPP__
+
+#include "../common.hpp"
+#include "../util/tuple.hpp"
+
+namespace cv { namespace cudev {
+
+template <class Op, int n> struct UnaryTupleAdapter
+{
+    typedef typename Op::result_type result_type;
+
+    Op op;
+
+    template <class Tuple>
+    __device__ __forceinline__ typename Op::result_type operator ()(const Tuple& t) const
+    {
+        return op(get<n>(t));
+    }
+};
+
+template <int n, class Op>
+__host__ __device__ UnaryTupleAdapter<Op, n> unaryTupleAdapter(const Op& op)
+{
+    UnaryTupleAdapter<Op, n> a;
+    a.op = op;
+    return a;
+}
+
+template <class Op, int n0, int n1> struct BinaryTupleAdapter
+{
+    typedef typename Op::result_type result_type;
+
+    Op op;
+
+    template <class Tuple>
+    __device__ __forceinline__ typename Op::result_type operator ()(const Tuple& t) const
+    {
+        return op(get<n0>(t), get<n1>(t));
+    }
+};
+
+template <int n0, int n1, class Op>
+__host__ __device__ BinaryTupleAdapter<Op, n0, n1> binaryTupleAdapter(const Op& op)
+{
+    BinaryTupleAdapter<Op, n0, n1> a;
+    a.op = op;
+    return a;
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/copy.hpp b/modules/cudev/include/opencv2/cudev/grid/copy.hpp
new file mode 100644
index 000000000..d7d3ea834
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/copy.hpp
@@ -0,0 +1,452 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_COPY_HPP__
+#define __OPENCV_CUDEV_GRID_COPY_HPP__
+
+#include "../common.hpp"
+#include "../util/tuple.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "../ptr2d/glob.hpp"
+#include "../ptr2d/mask.hpp"
+#include "../ptr2d/zip.hpp"
+#include "detail/copy.hpp"
+
+namespace cv { namespace cudev {
+
+template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridCopy_(const SrcPtr& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    dst.create(rows, cols);
+
+    grid_copy_detail::copy<Policy>(shrinkPtr(src), shrinkPtr(dst), shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridCopy_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_copy_detail::copy<Policy>(shrinkPtr(src), shrinkPtr(dst), shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType>
+__host__ void gridCopy_(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst.create(rows, cols);
+
+    grid_copy_detail::copy<Policy>(shrinkPtr(src), shrinkPtr(dst), WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType>
+__host__ void gridCopy_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
+
+    grid_copy_detail::copy<Policy>(shrinkPtr(src), shrinkPtr(dst), WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, class MaskPtr>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
+                                         shrinkPtr(mask),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, class MaskPtr>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
+                                         shrinkPtr(mask),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
+                                         WithOutMask(),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
+                                         WithOutMask(),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2, class MaskPtr>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
+                                         shrinkPtr(mask),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2, class MaskPtr>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
+                                         shrinkPtr(mask),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
+                                         WithOutMask(),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols );
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
+                                         WithOutMask(),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3, class MaskPtr>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+    get<3>(dst).create(rows, cols);
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
+                                         shrinkPtr(mask),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3, class MaskPtr>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols );
+    CV_Assert( getRows(get<3>(dst)) == rows && getCols(get<3>(dst)) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
+                                         shrinkPtr(mask),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+    get<3>(dst).create(rows, cols);
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
+                                         WithOutMask(),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<SrcPtrTuple>::value == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols );
+    CV_Assert( getRows(get<3>(dst)) == rows && getCols(get<3>(dst)) == cols );
+
+    grid_copy_detail::copy_tuple<Policy>(shrinkPtr(src),
+                                         shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
+                                         WithOutMask(),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+// Default Policy
+
+struct DefaultCopyPolicy
+{
+    enum {
+        block_size_x = 32,
+        block_size_y = 8
+    };
+};
+
+template <class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridCopy(const SrcPtr& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridCopy(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename DstType>
+__host__ void gridCopy(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, stream);
+}
+
+template <class SrcPtr, typename DstType>
+__host__ void gridCopy(const SrcPtr& src, const GlobPtrSz<DstType>& dst, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, class MaskPtr>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, class MaskPtr>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, typename D2, class MaskPtr>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, typename D2, class MaskPtr>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, typename D2>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, typename D2>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3, class MaskPtr>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3, class MaskPtr>
+__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, stream);
+}
+
+template <class SrcPtrTuple, typename D0, typename D1, typename D2, typename D3>
+__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, Stream& stream = Stream::Null())
+{
+    gridCopy_<DefaultCopyPolicy>(src, dst, stream);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/copy.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/copy.hpp
new file mode 100644
index 000000000..707b842f9
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/copy.hpp
@@ -0,0 +1,132 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_COPY_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_COPY_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/tuple.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../ptr2d/glob.hpp"
+#include "../../ptr2d/traits.hpp"
+
+namespace cv { namespace cudev {
+
+namespace grid_copy_detail
+{
+    template <class SrcPtr, typename DstType, class MaskPtr>
+    __global__ void copy(const SrcPtr src, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        dst(y, x) = saturate_cast<DstType>(src(y, x));
+    }
+
+    template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+    __host__ void copy(const SrcPtr& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        copy<<<grid, block, 0, stream>>>(src, dst, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+
+    template <int count> struct Unroll
+    {
+        template <class SrcPtrTuple, class DstPtrTuple>
+        __device__ static void copy(const SrcPtrTuple& src, DstPtrTuple& dst, const int y, const int x)
+        {
+            typedef typename tuple_element<count - 1, DstPtrTuple>::type dst_ptr_type;
+            typedef typename PtrTraits<dst_ptr_type>::value_type dst_type;
+
+            get<count - 1>(dst)(y, x) = saturate_cast<dst_type>(get<count - 1>(src)(y, x));
+            Unroll<count - 1>::copy(src, dst, y, x);
+        }
+    };
+    template <> struct Unroll<0>
+    {
+        template <class SrcPtrTuple, class DstPtrTuple>
+        __device__ __forceinline__ static void copy(const SrcPtrTuple&, DstPtrTuple&, const int, const int)
+        {
+        }
+    };
+
+    template <class SrcPtrTuple, class DstPtrTuple, class MaskPtr>
+    __global__ void copy_tuple(const SrcPtrTuple src, DstPtrTuple dst, const MaskPtr mask, const int rows, const int cols)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        Unroll<tuple_size<SrcPtrTuple>::value>::copy(src, dst, y, x);
+    }
+
+    template <class Policy, class SrcPtrTuple, class DstPtrTuple, class MaskPtr>
+    __host__ void copy_tuple(const SrcPtrTuple& src, const DstPtrTuple& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        copy_tuple<<<grid, block, 0, stream>>>(src, dst, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/glob_reduce.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/glob_reduce.hpp
new file mode 100644
index 000000000..c9d715982
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/glob_reduce.hpp
@@ -0,0 +1,475 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_GLOB_REDUCE_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_GLOB_REDUCE_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/tuple.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../util/atomic.hpp"
+#include "../../util/vec_traits.hpp"
+#include "../../util/type_traits.hpp"
+#include "../../util/limits.hpp"
+#include "../../block/reduce.hpp"
+#include "../../functional/functional.hpp"
+#include "../../ptr2d/traits.hpp"
+
+namespace cv { namespace cudev {
+
+namespace grid_glob_reduce_detail
+{
+    // Unroll
+
+    template <int cn> struct Unroll;
+
+    template <> struct Unroll<1>
+    {
+        template <int BLOCK_SIZE, typename R>
+        __device__ __forceinline__ static volatile R* smem(R* ptr)
+        {
+            return ptr;
+        }
+
+        template <typename R>
+        __device__ __forceinline__ static R& res(R& val)
+        {
+            return val;
+        }
+
+        template <class Op>
+        __device__ __forceinline__ static const Op& op(const Op& aop)
+        {
+            return aop;
+        }
+    };
+
+    template <> struct Unroll<2>
+    {
+        template <int BLOCK_SIZE, typename R>
+        __device__ __forceinline__ static tuple<volatile R*, volatile R*> smem(R* ptr)
+        {
+            return smem_tuple(ptr, ptr + BLOCK_SIZE);
+        }
+
+        template <typename R>
+        __device__ __forceinline__ static tuple<typename VecTraits<R>::elem_type&, typename VecTraits<R>::elem_type&> res(R& val)
+        {
+            return tie(val.x, val.y);
+        }
+
+        template <class Op>
+        __device__ __forceinline__ static tuple<Op, Op> op(const Op& aop)
+        {
+            return make_tuple(aop, aop);
+        }
+    };
+
+    template <> struct Unroll<3>
+    {
+        template <int BLOCK_SIZE, typename R>
+        __device__ __forceinline__ static tuple<volatile R*, volatile R*, volatile R*> smem(R* ptr)
+        {
+            return smem_tuple(ptr, ptr + BLOCK_SIZE, ptr + 2 * BLOCK_SIZE);
+        }
+
+        template <typename R>
+        __device__ __forceinline__ static tuple<typename VecTraits<R>::elem_type&,
+                                                typename VecTraits<R>::elem_type&,
+                                                typename VecTraits<R>::elem_type&> res(R& val)
+        {
+            return tie(val.x, val.y, val.z);
+        }
+
+        template <class Op>
+        __device__ __forceinline__ static tuple<Op, Op, Op> op(const Op& aop)
+        {
+            return make_tuple(aop, aop, aop);
+        }
+    };
+
+    template <> struct Unroll<4>
+    {
+        template <int BLOCK_SIZE, typename R>
+        __device__ __forceinline__ static tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem(R* ptr)
+        {
+            return smem_tuple(ptr, ptr + BLOCK_SIZE, ptr + 2 * BLOCK_SIZE, ptr + 3 * BLOCK_SIZE);
+        }
+
+        template <typename R>
+        __device__ __forceinline__ static tuple<typename VecTraits<R>::elem_type&,
+                                                typename VecTraits<R>::elem_type&,
+                                                typename VecTraits<R>::elem_type&,
+                                                typename VecTraits<R>::elem_type&> res(R& val)
+        {
+            return tie(val.x, val.y, val.z, val.w);
+        }
+
+        template <class Op>
+        __device__ __forceinline__ static tuple<Op, Op, Op, Op> op(const Op& aop)
+        {
+            return make_tuple(aop, aop, aop, aop);
+        }
+    };
+
+    // AtomicUnroll
+
+    template <typename R, int cn> struct AtomicUnroll;
+
+    template <typename R> struct AtomicUnroll<R, 1>
+    {
+        __device__ __forceinline__ static void add(R* ptr, R val)
+        {
+            atomicAdd(ptr, val);
+        }
+
+        __device__ __forceinline__ static void min(R* ptr, R val)
+        {
+            atomicMin(ptr, val);
+        }
+
+        __device__ __forceinline__ static void max(R* ptr, R val)
+        {
+            atomicMax(ptr, val);
+        }
+    };
+
+    template <typename R> struct AtomicUnroll<R, 2>
+    {
+        typedef typename MakeVec<R, 2>::type val_type;
+
+        __device__ __forceinline__ static void add(R* ptr, val_type val)
+        {
+            atomicAdd(ptr, val.x);
+            atomicAdd(ptr + 1, val.y);
+        }
+
+        __device__ __forceinline__ static void min(R* ptr, val_type val)
+        {
+            atomicMin(ptr, val.x);
+            atomicMin(ptr + 1, val.y);
+        }
+
+        __device__ __forceinline__ static void max(R* ptr, val_type val)
+        {
+            atomicMax(ptr, val.x);
+            atomicMax(ptr + 1, val.y);
+        }
+    };
+
+    template <typename R> struct AtomicUnroll<R, 3>
+    {
+        typedef typename MakeVec<R, 3>::type val_type;
+
+        __device__ __forceinline__ static void add(R* ptr, val_type val)
+        {
+            atomicAdd(ptr, val.x);
+            atomicAdd(ptr + 1, val.y);
+            atomicAdd(ptr + 2, val.z);
+        }
+
+        __device__ __forceinline__ static void min(R* ptr, val_type val)
+        {
+            atomicMin(ptr, val.x);
+            atomicMin(ptr + 1, val.y);
+            atomicMin(ptr + 2, val.z);
+        }
+
+        __device__ __forceinline__ static void max(R* ptr, val_type val)
+        {
+            atomicMax(ptr, val.x);
+            atomicMax(ptr + 1, val.y);
+            atomicMax(ptr + 2, val.z);
+        }
+    };
+
+    template <typename R> struct AtomicUnroll<R, 4>
+    {
+        typedef typename MakeVec<R, 4>::type val_type;
+
+        __device__ __forceinline__ static void add(R* ptr, val_type val)
+        {
+            atomicAdd(ptr, val.x);
+            atomicAdd(ptr + 1, val.y);
+            atomicAdd(ptr + 2, val.z);
+            atomicAdd(ptr + 3, val.w);
+        }
+
+        __device__ __forceinline__ static void min(R* ptr, val_type val)
+        {
+            atomicMin(ptr, val.x);
+            atomicMin(ptr + 1, val.y);
+            atomicMin(ptr + 2, val.z);
+            atomicMin(ptr + 3, val.w);
+        }
+
+        __device__ __forceinline__ static void max(R* ptr, val_type val)
+        {
+            atomicMax(ptr, val.x);
+            atomicMax(ptr + 1, val.y);
+            atomicMax(ptr + 2, val.z);
+            atomicMax(ptr + 3, val.w);
+        }
+    };
+
+    // SumReductor
+
+    template <typename src_type, typename work_type> struct SumReductor
+    {
+        typedef typename VecTraits<work_type>::elem_type work_elem_type;
+        enum { cn = VecTraits<src_type>::cn };
+
+        work_type sum;
+
+        __device__ __forceinline__ SumReductor()
+        {
+            sum = VecTraits<work_type>::all(0);
+        }
+
+        __device__ __forceinline__ void reduceVal(typename TypeTraits<src_type>::parameter_type srcVal)
+        {
+            sum = sum + saturate_cast<work_type>(srcVal);
+        }
+
+        template <int BLOCK_SIZE>
+        __device__ void reduceGrid(work_elem_type* result, int tid)
+        {
+            __shared__ work_elem_type smem[BLOCK_SIZE * cn];
+
+            blockReduce<BLOCK_SIZE>(Unroll<cn>::template smem<BLOCK_SIZE>(smem), Unroll<cn>::res(sum), tid, Unroll<cn>::op(plus<work_elem_type>()));
+
+            if (tid == 0)
+                AtomicUnroll<work_elem_type, cn>::add(result, sum);
+        }
+    };
+
+    // MinMaxReductor
+
+    template <typename T> struct minop : minimum<T>
+    {
+        __device__ __forceinline__ static T initial()
+        {
+            return numeric_limits<T>::max();
+        }
+
+        __device__ __forceinline__ static void atomic(T* result, T myval)
+        {
+            atomicMin(result, myval);
+        }
+    };
+
+    template <typename T> struct maxop : maximum<T>
+    {
+        __device__ __forceinline__ static T initial()
+        {
+            return -numeric_limits<T>::max();
+        }
+
+        __device__ __forceinline__ static void atomic(T* result, T myval)
+        {
+            atomicMax(result, myval);
+        }
+    };
+
+    struct both
+    {
+    };
+
+    template <class Op, typename src_type, typename work_type> struct MinMaxReductor
+    {
+        work_type myval;
+
+        __device__ __forceinline__ MinMaxReductor()
+        {
+            myval = Op::initial();
+        }
+
+        __device__ __forceinline__ void reduceVal(typename TypeTraits<src_type>::parameter_type srcVal)
+        {
+            Op op;
+
+            myval = op(myval, srcVal);
+        }
+
+        template <int BLOCK_SIZE>
+        __device__ void reduceGrid(work_type* result, int tid)
+        {
+            __shared__ work_type smem[BLOCK_SIZE];
+
+            Op op;
+
+            blockReduce<BLOCK_SIZE>(smem, myval, tid, op);
+
+            if (tid == 0)
+                Op::atomic(result, myval);
+        }
+    };
+
+    template <typename src_type, typename work_type> struct MinMaxReductor<both, src_type, work_type>
+    {
+        work_type mymin;
+        work_type mymax;
+
+        __device__ __forceinline__ MinMaxReductor()
+        {
+            mymin = numeric_limits<work_type>::max();
+            mymax = -numeric_limits<work_type>::max();
+        }
+
+        __device__ __forceinline__ void reduceVal(typename TypeTraits<src_type>::parameter_type srcVal)
+        {
+            minimum<work_type> minOp;
+            maximum<work_type> maxOp;
+
+            mymin = minOp(mymin, srcVal);
+            mymax = maxOp(mymax, srcVal);
+        }
+
+        template <int BLOCK_SIZE>
+        __device__ void reduceGrid(work_type* result, int tid)
+        {
+            __shared__ work_type sminval[BLOCK_SIZE];
+            __shared__ work_type smaxval[BLOCK_SIZE];
+
+            minimum<work_type> minOp;
+            maximum<work_type> maxOp;
+
+            blockReduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), tie(mymin, mymax), tid, make_tuple(minOp, maxOp));
+
+            if (tid == 0)
+            {
+                atomicMin(result, mymin);
+                atomicMax(result + 1, mymax);
+            }
+        }
+    };
+
+    // glob_reduce
+
+    template <class Reductor, int BLOCK_SIZE, int PATCH_X, int PATCH_Y, class SrcPtr, typename ResType, class MaskPtr>
+    __global__ void glob_reduce(const SrcPtr src, ResType* result, const MaskPtr mask, const int rows, const int cols)
+    {
+        const int x0 = blockIdx.x * blockDim.x * PATCH_X + threadIdx.x;
+        const int y0 = blockIdx.y * blockDim.y * PATCH_Y + threadIdx.y;
+
+        Reductor reductor;
+
+        for (int i = 0, y = y0; i < PATCH_Y && y < rows; ++i, y += blockDim.y)
+        {
+            for (int j = 0, x = x0; j < PATCH_X && x < cols; ++j, x += blockDim.x)
+            {
+                if (mask(y, x))
+                {
+                    reductor.reduceVal(src(y, x));
+                }
+            }
+        }
+
+        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
+
+        reductor.template reduceGrid<BLOCK_SIZE>(result, tid);
+    }
+
+    template <class Reductor, class Policy, class SrcPtr, typename ResType, class MaskPtr>
+    __host__ void glob_reduce(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x * Policy::patch_size_x), divUp(rows, block.y * Policy::patch_size_y));
+
+        const int BLOCK_SIZE = Policy::block_size_x * Policy::block_size_y;
+
+        glob_reduce<Reductor, BLOCK_SIZE, Policy::patch_size_x, Policy::patch_size_y><<<grid, block, 0, stream>>>(src, result, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+
+    // callers
+
+    template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
+    __host__ void sum(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        const int cn = VecTraits<src_type>::cn;
+        typedef typename MakeVec<ResType, cn>::type work_type;
+
+        glob_reduce<SumReductor<src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
+    }
+
+    template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
+    __host__ void minVal(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        const int cn = VecTraits<src_type>::cn;
+        typedef typename MakeVec<ResType, cn>::type work_type;
+
+        glob_reduce<MinMaxReductor<minop<work_type>, src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
+    }
+
+    template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
+    __host__ void maxVal(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        const int cn = VecTraits<src_type>::cn;
+        typedef typename MakeVec<ResType, cn>::type work_type;
+
+        glob_reduce<MinMaxReductor<maxop<work_type>, src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
+    }
+
+    template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
+    __host__ void minMaxVal(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        const int cn = VecTraits<src_type>::cn;
+        typedef typename MakeVec<ResType, cn>::type work_type;
+
+        glob_reduce<MinMaxReductor<both, src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp
new file mode 100644
index 000000000..a27955d5d
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp
@@ -0,0 +1,111 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_HISTOGRAM_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_HISTOGRAM_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/atomic.hpp"
+
+namespace cv { namespace cudev {
+
+namespace grid_histogram_detail
+{
+    template <int BIN_COUNT, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
+    __global__ void histogram(const SrcPtr src, ResType* hist, const MaskPtr mask, const int rows, const int cols)
+    {
+    #if CV_CUDEV_ARCH >= 120
+        __shared__ ResType smem[BIN_COUNT];
+
+        const int y = blockIdx.x * blockDim.y + threadIdx.y;
+        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
+
+        for (int i = tid; i < BIN_COUNT; i += BLOCK_SIZE)
+            smem[i] = 0;
+
+        __syncthreads();
+
+        if (y < rows)
+        {
+            for (int x = threadIdx.x; x < cols; x += blockDim.x)
+            {
+                if (mask(y, x))
+                {
+                    const uint data = src(y, x);
+                    atomicAdd(&smem[data % BIN_COUNT], 1);
+                }
+            }
+        }
+
+        __syncthreads();
+
+        for (int i = tid; i < BIN_COUNT; i += BLOCK_SIZE)
+        {
+            const ResType histVal = smem[i];
+            if (histVal > 0)
+                atomicAdd(hist + i, histVal);
+        }
+    #endif
+    }
+
+    template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
+    __host__ void histogram(const SrcPtr& src, ResType* hist, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(rows, block.y));
+
+        const int BLOCK_SIZE = Policy::block_size_x * Policy::block_size_y;
+
+        histogram<BIN_COUNT, BLOCK_SIZE><<<grid, block, 0, stream>>>(src, hist, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp
new file mode 100644
index 000000000..b06d7ddbb
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp
@@ -0,0 +1,626 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_INTEGRAL_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_INTEGRAL_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../warp/shuffle.hpp"
+#include "../../block/scan.hpp"
+#include "../../ptr2d/glob.hpp"
+
+namespace cv { namespace cudev {
+
+namespace integral_detail
+{
+    // horizontal_pass
+
+    template <int NUM_SCAN_THREADS, class SrcPtr, typename D>
+    __global__ void horizontal_pass(const SrcPtr src, GlobPtr<D> dst, const int cols)
+    {
+        __shared__ D smem[NUM_SCAN_THREADS * 2];
+        __shared__ D carryElem;
+
+        carryElem = 0;
+
+        __syncthreads();
+
+        D* dst_row = dst.row(blockIdx.x);
+
+        int numBuckets = divUp(cols, NUM_SCAN_THREADS);
+        int offsetX = 0;
+
+        while (numBuckets--)
+        {
+            const int curElemOffs = offsetX + threadIdx.x;
+
+            D curElem = 0.0f;
+
+            if (curElemOffs < cols)
+                curElem = src(blockIdx.x, curElemOffs);
+
+            const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x);
+
+            if (curElemOffs < cols)
+                dst_row[curElemOffs] = carryElem + curScanElem;
+
+            offsetX += NUM_SCAN_THREADS;
+
+            __syncthreads();
+
+            if (threadIdx.x == NUM_SCAN_THREADS - 1)
+            {
+                carryElem += curScanElem;
+            }
+
+            __syncthreads();
+        }
+    }
+
+    template <int NUM_SCAN_THREADS, typename T, typename D>
+    __global__ void horizontal_pass(const GlobPtr<T> src, GlobPtr<D> dst, const int cols)
+    {
+        __shared__ D smem[NUM_SCAN_THREADS * 2];
+        __shared__ D carryElem;
+
+        carryElem = 0;
+
+        __syncthreads();
+
+        const T* src_row = src.row(blockIdx.x);
+        D* dst_row = dst.row(blockIdx.x);
+
+        int numBuckets = divUp(cols, NUM_SCAN_THREADS);
+        int offsetX = 0;
+
+        while (numBuckets--)
+        {
+            const int curElemOffs = offsetX + threadIdx.x;
+
+            D curElem = 0.0f;
+
+            if (curElemOffs < cols)
+                curElem = src_row[curElemOffs];
+
+            const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x);
+
+            if (curElemOffs < cols)
+                dst_row[curElemOffs] = carryElem + curScanElem;
+
+            offsetX += NUM_SCAN_THREADS;
+
+            __syncthreads();
+
+            if (threadIdx.x == NUM_SCAN_THREADS - 1)
+            {
+                carryElem += curScanElem;
+            }
+
+            __syncthreads();
+        }
+    }
+
+    template <class SrcPtr, typename D>
+    __host__ void horizontal_pass(const SrcPtr& src, const GlobPtr<D>& dst, int rows, int cols, cudaStream_t stream)
+    {
+        const int NUM_SCAN_THREADS = 256;
+
+        const dim3 block(NUM_SCAN_THREADS);
+        const dim3 grid(rows);
+
+        horizontal_pass<NUM_SCAN_THREADS><<<grid, block, 0, stream>>>(src, dst, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+    }
+
+    // horisontal_pass_8u_shfl
+
+    __device__ static uchar4 int_to_uchar4(unsigned int in)
+    {
+        uchar4 bytes;
+        bytes.x = (in & 0x000000ff) >>  0;
+        bytes.y = (in & 0x0000ff00) >>  8;
+        bytes.z = (in & 0x00ff0000) >> 16;
+        bytes.w = (in & 0xff000000) >> 24;
+        return bytes;
+    }
+
+    __global__ static void horisontal_pass_8u_shfl_kernel(const GlobPtr<uint4> img, GlobPtr<uint4> integral)
+    {
+    #if CV_CUDEV_ARCH >= 300
+        __shared__ int sums[128];
+
+        const int id = threadIdx.x;
+        const int lane_id = id % warpSize;
+        const int warp_id = id / warpSize;
+
+        const uint4 data = img(blockIdx.x, id);
+
+        const uchar4 a = int_to_uchar4(data.x);
+        const uchar4 b = int_to_uchar4(data.y);
+        const uchar4 c = int_to_uchar4(data.z);
+        const uchar4 d = int_to_uchar4(data.w);
+
+        int result[16];
+
+        result[0]  =              a.x;
+        result[1]  = result[0]  + a.y;
+        result[2]  = result[1]  + a.z;
+        result[3]  = result[2]  + a.w;
+
+        result[4]  = result[3]  + b.x;
+        result[5]  = result[4]  + b.y;
+        result[6]  = result[5]  + b.z;
+        result[7]  = result[6]  + b.w;
+
+        result[8]  = result[7]  + c.x;
+        result[9]  = result[8]  + c.y;
+        result[10] = result[9]  + c.z;
+        result[11] = result[10] + c.w;
+
+        result[12] = result[11] + d.x;
+        result[13] = result[12] + d.y;
+        result[14] = result[13] + d.z;
+        result[15] = result[14] + d.w;
+
+        int sum = result[15];
+
+        // the prefix sum for each thread's 16 value is computed,
+        // now the final sums (result[15]) need to be shared
+        // with the other threads and add.  To do this,
+        // the shfl_up() instruction is used and a shuffle scan
+        // operation is performed to distribute the sums to the correct
+        // threads
+        #pragma unroll
+        for (int i = 1; i < 32; i *= 2)
+        {
+            const int n = shfl_up(sum, i, 32);
+
+            if (lane_id >= i)
+            {
+                #pragma unroll
+                for (int k = 0; k < 16; ++k)
+                    result[k] += n;
+
+                sum += n;
+            }
+        }
+
+        // Now the final sum for the warp must be shared
+        // between warps.  This is done by each warp
+        // having a thread store to shared memory, then
+        // having some other warp load the values and
+        // compute a prefix sum, again by using shfl_up.
+        // The results are uniformly added back to the warps.
+        // last thread in the warp holding sum of the warp
+        // places that in shared
+        if (threadIdx.x % warpSize == warpSize - 1)
+            sums[warp_id] = result[15];
+
+        __syncthreads();
+
+        if (warp_id == 0)
+        {
+            int warp_sum = sums[lane_id];
+
+            #pragma unroll
+            for (int i = 1; i <= 32; i *= 2)
+            {
+                const int n = shfl_up(warp_sum, i, 32);
+
+                if (lane_id >= i)
+                    warp_sum += n;
+            }
+
+            sums[lane_id] = warp_sum;
+        }
+
+        __syncthreads();
+
+        int blockSum = 0;
+
+        // fold in unused warp
+        if (warp_id > 0)
+        {
+            blockSum = sums[warp_id - 1];
+
+            #pragma unroll
+            for (int k = 0; k < 16; ++k)
+                result[k] += blockSum;
+        }
+
+        // assemble result
+        // Each thread has 16 values to write, which are
+        // now integer data (to avoid overflow).  Instead of
+        // each thread writing consecutive uint4s, the
+        // approach shown here experiments using
+        // the shuffle command to reformat the data
+        // inside the registers so that each thread holds
+        // consecutive data to be written so larger contiguous
+        // segments can be assembled for writing.
+
+        /*
+            For example data that needs to be written as
+
+            GMEM[16] <- x0 x1 x2 x3 y0 y1 y2 y3 z0 z1 z2 z3 w0 w1 w2 w3
+            but is stored in registers (r0..r3), in four threads (0..3) as:
+
+            threadId   0  1  2  3
+              r0      x0 y0 z0 w0
+              r1      x1 y1 z1 w1
+              r2      x2 y2 z2 w2
+              r3      x3 y3 z3 w3
+
+              after apply shfl_xor operations to move data between registers r1..r3:
+
+            threadId  00 01 10 11
+                      x0 y0 z0 w0
+             xor(01)->y1 x1 w1 z1
+             xor(10)->z2 w2 x2 y2
+             xor(11)->w3 z3 y3 x3
+
+             and now x0..x3, and z0..z3 can be written out in order by all threads.
+
+             In the current code, each register above is actually representing
+             four integers to be written as uint4's to GMEM.
+        */
+
+        result[4]  = shfl_xor(result[4] , 1, 32);
+        result[5]  = shfl_xor(result[5] , 1, 32);
+        result[6]  = shfl_xor(result[6] , 1, 32);
+        result[7]  = shfl_xor(result[7] , 1, 32);
+
+        result[8]  = shfl_xor(result[8] , 2, 32);
+        result[9]  = shfl_xor(result[9] , 2, 32);
+        result[10] = shfl_xor(result[10], 2, 32);
+        result[11] = shfl_xor(result[11], 2, 32);
+
+        result[12] = shfl_xor(result[12], 3, 32);
+        result[13] = shfl_xor(result[13], 3, 32);
+        result[14] = shfl_xor(result[14], 3, 32);
+        result[15] = shfl_xor(result[15], 3, 32);
+
+        uint4* integral_row = integral.row(blockIdx.x);
+        uint4 output;
+
+        ///////
+
+        if (threadIdx.x % 4 == 0)
+            output = make_uint4(result[0], result[1], result[2], result[3]);
+
+        if (threadIdx.x % 4 == 1)
+            output = make_uint4(result[4], result[5], result[6], result[7]);
+
+        if (threadIdx.x % 4 == 2)
+            output = make_uint4(result[8], result[9], result[10], result[11]);
+
+        if (threadIdx.x % 4 == 3)
+            output = make_uint4(result[12], result[13], result[14], result[15]);
+
+        integral_row[threadIdx.x % 4 + (threadIdx.x / 4) * 16] = output;
+
+        ///////
+
+        if (threadIdx.x % 4 == 2)
+            output = make_uint4(result[0], result[1], result[2], result[3]);
+
+        if (threadIdx.x % 4 == 3)
+            output = make_uint4(result[4], result[5], result[6], result[7]);
+
+        if (threadIdx.x % 4 == 0)
+            output = make_uint4(result[8], result[9], result[10], result[11]);
+
+        if (threadIdx.x % 4 == 1)
+            output = make_uint4(result[12], result[13], result[14], result[15]);
+
+        integral_row[(threadIdx.x + 2) % 4 + (threadIdx.x / 4) * 16 + 8] = output;
+
+        // continuning from the above example,
+        // this use of shfl_xor() places the y0..y3 and w0..w3 data
+        // in order.
+
+        #pragma unroll
+        for (int i = 0; i < 16; ++i)
+            result[i] = shfl_xor(result[i], 1, 32);
+
+        if (threadIdx.x % 4 == 0)
+            output = make_uint4(result[0], result[1], result[2], result[3]);
+
+        if (threadIdx.x % 4 == 1)
+            output = make_uint4(result[4], result[5], result[6], result[7]);
+
+        if (threadIdx.x % 4 == 2)
+            output = make_uint4(result[8], result[9], result[10], result[11]);
+
+        if (threadIdx.x % 4 == 3)
+            output = make_uint4(result[12], result[13], result[14], result[15]);
+
+        integral_row[threadIdx.x % 4 + (threadIdx.x / 4) * 16 + 4] = output;
+
+        ///////
+
+        if (threadIdx.x % 4 == 2)
+            output = make_uint4(result[0], result[1], result[2], result[3]);
+
+        if (threadIdx.x % 4 == 3)
+            output = make_uint4(result[4], result[5], result[6], result[7]);
+
+        if (threadIdx.x % 4 == 0)
+            output = make_uint4(result[8], result[9], result[10], result[11]);
+
+        if (threadIdx.x % 4 == 1)
+            output = make_uint4(result[12], result[13], result[14], result[15]);
+
+        integral_row[(threadIdx.x + 2) % 4 + (threadIdx.x / 4) * 16 + 12] = output;
+    #endif
+    }
+
+    __host__ static void horisontal_pass_8u_shfl(const GlobPtr<uchar> src, GlobPtr<uint> integral, int rows, int cols, cudaStream_t stream)
+    {
+        // each thread handles 16 values, use 1 block/row
+        // save, because step is actually can't be less 512 bytes
+        const int block = cols / 16;
+
+        // launch 1 block / row
+        const int grid = rows;
+
+        CV_CUDEV_SAFE_CALL( cudaFuncSetCacheConfig(horisontal_pass_8u_shfl_kernel, cudaFuncCachePreferL1) );
+
+        GlobPtr<uint4> src4 = globPtr((uint4*) src.data, src.step);
+        GlobPtr<uint4> integral4 = globPtr((uint4*) integral.data, integral.step);
+
+        horisontal_pass_8u_shfl_kernel<<<grid, block, 0, stream>>>(src4, integral4);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+    }
+
+    // vertical
+
+    template <typename T>
+    __global__ void vertical_pass(GlobPtr<T> integral, const int rows, const int cols)
+    {
+    #if CV_CUDEV_ARCH >= 300
+        __shared__ T sums[32][9];
+
+        const int tidx = blockIdx.x * blockDim.x + threadIdx.x;
+        const int lane_id = tidx % 8;
+
+        sums[threadIdx.x][threadIdx.y] = 0;
+        __syncthreads();
+
+        T stepSum = 0;
+
+        int numBuckets = divUp(rows, blockDim.y);
+        int y = threadIdx.y;
+
+        while (numBuckets--)
+        {
+            T* p = integral.row(y) + tidx;
+
+            T sum = (tidx < cols) && (y < rows) ? *p : 0;
+
+            y += blockDim.y;
+
+            sums[threadIdx.x][threadIdx.y] = sum;
+            __syncthreads();
+
+            // place into SMEM
+            // shfl scan reduce the SMEM, reformating so the column
+            // sums are computed in a warp
+            // then read out properly
+            const int j = threadIdx.x % 8;
+            const int k = threadIdx.x / 8 + threadIdx.y * 4;
+
+            T partial_sum = sums[k][j];
+
+            for (int i = 1; i <= 8; i *= 2)
+            {
+                T n = shfl_up(partial_sum, i, 32);
+
+                if (lane_id >= i)
+                    partial_sum += n;
+            }
+
+            sums[k][j] = partial_sum;
+            __syncthreads();
+
+            if (threadIdx.y > 0)
+                sum += sums[threadIdx.x][threadIdx.y - 1];
+
+            if (tidx < cols)
+            {
+                sum += stepSum;
+                stepSum += sums[threadIdx.x][blockDim.y - 1];
+                *p = sum;
+            }
+
+            __syncthreads();
+        }
+    #else
+        __shared__ T smem[32][32];
+        __shared__ T prevVals[32];
+
+        volatile T* smem_row = &smem[0][0] + 64 * threadIdx.y;
+
+        if (threadIdx.y == 0)
+            prevVals[threadIdx.x] = 0;
+
+        __syncthreads();
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+        int numBuckets = divUp(rows, 8 * 4);
+        int offsetY = 0;
+
+        while (numBuckets--)
+        {
+            const int curRowOffs = offsetY + threadIdx.y;
+
+            T curElems[4];
+            T temp[4];
+
+            // load patch
+
+            smem[threadIdx.y +  0][threadIdx.x] = 0.0f;
+            smem[threadIdx.y +  8][threadIdx.x] = 0.0f;
+            smem[threadIdx.y + 16][threadIdx.x] = 0.0f;
+            smem[threadIdx.y + 24][threadIdx.x] = 0.0f;
+
+            if (x < cols)
+            {
+                for (int i = 0; i < 4; ++i)
+                {
+                    if (curRowOffs + i * 8 < rows)
+                        smem[threadIdx.y + i * 8][threadIdx.x] = integral(curRowOffs + i * 8, x);
+                }
+            }
+
+            __syncthreads();
+
+            // reduce
+
+            curElems[0] = smem[threadIdx.x][threadIdx.y     ];
+            curElems[1] = smem[threadIdx.x][threadIdx.y +  8];
+            curElems[2] = smem[threadIdx.x][threadIdx.y + 16];
+            curElems[3] = smem[threadIdx.x][threadIdx.y + 24];
+
+            __syncthreads();
+
+            temp[0] = curElems[0] = warpScanInclusive(curElems[0], smem_row, threadIdx.x);
+            temp[1] = curElems[1] = warpScanInclusive(curElems[1], smem_row, threadIdx.x);
+            temp[2] = curElems[2] = warpScanInclusive(curElems[2], smem_row, threadIdx.x);
+            temp[3] = curElems[3] = warpScanInclusive(curElems[3], smem_row, threadIdx.x);
+
+            curElems[0] += prevVals[threadIdx.y     ];
+            curElems[1] += prevVals[threadIdx.y +  8];
+            curElems[2] += prevVals[threadIdx.y + 16];
+            curElems[3] += prevVals[threadIdx.y + 24];
+
+            __syncthreads();
+
+            if (threadIdx.x == 31)
+            {
+                prevVals[threadIdx.y     ] += temp[0];
+                prevVals[threadIdx.y +  8] += temp[1];
+                prevVals[threadIdx.y + 16] += temp[2];
+                prevVals[threadIdx.y + 24] += temp[3];
+            }
+
+            smem[threadIdx.y     ][threadIdx.x] = curElems[0];
+            smem[threadIdx.y +  8][threadIdx.x] = curElems[1];
+            smem[threadIdx.y + 16][threadIdx.x] = curElems[2];
+            smem[threadIdx.y + 24][threadIdx.x] = curElems[3];
+
+            __syncthreads();
+
+            // store patch
+
+            if (x < cols)
+            {
+                // read 4 value from source
+                for (int i = 0; i < 4; ++i)
+                {
+                    if (curRowOffs + i * 8 < rows)
+                        integral(curRowOffs + i * 8, x) = smem[threadIdx.x][threadIdx.y + i * 8];
+                }
+            }
+
+            __syncthreads();
+
+            offsetY += 8 * 4;
+        }
+    #endif
+    }
+
+    template <typename T>
+    __host__ void vertical_pass(const GlobPtr<T>& integral, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(cols, block.x));
+
+        vertical_pass<<<grid, block, 0, stream>>>(integral, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+    }
+
+    // integral
+
+    template <class SrcPtr, typename D>
+    __host__ void integral(const SrcPtr& src, const GlobPtr<D>& dst, int rows, int cols, cudaStream_t stream)
+    {
+        horizontal_pass(src, dst, rows, cols, stream);
+        vertical_pass(dst, rows, cols, stream);
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+
+    __host__ static void integral(const GlobPtr<uchar> src, GlobPtr<uint> dst, int rows, int cols, cudaStream_t stream)
+    {
+        if (deviceSupports(FEATURE_SET_COMPUTE_30)
+            && (cols % 16 == 0)
+            && reinterpret_cast<intptr_t>(src.data) % 32 == 0
+            && reinterpret_cast<intptr_t>(dst.data) % 32 == 0)
+        {
+            horisontal_pass_8u_shfl(src, dst, rows, cols, stream);
+        }
+        else
+        {
+            horizontal_pass(src, dst, rows, cols, stream);
+        }
+
+        vertical_pass(dst, rows, cols, stream);
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+
+    __host__ static void integral(const GlobPtr<uchar> src, GlobPtr<int> dst, int rows, int cols, cudaStream_t stream)
+    {
+        GlobPtr<uint> dstui = globPtr((uint*) dst.data, dst.step);
+        integral(src, dstui, rows, cols, stream);
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp
new file mode 100644
index 000000000..b14792cf4
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp
@@ -0,0 +1,201 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_PYR_DOWN_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_PYR_DOWN_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/vec_traits.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../util/type_traits.hpp"
+#include "../../ptr2d/glob.hpp"
+#include "../../ptr2d/traits.hpp"
+
+namespace cv { namespace cudev {
+
+namespace pyramids_detail
+{
+    template <class Brd, class SrcPtr, typename DstType>
+    __global__ void pyrDown(const SrcPtr src, GlobPtr<DstType> dst, const int src_rows, const int src_cols, const int dst_cols)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        typedef typename VecTraits<src_type>::elem_type src_elem_type;
+        typedef typename LargerType<float, src_elem_type>::type work_elem_type;
+        typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
+
+        __shared__ work_type smem[256 + 4];
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y;
+
+        const int src_y = 2 * y;
+
+        if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2)
+        {
+            {
+                work_type sum;
+
+                sum =       0.0625f * src(src_y - 2, x);
+                sum = sum + 0.25f   * src(src_y - 1, x);
+                sum = sum + 0.375f  * src(src_y    , x);
+                sum = sum + 0.25f   * src(src_y + 1, x);
+                sum = sum + 0.0625f * src(src_y + 2, x);
+
+                smem[2 + threadIdx.x] = sum;
+            }
+
+            if (threadIdx.x < 2)
+            {
+                const int left_x = x - 2;
+
+                work_type sum;
+
+                sum =       0.0625f * src(src_y - 2, left_x);
+                sum = sum + 0.25f   * src(src_y - 1, left_x);
+                sum = sum + 0.375f  * src(src_y    , left_x);
+                sum = sum + 0.25f   * src(src_y + 1, left_x);
+                sum = sum + 0.0625f * src(src_y + 2, left_x);
+
+                smem[threadIdx.x] = sum;
+            }
+
+            if (threadIdx.x > 253)
+            {
+                const int right_x = x + 2;
+
+                work_type sum;
+
+                sum =       0.0625f * src(src_y - 2, right_x);
+                sum = sum + 0.25f   * src(src_y - 1, right_x);
+                sum = sum + 0.375f  * src(src_y    , right_x);
+                sum = sum + 0.25f   * src(src_y + 1, right_x);
+                sum = sum + 0.0625f * src(src_y + 2, right_x);
+
+                smem[4 + threadIdx.x] = sum;
+            }
+        }
+        else
+        {
+            {
+                work_type sum;
+
+                sum =       0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_high(x, src_cols));
+                sum = sum + 0.25f   * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_high(x, src_cols));
+                sum = sum + 0.375f  * src(src_y                             , Brd::idx_high(x, src_cols));
+                sum = sum + 0.25f   * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_high(x, src_cols));
+                sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_high(x, src_cols));
+
+                smem[2 + threadIdx.x] = sum;
+            }
+
+            if (threadIdx.x < 2)
+            {
+                const int left_x = x - 2;
+
+                work_type sum;
+
+                sum =       0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
+                sum = sum + 0.25f   * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
+                sum = sum + 0.375f  * src(src_y                             , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
+                sum = sum + 0.25f   * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
+                sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
+
+                smem[threadIdx.x] = sum;
+            }
+
+            if (threadIdx.x > 253)
+            {
+                const int right_x = x + 2;
+
+                work_type sum;
+
+                sum =       0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_high(right_x, src_cols));
+                sum = sum + 0.25f   * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_high(right_x, src_cols));
+                sum = sum + 0.375f  * src(src_y                             , Brd::idx_high(right_x, src_cols));
+                sum = sum + 0.25f   * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_high(right_x, src_cols));
+                sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_high(right_x, src_cols));
+
+                smem[4 + threadIdx.x] = sum;
+            }
+        }
+
+        __syncthreads();
+
+        if (threadIdx.x < 128)
+        {
+            const int tid2 = threadIdx.x * 2;
+
+            work_type sum;
+
+            sum =       0.0625f * smem[2 + tid2 - 2];
+            sum = sum + 0.25f   * smem[2 + tid2 - 1];
+            sum = sum + 0.375f  * smem[2 + tid2    ];
+            sum = sum + 0.25f   * smem[2 + tid2 + 1];
+            sum = sum + 0.0625f * smem[2 + tid2 + 2];
+
+            const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
+
+            if (dst_x < dst_cols)
+                dst(y, dst_x) = saturate_cast<DstType>(sum);
+        }
+    }
+
+    template <class Brd, class SrcPtr, typename DstType>
+    __host__ void pyrDown(const SrcPtr& src, const GlobPtr<DstType>& dst, int src_rows, int src_cols, int dst_rows, int dst_cols, cudaStream_t stream)
+    {
+        const dim3 block(256);
+        const dim3 grid(divUp(src_cols, block.x), dst_rows);
+
+        pyrDown<Brd><<<grid, block, 0, stream>>>(src, dst, src_rows, src_cols, dst_cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/pyr_up.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/pyr_up.hpp
new file mode 100644
index 000000000..16c8b2091
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/pyr_up.hpp
@@ -0,0 +1,172 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_PYR_UP_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_PYR_UP_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/vec_traits.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../util/type_traits.hpp"
+#include "../../ptr2d/glob.hpp"
+#include "../../ptr2d/traits.hpp"
+
+namespace cv { namespace cudev {
+
+namespace pyramids_detail
+{
+    template <class SrcPtr, typename DstType>
+    __global__ void pyrUp(const SrcPtr src, GlobPtr<DstType> dst, const int src_rows, const int src_cols, const int dst_rows, const int dst_cols)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        typedef typename VecTraits<src_type>::elem_type src_elem_type;
+        typedef typename LargerType<float, src_elem_type>::type work_elem_type;
+        typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        __shared__ work_type s_srcPatch[10][10];
+        __shared__ work_type s_dstPatch[20][16];
+
+        if (threadIdx.x < 10 && threadIdx.y < 10)
+        {
+            int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
+            int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
+
+            srcx = ::abs(srcx);
+            srcx = ::min(src_cols - 1, srcx);
+
+            srcy = ::abs(srcy);
+            srcy = ::min(src_rows - 1, srcy);
+
+            s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<work_type>(src(srcy, srcx));
+        }
+
+        __syncthreads();
+
+        work_type sum = VecTraits<work_type>::all(0);
+
+        const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
+        const int oddFlag  = static_cast<int>((threadIdx.x & 1) != 0);
+        const bool eveny = ((threadIdx.y & 1) == 0);
+        const int tidx = threadIdx.x;
+
+        if (eveny)
+        {
+            sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
+            sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
+            sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx    ) >> 1)];
+            sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
+            sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
+        }
+
+        s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
+
+        if (threadIdx.y < 2)
+        {
+            sum = VecTraits<work_type>::all(0);
+
+            if (eveny)
+            {
+                sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
+                sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
+                sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx    ) >> 1)];
+                sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
+                sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
+            }
+
+            s_dstPatch[threadIdx.y][threadIdx.x] = sum;
+        }
+
+        if (threadIdx.y > 13)
+        {
+            sum = VecTraits<work_type>::all(0);
+
+            if (eveny)
+            {
+                sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
+                sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
+                sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx    ) >> 1)];
+                sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
+                sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
+            }
+
+            s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
+        }
+
+        __syncthreads();
+
+        sum = VecTraits<work_type>::all(0);
+
+        const int tidy = threadIdx.y;
+
+        sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
+        sum = sum + 0.25f   * s_dstPatch[2 + tidy - 1][threadIdx.x];
+        sum = sum + 0.375f  * s_dstPatch[2 + tidy    ][threadIdx.x];
+        sum = sum + 0.25f   * s_dstPatch[2 + tidy + 1][threadIdx.x];
+        sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
+
+        if (x < dst_cols && y < dst_rows)
+            dst(y, x) = saturate_cast<DstType>(4.0f * sum);
+    }
+
+    template <class SrcPtr, typename DstType>
+    __host__ void pyrUp(const SrcPtr& src, const GlobPtr<DstType>& dst, int src_rows, int src_cols, int dst_rows, int dst_cols, cudaStream_t stream)
+    {
+        const dim3 block(16, 16);
+        const dim3 grid(divUp(dst_cols, block.x), divUp(dst_rows, block.y));
+
+        pyrUp<<<grid, block, 0, stream>>>(src, dst, src_rows, src_cols, dst_rows, dst_cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp
new file mode 100644
index 000000000..b257e7503
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp
@@ -0,0 +1,106 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_REDUCE_TO_COLUMN_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_REDUCE_TO_COLUMN_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../block/reduce.hpp"
+
+namespace cv { namespace cudev {
+
+namespace grid_reduce_to_vec_detail
+{
+    template <class Reductor, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
+    __global__ void reduceToColumn(const SrcPtr src, ResType* dst, const MaskPtr mask, const int cols)
+    {
+        typedef typename Reductor::work_type work_type;
+
+        __shared__ work_type smem[BLOCK_SIZE];
+
+        const int y = blockIdx.x;
+
+        work_type myVal = Reductor::initialValue();
+
+        Reductor op;
+
+        for (int x = threadIdx.x; x < cols; x += BLOCK_SIZE)
+        {
+            if (mask(y, x))
+            {
+                myVal = op(myVal, saturate_cast<work_type>(src(y, x)));
+            }
+        }
+
+        blockReduce<BLOCK_SIZE>(smem, myVal, threadIdx.x, op);
+
+        if (threadIdx.x == 0)
+            dst[y] = saturate_cast<ResType>(Reductor::result(myVal, cols));
+    }
+
+    template <class Reductor, class Policy, class SrcPtr, typename ResType, class MaskPtr>
+    __host__ void reduceToColumn(const SrcPtr& src, ResType* dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const int BLOCK_SIZE_X = Policy::block_size_x;
+        const int BLOCK_SIZE_Y = Policy::block_size_y;
+
+        const int BLOCK_SIZE = BLOCK_SIZE_X * BLOCK_SIZE_Y;
+
+        const dim3 block(BLOCK_SIZE);
+        const dim3 grid(rows);
+
+        reduceToColumn<Reductor, BLOCK_SIZE><<<grid, block, 0, stream>>>(src, dst, mask, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp
new file mode 100644
index 000000000..154004f6e
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp
@@ -0,0 +1,118 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_REDUCE_TO_ROW_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_REDUCE_TO_ROW_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../block/reduce.hpp"
+
+namespace cv { namespace cudev {
+
+namespace grid_reduce_to_vec_detail
+{
+    template <class Reductor, int BLOCK_SIZE_X, int BLOCK_SIZE_Y, class SrcPtr, typename ResType, class MaskPtr>
+    __global__ void reduceToRow(const SrcPtr src, ResType* dst, const MaskPtr mask, const int rows, const int cols)
+    {
+        typedef typename Reductor::work_type work_type;
+
+        __shared__ work_type smem[BLOCK_SIZE_X * BLOCK_SIZE_Y];
+
+        const int x = blockIdx.x * BLOCK_SIZE_X + threadIdx.x;
+
+        work_type myVal = Reductor::initialValue();
+
+        Reductor op;
+
+        if (x < cols)
+        {
+            for (int y = threadIdx.y; y < rows; y += BLOCK_SIZE_Y)
+            {
+                if (mask(y, x))
+                {
+                    myVal = op(myVal, saturate_cast<work_type>(src(y, x)));
+                }
+            }
+        }
+
+        smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal;
+
+        __syncthreads();
+
+        volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X;
+
+        myVal = srow[threadIdx.x];
+        blockReduce<BLOCK_SIZE_X>(srow, myVal, threadIdx.x, op);
+
+        if (threadIdx.x == 0)
+            srow[0] = myVal;
+
+        __syncthreads();
+
+        if (threadIdx.y == 0 && x < cols)
+            dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows));
+    }
+
+    template <class Reductor, class SrcPtr, typename ResType, class MaskPtr>
+    __host__ void reduceToRow(const SrcPtr& src, ResType* dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const int BLOCK_SIZE_X = 16;
+        const int BLOCK_SIZE_Y = 16;
+
+        const dim3 block(BLOCK_SIZE_X, BLOCK_SIZE_Y);
+        const dim3 grid(divUp(cols, block.x));
+
+        reduceToRow<Reductor, BLOCK_SIZE_X, BLOCK_SIZE_Y><<<grid, block, 0, stream>>>(src, dst, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/split_merge.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/split_merge.hpp
new file mode 100644
index 000000000..0bd76ac46
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/split_merge.hpp
@@ -0,0 +1,282 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_SPLIT_MERGE_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_SPLIT_MERGE_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../util/tuple.hpp"
+#include "../../util/vec_traits.hpp"
+#include "../../ptr2d/glob.hpp"
+#include "../../ptr2d/traits.hpp"
+
+namespace cv { namespace cudev {
+
+namespace grid_split_merge_detail
+{
+    // merge
+
+    template <class Src1Ptr, class Src2Ptr, typename DstType, class MaskPtr>
+    __global__ void mergeC2(const Src1Ptr src1, const Src2Ptr src2, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
+    {
+        typedef typename VecTraits<DstType>::elem_type dst_elem_type;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        dst(y, x) = VecTraits<DstType>::make(
+                    saturate_cast<dst_elem_type>(src1(y, x)),
+                    saturate_cast<dst_elem_type>(src2(y, x))
+                    );
+    }
+
+    template <class Policy, class Src1Ptr, class Src2Ptr, typename DstType, class MaskPtr>
+    __host__ void mergeC2(const Src1Ptr& src1, const Src2Ptr& src2, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        mergeC2<<<grid, block, 0, stream>>>(src1, src2, dst, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
+    }
+
+    template <class Src1Ptr, class Src2Ptr, class Src3Ptr, typename DstType, class MaskPtr>
+    __global__ void mergeC3(const Src1Ptr src1, const Src2Ptr src2, const Src3Ptr src3, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
+    {
+        typedef typename VecTraits<DstType>::elem_type dst_elem_type;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        dst(y, x) = VecTraits<DstType>::make(
+                    saturate_cast<dst_elem_type>(src1(y, x)),
+                    saturate_cast<dst_elem_type>(src2(y, x)),
+                    saturate_cast<dst_elem_type>(src3(y, x))
+                    );
+    }
+
+    template <class Policy, class Src1Ptr, class Src2Ptr, class Src3Ptr, typename DstType, class MaskPtr>
+    __host__ void mergeC3(const Src1Ptr& src1, const Src2Ptr& src2, const Src3Ptr& src3, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        mergeC3<<<grid, block, 0, stream>>>(src1, src2, src3, dst, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
+    }
+
+    template <class Src1Ptr, class Src2Ptr, class Src3Ptr, class Src4Ptr, typename DstType, class MaskPtr>
+    __global__ void mergeC4(const Src1Ptr src1, const Src2Ptr src2, const Src3Ptr src3, const Src4Ptr src4, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
+    {
+        typedef typename VecTraits<DstType>::elem_type dst_elem_type;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        dst(y, x) = VecTraits<DstType>::make(
+                    saturate_cast<dst_elem_type>(src1(y, x)),
+                    saturate_cast<dst_elem_type>(src2(y, x)),
+                    saturate_cast<dst_elem_type>(src3(y, x)),
+                    saturate_cast<dst_elem_type>(src4(y, x))
+                    );
+    }
+
+    template <class Policy, class Src1Ptr, class Src2Ptr, class Src3Ptr, class Src4Ptr, typename DstType, class MaskPtr>
+    __host__ void mergeC4(const Src1Ptr& src1, const Src2Ptr& src2, const Src3Ptr& src3, const Src4Ptr& src4, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        mergeC4<<<grid, block, 0, stream>>>(src1, src2, src3, src4, dst, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
+    }
+
+    template <int cn, class Policy> struct MergeImpl;
+
+    template <class Policy> struct MergeImpl<2, Policy>
+    {
+        template <class SrcPtrTuple, typename DstType, class MaskPtr>
+        __host__ static void merge(const SrcPtrTuple& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+        {
+            mergeC2<Policy>(get<0>(src), get<1>(src), dst, mask, rows, cols, stream);
+        }
+    };
+
+    template <class Policy> struct MergeImpl<3, Policy>
+    {
+        template <class SrcPtrTuple, typename DstType, class MaskPtr>
+        __host__ static void merge(const SrcPtrTuple& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+        {
+            mergeC3<Policy>(get<0>(src), get<1>(src), get<2>(src), dst, mask, rows, cols, stream);
+        }
+    };
+
+    template <class Policy> struct MergeImpl<4, Policy>
+    {
+        template <class SrcPtrTuple, typename DstType, class MaskPtr>
+        __host__ static void merge(const SrcPtrTuple& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+        {
+            mergeC4<Policy>(get<0>(src), get<1>(src), get<2>(src), get<3>(src), dst, mask, rows, cols, stream);
+        }
+    };
+
+    // split
+
+    template <class SrcPtr, typename DstType, class MaskPtr>
+    __global__ void split(const SrcPtr src, GlobPtr<DstType> dst1, GlobPtr<DstType> dst2, const MaskPtr mask, const int rows, const int cols)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        const src_type src_value = src(y, x);
+
+        dst1(y, x) = src_value.x;
+        dst2(y, x) = src_value.y;
+    }
+
+    template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+    __host__ void split(const SrcPtr& src, const GlobPtr<DstType>& dst1, const GlobPtr<DstType>& dst2, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        split<<<grid, block, 0, stream>>>(src, dst1, dst2, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
+    }
+
+    template <class SrcPtr, typename DstType, class MaskPtr>
+    __global__ void split(const SrcPtr src, GlobPtr<DstType> dst1, GlobPtr<DstType> dst2, GlobPtr<DstType> dst3, const MaskPtr mask, const int rows, const int cols)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        const src_type src_value = src(y, x);
+
+        dst1(y, x) = src_value.x;
+        dst2(y, x) = src_value.y;
+        dst3(y, x) = src_value.z;
+    }
+
+    template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+    __host__ void split(const SrcPtr& src, const GlobPtr<DstType>& dst1, const GlobPtr<DstType>& dst2, const GlobPtr<DstType>& dst3, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        split<<<grid, block, 0, stream>>>(src, dst1, dst2, dst3, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
+    }
+
+    template <class SrcPtr, typename DstType, class MaskPtr>
+    __global__ void split(const SrcPtr src, GlobPtr<DstType> dst1, GlobPtr<DstType> dst2, GlobPtr<DstType> dst3, GlobPtr<DstType> dst4, const MaskPtr mask, const int rows, const int cols)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        const src_type src_value = src(y, x);
+
+        dst1(y, x) = src_value.x;
+        dst2(y, x) = src_value.y;
+        dst3(y, x) = src_value.z;
+        dst4(y, x) = src_value.w;
+    }
+
+    template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+    __host__ void split(const SrcPtr& src, const GlobPtr<DstType>& dst1, const GlobPtr<DstType>& dst2, const GlobPtr<DstType>& dst3, const GlobPtr<DstType>& dst4, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        split<<<grid, block, 0, stream>>>(src, dst1, dst2, dst3, dst4, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/transform.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/transform.hpp
new file mode 100644
index 000000000..0e0df0794
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/transform.hpp
@@ -0,0 +1,423 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_TRANSFORM_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_TRANSFORM_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/tuple.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../util/vec_traits.hpp"
+#include "../../ptr2d/glob.hpp"
+#include "../../ptr2d/traits.hpp"
+
+namespace cv { namespace cudev {
+
+namespace grid_transform_detail
+{
+    // OpUnroller
+
+    template <int cn> struct OpUnroller;
+
+    template <> struct OpUnroller<1>
+    {
+        template <typename T, typename D, class UnOp, class MaskPtr>
+        __device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
+        {
+            if (mask(y, x_shifted))
+                dst.x = op(src.x);
+        }
+
+        template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
+        __device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
+        {
+            if (mask(y, x_shifted))
+                dst.x = op(src1.x, src2.x);
+        }
+    };
+
+    template <> struct OpUnroller<2>
+    {
+        template <typename T, typename D, class UnOp, class MaskPtr>
+        __device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
+        {
+            if (mask(y, x_shifted))
+                dst.x = op(src.x);
+            if (mask(y, x_shifted + 1))
+                dst.y = op(src.y);
+        }
+
+        template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
+        __device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
+        {
+            if (mask(y, x_shifted))
+                dst.x = op(src1.x, src2.x);
+            if (mask(y, x_shifted + 1))
+                dst.y = op(src1.y, src2.y);
+        }
+    };
+
+    template <> struct OpUnroller<3>
+    {
+        template <typename T, typename D, class UnOp, class MaskPtr>
+        __device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
+        {
+            if (mask(y, x_shifted))
+                dst.x = op(src.x);
+            if (mask(y, x_shifted + 1))
+                dst.y = op(src.y);
+            if (mask(y, x_shifted + 2))
+                dst.z = op(src.z);
+        }
+
+        template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
+        __device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
+        {
+            if (mask(y, x_shifted))
+                dst.x = op(src1.x, src2.x);
+            if (mask(y, x_shifted + 1))
+                dst.y = op(src1.y, src2.y);
+            if (mask(y, x_shifted + 2))
+                dst.z = op(src1.z, src2.z);
+        }
+    };
+
+    template <> struct OpUnroller<4>
+    {
+        template <typename T, typename D, class UnOp, class MaskPtr>
+        __device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
+        {
+            if (mask(y, x_shifted))
+                dst.x = op(src.x);
+            if (mask(y, x_shifted + 1))
+                dst.y = op(src.y);
+            if (mask(y, x_shifted + 2))
+                dst.z = op(src.z);
+            if (mask(y, x_shifted + 3))
+                dst.w = op(src.w);
+        }
+
+        template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
+        __device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
+        {
+            if (mask(y, x_shifted))
+                dst.x = op(src1.x, src2.x);
+            if (mask(y, x_shifted + 1))
+                dst.y = op(src1.y, src2.y);
+            if (mask(y, x_shifted + 2))
+                dst.z = op(src1.z, src2.z);
+            if (mask(y, x_shifted + 3))
+                dst.w = op(src1.w, src2.w);
+        }
+    };
+
+    // transformSimple
+
+    template <class SrcPtr, typename DstType, class UnOp, class MaskPtr>
+    __global__ void transformSimple(const SrcPtr src, GlobPtr<DstType> dst, const UnOp op, const MaskPtr mask, const int rows, const int cols)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        dst(y, x) = saturate_cast<DstType>(op(src(y, x)));
+    }
+
+    template <class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
+    __global__ void transformSimple(const SrcPtr1 src1, const SrcPtr2 src2, GlobPtr<DstType> dst, const BinOp op, const MaskPtr mask, const int rows, const int cols)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        dst(y, x) = saturate_cast<DstType>(op(src1(y, x), src2(y, x)));
+    }
+
+    // transformSmart
+
+    template <int SHIFT, typename SrcType, typename DstType, class UnOp, class MaskPtr>
+    __global__ void transformSmart(const GlobPtr<SrcType> src_, GlobPtr<DstType> dst_, const UnOp op, const MaskPtr mask, const int rows, const int cols)
+    {
+        typedef typename MakeVec<SrcType, SHIFT>::type read_type;
+        typedef typename MakeVec<DstType, SHIFT>::type write_type;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+        const int x_shifted = x * SHIFT;
+
+        if (y < rows)
+        {
+            const SrcType* src = src_.row(y);
+            DstType* dst = dst_.row(y);
+
+            if (x_shifted + SHIFT - 1 < cols)
+            {
+                const read_type src_n_el = ((const read_type*)src)[x];
+                write_type dst_n_el = ((const write_type*)dst)[x];
+
+                OpUnroller<SHIFT>::unroll(src_n_el, dst_n_el, op, mask, x_shifted, y);
+
+                ((write_type*)dst)[x] = dst_n_el;
+            }
+            else
+            {
+                for (int real_x = x_shifted; real_x < cols; ++real_x)
+                {
+                    if (mask(y, real_x))
+                        dst[real_x] = op(src[real_x]);
+                }
+            }
+        }
+    }
+
+    template <int SHIFT, typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
+    __global__ void transformSmart(const GlobPtr<SrcType1> src1_, const GlobPtr<SrcType2> src2_, PtrStep<DstType> dst_, const BinOp op, const MaskPtr mask, const int rows, const int cols)
+    {
+        typedef typename MakeVec<SrcType1, SHIFT>::type read_type1;
+        typedef typename MakeVec<SrcType2, SHIFT>::type read_type2;
+        typedef typename MakeVec<DstType, SHIFT>::type write_type;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+        const int x_shifted = x * SHIFT;
+
+        if (y < rows)
+        {
+            const SrcType1* src1 = src1_.row(y);
+            const SrcType2* src2 = src2_.row(y);
+            DstType* dst = dst_.row(y);
+
+            if (x_shifted + SHIFT - 1 < cols)
+            {
+                const read_type1 src1_n_el = ((const read_type1*)src1)[x];
+                const read_type2 src2_n_el = ((const read_type2*)src2)[x];
+                write_type dst_n_el = ((const write_type*)dst)[x];
+
+                OpUnroller<SHIFT>::unroll(src1_n_el, src2_n_el, dst_n_el, op, mask, x_shifted, y);
+
+                ((write_type*)dst)[x] = dst_n_el;
+            }
+            else
+            {
+                for (int real_x = x_shifted; real_x < cols; ++real_x)
+                {
+                    if (mask(y, real_x))
+                        dst[real_x] = op(src1[real_x], src2[real_x]);
+                }
+            }
+        }
+    }
+
+    // TransformDispatcher
+
+    template <bool UseSmart, class Policy> struct TransformDispatcher;
+
+    template <class Policy> struct TransformDispatcher<false, Policy>
+    {
+        template <class SrcPtr, typename DstType, class UnOp, class MaskPtr>
+        __host__ static void call(const SrcPtr& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+        {
+            const dim3 block(Policy::block_size_x, Policy::block_size_y);
+            const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+            transformSimple<<<grid, block, 0, stream>>>(src, dst, op, mask, rows, cols);
+            CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+            if (stream == 0)
+                CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+        }
+
+        template <class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
+        __host__ static void call(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+        {
+            const dim3 block(Policy::block_size_x, Policy::block_size_y);
+            const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+            transformSimple<<<grid, block, 0, stream>>>(src1, src2, dst, op, mask, rows, cols);
+            CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+            if (stream == 0)
+                CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+        }
+    };
+
+    template <class Policy> struct TransformDispatcher<true, Policy>
+    {
+        template <typename T>
+        __host__ static bool isAligned(const T* ptr, size_t size)
+        {
+            return reinterpret_cast<size_t>(ptr) % size == 0;
+        }
+
+        __host__ static bool isAligned(size_t step, size_t size)
+        {
+            return step % size == 0;
+        }
+
+        template <typename SrcType, typename DstType, class UnOp, class MaskPtr>
+        __host__ static void call(const GlobPtr<SrcType>& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+        {
+            if (Policy::shift == 1 ||
+                !isAligned(src.data, Policy::shift * sizeof(SrcType)) || !isAligned(src.step, Policy::shift * sizeof(SrcType)) ||
+                !isAligned(dst.data, Policy::shift * sizeof(DstType)) || !isAligned(dst.step, Policy::shift * sizeof(DstType)))
+            {
+                TransformDispatcher<false, Policy>::call(src, dst, op, mask, rows, cols, stream);
+                return;
+            }
+
+            const dim3 block(Policy::block_size_x, Policy::block_size_y);
+            const dim3 grid(divUp(cols, block.x * Policy::shift), divUp(rows, block.y));
+
+            transformSmart<Policy::shift><<<grid, block, 0, stream>>>(src, dst, op, mask, rows, cols);
+            CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+            if (stream == 0)
+                CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+        }
+
+        template <typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
+        __host__ static void call(const GlobPtr<SrcType1>& src1, const GlobPtr<SrcType2>& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+        {
+            if (Policy::shift == 1 ||
+                !isAligned(src1.data, Policy::shift * sizeof(SrcType1)) || !isAligned(src1.step, Policy::shift * sizeof(SrcType1)) ||
+                !isAligned(src2.data, Policy::shift * sizeof(SrcType2)) || !isAligned(src2.step, Policy::shift * sizeof(SrcType2)) ||
+                !isAligned(dst.data,  Policy::shift * sizeof(DstType))  || !isAligned(dst.step,  Policy::shift * sizeof(DstType)))
+            {
+                TransformDispatcher<false, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
+                return;
+            }
+
+            const dim3 block(Policy::block_size_x, Policy::block_size_y);
+            const dim3 grid(divUp(cols, block.x * Policy::shift), divUp(rows, block.y));
+
+            transformSmart<Policy::shift><<<grid, block, 0, stream>>>(src1, src2, dst, op, mask, rows, cols);
+            CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+            if (stream == 0)
+                CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+        }
+    };
+
+    template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
+    __host__ void transform(const SrcPtr& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        TransformDispatcher<false, Policy>::call(src, dst, op, mask, rows, cols, stream);
+    }
+
+    template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
+    __host__ void transform(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        TransformDispatcher<false, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
+    }
+
+    template <class Policy, typename SrcType, typename DstType, class UnOp, class MaskPtr>
+    __host__ void transform(const GlobPtr<SrcType>& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        TransformDispatcher<VecTraits<SrcType>::cn == 1 && VecTraits<DstType>::cn == 1 && Policy::shift != 1, Policy>::call(src, dst, op, mask, rows, cols, stream);
+    }
+
+    template <class Policy, typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
+    __host__ void transform(const GlobPtr<SrcType1>& src1, const GlobPtr<SrcType2>& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        TransformDispatcher<VecTraits<SrcType1>::cn == 1 && VecTraits<SrcType2>::cn == 1 && VecTraits<DstType>::cn == 1 && Policy::shift != 1, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
+    }
+
+    // transform_tuple
+
+    template <int count> struct Unroll
+    {
+        template <class SrcVal, class DstPtrTuple, class OpTuple>
+        __device__ static void transform(const SrcVal& srcVal, DstPtrTuple& dst, const OpTuple& op, int y, int x)
+        {
+            typedef typename tuple_element<count - 1, DstPtrTuple>::type dst_ptr_type;
+            typedef typename PtrTraits<dst_ptr_type>::value_type dst_type;
+
+            get<count - 1>(dst)(y, x) = saturate_cast<dst_type>(get<count - 1>(op)(srcVal));
+            Unroll<count - 1>::transform(srcVal, dst, op, y, x);
+        }
+    };
+    template <> struct Unroll<0>
+    {
+        template <class SrcVal, class DstPtrTuple, class OpTuple>
+        __device__ __forceinline__ static void transform(const SrcVal&, DstPtrTuple&, const OpTuple&, int, int)
+        {
+        }
+    };
+
+    template <class SrcPtr, class DstPtrTuple, class OpTuple, class MaskPtr>
+    __global__ void transform_tuple(const SrcPtr src, DstPtrTuple dst, const OpTuple op, const MaskPtr mask, const int rows, const int cols)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x >= cols || y >= rows || !mask(y, x))
+            return;
+
+        typename PtrTraits<SrcPtr>::value_type srcVal = src(y, x);
+
+        Unroll<tuple_size<DstPtrTuple>::value>::transform(srcVal, dst, op, y, x);
+    }
+
+    template <class Policy, class SrcPtrTuple, class DstPtrTuple, class OpTuple, class MaskPtr>
+    __host__ void transform_tuple(const SrcPtrTuple& src, const DstPtrTuple& dst, const OpTuple& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(Policy::block_size_x, Policy::block_size_y);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        transform_tuple<<<grid, block, 0, stream>>>(src, dst, op, mask, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/transpose.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/transpose.hpp
new file mode 100644
index 000000000..3854a1740
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/transpose.hpp
@@ -0,0 +1,130 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_TRANSPOSE_DETAIL_HPP__
+#define __OPENCV_CUDEV_GRID_TRANSPOSE_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/saturate_cast.hpp"
+#include "../../ptr2d/glob.hpp"
+#include "../../ptr2d/traits.hpp"
+
+namespace cv { namespace cudev {
+
+namespace transpose_detail
+{
+    const int TRANSPOSE_TILE_DIM   = 16;
+    const int TRANSPOSE_BLOCK_ROWS = 16;
+
+    template <class SrcPtr, typename DstType>
+    __global__ void transpose(const SrcPtr src, GlobPtr<DstType> dst, const int rows, const int cols)
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+
+        __shared__ src_type tile[TRANSPOSE_TILE_DIM][TRANSPOSE_TILE_DIM + 1];
+
+        int blockIdx_x, blockIdx_y;
+
+        // do diagonal reordering
+        if (gridDim.x == gridDim.y)
+        {
+            blockIdx_y = blockIdx.x;
+            blockIdx_x = (blockIdx.x + blockIdx.y) % gridDim.x;
+        }
+        else
+        {
+            int bid = blockIdx.x + gridDim.x * blockIdx.y;
+            blockIdx_y = bid % gridDim.y;
+            blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x;
+        }
+
+        int xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;
+        int yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;
+
+        if (xIndex < cols)
+        {
+            for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
+            {
+                if (yIndex + i < rows)
+                {
+                    tile[threadIdx.y + i][threadIdx.x] = src(yIndex + i, xIndex);
+                }
+            }
+        }
+
+        __syncthreads();
+
+        xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
+        yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
+
+        if (xIndex < rows)
+        {
+            for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
+            {
+                if (yIndex + i < cols)
+                {
+                    dst(yIndex + i, xIndex) = saturate_cast<DstType>(tile[threadIdx.x][threadIdx.y + i]);
+                }
+            }
+        }
+    }
+
+    template <class SrcPtr, typename DstType>
+    __host__ void transpose(const SrcPtr& src, const GlobPtr<DstType>& dst, int rows, int cols, cudaStream_t stream)
+    {
+        const dim3 block(TRANSPOSE_TILE_DIM, TRANSPOSE_TILE_DIM);
+        const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
+
+        transpose<<<grid, block, 0, stream>>>(src, dst, rows, cols);
+        CV_CUDEV_SAFE_CALL( cudaGetLastError() );
+
+        if (stream == 0)
+            CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
+    }
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/glob_reduce.hpp b/modules/cudev/include/opencv2/cudev/grid/glob_reduce.hpp
new file mode 100644
index 000000000..8d3e0826d
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/glob_reduce.hpp
@@ -0,0 +1,314 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_GLOB_REDUCE_HPP__
+#define __OPENCV_CUDEV_GRID_GLOB_REDUCE_HPP__
+
+#include <limits>
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "../ptr2d/mask.hpp"
+#include "../ptr2d/transform.hpp"
+#include "detail/glob_reduce.hpp"
+
+namespace cv { namespace cudev {
+
+template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    dst.create(1, 1);
+    dst.setTo(0, stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_glob_reduce_detail::sum<Policy>(shrinkPtr(src),
+                                         dst[0],
+                                         shrinkPtr(mask),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename ResType>
+__host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    dst.create(1, 1);
+    dst.setTo(0, stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    grid_glob_reduce_detail::sum<Policy>(shrinkPtr(src),
+                                         dst[0],
+                                         WithOutMask(),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridFindMinVal_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    dst.create(1, 1);
+    dst.setTo(Scalar::all(std::numeric_limits<ResType>::max()), stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_glob_reduce_detail::minVal<Policy>(shrinkPtr(src),
+                                            dst[0],
+                                            shrinkPtr(mask),
+                                            rows, cols,
+                                            StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename ResType>
+__host__ void gridFindMinVal_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    dst.create(1, 1);
+    dst.setTo(Scalar::all(std::numeric_limits<ResType>::max()), stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    grid_glob_reduce_detail::minVal<Policy>(shrinkPtr(src),
+                                            dst[0],
+                                            WithOutMask(),
+                                            rows, cols,
+                                            StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridFindMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    dst.create(1, 1);
+    dst.setTo(Scalar::all(-std::numeric_limits<ResType>::max()), stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_glob_reduce_detail::maxVal<Policy>(shrinkPtr(src),
+                                            dst[0],
+                                            shrinkPtr(mask),
+                                            rows, cols,
+                                            StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename ResType>
+__host__ void gridFindMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    dst.create(1, 1);
+    dst.setTo(Scalar::all(-std::numeric_limits<ResType>::max()), stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    grid_glob_reduce_detail::maxVal<Policy>(shrinkPtr(src),
+                                            dst[0],
+                                            WithOutMask(),
+                                            rows, cols,
+                                            StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridFindMinMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    dst.create(1, 2);
+    dst.col(0).setTo(Scalar::all(std::numeric_limits<ResType>::max()), stream);
+    dst.col(1).setTo(Scalar::all(-std::numeric_limits<ResType>::max()), stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_glob_reduce_detail::minMaxVal<Policy>(shrinkPtr(src),
+                                               dst[0],
+                                               shrinkPtr(mask),
+                                               rows, cols,
+                                               StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename ResType>
+__host__ void gridFindMinMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    dst.create(1, 2);
+    dst.col(0).setTo(Scalar::all(std::numeric_limits<ResType>::max()), stream);
+    dst.col(1).setTo(Scalar::all(-std::numeric_limits<ResType>::max()), stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    grid_glob_reduce_detail::minMaxVal<Policy>(shrinkPtr(src),
+                                               dst[0],
+                                               WithOutMask(),
+                                               rows, cols,
+                                               StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridCountNonZero_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    dst.create(1, 1);
+    dst.setTo(0, stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    typedef typename PtrTraits<SrcPtr>::value_type src_type;
+    not_equal_to<src_type> ne_op;
+    const src_type zero = VecTraits<src_type>::all(0);
+
+    grid_glob_reduce_detail::sum<Policy>(shrinkPtr(transformPtr(src, bind2nd(ne_op, zero))),
+                                         dst[0],
+                                         shrinkPtr(mask),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename ResType>
+__host__ void gridCountNonZero_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    dst.create(1, 1);
+    dst.setTo(0, stream);
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    typedef typename PtrTraits<SrcPtr>::value_type src_type;
+    not_equal_to<src_type> ne_op;
+    const src_type zero = VecTraits<src_type>::all(0);
+
+    grid_glob_reduce_detail::sum<Policy>(shrinkPtr(transformPtr(src, bind2nd(ne_op, zero))),
+                                         dst[0],
+                                         WithOutMask(),
+                                         rows, cols,
+                                         StreamAccessor::getStream(stream));
+}
+
+// default policy
+
+struct DefaultGlobReducePolicy
+{
+    enum {
+        block_size_x = 32,
+        block_size_y = 8,
+
+        patch_size_x = 4,
+        patch_size_y = 4
+    };
+};
+
+template <class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridCalcSum(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCalcSum_<DefaultGlobReducePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename ResType>
+__host__ void gridCalcSum(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    gridCalcSum_<DefaultGlobReducePolicy>(src, dst, stream);
+}
+
+template <class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridFindMinVal(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridFindMinVal_<DefaultGlobReducePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename ResType>
+__host__ void gridFindMinVal(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    gridFindMinVal_<DefaultGlobReducePolicy>(src, dst, stream);
+}
+
+template <class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridFindMaxVal(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridFindMaxVal_<DefaultGlobReducePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename ResType>
+__host__ void gridFindMaxVal(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    gridFindMaxVal_<DefaultGlobReducePolicy>(src, dst, stream);
+}
+
+template <class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridFindMinMaxVal(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridFindMinMaxVal_<DefaultGlobReducePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename ResType>
+__host__ void gridFindMinMaxVal(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    gridFindMinMaxVal_<DefaultGlobReducePolicy>(src, dst, stream);
+}
+
+template <class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridCountNonZero(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridCountNonZero_<DefaultGlobReducePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename ResType>
+__host__ void gridCountNonZero(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    gridCountNonZero_<DefaultGlobReducePolicy>(src, dst, stream);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/histogram.hpp b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp
new file mode 100644
index 000000000..ecb1a19c8
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp
@@ -0,0 +1,119 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_HISTOGRAM_HPP__
+#define __OPENCV_CUDEV_GRID_HISTOGRAM_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "../ptr2d/mask.hpp"
+#include "detail/histogram.hpp"
+
+namespace cv { namespace cudev {
+
+template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_Assert( deviceSupports(SHARED_ATOMICS) );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    dst.create(1, BIN_COUNT);
+    dst.setTo(0, stream);
+
+    grid_histogram_detail::histogram<BIN_COUNT, Policy>(shrinkPtr(src),
+                                                        dst[0],
+                                                        shrinkPtr(mask),
+                                                        rows, cols,
+                                                        StreamAccessor::getStream(stream));
+}
+
+template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType>
+__host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    CV_Assert( deviceSupports(SHARED_ATOMICS) );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst.create(1, BIN_COUNT);
+    dst.setTo(0, stream);
+
+    grid_histogram_detail::histogram<BIN_COUNT, Policy>(shrinkPtr(src),
+                                                        dst[0],
+                                                        WithOutMask(),
+                                                        rows, cols,
+                                                        StreamAccessor::getStream(stream));
+}
+
+// default policy
+
+struct DefaultHistogramPolicy
+{
+    enum {
+        block_size_x = 32,
+        block_size_y = 8
+    };
+};
+
+template <int BIN_COUNT, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridHistogram(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridHistogram_<BIN_COUNT, DefaultHistogramPolicy>(src, dst, mask, stream);
+}
+
+template <int BIN_COUNT, class SrcPtr, typename ResType>
+__host__ void gridHistogram(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    gridHistogram_<BIN_COUNT, DefaultHistogramPolicy>(src, dst, stream);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/integral.hpp b/modules/cudev/include/opencv2/cudev/grid/integral.hpp
new file mode 100644
index 000000000..d948c1267
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/integral.hpp
@@ -0,0 +1,69 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_INTEGRAL_HPP__
+#define __OPENCV_CUDEV_GRID_INTEGRAL_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "detail/integral.hpp"
+
+namespace cv { namespace cudev {
+
+template <class SrcPtr, typename DstType>
+__host__ void gridIntegral(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst.create(rows, cols);
+
+    integral_detail::integral(shrinkPtr(src), shrinkPtr(dst), rows, cols, StreamAccessor::getStream(stream));
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/pyramids.hpp b/modules/cudev/include/opencv2/cudev/grid/pyramids.hpp
new file mode 100644
index 000000000..99833bd3f
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/pyramids.hpp
@@ -0,0 +1,88 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_PYRAMIDS_HPP__
+#define __OPENCV_CUDEV_GRID_PYRAMIDS_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "../ptr2d/extrapolation.hpp"
+#include "detail/pyr_down.hpp"
+#include "detail/pyr_up.hpp"
+
+namespace cv { namespace cudev {
+
+template <class Brd, class SrcPtr, typename DstType>
+__host__ void gridPyrDown_(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst.create(divUp(rows, 2), divUp(cols, 2));
+
+    pyramids_detail::pyrDown<Brd>(shrinkPtr(src), shrinkPtr(dst), rows, cols, dst.rows, dst.cols, StreamAccessor::getStream(stream));
+}
+
+template <class SrcPtr, typename DstType>
+__host__ void gridPyrDown(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
+{
+    gridPyrDown_<BrdReflect101>(src, dst, stream);
+}
+
+template <class SrcPtr, typename DstType>
+__host__ void gridPyrUp(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst.create(rows * 2, cols * 2);
+
+    pyramids_detail::pyrUp(shrinkPtr(src), shrinkPtr(dst), rows, cols, dst.rows, dst.cols, StreamAccessor::getStream(stream));
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp b/modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp
new file mode 100644
index 000000000..f9e351242
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp
@@ -0,0 +1,209 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_REDUCE_TO_VEC_HPP__
+#define __OPENCV_CUDEV_GRID_REDUCE_TO_VEC_HPP__
+
+#include "../common.hpp"
+#include "../util/vec_traits.hpp"
+#include "../util/limits.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "../ptr2d/mask.hpp"
+#include "../functional/functional.hpp"
+#include "detail/reduce_to_column.hpp"
+#include "detail/reduce_to_row.hpp"
+
+namespace cv { namespace cudev {
+
+template <typename T> struct Sum : plus<T>
+{
+    typedef T work_type;
+
+    __device__ __forceinline__ static T initialValue()
+    {
+        return VecTraits<T>::all(0);
+    }
+
+    __device__ __forceinline__ static T result(T r, int)
+    {
+        return r;
+    }
+};
+
+template <typename T> struct Avg : plus<T>
+{
+    typedef T work_type;
+
+    __device__ __forceinline__ static T initialValue()
+    {
+        return VecTraits<T>::all(0);
+    }
+
+    __device__ __forceinline__ static T result(T r, int sz)
+    {
+        return r / sz;
+    }
+};
+
+template <typename T> struct Min : minimum<T>
+{
+    typedef T work_type;
+
+    __device__ __forceinline__ static T initialValue()
+    {
+        return VecTraits<T>::all(numeric_limits<typename VecTraits<T>::elem_type>::max());
+    }
+
+    __device__ __forceinline__ static T result(T r, int)
+    {
+        return r;
+    }
+};
+
+template <typename T> struct Max : maximum<T>
+{
+    typedef T work_type;
+
+    __device__ __forceinline__ static T initialValue()
+    {
+        return VecTraits<T>::all(-numeric_limits<typename VecTraits<T>::elem_type>::max());
+    }
+
+    __device__ __forceinline__ static T result(T r, int)
+    {
+        return r;
+    }
+};
+
+template <class Reductor, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridReduceToRow(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    dst.create(1, cols);
+
+    grid_reduce_to_vec_detail::reduceToRow<Reductor>(shrinkPtr(src),
+                                                     dst[0],
+                                                     shrinkPtr(mask),
+                                                     rows, cols,
+                                                     StreamAccessor::getStream(stream));
+}
+
+template <class Reductor, class SrcPtr, typename ResType>
+__host__ void gridReduceToRow(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst.create(1, cols);
+
+    grid_reduce_to_vec_detail::reduceToRow<Reductor>(shrinkPtr(src),
+                                                     dst[0],
+                                                     WithOutMask(),
+                                                     rows, cols,
+                                                     StreamAccessor::getStream(stream));
+}
+
+template <class Reductor, class Policy, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    createContinuous(rows, 1, DataType<ResType>::type, dst);
+
+    grid_reduce_to_vec_detail::reduceToColumn<Reductor, Policy>(shrinkPtr(src),
+                                                                dst[0],
+                                                                shrinkPtr(mask),
+                                                                rows, cols,
+                                                                StreamAccessor::getStream(stream));
+}
+
+template <class Reductor, class Policy, class SrcPtr, typename ResType>
+__host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    createContinuous(rows, 1, DataType<ResType>::type, dst);
+
+    grid_reduce_to_vec_detail::reduceToColumn<Reductor, Policy>(shrinkPtr(src),
+                                                                dst[0],
+                                                                WithOutMask(),
+                                                                rows, cols,
+                                                                StreamAccessor::getStream(stream));
+}
+
+// default policy
+
+struct DefaultReduceToVecPolicy
+{
+    enum {
+        block_size_x = 32,
+        block_size_y = 8
+    };
+};
+
+template <class Reductor, class SrcPtr, typename ResType, class MaskPtr>
+__host__ void gridReduceToColumn(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridReduceToColumn_<Reductor, DefaultReduceToVecPolicy>(src, dst, mask, stream);
+}
+
+template <class Reductor, class SrcPtr, typename ResType>
+__host__ void gridReduceToColumn(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
+{
+    gridReduceToColumn_<Reductor, DefaultReduceToVecPolicy>(src, dst, stream);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/split_merge.hpp b/modules/cudev/include/opencv2/cudev/grid/split_merge.hpp
new file mode 100644
index 000000000..78584aad8
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/split_merge.hpp
@@ -0,0 +1,407 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_SPLIT_MERGE_HPP__
+#define __OPENCV_CUDEV_GRID_SPLIT_MERGE_HPP__
+
+#include "../common.hpp"
+#include "../util/tuple.hpp"
+#include "../util/vec_traits.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "../ptr2d/mask.hpp"
+#include "detail/split_merge.hpp"
+
+namespace cv { namespace cudev {
+
+template <class Policy, class SrcPtrTuple, typename DstType, class MaskPtr>
+__host__ void gridMerge_(const SrcPtrTuple& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<DstType>::cn == tuple_size<SrcPtrTuple>::value, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    dst.create(rows, cols);
+
+    grid_split_merge_detail::MergeImpl<VecTraits<DstType>::cn, Policy>::merge(shrinkPtr(src),
+                                                                              shrinkPtr(dst),
+                                                                              shrinkPtr(mask),
+                                                                              rows, cols,
+                                                                              StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtrTuple, typename DstType>
+__host__ void gridMerge_(const SrcPtrTuple& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<DstType>::cn == tuple_size<SrcPtrTuple>::value, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst.create(rows, cols);
+
+    grid_split_merge_detail::MergeImpl<VecTraits<DstType>::cn, Policy>::merge(shrinkPtr(src),
+                                                                              shrinkPtr(dst),
+                                                                              WithOutMask(),
+                                                                              rows, cols,
+                                                                              StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)),
+                                           shrinkPtr(mask),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[2], const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    dst[0].create(rows, cols);
+    dst[1].create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(dst[0]), shrinkPtr(dst[1]),
+                                           shrinkPtr(mask),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType>
+__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)),
+                                           WithOutMask(),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType>
+__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[2], Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst[0].create(rows, cols);
+    dst[1].create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(dst[0]), shrinkPtr(dst[1]),
+                                           WithOutMask(),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)), shrinkPtr(get<2>(dst)),
+                                           shrinkPtr(mask),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[3], const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    dst[0].create(rows, cols);
+    dst[1].create(rows, cols);
+    dst[2].create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]),
+                                           shrinkPtr(mask),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType>
+__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)), shrinkPtr(get<2>(dst)),
+                                           WithOutMask(),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType>
+__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[3], Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst[0].create(rows, cols);
+    dst[1].create(rows, cols);
+    dst[2].create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]),
+                                           WithOutMask(),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+    get<3>(dst).create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)), shrinkPtr(get<2>(dst)), shrinkPtr(get<3>(dst)),
+                                           shrinkPtr(mask),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[4], const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    dst[0].create(rows, cols);
+    dst[1].create(rows, cols);
+    dst[2].create(rows, cols);
+    dst[4].create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[4]),
+                                           shrinkPtr(mask),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType>
+__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+    get<3>(dst).create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(get<0>(dst)), shrinkPtr(get<1>(dst)), shrinkPtr(get<2>(dst)), shrinkPtr(get<3>(dst)),
+                                           WithOutMask(),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType>
+__host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[4], Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst[0].create(rows, cols);
+    dst[1].create(rows, cols);
+    dst[2].create(rows, cols);
+    dst[4].create(rows, cols);
+
+    grid_split_merge_detail::split<Policy>(shrinkPtr(src),
+                                           shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[4]),
+                                           WithOutMask(),
+                                           rows, cols,
+                                           StreamAccessor::getStream(stream));
+}
+
+// Default Policy
+
+struct DefaultSplitMergePolicy
+{
+    enum {
+        block_size_x = 32,
+        block_size_y = 8
+    };
+};
+
+template <class SrcPtrTuple, typename DstType, class MaskPtr>
+__host__ void gridMerge(const SrcPtrTuple& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridMerge_<DefaultSplitMergePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtrTuple, typename DstType>
+__host__ void gridMerge(const SrcPtrTuple& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
+{
+    gridMerge_<DefaultSplitMergePolicy>(src, dst, stream);
+}
+
+template <class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename DstType>
+__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
+{
+    gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
+}
+
+template <class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename DstType>
+__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
+{
+    gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
+}
+
+template <class SrcPtr, typename DstType, class MaskPtr>
+__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename DstType>
+__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
+{
+    gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
+}
+
+template <class SrcPtr, typename DstType, int COUNT, class MaskPtr>
+__host__ void gridSplit(const SrcPtr& src, GpuMat_<DstType> (&dst)[COUNT], const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
+}
+
+template <class SrcPtr, typename DstType, int COUNT>
+__host__ void gridSplit(const SrcPtr& src, GpuMat_<DstType> (&dst)[COUNT], Stream& stream = Stream::Null())
+{
+    gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/grid/transform.hpp b/modules/cudev/include/opencv2/cudev/grid/transform.hpp
new file mode 100644
index 000000000..0da5e2d8a
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/grid/transform.hpp
@@ -0,0 +1,541 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_GRID_TRANSFORM_HPP__
+#define __OPENCV_CUDEV_GRID_TRANSFORM_HPP__
+
+#include "../common.hpp"
+#include "../util/tuple.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "../ptr2d/glob.hpp"
+#include "../ptr2d/mask.hpp"
+#include "../ptr2d/zip.hpp"
+#include "detail/transform.hpp"
+
+namespace cv { namespace cudev {
+
+template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    dst.create(rows, cols);
+
+    grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class UnOp>
+__host__ void gridTransform_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnOp& op, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    dst.create(rows, cols);
+
+    grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename DstType, class UnOp>
+__host__ void gridTransform_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const UnOp& op, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
+
+    grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const BinOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src1);
+    const int cols = getCols(src1);
+
+    CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    dst.create(rows, cols);
+
+    grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtrSz<DstType>& dst, const BinOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src1);
+    const int cols = getCols(src1);
+
+    CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
+    CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp>
+__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const BinOp& op, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src1);
+    const int cols = getCols(src1);
+
+    CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
+
+    dst.create(rows, cols);
+
+    grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp>
+__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GlobPtrSz<DstType>& dst, const BinOp& op, Stream& stream = Stream::Null())
+{
+    const int rows = getRows(src1);
+    const int cols = getCols(src1);
+
+    CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
+    CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
+
+    grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
+                                                   op,
+                                                   shrinkPtr(mask),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
+                                                   op,
+                                                   shrinkPtr(mask),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
+                                                   op,
+                                                   WithOutMask(),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))),
+                                                   op,
+                                                   WithOutMask(),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
+                                                   op,
+                                                   shrinkPtr(mask),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
+                                                   op,
+                                                   shrinkPtr(mask),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
+                                                   op,
+                                                   WithOutMask(),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols );
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))),
+                                                   op,
+                                                   WithOutMask(),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+    get<3>(dst).create(rows, cols);
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
+                                                   op,
+                                                   shrinkPtr(mask),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols );
+    CV_Assert( getRows(get<3>(dst)) == rows && getCols(get<3>(dst)) == cols );
+    CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
+                                                   op,
+                                                   shrinkPtr(mask),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    get<0>(dst).create(rows, cols);
+    get<1>(dst).create(rows, cols);
+    get<2>(dst).create(rows, cols);
+    get<3>(dst).create(rows, cols);
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
+                                                   op,
+                                                   WithOutMask(),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
+__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
+
+    const int rows = getRows(src);
+    const int cols = getCols(src);
+
+    CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols );
+    CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols );
+    CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols );
+    CV_Assert( getRows(get<3>(dst)) == rows && getCols(get<3>(dst)) == cols );
+
+    grid_transform_detail::transform_tuple<Policy>(shrinkPtr(src),
+                                                   shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))),
+                                                   op,
+                                                   WithOutMask(),
+                                                   rows, cols,
+                                                   StreamAccessor::getStream(stream));
+}
+
+// Default Policy
+
+struct DefaultTransformPolicy
+{
+    enum {
+        block_size_x = 32,
+        block_size_y = 8,
+        shift = 4
+    };
+};
+
+template <class SrcPtr, typename DstType, class Op, class MaskPtr>
+__host__ void gridTransform(const SrcPtr& src, GpuMat_<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
+}
+
+template <class SrcPtr, typename DstType, class Op, class MaskPtr>
+__host__ void gridTransform(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
+}
+
+template <class SrcPtr, typename DstType, class Op>
+__host__ void gridTransform(const SrcPtr& src, GpuMat_<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
+}
+
+template <class SrcPtr, typename DstType, class Op>
+__host__ void gridTransform(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
+}
+
+template <class SrcPtr1, class SrcPtr2, typename DstType, class Op, class MaskPtr>
+__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, GpuMat_<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, mask, stream);
+}
+
+template <class SrcPtr1, class SrcPtr2, typename DstType, class Op, class MaskPtr>
+__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, const GlobPtrSz<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, mask, stream);
+}
+
+template <class SrcPtr1, class SrcPtr2, typename DstType, class Op>
+__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, GpuMat_<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, stream);
+}
+
+template <class SrcPtr1, class SrcPtr2, typename DstType, class Op>
+__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, const GlobPtrSz<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, class OpTuple>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, class OpTuple>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
+}
+
+template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
+__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
+{
+    gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
+}
+
+}}
+
+#endif
diff --git a/modules/core/src/cuda/matrix_operations.hpp b/modules/cudev/include/opencv2/cudev/grid/transpose.hpp
similarity index 80%
rename from modules/core/src/cuda/matrix_operations.hpp
rename to modules/cudev/include/opencv2/cudev/grid/transpose.hpp
index 4e451061b..5e8f24868 100644
--- a/modules/core/src/cuda/matrix_operations.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/transpose.hpp
@@ -41,17 +41,29 @@
 //
 //M*/
 
-#include "opencv2/core/cuda/common.hpp"
+#pragma once
 
-namespace cv { namespace gpu { namespace cudev
+#ifndef __OPENCV_CUDEV_GRID_TRANSPOSE_HPP__
+#define __OPENCV_CUDEV_GRID_TRANSPOSE_HPP__
+
+#include "../common.hpp"
+#include "../ptr2d/traits.hpp"
+#include "../ptr2d/gpumat.hpp"
+#include "detail/transpose.hpp"
+
+namespace cv { namespace cudev {
+
+template <class SrcPtr, typename DstType>
+__host__ void gridTranspose(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
 {
-    void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream);
+    const int rows = getRows(src);
+    const int cols = getCols(src);
 
-    template <typename T>
-    void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream);
+    dst.create(cols, rows);
 
-    template <typename T>
-    void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
+    transpose_detail::transpose(shrinkPtr(src), shrinkPtr(dst), rows, cols, StreamAccessor::getStream(stream));
+}
 
-    void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream);
-}}}
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/constant.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/constant.hpp
new file mode 100644
index 000000000..d3c56e771
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/constant.hpp
@@ -0,0 +1,93 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_CONSTANT_HPP__
+#define __OPENCV_CUDEV_PTR2D_CONSTANT_HPP__
+
+#include "../common.hpp"
+#include "traits.hpp"
+
+namespace cv { namespace cudev {
+
+template <typename T> struct ConstantPtr
+{
+    typedef T   value_type;
+    typedef int index_type;
+
+    T value;
+
+    __device__ __forceinline__ T operator ()(int, int) const { return value; }
+};
+
+template <typename T> struct ConstantPtrSz : ConstantPtr<T>
+{
+    int rows, cols;
+};
+
+template <typename T>
+__host__ ConstantPtr<T> constantPtr(T value)
+{
+    ConstantPtr<T> p;
+    p.value = value;
+    return p;
+}
+
+template <typename T> ConstantPtrSz<T>
+__host__ constantPtr(T value, int rows, int cols)
+{
+    ConstantPtrSz<T> p;
+    p.value = value;
+    p.rows = rows;
+    p.cols = cols;
+    return p;
+}
+
+template <typename T> struct PtrTraits< ConstantPtrSz<T> > : PtrTraitsBase< ConstantPtrSz<T>, ConstantPtr<T> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/deriv.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/deriv.hpp
new file mode 100644
index 000000000..097007400
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/deriv.hpp
@@ -0,0 +1,393 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_DERIV_HPP__
+#define __OPENCV_CUDEV_PTR2D_DERIV_HPP__
+
+#include "../common.hpp"
+#include "../grid/copy.hpp"
+#include "traits.hpp"
+#include "gpumat.hpp"
+
+namespace cv { namespace cudev {
+
+// derivX
+
+template <class SrcPtr> struct DerivXPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+    SrcPtr src;
+
+    __device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+    {
+        return src(y, x + 1) - src(y, x - 1);
+    }
+};
+
+template <class SrcPtr> struct DerivXPtrSz : DerivXPtr<SrcPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ DerivXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> derivXPtr(const SrcPtr& src)
+{
+    DerivXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
+    s.src = shrinkPtr(src);
+    s.rows = getRows(src);
+    s.cols = getCols(src);
+    return s;
+}
+
+template <class SrcPtr> struct PtrTraits< DerivXPtrSz<SrcPtr> > : PtrTraitsBase<DerivXPtrSz<SrcPtr>, DerivXPtr<SrcPtr> >
+{
+};
+
+// derivY
+
+template <class SrcPtr> struct DerivYPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+    SrcPtr src;
+
+    __device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+    {
+        return src(y + 1, x) - src(y - 1, x);
+    }
+};
+
+template <class SrcPtr> struct DerivYPtrSz : DerivYPtr<SrcPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ DerivYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> derivYPtr(const SrcPtr& src)
+{
+    DerivYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
+    s.src = shrinkPtr(src);
+    s.rows = getRows(src);
+    s.cols = getCols(src);
+    return s;
+}
+
+template <class SrcPtr> struct PtrTraits< DerivYPtrSz<SrcPtr> > : PtrTraitsBase<DerivYPtrSz<SrcPtr>, DerivYPtr<SrcPtr> >
+{
+};
+
+// sobelX
+
+template <class SrcPtr> struct SobelXPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+    SrcPtr src;
+
+    __device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+    {
+        typename PtrTraits<SrcPtr>::value_type vals[6] =
+        {
+            src(y - 1, x - 1), src(y - 1, x + 1),
+            src(y    , x - 1), src(y    , x + 1),
+            src(y + 1, x - 1), src(y + 1, x + 1),
+        };
+
+        return (vals[1] - vals[0]) + 2 * (vals[3] - vals[2]) + (vals[5] - vals[4]);
+    }
+};
+
+template <class SrcPtr> struct SobelXPtrSz : SobelXPtr<SrcPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ SobelXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> sobelXPtr(const SrcPtr& src)
+{
+    SobelXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
+    s.src = shrinkPtr(src);
+    s.rows = getRows(src);
+    s.cols = getCols(src);
+    return s;
+}
+
+template <class SrcPtr> struct PtrTraits< SobelXPtrSz<SrcPtr> > : PtrTraitsBase<SobelXPtrSz<SrcPtr>, SobelXPtr<SrcPtr> >
+{
+};
+
+// sobelY
+
+template <class SrcPtr> struct SobelYPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+    SrcPtr src;
+
+    __device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+    {
+        typename PtrTraits<SrcPtr>::value_type vals[6] =
+        {
+            src(y - 1, x - 1), src(y - 1, x), src(y - 1, x + 1),
+            src(y + 1, x - 1), src(y + 1, x), src(y + 1, x + 1)
+        };
+
+        return (vals[3] - vals[0]) + 2 * (vals[4] - vals[1]) + (vals[5] - vals[2]);
+    }
+};
+
+template <class SrcPtr> struct SobelYPtrSz : SobelYPtr<SrcPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ SobelYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> sobelYPtr(const SrcPtr& src)
+{
+    SobelYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
+    s.src = shrinkPtr(src);
+    s.rows = getRows(src);
+    s.cols = getCols(src);
+    return s;
+}
+
+template <class SrcPtr> struct PtrTraits< SobelYPtrSz<SrcPtr> > : PtrTraitsBase<SobelYPtrSz<SrcPtr>, SobelYPtr<SrcPtr> >
+{
+};
+
+// scharrX
+
+template <class SrcPtr> struct ScharrXPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+    SrcPtr src;
+
+    __device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+    {
+        typename PtrTraits<SrcPtr>::value_type vals[6] =
+        {
+            src(y - 1, x - 1), src(y - 1, x + 1),
+            src(y    , x - 1), src(y    , x + 1),
+            src(y + 1, x - 1), src(y + 1, x + 1),
+        };
+
+        return 3 * (vals[1] - vals[0]) + 10 * (vals[3] - vals[2]) + 3 * (vals[5] - vals[4]);
+    }
+};
+
+template <class SrcPtr> struct ScharrXPtrSz : ScharrXPtr<SrcPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ ScharrXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> scharrXPtr(const SrcPtr& src)
+{
+    ScharrXPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
+    s.src = shrinkPtr(src);
+    s.rows = getRows(src);
+    s.cols = getCols(src);
+    return s;
+}
+
+template <class SrcPtr> struct PtrTraits< ScharrXPtrSz<SrcPtr> > : PtrTraitsBase<ScharrXPtrSz<SrcPtr>, ScharrXPtr<SrcPtr> >
+{
+};
+
+// scharrY
+
+template <class SrcPtr> struct ScharrYPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+    SrcPtr src;
+
+    __device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+    {
+        typename PtrTraits<SrcPtr>::value_type vals[6] =
+        {
+            src(y - 1, x - 1), src(y - 1, x), src(y - 1, x + 1),
+            src(y + 1, x - 1), src(y + 1, x), src(y + 1, x + 1)
+        };
+
+        return 3 * (vals[3] - vals[0]) + 10 * (vals[4] - vals[1]) + 3 * (vals[5] - vals[2]);
+    }
+};
+
+template <class SrcPtr> struct ScharrYPtrSz : ScharrYPtr<SrcPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ ScharrYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> scharrYPtr(const SrcPtr& src)
+{
+    ScharrYPtrSz<typename PtrTraits<SrcPtr>::ptr_type> s;
+    s.src = shrinkPtr(src);
+    s.rows = getRows(src);
+    s.cols = getCols(src);
+    return s;
+}
+
+template <class SrcPtr> struct PtrTraits< ScharrYPtrSz<SrcPtr> > : PtrTraitsBase<ScharrYPtrSz<SrcPtr>, ScharrYPtr<SrcPtr> >
+{
+};
+
+// laplacian
+
+template <int ksize, class SrcPtr> struct LaplacianPtr;
+
+template <class SrcPtr> struct LaplacianPtr<1, SrcPtr>
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+    SrcPtr src;
+
+    __device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+    {
+        typename PtrTraits<SrcPtr>::value_type vals[5] =
+        {
+                           src(y - 1, x),
+            src(y, x - 1), src(y    , x), src(y, x + 1),
+                           src(y + 1, x)
+        };
+
+        return (vals[0] + vals[1] + vals[3] + vals[4]) - 4 * vals[2];
+    }
+};
+
+template <class SrcPtr> struct LaplacianPtr<3, SrcPtr>
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+   SrcPtr src;
+
+   __device__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+   {
+       typename PtrTraits<SrcPtr>::value_type vals[5] =
+       {
+           src(y - 1, x - 1),            src(y - 1, x + 1),
+                              src(y, x),
+           src(y + 1, x - 1),            src(y + 1, x + 1)
+       };
+
+       return 2 * (vals[0] + vals[1] + vals[3] + vals[4]) - 8 * vals[2];
+   }
+};
+
+template <int ksize, class SrcPtr> struct LaplacianPtrSz : LaplacianPtr<ksize, SrcPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <int ksize, class SrcPtr>
+__host__ LaplacianPtrSz<ksize, typename PtrTraits<SrcPtr>::ptr_type> laplacianPtr(const SrcPtr& src)
+{
+    LaplacianPtrSz<ksize, typename PtrTraits<SrcPtr>::ptr_type> ptr;
+    ptr.src = shrinkPtr(src);
+    ptr.rows = getRows(src);
+    ptr.cols = getCols(src);
+    return ptr;
+}
+
+template <int ksize, class SrcPtr> struct PtrTraits< LaplacianPtrSz<ksize, SrcPtr> > : PtrTraitsBase<LaplacianPtrSz<ksize, SrcPtr>, LaplacianPtr<ksize, SrcPtr> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp
new file mode 100644
index 000000000..e378c5237
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp
@@ -0,0 +1,361 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_GPUMAT_DETAIL_HPP__
+#define __OPENCV_CUDEV_PTR2D_GPUMAT_DETAIL_HPP__
+
+#include "../gpumat.hpp"
+
+namespace cv { namespace cudev {
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_()
+    : GpuMat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<T>::type;
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(int arows, int acols)
+    : GpuMat(arows, acols, DataType<T>::type)
+{
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(Size asize)
+    : GpuMat(asize.height, asize.width, DataType<T>::type)
+{
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(int arows, int acols, Scalar val)
+    : GpuMat(arows, acols, DataType<T>::type, val)
+{
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(Size asize, Scalar val)
+    : GpuMat(asize.height, asize.width, DataType<T>::type, val)
+{
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(const GpuMat_& m)
+    : GpuMat(m)
+{
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(const GpuMat& m)
+    : GpuMat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<T>::type;
+
+    if (DataType<T>::type == m.type())
+    {
+        GpuMat::operator =(m);
+        return;
+    }
+
+    if (DataType<T>::depth == m.depth())
+    {
+        GpuMat::operator =(m.reshape(DataType<T>::channels, m.rows));
+        return;
+    }
+
+    CV_Assert( DataType<T>::channels == m.channels() );
+    m.convertTo(*this, type());
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(int arows, int acols, T* adata, size_t astep)
+    : GpuMat(arows, acols, DataType<T>::type, adata, astep)
+{
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(Size asize, T* adata, size_t astep)
+    : GpuMat(asize.height, asize.width, DataType<T>::type, adata, astep)
+{
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(const GpuMat_& m, Range arowRange, Range acolRange)
+    : GpuMat(m, arowRange, acolRange)
+{
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(const GpuMat_& m, Rect roi)
+    : GpuMat(m, roi)
+{
+}
+
+template <typename T>
+__host__ GpuMat_<T>::GpuMat_(InputArray arr)
+    : GpuMat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<T>::type;
+    upload(arr);
+}
+
+template <typename T>
+__host__ GpuMat_<T>& GpuMat_<T>::operator =(const GpuMat_& m)
+{
+    GpuMat::operator =(m);
+    return *this;
+}
+
+template <typename T>
+__host__ void GpuMat_<T>::create(int arows, int acols)
+{
+    GpuMat::create(arows, acols, DataType<T>::type);
+}
+
+template <typename T>
+__host__ void GpuMat_<T>::create(Size asize)
+{
+    GpuMat::create(asize, DataType<T>::type);
+}
+
+template <typename T>
+__host__ void GpuMat_<T>::swap(GpuMat_& mat)
+{
+    GpuMat::swap(mat);
+}
+
+template <typename T>
+__host__ void GpuMat_<T>::upload(InputArray arr)
+{
+    CV_Assert( arr.type() == DataType<T>::type );
+    GpuMat::upload(arr);
+}
+
+template <typename T>
+__host__ void GpuMat_<T>::upload(InputArray arr, Stream& stream)
+{
+    CV_Assert( arr.type() == DataType<T>::type );
+    GpuMat::upload(arr, stream);
+}
+
+template <typename T>
+__host__ GpuMat_<T>::operator GlobPtrSz<T>() const
+{
+    return globPtr((T*) data, step, rows, cols);
+}
+
+template <typename T>
+__host__ GpuMat_<T>::operator GlobPtr<T>() const
+{
+    return globPtr((T*) data, step);
+}
+
+template <typename T>
+__host__ GpuMat_<T> GpuMat_<T>::clone() const
+{
+    return GpuMat_(GpuMat::clone());
+}
+
+template <typename T>
+__host__ GpuMat_<T> GpuMat_<T>::row(int y) const
+{
+    return GpuMat_(*this, Range(y, y+1), Range::all());
+}
+
+template <typename T>
+__host__ GpuMat_<T> GpuMat_<T>::col(int x) const
+{
+    return GpuMat_(*this, Range::all(), Range(x, x+1));
+}
+
+template <typename T>
+__host__ GpuMat_<T> GpuMat_<T>::rowRange(int startrow, int endrow) const
+{
+    return GpuMat_(*this, Range(startrow, endrow), Range::all());
+}
+
+template <typename T>
+__host__ GpuMat_<T> GpuMat_<T>::rowRange(Range r) const
+{
+    return GpuMat_(*this, r, Range::all());
+}
+
+template <typename T>
+__host__ GpuMat_<T> GpuMat_<T>::colRange(int startcol, int endcol) const
+{
+    return GpuMat_(*this, Range::all(), Range(startcol, endcol));
+}
+
+template <typename T>
+__host__ GpuMat_<T> GpuMat_<T>::colRange(Range r) const
+{
+    return GpuMat_(*this, Range::all(), r);
+}
+
+template <typename T>
+__host__ GpuMat_<T> GpuMat_<T>::operator ()(Range _rowRange, Range _colRange) const
+{
+    return GpuMat_(*this, _rowRange, _colRange);
+}
+
+template <typename T>
+__host__ GpuMat_<T> GpuMat_<T>::operator ()(Rect roi) const
+{
+    return GpuMat_(*this, roi);
+}
+
+template <typename T>
+__host__ GpuMat_<T>& GpuMat_<T>::adjustROI(int dtop, int dbottom, int dleft, int dright)
+{
+    return (GpuMat_<T>&)(GpuMat::adjustROI(dtop, dbottom, dleft, dright));
+}
+
+template <typename T>
+__host__ size_t GpuMat_<T>::elemSize() const
+{
+    CV_DbgAssert( GpuMat::elemSize() == sizeof(T) );
+    return sizeof(T);
+}
+
+template <typename T>
+__host__ size_t GpuMat_<T>::elemSize1() const
+{
+    CV_DbgAssert( GpuMat::elemSize1() == sizeof(T) / DataType<T>::channels );
+    return sizeof(T) / DataType<T>::channels;
+}
+
+template <typename T>
+__host__ int GpuMat_<T>::type() const
+{
+    CV_DbgAssert( GpuMat::type() == DataType<T>::type );
+    return DataType<T>::type;
+}
+
+template <typename T>
+__host__ int GpuMat_<T>::depth() const
+{
+    CV_DbgAssert( GpuMat::depth() == DataType<T>::depth );
+    return DataType<T>::depth;
+}
+
+template <typename T>
+__host__ int GpuMat_<T>::channels() const
+{
+    CV_DbgAssert( GpuMat::channels() == DataType<T>::channels );
+    return DataType<T>::channels;
+}
+
+template <typename T>
+__host__ size_t GpuMat_<T>::stepT() const
+{
+    return step / elemSize();
+}
+
+template <typename T>
+__host__ size_t GpuMat_<T>::step1() const
+{
+    return step / elemSize1();
+}
+
+template <typename T>
+__host__ T* GpuMat_<T>::operator [](int y)
+{
+    return (T*)ptr(y);
+}
+
+template <typename T>
+__host__ const T* GpuMat_<T>::operator [](int y) const
+{
+    return (const T*)ptr(y);
+}
+
+template <typename T> template <class Body>
+__host__ GpuMat_<T>::GpuMat_(const Expr<Body>& expr)
+    : GpuMat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<T>::type;
+    *this = expr;
+}
+
+template <typename T> template <class Body>
+__host__ GpuMat_<T>& GpuMat_<T>::operator =(const Expr<Body>& expr)
+{
+    expr.body.assignTo(*this);
+    return *this;
+}
+
+template <typename T> template <class Body>
+__host__ GpuMat_<T>& GpuMat_<T>::assign(const Expr<Body>& expr, Stream& stream)
+{
+    expr.body.assignTo(*this, stream);
+    return *this;
+}
+
+}}
+
+// Input / Output Arrays
+
+namespace cv {
+
+template<typename _Tp>
+__host__ _InputArray::_InputArray(const cudev::GpuMat_<_Tp>& m)
+    : flags(FIXED_TYPE + GPU_MAT + DataType<_Tp>::type), obj((void*)&m)
+{}
+
+template<typename _Tp>
+__host__ _OutputArray::_OutputArray(cudev::GpuMat_<_Tp>& m)
+    : _InputArray(m)
+{}
+
+template<typename _Tp>
+__host__ _OutputArray::_OutputArray(const cudev::GpuMat_<_Tp>& m)
+    : _InputArray(m)
+{
+    flags |= FIXED_SIZE;
+}
+
+}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/extrapolation.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/extrapolation.hpp
new file mode 100644
index 000000000..c90e79174
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/extrapolation.hpp
@@ -0,0 +1,219 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_EXTRAPOLATION_HPP__
+#define __OPENCV_CUDEV_PTR2D_EXTRAPOLATION_HPP__
+
+#include "../common.hpp"
+#include "../util/vec_traits.hpp"
+#include "traits.hpp"
+
+namespace cv { namespace cudev {
+
+// BrdConstant
+
+template <class SrcPtr> struct BrdConstant
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+    SrcPtr src;
+    int rows, cols;
+    typename PtrTraits<SrcPtr>::value_type val;
+
+    __device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+    {
+        return (x >= 0 && x < cols && y >= 0 && y < rows) ? src(y, x) : val;
+    }
+};
+
+template <class SrcPtr>
+__host__ BrdConstant<typename PtrTraits<SrcPtr>::ptr_type> brdConstant(const SrcPtr& src, typename PtrTraits<SrcPtr>::value_type val)
+{
+    BrdConstant<typename PtrTraits<SrcPtr>::ptr_type> b;
+    b.src = shrinkPtr(src);
+    b.rows = getRows(src);
+    b.cols = getCols(src);
+    b.val = val;
+    return b;
+}
+
+template <class SrcPtr>
+__host__ BrdConstant<typename PtrTraits<SrcPtr>::ptr_type> brdConstant(const SrcPtr& src)
+{
+    return brdConstant(src, VecTraits<typename PtrTraits<SrcPtr>::value_type>::all(0));
+}
+
+// BrdBase
+
+template <class BrdImpl, class SrcPtr> struct BrdBase
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef int                                    index_type;
+
+    SrcPtr src;
+    int rows, cols;
+
+    __device__ __forceinline__ int idx_row(int y) const
+    {
+        return BrdImpl::idx_low(BrdImpl::idx_high(y, rows), rows);
+    }
+
+    __device__ __forceinline__ int idx_col(int x) const
+    {
+        return BrdImpl::idx_low(BrdImpl::idx_high(x, cols), cols);
+    }
+
+    __device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(int y, int x) const
+    {
+        return src(idx_row(y), idx_col(x));
+    }
+};
+
+// BrdReplicate
+
+struct BrdReplicate
+{
+    __device__ __forceinline__ static int idx_low(int i, int len)
+    {
+        return ::max(i, 0);
+    }
+
+    __device__ __forceinline__ static int idx_high(int i, int len)
+    {
+        return ::min(i, len - 1);
+    }
+};
+
+template <class SrcPtr>
+__host__ BrdBase<BrdReplicate, typename PtrTraits<SrcPtr>::ptr_type> brdReplicate(const SrcPtr& src)
+{
+    BrdBase<BrdReplicate, typename PtrTraits<SrcPtr>::ptr_type> b;
+    b.src = shrinkPtr(src);
+    b.rows = getRows(src);
+    b.cols = getCols(src);
+    return b;
+}
+
+// BrdReflect101
+
+struct BrdReflect101
+{
+    __device__ __forceinline__ static int idx_low(int i, int len)
+    {
+        return ::abs(i) % len;
+    }
+
+    __device__ __forceinline__ static int idx_high(int i, int len)
+    {
+        const int last_ind = len - 1;
+        return ::abs(last_ind - ::abs(last_ind - i)) % len;
+    }
+};
+
+template <class SrcPtr>
+__host__ BrdBase<BrdReflect101, typename PtrTraits<SrcPtr>::ptr_type> brdReflect101(const SrcPtr& src)
+{
+    BrdBase<BrdReflect101, typename PtrTraits<SrcPtr>::ptr_type> b;
+    b.src = shrinkPtr(src);
+    b.rows = getRows(src);
+    b.cols = getCols(src);
+    return b;
+}
+
+// BrdReflect
+
+struct BrdReflect
+{
+    __device__ __forceinline__ static int idx_low(int i, int len)
+    {
+        return (::abs(i) - (i < 0)) % len;
+    }
+
+    __device__ __forceinline__ static int idx_high(int i, int len)
+    {
+        const int last_ind = len - 1;
+        return (last_ind - ::abs(last_ind - i) + (i > last_ind));
+    }
+};
+
+template <class SrcPtr>
+__host__ BrdBase<BrdReflect, typename PtrTraits<SrcPtr>::ptr_type> brdReflect(const SrcPtr& src)
+{
+    BrdBase<BrdReflect, typename PtrTraits<SrcPtr>::ptr_type> b;
+    b.src = shrinkPtr(src);
+    b.rows = getRows(src);
+    b.cols = getCols(src);
+    return b;
+}
+
+// BrdWrap
+
+struct BrdWrap
+{
+    __device__ __forceinline__ static int idx_low(int i, int len)
+    {
+        return (i >= 0) * i + (i < 0) * (i - ((i - len + 1) / len) * len);
+    }
+
+    __device__ __forceinline__ static int idx_high(int i, int len)
+    {
+        return (i < len) * i + (i >= len) * (i % len);
+    }
+};
+
+template <class SrcPtr>
+__host__ BrdBase<BrdWrap, typename PtrTraits<SrcPtr>::ptr_type> brdWrap(const SrcPtr& src)
+{
+    BrdBase<BrdWrap, typename PtrTraits<SrcPtr>::ptr_type> b;
+    b.src = shrinkPtr(src);
+    b.rows = getRows(src);
+    b.cols = getCols(src);
+    return b;
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp
new file mode 100644
index 000000000..7304a8c7f
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp
@@ -0,0 +1,111 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_GLOB_HPP__
+#define __OPENCV_CUDEV_PTR2D_GLOB_HPP__
+
+#include "../common.hpp"
+#include "traits.hpp"
+
+namespace cv { namespace cudev {
+
+template <typename T> struct GlobPtr
+{
+    typedef T   value_type;
+    typedef int index_type;
+
+    T* data;
+    size_t step;
+
+    __device__ __forceinline__       T* row(int y)       { return (      T*)( (      uchar*)data + y * step); }
+    __device__ __forceinline__ const T* row(int y) const { return (const T*)( (const uchar*)data + y * step); }
+
+    __device__ __forceinline__       T& operator ()(int y, int x)       { return row(y)[x]; }
+    __device__ __forceinline__ const T& operator ()(int y, int x) const { return row(y)[x]; }
+};
+
+template <typename T> struct GlobPtrSz : GlobPtr<T>
+{
+    int rows, cols;
+};
+
+template <typename T>
+__host__ GlobPtr<T> globPtr(T* data, size_t step)
+{
+    GlobPtr<T> p;
+    p.data = data;
+    p.step = step;
+    return p;
+}
+
+template <typename T>
+__host__ GlobPtrSz<T> globPtr(T* data, size_t step, int rows, int cols)
+{
+    GlobPtrSz<T> p;
+    p.data = data;
+    p.step = step;
+    p.rows = rows;
+    p.cols = cols;
+    return p;
+}
+
+template <typename T>
+__host__ GlobPtrSz<T> globPtr(const GpuMat& mat)
+{
+    GlobPtrSz<T> p;
+    p.data = (T*) mat.data;
+    p.step = mat.step;
+    p.rows = mat.rows;
+    p.cols = mat.cols;
+    return p;
+}
+
+template <typename T> struct PtrTraits< GlobPtrSz<T> > : PtrTraitsBase<GlobPtrSz<T>, GlobPtr<T> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/gpumat.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/gpumat.hpp
new file mode 100644
index 000000000..50bb0874f
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/gpumat.hpp
@@ -0,0 +1,161 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_GPUMAT_HPP__
+#define __OPENCV_CUDEV_PTR2D_GPUMAT_HPP__
+
+#include "../common.hpp"
+#include "../util/vec_traits.hpp"
+#include "../expr/expr.hpp"
+#include "glob.hpp"
+
+namespace cv { namespace cudev {
+
+template <typename T>
+class GpuMat_ : public GpuMat
+{
+public:
+    typedef T value_type;
+
+    //! default constructor
+    __host__ GpuMat_();
+
+    //! constructs GpuMat of the specified size
+    __host__ GpuMat_(int arows, int acols);
+    __host__ explicit GpuMat_(Size asize);
+
+    //! constucts GpuMat and fills it with the specified value
+    __host__ GpuMat_(int arows, int acols, Scalar val);
+    __host__ GpuMat_(Size asize, Scalar val);
+
+    //! copy constructor
+    __host__ GpuMat_(const GpuMat_& m);
+
+    //! copy/conversion contructor. If m is of different type, it's converted
+    __host__ explicit GpuMat_(const GpuMat& m);
+
+    //! constructs a matrix on top of user-allocated data. step is in bytes(!!!), regardless of the type
+    __host__ GpuMat_(int arows, int acols, T* adata, size_t astep = Mat::AUTO_STEP);
+    __host__ GpuMat_(Size asize, T* adata, size_t astep = Mat::AUTO_STEP);
+
+    //! selects a submatrix
+    __host__ GpuMat_(const GpuMat_& m, Range arowRange, Range acolRange);
+    __host__ GpuMat_(const GpuMat_& m, Rect roi);
+
+    //! builds GpuMat from host memory (Blocking call)
+    __host__ explicit GpuMat_(InputArray arr);
+
+    //! assignment operators
+    __host__ GpuMat_& operator =(const GpuMat_& m);
+
+    //! allocates new GpuMat data unless the GpuMat already has specified size and type
+    __host__ void create(int arows, int acols);
+    __host__ void create(Size asize);
+
+    //! swaps with other smart pointer
+    __host__ void swap(GpuMat_& mat);
+
+    //! pefroms upload data to GpuMat (Blocking call)
+    __host__ void upload(InputArray arr);
+
+    //! pefroms upload data to GpuMat (Non-Blocking call)
+    __host__ void upload(InputArray arr, Stream& stream);
+
+    //! convert to GlobPtr
+    __host__ operator GlobPtrSz<T>() const;
+    __host__ operator GlobPtr<T>() const;
+
+    //! overridden forms of GpuMat::row() etc.
+    __host__ GpuMat_ clone() const;
+    __host__ GpuMat_ row(int y) const;
+    __host__ GpuMat_ col(int x) const;
+    __host__ GpuMat_ rowRange(int startrow, int endrow) const;
+    __host__ GpuMat_ rowRange(Range r) const;
+    __host__ GpuMat_ colRange(int startcol, int endcol) const;
+    __host__ GpuMat_ colRange(Range r) const;
+    __host__ GpuMat_ operator ()(Range rowRange, Range colRange) const;
+    __host__ GpuMat_ operator ()(Rect roi) const;
+    __host__ GpuMat_& adjustROI(int dtop, int dbottom, int dleft, int dright);
+
+    //! overridden forms of GpuMat::elemSize() etc.
+    __host__ size_t elemSize() const;
+    __host__ size_t elemSize1() const;
+    __host__ int type() const;
+    __host__ int depth() const;
+    __host__ int channels() const;
+    __host__ size_t step1() const;
+
+    //! returns step()/sizeof(T)
+    __host__ size_t stepT() const;
+
+    //! more convenient forms of row and element access operators
+    __host__ T* operator [](int y);
+    __host__ const T* operator [](int y) const;
+
+    //! expression templates
+    template <class Body> __host__ GpuMat_(const Expr<Body>& expr);
+    template <class Body> __host__ GpuMat_& operator =(const Expr<Body>& expr);
+    template <class Body> __host__ GpuMat_& assign(const Expr<Body>& expr, Stream& stream);
+};
+
+//! creates alternative GpuMat header for the same data, with different
+//! number of channels and/or different number of rows. see cvReshape.
+template <int cn, typename T>
+__host__ GpuMat_<typename MakeVec<typename VecTraits<T>::elem_type, cn>::type>
+reshape_(const GpuMat_<T>& mat, int rows = 0)
+{
+    GpuMat_<typename MakeVec<typename VecTraits<T>::elem_type, cn>::type> dst(mat.reshape(cn, rows));
+    return dst;
+}
+
+template <typename T> struct PtrTraits< GpuMat_<T> > : PtrTraitsBase<GpuMat_<T>, GlobPtr<T> >
+{
+};
+
+}}
+
+#include "detail/gpumat.hpp"
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/interpolation.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/interpolation.hpp
new file mode 100644
index 000000000..e86d7191e
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/interpolation.hpp
@@ -0,0 +1,385 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_INTERPOLATION_HPP__
+#define __OPENCV_CUDEV_PTR2D_INTERPOLATION_HPP__
+
+#include "../common.hpp"
+#include "../util/vec_traits.hpp"
+#include "../util/saturate_cast.hpp"
+#include "../util/type_traits.hpp"
+#include "../util/limits.hpp"
+#include "traits.hpp"
+
+namespace cv { namespace cudev {
+
+// Nearest
+
+template <class SrcPtr> struct NearestInterPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef float                                  index_type;
+
+    SrcPtr src;
+
+    __device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
+    {
+        return src(__float2int_rn(y), __float2int_rn(x));
+    }
+};
+
+template <class SrcPtr> struct NearestInterPtrSz : NearestInterPtr<SrcPtr>
+{
+    int rows, cols;
+};
+
+template <class SrcPtr>
+__host__ NearestInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interNearest(const SrcPtr& src)
+{
+    NearestInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
+    i.src = shrinkPtr(src);
+    i.rows = getRows(src);
+    i.cols = getCols(src);
+    return i;
+}
+
+template <class SrcPtr> struct PtrTraits< NearestInterPtrSz<SrcPtr> > : PtrTraitsBase<NearestInterPtrSz<SrcPtr>, NearestInterPtr<SrcPtr> >
+{
+};
+
+// Linear
+
+template <typename SrcPtr> struct LinearInterPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef float                                  index_type;
+
+    SrcPtr src;
+
+    __device__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        typedef typename VecTraits<src_type>::elem_type src_elem_type;
+        typedef typename LargerType<float, src_elem_type>::type work_elem_type;
+        typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
+
+        work_type out = VecTraits<work_type>::all(0);
+
+        const int x1 = __float2int_rd(x);
+        const int y1 = __float2int_rd(y);
+        const int x2 = x1 + 1;
+        const int y2 = y1 + 1;
+
+        typename PtrTraits<SrcPtr>::value_type src_reg = src(y1, x1);
+        out = out + src_reg * static_cast<work_elem_type>((x2 - x) * (y2 - y));
+
+        src_reg = src(y1, x2);
+        out = out + src_reg * static_cast<work_elem_type>((x - x1) * (y2 - y));
+
+        src_reg = src(y2, x1);
+        out = out + src_reg * static_cast<work_elem_type>((x2 - x) * (y - y1));
+
+        src_reg = src(y2, x2);
+        out = out + src_reg * static_cast<work_elem_type>((x - x1) * (y - y1));
+
+        return saturate_cast<typename PtrTraits<SrcPtr>::value_type>(out);
+    }
+};
+
+template <class SrcPtr> struct LinearInterPtrSz : LinearInterPtr<SrcPtr>
+{
+    int rows, cols;
+};
+
+template <class SrcPtr>
+__host__ LinearInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interLinear(const SrcPtr& src)
+{
+    LinearInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
+    i.src = shrinkPtr(src);
+    i.rows = getRows(src);
+    i.cols = getCols(src);
+    return i;
+}
+
+template <class SrcPtr> struct PtrTraits< LinearInterPtrSz<SrcPtr> > : PtrTraitsBase<LinearInterPtrSz<SrcPtr>, LinearInterPtr<SrcPtr> >
+{
+};
+
+// Cubic
+
+template <typename SrcPtr> struct CubicInterPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef float                                  index_type;
+
+    SrcPtr src;
+
+    __device__ static float bicubicCoeff(float x_)
+    {
+        float x = ::fabsf(x_);
+        if (x <= 1.0f)
+        {
+            return x * x * (1.5f * x - 2.5f) + 1.0f;
+        }
+        else if (x < 2.0f)
+        {
+            return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
+        }
+        else
+        {
+            return 0.0f;
+        }
+    }
+
+    __device__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        typedef typename VecTraits<src_type>::elem_type src_elem_type;
+        typedef typename LargerType<float, src_elem_type>::type work_elem_type;
+        typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
+
+        const float xmin = ::ceilf(x - 2.0f);
+        const float xmax = ::floorf(x + 2.0f);
+
+        const float ymin = ::ceilf(y - 2.0f);
+        const float ymax = ::floorf(y + 2.0f);
+
+        work_type sum = VecTraits<work_type>::all(0);
+        float wsum = 0.0f;
+
+        for (float cy = ymin; cy <= ymax; cy += 1.0f)
+        {
+            for (float cx = xmin; cx <= xmax; cx += 1.0f)
+            {
+                typename PtrTraits<SrcPtr>::value_type src_reg = src(__float2int_rd(cy), __float2int_rd(cx));
+                const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
+
+                sum = sum + static_cast<work_elem_type>(w) * src_reg;
+                wsum += w;
+            }
+        }
+
+        work_type res = (wsum > numeric_limits<float>::epsilon()) ? VecTraits<work_type>::all(0) : sum / static_cast<work_elem_type>(wsum);
+
+        return saturate_cast<typename PtrTraits<SrcPtr>::value_type>(res);
+    }
+};
+
+template <class SrcPtr> struct CubicInterPtrSz : CubicInterPtr<SrcPtr>
+{
+    int rows, cols;
+};
+
+template <class SrcPtr>
+__host__ CubicInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interCubic(const SrcPtr& src)
+{
+    CubicInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
+    i.src = shrinkPtr(src);
+    i.rows = getRows(src);
+    i.cols = getCols(src);
+    return i;
+}
+
+template <class SrcPtr> struct PtrTraits< CubicInterPtrSz<SrcPtr> > : PtrTraitsBase<CubicInterPtrSz<SrcPtr>, CubicInterPtr<SrcPtr> >
+{
+};
+
+// IntegerArea
+
+template <typename SrcPtr> struct IntegerAreaInterPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef float                                  index_type;
+
+    SrcPtr src;
+    int area_width, area_height;
+
+    __device__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        typedef typename VecTraits<src_type>::elem_type src_elem_type;
+        typedef typename LargerType<float, src_elem_type>::type work_elem_type;
+        typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
+
+        const int sx1 = __float2int_rd(x);
+        const int sx2 = sx1 + area_width;
+
+        const int sy1 = __float2int_rd(y);
+        const int sy2 = sy1 + area_height;
+
+        work_type out = VecTraits<work_type>::all(0);
+
+        for (int dy = sy1; dy < sy2; ++dy)
+        {
+            for (int dx = sx1; dx < sx2; ++dx)
+            {
+                out = out + saturate_cast<work_type>(src(dy, dx));
+            }
+        }
+
+        const work_elem_type scale = 1.0f / (area_width * area_height);
+
+        return saturate_cast<typename PtrTraits<SrcPtr>::value_type>(out * scale);
+    }
+};
+
+template <class SrcPtr> struct IntegerAreaInterPtrSz : IntegerAreaInterPtr<SrcPtr>
+{
+    int rows, cols;
+};
+
+template <class SrcPtr>
+__host__ IntegerAreaInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interArea(const SrcPtr& src, Size areaSize)
+{
+    IntegerAreaInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
+    i.src = shrinkPtr(src);
+    i.area_width = areaSize.width;
+    i.area_height = areaSize.height;
+    i.rows = getRows(src);
+    i.cols = getCols(src);
+    return i;
+}
+
+template <class SrcPtr> struct PtrTraits< IntegerAreaInterPtrSz<SrcPtr> > : PtrTraitsBase<IntegerAreaInterPtrSz<SrcPtr>, IntegerAreaInterPtr<SrcPtr> >
+{
+};
+
+// CommonArea
+
+template <typename SrcPtr> struct CommonAreaInterPtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef float                                  index_type;
+
+    SrcPtr src;
+    float area_width, area_height;
+
+    __device__ typename PtrTraits<SrcPtr>::value_type operator ()(float y, float x) const
+    {
+        typedef typename PtrTraits<SrcPtr>::value_type src_type;
+        typedef typename VecTraits<src_type>::elem_type src_elem_type;
+        typedef typename LargerType<float, src_elem_type>::type work_elem_type;
+        typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
+
+        const float fsx1 = x;
+        const float fsx2 = fsx1 + area_width;
+
+        const int sx1 = __float2int_rd(fsx1);
+        const int sx2 = __float2int_ru(fsx2);
+
+        const float fsy1 = y;
+        const float fsy2 = fsy1 + area_height;
+
+        const int sy1 = __float2int_rd(fsy1);
+        const int sy2 = __float2int_ru(fsy2);
+
+        work_type out = VecTraits<work_type>::all(0);
+
+        for (int dy = sy1; dy < sy2; ++dy)
+        {
+            for (int dx = sx1; dx < sx2; ++dx)
+                out = out + saturate_cast<work_type>(src(dy, dx));
+
+            if (sx1 > fsx1)
+                out = out + saturate_cast<work_type>(src(dy, sx1 - 1)) * static_cast<work_elem_type>(sx1 - fsx1);
+
+            if (sx2 < fsx2)
+                out = out + saturate_cast<work_type>(src(dy, sx2)) * static_cast<work_elem_type>(fsx2 - sx2);
+        }
+
+        if (sy1 > fsy1)
+        {
+            for (int dx = sx1; dx < sx2; ++dx)
+                out = out + saturate_cast<work_type>(src(sy1 - 1, dx)) * static_cast<work_elem_type>(sy1 - fsy1);
+        }
+
+        if (sy2 < fsy2)
+        {
+            for (int dx = sx1; dx < sx2; ++dx)
+                out = out + saturate_cast<work_type>(src(sy2, dx)) * static_cast<work_elem_type>(fsy2 - sy2);
+        }
+
+        if ((sy1 > fsy1) && (sx1 > fsx1))
+            out = out + saturate_cast<work_type>(src(sy1 - 1, sx1 - 1)) * static_cast<work_elem_type>((sy1 - fsy1) * (sx1 - fsx1));
+
+        if ((sy1 > fsy1) && (sx2 < fsx2))
+            out = out + saturate_cast<work_type>(src(sy1 - 1, sx2)) * static_cast<work_elem_type>((sy1 - fsy1) * (fsx2 - sx2));
+
+        if ((sy2 < fsy2) && (sx2 < fsx2))
+            out = out + saturate_cast<work_type>(src(sy2, sx2)) * static_cast<work_elem_type>((fsy2 - sy2) * (fsx2 - sx2));
+
+        if ((sy2 < fsy2) && (sx1 > fsx1))
+            out = out + saturate_cast<work_type>(src(sy2, sx1 - 1)) * static_cast<work_elem_type>((fsy2 - sy2) * (sx1 - fsx1));
+
+        const work_elem_type scale = 1.0f / (area_width * area_height);
+
+        return saturate_cast<typename PtrTraits<SrcPtr>::value_type>(out * scale);
+    }
+};
+
+template <class SrcPtr> struct CommonAreaInterPtrSz : CommonAreaInterPtr<SrcPtr>
+{
+    int rows, cols;
+};
+
+template <class SrcPtr>
+__host__ CommonAreaInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> interArea(const SrcPtr& src, Size2f areaSize)
+{
+    CommonAreaInterPtrSz<typename PtrTraits<SrcPtr>::ptr_type> i;
+    i.src = shrinkPtr(src);
+    i.area_width = areaSize.width;
+    i.area_height = areaSize.height;
+    i.rows = getRows(src);
+    i.cols = getCols(src);
+    return i;
+}
+
+template <class SrcPtr> struct PtrTraits< CommonAreaInterPtrSz<SrcPtr> > : PtrTraitsBase<CommonAreaInterPtrSz<SrcPtr>, CommonAreaInterPtr<SrcPtr> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp
new file mode 100644
index 000000000..e47719c6e
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp
@@ -0,0 +1,100 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_LUT_HPP__
+#define __OPENCV_CUDEV_PTR2D_LUT_HPP__
+
+#include "../common.hpp"
+#include "../grid/copy.hpp"
+#include "traits.hpp"
+#include "gpumat.hpp"
+
+namespace cv { namespace cudev {
+
+template <class SrcPtr, class TablePtr> struct LutPtr
+{
+    typedef typename PtrTraits<TablePtr>::value_type value_type;
+    typedef typename PtrTraits<SrcPtr>::index_type   index_type;
+
+    SrcPtr src;
+    TablePtr tbl;
+
+    __device__ __forceinline__ typename PtrTraits<TablePtr>::value_type operator ()(typename PtrTraits<SrcPtr>::index_type y, typename PtrTraits<SrcPtr>::index_type x) const
+    {
+        return tbl(0, src(y, x));
+    }
+};
+
+template <class SrcPtr, class TablePtr> struct LutPtrSz : LutPtr<SrcPtr, TablePtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr, class TablePtr>
+__host__ LutPtrSz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<TablePtr>::ptr_type> lutPtr(const SrcPtr& src, const TablePtr& tbl)
+{
+    CV_Assert( getRows(tbl) == 1 );
+
+    LutPtrSz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<TablePtr>::ptr_type> ptr;
+    ptr.src = shrinkPtr(src);
+    ptr.tbl = shrinkPtr(tbl);
+    ptr.rows = getRows(src);
+    ptr.cols = getCols(src);
+    return ptr;
+}
+
+template <class SrcPtr, class TablePtr> struct PtrTraits< LutPtrSz<SrcPtr, TablePtr> > : PtrTraitsBase<LutPtrSz<SrcPtr, TablePtr>, LutPtr<SrcPtr, TablePtr> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/mask.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/mask.hpp
new file mode 100644
index 000000000..1ff337193
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/mask.hpp
@@ -0,0 +1,67 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_MASK_HPP__
+#define __OPENCV_CUDEV_PTR2D_MASK_HPP__
+
+#include "../common.hpp"
+#include "traits.hpp"
+
+namespace cv { namespace cudev {
+
+struct WithOutMask
+{
+    typedef bool value_type;
+    typedef int  index_type;
+
+    __device__ __forceinline__ bool operator ()(int, int) const
+    {
+        return true;
+    }
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/remap.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/remap.hpp
new file mode 100644
index 000000000..db2669a40
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/remap.hpp
@@ -0,0 +1,154 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_REMAP_HPP__
+#define __OPENCV_CUDEV_PTR2D_REMAP_HPP__
+
+#include "opencv2/core/base.hpp"
+#include "../common.hpp"
+#include "../grid/copy.hpp"
+#include "traits.hpp"
+#include "gpumat.hpp"
+
+namespace cv { namespace cudev {
+
+template <class SrcPtr, class MapPtr> struct RemapPtr1
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef typename PtrTraits<MapPtr>::index_type index_type;
+
+    SrcPtr src;
+    MapPtr map;
+
+    __device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(typename PtrTraits<MapPtr>::index_type y, typename PtrTraits<MapPtr>::index_type x) const
+    {
+        const typename PtrTraits<MapPtr>::value_type coord = map(y, x);
+        return src(coord.y, coord.x);
+    }
+};
+
+template <class SrcPtr, class MapXPtr, class MapYPtr> struct RemapPtr2
+{
+    typedef typename PtrTraits<SrcPtr>::value_type  value_type;
+    typedef typename PtrTraits<MapXPtr>::index_type index_type;
+
+    SrcPtr src;
+    MapXPtr mapx;
+    MapYPtr mapy;
+
+    __device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(typename PtrTraits<MapXPtr>::index_type y, typename PtrTraits<MapXPtr>::index_type x) const
+    {
+        const typename PtrTraits<MapXPtr>::value_type nx = mapx(y, x);
+        const typename PtrTraits<MapYPtr>::value_type ny = mapy(y, x);
+        return src(ny, nx);
+    }
+};
+
+template <class SrcPtr, class MapPtr> struct RemapPtr1Sz : RemapPtr1<SrcPtr, MapPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr, class MapXPtr, class MapYPtr> struct RemapPtr2Sz : RemapPtr2<SrcPtr, MapXPtr, MapYPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr, class MapPtr>
+__host__ RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapPtr>::ptr_type>
+remapPtr(const SrcPtr& src, const MapPtr& map)
+{
+    const int rows = getRows(map);
+    const int cols = getCols(map);
+
+    RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapPtr>::ptr_type> r;
+    r.src = shrinkPtr(src);
+    r.map = shrinkPtr(map);
+    r.rows = rows;
+    r.cols = cols;
+    return r;
+}
+
+template <class SrcPtr, class MapXPtr, class MapYPtr>
+__host__ RemapPtr2Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapXPtr>::ptr_type, typename PtrTraits<MapYPtr>::ptr_type>
+remapPtr(const SrcPtr& src, const MapXPtr& mapx, const MapYPtr& mapy)
+{
+    const int rows = getRows(mapx);
+    const int cols = getCols(mapx);
+
+    CV_Assert( getRows(mapy) == rows && getCols(mapy) == cols );
+
+    RemapPtr2Sz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<MapXPtr>::ptr_type, typename PtrTraits<MapYPtr>::ptr_type> r;
+    r.src = shrinkPtr(src);
+    r.mapx = shrinkPtr(mapx);
+    r.mapy = shrinkPtr(mapy);
+    r.rows = rows;
+    r.cols = cols;
+    return r;
+}
+
+template <class SrcPtr, class MapPtr> struct PtrTraits< RemapPtr1Sz<SrcPtr, MapPtr> > : PtrTraitsBase<RemapPtr1Sz<SrcPtr, MapPtr>, RemapPtr1<SrcPtr, MapPtr> >
+{
+};
+
+template <class SrcPtr, class MapXPtr, class MapYPtr> struct PtrTraits< RemapPtr2Sz<SrcPtr, MapXPtr, MapYPtr> > : PtrTraitsBase<RemapPtr2Sz<SrcPtr, MapXPtr, MapYPtr>, RemapPtr2<SrcPtr, MapXPtr, MapYPtr> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/resize.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/resize.hpp
new file mode 100644
index 000000000..10a4bad90
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/resize.hpp
@@ -0,0 +1,103 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_RESIZE_HPP__
+#define __OPENCV_CUDEV_PTR2D_RESIZE_HPP__
+
+#include "opencv2/core/base.hpp"
+#include "../common.hpp"
+#include "../grid/copy.hpp"
+#include "traits.hpp"
+#include "gpumat.hpp"
+
+namespace cv { namespace cudev {
+
+template <class SrcPtr> struct ResizePtr
+{
+    typedef typename PtrTraits<SrcPtr>::value_type value_type;
+    typedef typename PtrTraits<SrcPtr>::index_type index_type;
+
+    SrcPtr src;
+    float fx, fy;
+
+    __device__ __forceinline__ typename PtrTraits<SrcPtr>::value_type operator ()(typename PtrTraits<SrcPtr>::index_type y, typename PtrTraits<SrcPtr>::index_type x) const
+    {
+        const float yn = static_cast<float>(y * fy);
+        const float xn = static_cast<float>(x * fx);
+
+        return src(yn, xn);
+    }
+};
+
+template <class SrcPtr> struct ResizePtrSz : ResizePtr<SrcPtr>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr>
+__host__ ResizePtrSz<typename PtrTraits<SrcPtr>::ptr_type> resizePtr(const SrcPtr& src, float fx, float fy)
+{
+    ResizePtrSz<typename PtrTraits<SrcPtr>::ptr_type> r;
+    r.src = shrinkPtr(src);
+    r.fx = 1.0f / fx;
+    r.fy = 1.0f / fy;
+    r.rows = cv::saturate_cast<int>(getRows(src) * fy);
+    r.cols = cv::saturate_cast<int>(getCols(src) * fx);
+    return r;
+}
+
+template <class SrcPtr> struct PtrTraits< ResizePtrSz<SrcPtr> > : PtrTraitsBase<ResizePtrSz<SrcPtr>, ResizePtr<SrcPtr> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
new file mode 100644
index 000000000..095864fcb
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
@@ -0,0 +1,176 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_TEXTURE_HPP__
+#define __OPENCV_CUDEV_PTR2D_TEXTURE_HPP__
+
+#include <cstring>
+#include "../common.hpp"
+#include "glob.hpp"
+#include "gpumat.hpp"
+#include "traits.hpp"
+
+namespace
+{
+    template <typename T> struct CvCudevTextureRef
+    {
+        typedef texture<T, cudaTextureType2D, cudaReadModeElementType> TexRef;
+
+        static TexRef ref;
+
+        __host__ static void bind(const cv::cudev::GlobPtrSz<T>& mat,
+                                  bool normalizedCoords = false,
+                                  cudaTextureFilterMode filterMode = cudaFilterModePoint,
+                                  cudaTextureAddressMode addressMode = cudaAddressModeClamp)
+        {
+            ref.normalized = normalizedCoords;
+            ref.filterMode = filterMode;
+            ref.addressMode[0] = addressMode;
+            ref.addressMode[1] = addressMode;
+            ref.addressMode[2] = addressMode;
+
+            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
+
+            CV_CUDEV_SAFE_CALL( cudaBindTexture2D(0, &ref, mat.data, &desc, mat.cols, mat.rows, mat.step) );
+        }
+
+        __host__ static void unbind()
+        {
+            CV_CUDEV_SAFE_CALL( cudaUnbindTexture(ref) );
+        }
+    };
+
+    template <typename T>
+    typename CvCudevTextureRef<T>::TexRef CvCudevTextureRef<T>::ref;
+}
+
+namespace cv { namespace cudev {
+
+template <typename T> struct TexturePtr
+{
+    typedef T     value_type;
+    typedef float index_type;
+
+    cudaTextureObject_t texObj;
+
+    __device__ __forceinline__ T operator ()(float y, float x) const
+    {
+    #if CV_CUDEV_ARCH < 300
+        // Use the texture reference
+        return tex2D(CvCudevTextureRef<T>::ref, x, y);
+    #else
+        // Use the texture object
+        return tex2D<T>(texObj, x, y);
+    #endif
+    }
+};
+
+template <typename T> struct Texture : TexturePtr<T>
+{
+    int rows, cols;
+    bool cc30;
+
+    __host__ explicit Texture(const GlobPtrSz<T>& mat,
+                              bool normalizedCoords = false,
+                              cudaTextureFilterMode filterMode = cudaFilterModePoint,
+                              cudaTextureAddressMode addressMode = cudaAddressModeClamp)
+    {
+        cc30 = deviceSupports(FEATURE_SET_COMPUTE_30);
+
+        rows = mat.rows;
+        cols = mat.cols;
+
+        if (cc30)
+        {
+            // Use the texture object
+            cudaResourceDesc texRes;
+            std::memset(&texRes, 0, sizeof(texRes));
+            texRes.resType = cudaResourceTypePitch2D;
+            texRes.res.pitch2D.devPtr = mat.data;
+            texRes.res.pitch2D.height = mat.rows;
+            texRes.res.pitch2D.width = mat.cols;
+            texRes.res.pitch2D.pitchInBytes = mat.step;
+            texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
+
+            cudaTextureDesc texDescr;
+            std::memset(&texDescr, 0, sizeof(texDescr));
+            texDescr.normalizedCoords = normalizedCoords;
+            texDescr.filterMode = filterMode;
+            texDescr.addressMode[0] = addressMode;
+            texDescr.addressMode[1] = addressMode;
+            texDescr.addressMode[2] = addressMode;
+            texDescr.readMode = cudaReadModeElementType;
+
+            CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
+        }
+        else
+        {
+            // Use the texture reference
+            CvCudevTextureRef<T>::bind(mat, normalizedCoords, filterMode, addressMode);
+        }
+    }
+
+    __host__ ~Texture()
+    {
+        if (cc30)
+        {
+            // Use the texture object
+            cudaDestroyTextureObject(this->texObj);
+        }
+        else
+        {
+            // Use the texture reference
+            CvCudevTextureRef<T>::unbind();
+        }
+    }
+};
+
+template <typename T> struct PtrTraits< Texture<T> > : PtrTraitsBase<Texture<T>, TexturePtr<T> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/traits.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/traits.hpp
new file mode 100644
index 000000000..7fb4b32b1
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/traits.hpp
@@ -0,0 +1,101 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_TRAITS_HPP__
+#define __OPENCV_CUDEV_PTR2D_TRAITS_HPP__
+
+#include "../common.hpp"
+
+namespace cv { namespace cudev {
+
+template <class Ptr2DSz, class Ptr2D> struct PtrTraitsBase
+{
+    typedef Ptr2DSz ptr_sz_type;
+    typedef Ptr2D   ptr_type;
+
+    typedef typename Ptr2D::value_type value_type;
+    typedef typename Ptr2D::index_type index_type;
+
+    __host__ static Ptr2D shrinkPtr(const Ptr2DSz& ptr)
+    {
+        return ptr;
+    }
+
+    __host__ static int getRows(const Ptr2DSz& ptr)
+    {
+        return ptr.rows;
+    }
+
+    __host__ static int getCols(const Ptr2DSz& ptr)
+    {
+        return ptr.cols;
+    }
+};
+
+template <class Ptr2DSz> struct PtrTraits : PtrTraitsBase<Ptr2DSz, Ptr2DSz>
+{
+};
+
+template <class Ptr2DSz>
+__host__ typename PtrTraits<Ptr2DSz>::ptr_type shrinkPtr(const Ptr2DSz& ptr)
+{
+    return PtrTraits<Ptr2DSz>::shrinkPtr(ptr);
+}
+
+template <class Ptr2DSz>
+__host__ int getRows(const Ptr2DSz& ptr)
+{
+    return PtrTraits<Ptr2DSz>::getRows(ptr);
+}
+
+template <class Ptr2DSz>
+__host__ int getCols(const Ptr2DSz& ptr)
+{
+    return PtrTraits<Ptr2DSz>::getCols(ptr);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/transform.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/transform.hpp
new file mode 100644
index 000000000..f540e7521
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/transform.hpp
@@ -0,0 +1,151 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_TRANSFORM_HPP__
+#define __OPENCV_CUDEV_PTR2D_TRANSFORM_HPP__
+
+#include "../common.hpp"
+#include "../grid/copy.hpp"
+#include "traits.hpp"
+#include "gpumat.hpp"
+
+namespace cv { namespace cudev {
+
+// UnaryTransformPtr
+
+template <class SrcPtr, class Op> struct UnaryTransformPtr
+{
+    typedef typename Op::result_type               value_type;
+    typedef typename PtrTraits<SrcPtr>::index_type index_type;
+
+    SrcPtr src;
+    Op op;
+
+    __device__ __forceinline__ typename Op::result_type operator ()(typename PtrTraits<SrcPtr>::index_type y, typename PtrTraits<SrcPtr>::index_type x) const
+    {
+        return op(src(y, x));
+    }
+};
+
+template <class SrcPtr, class Op> struct UnaryTransformPtrSz : UnaryTransformPtr<SrcPtr, Op>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class SrcPtr, class Op>
+__host__ UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, Op>
+transformPtr(const SrcPtr& src, const Op& op)
+{
+    UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, Op> ptr;
+    ptr.src = shrinkPtr(src);
+    ptr.op = op;
+    ptr.rows = getRows(src);
+    ptr.cols = getCols(src);
+    return ptr;
+}
+
+template <class SrcPtr, class Op> struct PtrTraits< UnaryTransformPtrSz<SrcPtr, Op> > : PtrTraitsBase<UnaryTransformPtrSz<SrcPtr, Op>, UnaryTransformPtr<SrcPtr, Op> >
+{
+};
+
+// BinaryTransformPtr
+
+template <class Src1Ptr, class Src2Ptr, class Op> struct BinaryTransformPtr
+{
+    typedef typename Op::result_type                value_type;
+    typedef typename PtrTraits<Src1Ptr>::index_type index_type;
+
+    Src1Ptr src1;
+    Src2Ptr src2;
+    Op op;
+
+    __device__ __forceinline__ typename Op::result_type operator ()(typename PtrTraits<Src1Ptr>::index_type y, typename PtrTraits<Src1Ptr>::index_type x) const
+    {
+        return op(src1(y, x), src2(y, x));
+    }
+};
+
+template <class Src1Ptr, class Src2Ptr, class Op> struct BinaryTransformPtrSz : BinaryTransformPtr<Src1Ptr, Src2Ptr, Op>
+{
+    int rows, cols;
+
+    template <typename T>
+    __host__ void assignTo(GpuMat_<T>& dst, Stream& stream = Stream::Null()) const
+    {
+        gridCopy(*this, dst, stream);
+    }
+};
+
+template <class Src1Ptr, class Src2Ptr, class Op>
+__host__ BinaryTransformPtrSz<typename PtrTraits<Src1Ptr>::ptr_type, typename PtrTraits<Src2Ptr>::ptr_type, Op>
+transformPtr(const Src1Ptr& src1, const Src2Ptr& src2, const Op& op)
+{
+    const int rows = getRows(src1);
+    const int cols = getCols(src1);
+
+    CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
+
+    BinaryTransformPtrSz<typename PtrTraits<Src1Ptr>::ptr_type, typename PtrTraits<Src2Ptr>::ptr_type, Op> ptr;
+    ptr.src1 = shrinkPtr(src1);
+    ptr.src2 = shrinkPtr(src2);
+    ptr.op = op;
+    ptr.rows = rows;
+    ptr.cols = cols;
+    return ptr;
+}
+
+template <class Src1Ptr, class Src2Ptr, class Op> struct PtrTraits< BinaryTransformPtrSz<Src1Ptr, Src2Ptr, Op> > : PtrTraitsBase<BinaryTransformPtrSz<Src1Ptr, Src2Ptr, Op>, BinaryTransformPtr<Src1Ptr, Src2Ptr, Op> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/warping.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/warping.hpp
new file mode 100644
index 000000000..80e5fbeef
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/warping.hpp
@@ -0,0 +1,152 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_WARPING_HPP__
+#define __OPENCV_CUDEV_PTR2D_WARPING_HPP__
+
+#include "../common.hpp"
+#include "traits.hpp"
+#include "remap.hpp"
+#include "gpumat.hpp"
+
+namespace cv { namespace cudev {
+
+// affine
+
+struct AffineMapPtr
+{
+    typedef float2 value_type;
+    typedef float  index_type;
+
+    const float* warpMat;
+
+    __device__ __forceinline__ float2 operator ()(float y, float x) const
+    {
+        const float xcoo = warpMat[0] * x + warpMat[1] * y + warpMat[2];
+        const float ycoo = warpMat[3] * x + warpMat[4] * y + warpMat[5];
+
+        return make_float2(xcoo, ycoo);
+    }
+};
+
+struct AffineMapPtrSz : AffineMapPtr
+{
+    int rows, cols;
+};
+
+template <> struct PtrTraits<AffineMapPtrSz> : PtrTraitsBase<AffineMapPtrSz, AffineMapPtr>
+{
+};
+
+__host__ static AffineMapPtrSz affineMap(Size dstSize, const GpuMat_<float>& warpMat)
+{
+    CV_Assert( warpMat.rows == 2 && warpMat.cols == 3 );
+    CV_Assert( warpMat.isContinuous() );
+
+    AffineMapPtrSz map;
+    map.warpMat = warpMat[0];
+    map.rows = dstSize.height;
+    map.cols = dstSize.width;
+    return map;
+}
+
+template <class SrcPtr>
+__host__ RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, AffineMapPtr>
+warpAffinePtr(const SrcPtr& src, Size dstSize, const GpuMat_<float>& warpMat)
+{
+    return remapPtr(src, affineMap(dstSize, warpMat));
+}
+
+// perspective
+
+struct PerspectiveMapPtr
+{
+    typedef float2 value_type;
+    typedef float  index_type;
+
+    const float* warpMat;
+
+    __device__ __forceinline__ float2 operator ()(float y, float x) const
+    {
+        const float coeff = 1.0f / (warpMat[6] * x + warpMat[7] * y + warpMat[8]);
+
+        const float xcoo = coeff * (warpMat[0] * x + warpMat[1] * y + warpMat[2]);
+        const float ycoo = coeff * (warpMat[3] * x + warpMat[4] * y + warpMat[5]);
+
+        return make_float2(xcoo, ycoo);
+    }
+};
+
+struct PerspectiveMapPtrSz : PerspectiveMapPtr
+{
+    int rows, cols;
+};
+
+template <> struct PtrTraits<PerspectiveMapPtrSz> : PtrTraitsBase<PerspectiveMapPtrSz, PerspectiveMapPtr>
+{
+};
+
+__host__ static PerspectiveMapPtrSz perspectiveMap(Size dstSize, const GpuMat_<float>& warpMat)
+{
+    CV_Assert( warpMat.rows == 3 && warpMat.cols == 3 );
+    CV_Assert( warpMat.isContinuous() );
+
+    PerspectiveMapPtrSz map;
+    map.warpMat = warpMat[0];
+    map.rows = dstSize.height;
+    map.cols = dstSize.width;
+    return map;
+}
+
+template <class SrcPtr>
+__host__ RemapPtr1Sz<typename PtrTraits<SrcPtr>::ptr_type, PerspectiveMapPtr>
+warpPerspectivePtr(const SrcPtr& src, Size dstSize, const GpuMat_<float>& warpMat)
+{
+    return remapPtr(src, perspectiveMap(dstSize, warpMat));
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp
new file mode 100644
index 000000000..934939f62
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp
@@ -0,0 +1,173 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_PTR2D_ZIP_HPP__
+#define __OPENCV_CUDEV_PTR2D_ZIP_HPP__
+
+#include "../common.hpp"
+#include "../util/tuple.hpp"
+#include "traits.hpp"
+
+namespace cv { namespace cudev {
+
+template <class PtrTuple> struct ZipPtr;
+
+template <class Ptr0, class Ptr1> struct ZipPtr< tuple<Ptr0, Ptr1> > : tuple<Ptr0, Ptr1>
+{
+    typedef tuple<typename PtrTraits<Ptr0>::value_type,
+                  typename PtrTraits<Ptr1>::value_type> value_type;
+    typedef typename PtrTraits<Ptr0>::index_type        index_type;
+
+    __host__ __device__ __forceinline__ ZipPtr() {}
+    __host__ __device__ __forceinline__ ZipPtr(const tuple<Ptr0, Ptr1>& t) : tuple<Ptr0, Ptr1>(t) {}
+
+    __device__ __forceinline__ value_type operator ()(index_type y, index_type x) const
+    {
+        return make_tuple(cv::cudev::get<0>(*this)(y, x), cv::cudev::get<1>(*this)(y, x));
+    }
+};
+
+template <class Ptr0, class Ptr1, class Ptr2> struct ZipPtr< tuple<Ptr0, Ptr1, Ptr2> > : tuple<Ptr0, Ptr1, Ptr2>
+{
+    typedef tuple<typename PtrTraits<Ptr0>::value_type,
+                  typename PtrTraits<Ptr1>::value_type,
+                  typename PtrTraits<Ptr2>::value_type> value_type;
+    typedef typename PtrTraits<Ptr0>::index_type        index_type;
+
+    __host__ __device__ __forceinline__ ZipPtr() {}
+    __host__ __device__ __forceinline__ ZipPtr(const tuple<Ptr0, Ptr1, Ptr2>& t) : tuple<Ptr0, Ptr1, Ptr2>(t) {}
+
+    __device__ __forceinline__ value_type operator ()(index_type y, index_type x) const
+    {
+        return make_tuple(cv::cudev::get<0>(*this)(y, x), cv::cudev::get<1>(*this)(y, x), cv::cudev::get<2>(*this)(y, x));
+    }
+};
+
+template <class Ptr0, class Ptr1, class Ptr2, class Ptr3> struct ZipPtr< tuple<Ptr0, Ptr1, Ptr2, Ptr3> > : tuple<Ptr0, Ptr1, Ptr2, Ptr3>
+{
+    typedef tuple<typename PtrTraits<Ptr0>::value_type,
+                  typename PtrTraits<Ptr1>::value_type,
+                  typename PtrTraits<Ptr2>::value_type,
+                  typename PtrTraits<Ptr3>::value_type> value_type;
+    typedef typename PtrTraits<Ptr0>::index_type        index_type;
+
+    __host__ __device__ __forceinline__ ZipPtr() {}
+    __host__ __device__ __forceinline__ ZipPtr(const tuple<Ptr0, Ptr1, Ptr2, Ptr3>& t) : tuple<Ptr0, Ptr1, Ptr2, Ptr3>(t) {}
+
+    __device__ __forceinline__ value_type operator ()(index_type y, index_type x) const
+    {
+        return make_tuple(cv::cudev::get<0>(*this)(y, x), cv::cudev::get<1>(*this)(y, x), cv::cudev::get<2>(*this)(y, x), cv::cudev::get<3>(*this)(y, x));
+    }
+};
+
+template <class PtrTuple> struct ZipPtrSz : ZipPtr<PtrTuple>
+{
+    int rows, cols;
+
+    __host__ __device__ __forceinline__ ZipPtrSz() {}
+    __host__ __device__ __forceinline__ ZipPtrSz(const PtrTuple& t) : ZipPtr<PtrTuple>(t) {}
+};
+
+template <class Ptr0, class Ptr1>
+__host__ ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type> >
+zipPtr(const Ptr0& ptr0, const Ptr1& ptr1)
+{
+    const int rows = getRows(ptr0);
+    const int cols = getCols(ptr0);
+
+    CV_Assert( getRows(ptr1) == rows && getCols(ptr1) == cols );
+
+    ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type> >
+            z(make_tuple(shrinkPtr(ptr0), shrinkPtr(ptr1)));
+    z.rows = rows;
+    z.cols = cols;
+
+    return z;
+}
+
+template <class Ptr0, class Ptr1, class Ptr2>
+__host__ ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type, typename PtrTraits<Ptr2>::ptr_type> >
+zipPtr(const Ptr0& ptr0, const Ptr1& ptr1, const Ptr2& ptr2)
+{
+    const int rows = getRows(ptr0);
+    const int cols = getCols(ptr0);
+
+    CV_Assert( getRows(ptr1) == rows && getCols(ptr1) == cols );
+    CV_Assert( getRows(ptr2) == rows && getCols(ptr2) == cols );
+
+    ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type, typename PtrTraits<Ptr2>::ptr_type> >
+            z(make_tuple(shrinkPtr(ptr0), shrinkPtr(ptr1), shrinkPtr(ptr2)));
+    z.rows = rows;
+    z.cols = cols;
+
+    return z;
+}
+
+template <class Ptr0, class Ptr1, class Ptr2, class Ptr3>
+__host__ ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type, typename PtrTraits<Ptr2>::ptr_type, typename PtrTraits<Ptr3>::ptr_type> >
+zipPtr(const Ptr0& ptr0, const Ptr1& ptr1, const Ptr2& ptr2, const Ptr3& ptr3)
+{
+    const int rows = getRows(ptr0);
+    const int cols = getCols(ptr0);
+
+    CV_Assert( getRows(ptr1) == rows && getCols(ptr1) == cols );
+    CV_Assert( getRows(ptr2) == rows && getCols(ptr2) == cols );
+    CV_Assert( getRows(ptr3) == rows && getCols(ptr3) == cols );
+
+    ZipPtrSz< tuple<typename PtrTraits<Ptr0>::ptr_type, typename PtrTraits<Ptr1>::ptr_type, typename PtrTraits<Ptr2>::ptr_type, typename PtrTraits<Ptr3>::ptr_type> >
+            z(make_tuple(shrinkPtr(ptr0), shrinkPtr(ptr1), shrinkPtr(ptr2), shrinkPtr(ptr3)));
+    z.rows = rows;
+    z.cols = cols;
+
+    return z;
+}
+
+template <class PtrTuple> struct PtrTraits< ZipPtrSz<PtrTuple> > : PtrTraitsBase<ZipPtrSz<PtrTuple>, ZipPtr<PtrTuple> >
+{
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/atomic.hpp b/modules/cudev/include/opencv2/cudev/util/atomic.hpp
new file mode 100644
index 000000000..2da110231
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/atomic.hpp
@@ -0,0 +1,197 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_ATOMIC_HPP__
+#define __OPENCV_CUDEV_UTIL_ATOMIC_HPP__
+
+#include "../common.hpp"
+
+namespace cv { namespace cudev {
+
+// atomicAdd
+
+__device__ __forceinline__ int atomicAdd(int* address, int val)
+{
+    return ::atomicAdd(address, val);
+}
+
+__device__ __forceinline__ uint atomicAdd(uint* address, uint val)
+{
+    return ::atomicAdd(address, val);
+}
+
+__device__ __forceinline__ float atomicAdd(float* address, float val)
+{
+#if CV_CUDEV_ARCH >= 200
+    return ::atomicAdd(address, val);
+#else
+    int* address_as_i = (int*) address;
+    int old = *address_as_i, assumed;
+    do {
+        assumed = old;
+        old = ::atomicCAS(address_as_i, assumed,
+            __float_as_int(val + __int_as_float(assumed)));
+    } while (assumed != old);
+    return __int_as_float(old);
+#endif
+}
+
+__device__ static double atomicAdd(double* address, double val)
+{
+#if CV_CUDEV_ARCH >= 130
+    unsigned long long int* address_as_ull = (unsigned long long int*) address;
+    unsigned long long int old = *address_as_ull, assumed;
+    do {
+        assumed = old;
+        old = ::atomicCAS(address_as_ull, assumed,
+            __double_as_longlong(val + __longlong_as_double(assumed)));
+    } while (assumed != old);
+    return __longlong_as_double(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0;
+#endif
+}
+
+// atomicMin
+
+__device__ __forceinline__ int atomicMin(int* address, int val)
+{
+    return ::atomicMin(address, val);
+}
+
+__device__ __forceinline__ uint atomicMin(uint* address, uint val)
+{
+    return ::atomicMin(address, val);
+}
+
+__device__ static float atomicMin(float* address, float val)
+{
+#if CV_CUDEV_ARCH >= 120
+    int* address_as_i = (int*) address;
+    int old = *address_as_i, assumed;
+    do {
+        assumed = old;
+        old = ::atomicCAS(address_as_i, assumed,
+            __float_as_int(::fminf(val, __int_as_float(assumed))));
+    } while (assumed != old);
+    return __int_as_float(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0f;
+#endif
+}
+
+__device__ static double atomicMin(double* address, double val)
+{
+#if CV_CUDEV_ARCH >= 130
+    unsigned long long int* address_as_ull = (unsigned long long int*) address;
+    unsigned long long int old = *address_as_ull, assumed;
+    do {
+        assumed = old;
+        old = ::atomicCAS(address_as_ull, assumed,
+            __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
+    } while (assumed != old);
+    return __longlong_as_double(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0;
+#endif
+}
+
+// atomicMax
+
+__device__ __forceinline__ int atomicMax(int* address, int val)
+{
+    return ::atomicMax(address, val);
+}
+
+__device__ __forceinline__ uint atomicMax(uint* address, uint val)
+{
+    return ::atomicMax(address, val);
+}
+
+__device__ static float atomicMax(float* address, float val)
+{
+#if CV_CUDEV_ARCH >= 120
+    int* address_as_i = (int*) address;
+    int old = *address_as_i, assumed;
+    do {
+        assumed = old;
+        old = ::atomicCAS(address_as_i, assumed,
+            __float_as_int(::fmaxf(val, __int_as_float(assumed))));
+    } while (assumed != old);
+    return __int_as_float(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0f;
+#endif
+}
+
+__device__ static double atomicMax(double* address, double val)
+{
+#if CV_CUDEV_ARCH >= 130
+    unsigned long long int* address_as_ull = (unsigned long long int*) address;
+    unsigned long long int old = *address_as_ull, assumed;
+    do {
+        assumed = old;
+        old = ::atomicCAS(address_as_ull, assumed,
+            __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
+    } while (assumed != old);
+    return __longlong_as_double(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0;
+#endif
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/detail/tuple.hpp b/modules/cudev/include/opencv2/cudev/util/detail/tuple.hpp
new file mode 100644
index 000000000..2fc84946b
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/detail/tuple.hpp
@@ -0,0 +1,175 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_TUPLE_DETAIL_HPP__
+#define __OPENCV_CUDEV_UTIL_TUPLE_DETAIL_HPP__
+
+#include <thrust/tuple.h>
+
+namespace cv { namespace cudev {
+
+namespace tuple_detail
+{
+    using thrust::tuple;
+    using thrust::tuple_size;
+    using thrust::get;
+    using thrust::tuple_element;
+    using thrust::make_tuple;
+    using thrust::tie;
+
+    template <class Tuple, int SIZE, template <typename T> class CvtOp> struct ConvertTuple;
+
+    template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 2, CvtOp>
+    {
+        typedef tuple<
+            typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<1, Tuple>::type>::type
+        > type;
+    };
+
+    template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 3, CvtOp>
+    {
+        typedef tuple<
+            typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<2, Tuple>::type>::type
+        > type;
+    };
+
+    template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 4, CvtOp>
+    {
+        typedef tuple<
+            typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<3, Tuple>::type>::type
+        > type;
+    };
+
+    template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 5, CvtOp>
+    {
+        typedef tuple<
+            typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<4, Tuple>::type>::type
+        > type;
+    };
+
+    template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 6, CvtOp>
+    {
+        typedef tuple<
+            typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<5, Tuple>::type>::type
+        > type;
+    };
+
+    template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 7, CvtOp>
+    {
+        typedef tuple<
+            typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<5, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<6, Tuple>::type>::type
+        > type;
+    };
+
+    template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 8, CvtOp>
+    {
+        typedef tuple<
+            typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<5, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<6, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<7, Tuple>::type>::type
+        > type;
+    };
+
+    template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 9, CvtOp>
+    {
+        typedef tuple<
+            typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<5, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<6, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<7, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<8, Tuple>::type>::type
+        > type;
+    };
+
+    template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple<Tuple, 10, CvtOp>
+    {
+        typedef tuple<
+            typename CvtOp<typename tuple_element<0, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<1, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<2, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<3, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<4, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<5, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<6, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<7, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<8, Tuple>::type>::type,
+            typename CvtOp<typename tuple_element<9, Tuple>::type>::type
+        > type;
+    };
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/detail/type_traits.hpp b/modules/cudev/include/opencv2/cudev/util/detail/type_traits.hpp
new file mode 100644
index 000000000..d71164283
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/detail/type_traits.hpp
@@ -0,0 +1,238 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_TYPE_TRAITS_DETAIL_HPP__
+#define __OPENCV_CUDEV_UTIL_TYPE_TRAITS_DETAIL_HPP__
+
+#include "../../common.hpp"
+
+namespace cv { namespace cudev {
+
+namespace type_traits_detail
+{
+    template <typename T> struct IsSignedIntergral { enum {value = 0}; };
+    template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
+    template <> struct IsSignedIntergral<short> { enum {value = 1}; };
+    template <> struct IsSignedIntergral<int> { enum {value = 1}; };
+
+    template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
+    template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
+    template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
+    template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
+
+    template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
+    template <> struct IsIntegral<char> { enum {value = 1}; };
+    template <> struct IsIntegral<bool> { enum {value = 1}; };
+
+    template <typename T> struct IsFloat { enum {value = 0}; };
+    template <> struct IsFloat<float> { enum {value = 1}; };
+    template <> struct IsFloat<double> { enum {value = 1}; };
+
+    template <typename T> struct IsVec { enum {value = 0}; };
+    template <> struct IsVec<uchar1> { enum {value = 1}; };
+    template <> struct IsVec<uchar2> { enum {value = 1}; };
+    template <> struct IsVec<uchar3> { enum {value = 1}; };
+    template <> struct IsVec<uchar4> { enum {value = 1}; };
+    template <> struct IsVec<char1> { enum {value = 1}; };
+    template <> struct IsVec<char2> { enum {value = 1}; };
+    template <> struct IsVec<char3> { enum {value = 1}; };
+    template <> struct IsVec<char4> { enum {value = 1}; };
+    template <> struct IsVec<ushort1> { enum {value = 1}; };
+    template <> struct IsVec<ushort2> { enum {value = 1}; };
+    template <> struct IsVec<ushort3> { enum {value = 1}; };
+    template <> struct IsVec<ushort4> { enum {value = 1}; };
+    template <> struct IsVec<short1> { enum {value = 1}; };
+    template <> struct IsVec<short2> { enum {value = 1}; };
+    template <> struct IsVec<short3> { enum {value = 1}; };
+    template <> struct IsVec<short4> { enum {value = 1}; };
+    template <> struct IsVec<uint1> { enum {value = 1}; };
+    template <> struct IsVec<uint2> { enum {value = 1}; };
+    template <> struct IsVec<uint3> { enum {value = 1}; };
+    template <> struct IsVec<uint4> { enum {value = 1}; };
+    template <> struct IsVec<int1> { enum {value = 1}; };
+    template <> struct IsVec<int2> { enum {value = 1}; };
+    template <> struct IsVec<int3> { enum {value = 1}; };
+    template <> struct IsVec<int4> { enum {value = 1}; };
+    template <> struct IsVec<float1> { enum {value = 1}; };
+    template <> struct IsVec<float2> { enum {value = 1}; };
+    template <> struct IsVec<float3> { enum {value = 1}; };
+    template <> struct IsVec<float4> { enum {value = 1}; };
+    template <> struct IsVec<double1> { enum {value = 1}; };
+    template <> struct IsVec<double2> { enum {value = 1}; };
+    template <> struct IsVec<double3> { enum {value = 1}; };
+    template <> struct IsVec<double4> { enum {value = 1}; };
+
+    template <class U> struct AddParameterType { typedef const U& type; };
+    template <class U> struct AddParameterType<U&> { typedef U& type; };
+    template <> struct AddParameterType<void> { typedef void type; };
+
+    // ReferenceTraits
+
+    template <class U> struct ReferenceTraits
+    {
+        enum { value = 0 };
+        typedef U type;
+    };
+    template <class U> struct ReferenceTraits<U&>
+    {
+        enum { value = 1 };
+        typedef U type;
+    };
+
+    // PointerTraits
+
+    template <class U> struct PointerTraits
+    {
+        enum { value = 0 };
+        typedef void type;
+    };
+    template <class U> struct PointerTraits<U*>
+    {
+        enum { value = 1 };
+        typedef U type;
+    };
+    template <class U> struct PointerTraits<U*&>
+    {
+        enum { value = 1 };
+        typedef U type;
+    };
+
+    // UnConst
+
+    template <class U> struct UnConst
+    {
+        typedef U type;
+        enum { value = 0 };
+    };
+    template <class U> struct UnConst<const U>
+    {
+        typedef U type;
+        enum { value = 1 };
+    };
+    template <class U> struct UnConst<const U&>
+    {
+        typedef U& type;
+        enum { value = 1 };
+    };
+
+    // UnVolatile
+
+    template <class U> struct UnVolatile
+    {
+        typedef U type;
+        enum { value = 0 };
+    };
+    template <class U> struct UnVolatile<volatile U>
+    {
+        typedef U type;
+        enum { value = 1 };
+    };
+    template <class U> struct UnVolatile<volatile U&>
+    {
+        typedef U& type;
+        enum { value = 1 };
+    };
+
+    // IsSimpleParameter
+
+    template <typename T> struct IsSimpleParameter
+    {
+        enum { value = IsIntegral<T>::value
+               || IsFloat<T>::value
+               || PointerTraits<typename ReferenceTraits<T>::type>::value};
+    };
+
+    // LargerDepth
+
+    template <bool, typename ThenType, typename ElseType> struct SelectIf
+    {
+        typedef ThenType type;
+    };
+    template <typename ThenType, typename ElseType> struct SelectIf<false, ThenType, ElseType>
+    {
+        typedef ElseType type;
+    };
+
+    template <typename A, typename B> struct LargerDepth
+    {
+        typedef typename SelectIf<sizeof(A) >= sizeof(B), A, B>::type type;
+    };
+    template <typename A> struct LargerDepth<A, float>
+    {
+        typedef float type;
+    };
+    template <typename A> struct LargerDepth<float, A>
+    {
+        typedef float type;
+    };
+    template <typename A> struct LargerDepth<A, double>
+    {
+        typedef double type;
+    };
+    template <typename A> struct LargerDepth<double, A>
+    {
+        typedef double type;
+    };
+    template <> struct LargerDepth<float, float>
+    {
+        typedef float type;
+    };
+    template <> struct LargerDepth<float, double>
+    {
+        typedef double type;
+    };
+    template <> struct LargerDepth<double, float>
+    {
+        typedef double type;
+    };
+    template <> struct LargerDepth<double, double>
+    {
+        typedef double type;
+    };
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/limits.hpp b/modules/cudev/include/opencv2/cudev/util/limits.hpp
new file mode 100644
index 000000000..58faca6b5
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/limits.hpp
@@ -0,0 +1,124 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_LIMITS_HPP__
+#define __OPENCV_CUDEV_UTIL_LIMITS_HPP__
+
+#include <limits.h>
+#include <float.h>
+#include "../common.hpp"
+
+namespace cv { namespace cudev {
+
+template <class T> struct numeric_limits;
+
+template <> struct numeric_limits<bool>
+{
+    __device__ __forceinline__ static bool min() { return false; }
+    __device__ __forceinline__ static bool max() { return true;  }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<schar>
+{
+    __device__ __forceinline__ static schar min() { return SCHAR_MIN; }
+    __device__ __forceinline__ static schar max() { return SCHAR_MAX; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<uchar>
+{
+    __device__ __forceinline__ static uchar min() { return 0; }
+    __device__ __forceinline__ static uchar max() { return UCHAR_MAX; }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<short>
+{
+    __device__ __forceinline__ static short min() { return SHRT_MIN; }
+    __device__ __forceinline__ static short max() { return SHRT_MAX; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<ushort>
+{
+    __device__ __forceinline__ static ushort min() { return 0; }
+    __device__ __forceinline__ static ushort max() { return USHRT_MAX; }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<int>
+{
+    __device__ __forceinline__ static int min() { return INT_MIN; }
+    __device__ __forceinline__ static int max() { return INT_MAX; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<uint>
+{
+    __device__ __forceinline__ static uint min() { return 0; }
+    __device__ __forceinline__ static uint max() { return UINT_MAX; }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<float>
+{
+    __device__ __forceinline__ static float min() { return FLT_MIN; }
+    __device__ __forceinline__ static float max() { return FLT_MAX; }
+    __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<double>
+{
+    __device__ __forceinline__ static double min() { return DBL_MIN; }
+    __device__ __forceinline__ static double max() { return DBL_MAX; }
+    __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
+    static const bool is_signed = true;
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
new file mode 100644
index 000000000..ff7ce8598
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
@@ -0,0 +1,272 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_SATURATE_CAST_HPP__
+#define __OPENCV_CUDEV_UTIL_SATURATE_CAST_HPP__
+
+#include "../common.hpp"
+
+namespace cv { namespace cudev {
+
+template <typename T> __device__ __forceinline__ T saturate_cast(uchar v) { return T(v); }
+template <typename T> __device__ __forceinline__ T saturate_cast(schar v) { return T(v); }
+template <typename T> __device__ __forceinline__ T saturate_cast(ushort v) { return T(v); }
+template <typename T> __device__ __forceinline__ T saturate_cast(short v) { return T(v); }
+template <typename T> __device__ __forceinline__ T saturate_cast(uint v) { return T(v); }
+template <typename T> __device__ __forceinline__ T saturate_cast(int v) { return T(v); }
+template <typename T> __device__ __forceinline__ T saturate_cast(float v) { return T(v); }
+template <typename T> __device__ __forceinline__ T saturate_cast(double v) { return T(v); }
+
+template <> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
+{
+    uint res = 0;
+    int vi = v;
+    asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
+    return res;
+}
+template <> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
+{
+    uint res = 0;
+    asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
+    return res;
+}
+template <> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
+{
+    uint res = 0;
+    asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
+    return res;
+}
+template <> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
+{
+    uint res = 0;
+    asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
+{
+    uint res = 0;
+    asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
+{
+    uint res = 0;
+    asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
+    return res;
+}
+template <> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
+{
+    uint res = 0;
+    asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
+    return res;
+}
+
+template <> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
+{
+    uint res = 0;
+    uint vi = v;
+    asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
+    return res;
+}
+template <> __device__ __forceinline__ schar saturate_cast<schar>(short v)
+{
+    uint res = 0;
+    asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
+    return res;
+}
+template <> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
+{
+    uint res = 0;
+    asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
+    return res;
+}
+template <> __device__ __forceinline__ schar saturate_cast<schar>(int v)
+{
+    uint res = 0;
+    asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
+{
+    uint res = 0;
+    asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ schar saturate_cast<schar>(float v)
+{
+    uint res = 0;
+    asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
+    return res;
+}
+template <> __device__ __forceinline__ schar saturate_cast<schar>(double v)
+{
+    uint res = 0;
+    asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
+    return res;
+}
+
+template <> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
+{
+    ushort res = 0;
+    int vi = v;
+    asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
+    return res;
+}
+template <> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
+{
+    ushort res = 0;
+    asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
+    return res;
+}
+template <> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
+{
+    ushort res = 0;
+    asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
+{
+    ushort res = 0;
+    asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
+{
+    ushort res = 0;
+    asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
+    return res;
+}
+template <> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
+{
+    ushort res = 0;
+    asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
+    return res;
+}
+
+template <> __device__ __forceinline__ short saturate_cast<short>(ushort v)
+{
+    short res = 0;
+    asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
+    return res;
+}
+template <> __device__ __forceinline__ short saturate_cast<short>(int v)
+{
+    short res = 0;
+    asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ short saturate_cast<short>(uint v)
+{
+    short res = 0;
+    asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ short saturate_cast<short>(float v)
+{
+    short res = 0;
+    asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
+    return res;
+}
+template <> __device__ __forceinline__ short saturate_cast<short>(double v)
+{
+    short res = 0;
+    asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
+    return res;
+}
+
+template <> __device__ __forceinline__ int saturate_cast<int>(uint v)
+{
+    int res = 0;
+    asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ int saturate_cast<int>(float v)
+{
+    return __float2int_rn(v);
+}
+template <> __device__ __forceinline__ int saturate_cast<int>(double v)
+{
+#if CV_CUDEV_ARCH >= 130
+    return __double2int_rn(v);
+#else
+    return saturate_cast<int>((float) v);
+#endif
+}
+
+template <> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
+{
+    uint res = 0;
+    int vi = v;
+    asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
+    return res;
+}
+template <> __device__ __forceinline__ uint saturate_cast<uint>(short v)
+{
+    uint res = 0;
+    asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
+    return res;
+}
+template <> __device__ __forceinline__ uint saturate_cast<uint>(int v)
+{
+    uint res = 0;
+    asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
+    return res;
+}
+template <> __device__ __forceinline__ uint saturate_cast<uint>(float v)
+{
+    return __float2uint_rn(v);
+}
+template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
+{
+#if CV_CUDEV_ARCH >= 130
+    return __double2uint_rn(v);
+#else
+    return saturate_cast<uint>((float) v);
+#endif
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/simd_functions.hpp b/modules/cudev/include/opencv2/cudev/util/simd_functions.hpp
new file mode 100644
index 000000000..db63f5180
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/simd_functions.hpp
@@ -0,0 +1,913 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*
+ * Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ *   Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ *   Neither the name of NVIDIA Corporation nor the names of its contributors
+ *   may be used to endorse or promote products derived from this software
+ *   without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_SIMD_FUNCTIONS_HPP__
+#define __OPENCV_CUDEV_UTIL_SIMD_FUNCTIONS_HPP__
+
+#include "../common.hpp"
+
+/*
+  This header file contains inline functions that implement intra-word SIMD
+  operations, that are hardware accelerated on sm_3x (Kepler) GPUs. Efficient
+  emulation code paths are provided for earlier architectures (sm_1x, sm_2x)
+  to make the code portable across all GPUs supported by CUDA. The following
+  functions are currently implemented:
+
+  vadd2(a,b)      per-halfword unsigned addition, with wrap-around: a + b
+  vsub2(a,b)      per-halfword unsigned subtraction, with wrap-around: a - b
+  vabsdiff2(a,b)  per-halfword unsigned absolute difference: |a - b|
+  vavg2(a,b)      per-halfword unsigned average: (a + b) / 2
+  vavrg2(a,b)     per-halfword unsigned rounded average: (a + b + 1) / 2
+  vseteq2(a,b)    per-halfword unsigned comparison: a == b ? 1 : 0
+  vcmpeq2(a,b)    per-halfword unsigned comparison: a == b ? 0xffff : 0
+  vsetge2(a,b)    per-halfword unsigned comparison: a >= b ? 1 : 0
+  vcmpge2(a,b)    per-halfword unsigned comparison: a >= b ? 0xffff : 0
+  vsetgt2(a,b)    per-halfword unsigned comparison: a > b ? 1 : 0
+  vcmpgt2(a,b)    per-halfword unsigned comparison: a > b ? 0xffff : 0
+  vsetle2(a,b)    per-halfword unsigned comparison: a <= b ? 1 : 0
+  vcmple2(a,b)    per-halfword unsigned comparison: a <= b ? 0xffff : 0
+  vsetlt2(a,b)    per-halfword unsigned comparison: a < b ? 1 : 0
+  vcmplt2(a,b)    per-halfword unsigned comparison: a < b ? 0xffff : 0
+  vsetne2(a,b)    per-halfword unsigned comparison: a != b ? 1 : 0
+  vcmpne2(a,b)    per-halfword unsigned comparison: a != b ? 0xffff : 0
+  vmax2(a,b)      per-halfword unsigned maximum: max(a, b)
+  vmin2(a,b)      per-halfword unsigned minimum: min(a, b)
+
+  vadd4(a,b)      per-byte unsigned addition, with wrap-around: a + b
+  vsub4(a,b)      per-byte unsigned subtraction, with wrap-around: a - b
+  vabsdiff4(a,b)  per-byte unsigned absolute difference: |a - b|
+  vavg4(a,b)      per-byte unsigned average: (a + b) / 2
+  vavrg4(a,b)     per-byte unsigned rounded average: (a + b + 1) / 2
+  vseteq4(a,b)    per-byte unsigned comparison: a == b ? 1 : 0
+  vcmpeq4(a,b)    per-byte unsigned comparison: a == b ? 0xff : 0
+  vsetge4(a,b)    per-byte unsigned comparison: a >= b ? 1 : 0
+  vcmpge4(a,b)    per-byte unsigned comparison: a >= b ? 0xff : 0
+  vsetgt4(a,b)    per-byte unsigned comparison: a > b ? 1 : 0
+  vcmpgt4(a,b)    per-byte unsigned comparison: a > b ? 0xff : 0
+  vsetle4(a,b)    per-byte unsigned comparison: a <= b ? 1 : 0
+  vcmple4(a,b)    per-byte unsigned comparison: a <= b ? 0xff : 0
+  vsetlt4(a,b)    per-byte unsigned comparison: a < b ? 1 : 0
+  vcmplt4(a,b)    per-byte unsigned comparison: a < b ? 0xff : 0
+  vsetne4(a,b)    per-byte unsigned comparison: a != b ? 1: 0
+  vcmpne4(a,b)    per-byte unsigned comparison: a != b ? 0xff: 0
+  vmax4(a,b)      per-byte unsigned maximum: max(a, b)
+  vmin4(a,b)      per-byte unsigned minimum: min(a, b)
+*/
+
+namespace cv { namespace cudev {
+
+// 2
+
+__device__ __forceinline__ uint vadd2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s;
+    s = a ^ b;          // sum bits
+    r = a + b;          // actual sum
+    s = s ^ r;          // determine carry-ins for each bit position
+    s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
+    r = r - s;          // subtract out carry-out from low word
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsub2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s;
+    s = a ^ b;          // sum bits
+    r = a - b;          // actual sum
+    s = s ^ r;          // determine carry-ins for each bit position
+    s = s & 0x00010000; // borrow to high word
+    r = r + s;          // compensate for borrow from low word
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vabsdiff2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s, t, u, v;
+    s = a & 0x0000ffff; // extract low halfword
+    r = b & 0x0000ffff; // extract low halfword
+    u = ::max(r, s);    // maximum of low halfwords
+    v = ::min(r, s);    // minimum of low halfwords
+    s = a & 0xffff0000; // extract high halfword
+    r = b & 0xffff0000; // extract high halfword
+    t = ::max(r, s);    // maximum of high halfwords
+    s = ::min(r, s);    // minimum of high halfwords
+    r = u | t;          // maximum of both halfwords
+    s = v | s;          // minimum of both halfwords
+    r = r - s;          // |a - b| = max(a,b) - min(a,b);
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vavg2(uint a, uint b)
+{
+    uint r, s;
+
+    // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
+    // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
+    s = a ^ b;
+    r = a & b;
+    s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
+    s = s >> 1;
+    s = r + s;
+
+    return s;
+}
+
+__device__ __forceinline__ uint vavrg2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
+    // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
+    uint s;
+    s = a ^ b;
+    r = a | b;
+    s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
+    s = s >> 1;
+    r = r - s;
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vseteq2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    // inspired by Alan Mycroft's null-byte detection algorithm:
+    // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+    uint c;
+    r = a ^ b;          // 0x0000 if a == b
+    c = r | 0x80008000; // set msbs, to catch carry out
+    r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
+    c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+    c = r & ~c;         // msb = 1, if r was 0x0000
+    r = c >> 15;        // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmpeq2(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vseteq2(a, b);
+    c = r << 16;        // convert bool
+    r = c - r;          //  into mask
+#else
+    // inspired by Alan Mycroft's null-byte detection algorithm:
+    // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+    r = a ^ b;          // 0x0000 if a == b
+    c = r | 0x80008000; // set msbs, to catch carry out
+    r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
+    c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+    c = r & ~c;         // msb = 1, if r was 0x0000
+    r = c >> 15;        // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetge2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint c;
+    asm("not.b32 %0, %0;" : "+r"(b));
+    c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
+    c = c & 0x80008000; // msb = carry-outs
+    r = c >> 15;        // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmpge2(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetge2(a, b);
+    c = r << 16;        // convert bool
+    r = c - r;          //  into mask
+#else
+    asm("not.b32 %0, %0;" : "+r"(b));
+    c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
+    c = c & 0x80008000; // msb = carry-outs
+    r = c >> 15;        // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetgt2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint c;
+    asm("not.b32 %0, %0;" : "+r"(b));
+    c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+    c = c & 0x80008000; // msbs = carry-outs
+    r = c >> 15;        // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmpgt2(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetgt2(a, b);
+    c = r << 16;        // convert bool
+    r = c - r;          //  into mask
+#else
+    asm("not.b32 %0, %0;" : "+r"(b));
+    c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+    c = c & 0x80008000; // msbs = carry-outs
+    r = c >> 15;        // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetle2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint c;
+    asm("not.b32 %0, %0;" : "+r"(a));
+    c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+    c = c & 0x80008000; // msb = carry-outs
+    r = c >> 15;        // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmple2(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetle2(a, b);
+    c = r << 16;        // convert bool
+    r = c - r;          //  into mask
+#else
+    asm("not.b32 %0, %0;" : "+r"(a));
+    c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+    c = c & 0x80008000; // msb = carry-outs
+    r = c >> 15;        // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetlt2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint c;
+    asm("not.b32 %0, %0;" : "+r"(a));
+    c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+    c = c & 0x80008000; // msb = carry-outs
+    r = c >> 15;        // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmplt2(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetlt2(a, b);
+    c = r << 16;        // convert bool
+    r = c - r;          //  into mask
+#else
+    asm("not.b32 %0, %0;" : "+r"(a));
+    c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+    c = c & 0x80008000; // msb = carry-outs
+    r = c >> 15;        // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetne2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    // inspired by Alan Mycroft's null-byte detection algorithm:
+    // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+    uint c;
+    r = a ^ b;          // 0x0000 if a == b
+    c = r | 0x80008000; // set msbs, to catch carry out
+    c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+    c = r | c;          // msb = 1, if r was not 0x0000
+    c = c & 0x80008000; // extract msbs
+    r = c >> 15;        // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmpne2(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetne2(a, b);
+    c = r << 16;        // convert bool
+    r = c - r;          //  into mask
+#else
+    // inspired by Alan Mycroft's null-byte detection algorithm:
+    // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+    r = a ^ b;          // 0x0000 if a == b
+    c = r | 0x80008000; // set msbs, to catch carry out
+    c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+    c = r | c;          // msb = 1, if r was not 0x0000
+    c = c & 0x80008000; // extract msbs
+    r = c >> 15;        // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vmax2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s, t, u;
+    r = a & 0x0000ffff; // extract low halfword
+    s = b & 0x0000ffff; // extract low halfword
+    t = ::max(r, s);    // maximum of low halfwords
+    r = a & 0xffff0000; // extract high halfword
+    s = b & 0xffff0000; // extract high halfword
+    u = ::max(r, s);    // maximum of high halfwords
+    r = t | u;          // combine halfword maximums
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vmin2(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s, t, u;
+    r = a & 0x0000ffff; // extract low halfword
+    s = b & 0x0000ffff; // extract low halfword
+    t = ::min(r, s);    // minimum of low halfwords
+    r = a & 0xffff0000; // extract high halfword
+    s = b & 0xffff0000; // extract high halfword
+    u = ::min(r, s);    // minimum of high halfwords
+    r = t | u;          // combine halfword minimums
+#endif
+
+    return r;
+}
+
+// 4
+
+__device__ __forceinline__ uint vadd4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s, t;
+    s = a ^ b;          // sum bits
+    r = a & 0x7f7f7f7f; // clear msbs
+    t = b & 0x7f7f7f7f; // clear msbs
+    s = s & 0x80808080; // msb sum bits
+    r = r + t;          // add without msbs, record carry-out in msbs
+    r = r ^ s;          // sum of msb sum and carry-in bits, w/o carry-out
+#endif /* CV_CUDEV_ARCH >= 300 */
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsub4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s, t;
+    s = a ^ ~b;         // inverted sum bits
+    r = a | 0x80808080; // set msbs
+    t = b & 0x7f7f7f7f; // clear msbs
+    s = s & 0x80808080; // inverted msb sum bits
+    r = r - t;          // subtract w/o msbs, record inverted borrows in msb
+    r = r ^ s;          // combine inverted msb sum bits and borrows
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vavg4(uint a, uint b)
+{
+    uint r, s;
+
+    // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
+    // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
+    s = a ^ b;
+    r = a & b;
+    s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
+    s = s >> 1;
+    s = r + s;
+
+    return s;
+}
+
+__device__ __forceinline__ uint vavrg4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
+    // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
+    uint c;
+    c = a ^ b;
+    r = a | b;
+    c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
+    c = c >> 1;
+    r = r - c;
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vseteq4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    // inspired by Alan Mycroft's null-byte detection algorithm:
+    // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+    uint c;
+    r = a ^ b;          // 0x00 if a == b
+    c = r | 0x80808080; // set msbs, to catch carry out
+    r = r ^ c;          // extract msbs, msb = 1 if r < 0x80
+    c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
+    c = r & ~c;         // msb = 1, if r was 0x00
+    r = c >> 7;         // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmpeq4(uint a, uint b)
+{
+    uint r, t;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vseteq4(a, b);
+    t = r << 8;         // convert bool
+    r = t - r;          //  to mask
+#else
+    // inspired by Alan Mycroft's null-byte detection algorithm:
+    // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+    t = a ^ b;          // 0x00 if a == b
+    r = t | 0x80808080; // set msbs, to catch carry out
+    t = t ^ r;          // extract msbs, msb = 1 if t < 0x80
+    r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
+    r = t & ~r;         // msb = 1, if t was 0x00
+    t = r >> 7;         // build mask
+    t = r - t;          //  from
+    r = t | r;          //   msbs
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetle4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint c;
+    asm("not.b32 %0, %0;" : "+r"(a));
+    c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+    c = c & 0x80808080; // msb = carry-outs
+    r = c >> 7;         // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmple4(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetle4(a, b);
+    c = r << 8;         // convert bool
+    r = c - r;          //  to mask
+#else
+    asm("not.b32 %0, %0;" : "+r"(a));
+    c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+    c = c & 0x80808080; // msbs = carry-outs
+    r = c >> 7;         // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetlt4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint c;
+    asm("not.b32 %0, %0;" : "+r"(a));
+    c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+    c = c & 0x80808080; // msb = carry-outs
+    r = c >> 7;         // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmplt4(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetlt4(a, b);
+    c = r << 8;         // convert bool
+    r = c - r;          //  to mask
+#else
+    asm("not.b32 %0, %0;" : "+r"(a));
+    c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+    c = c & 0x80808080; // msbs = carry-outs
+    r = c >> 7;         // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetge4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint c;
+    asm("not.b32 %0, %0;" : "+r"(b));
+    c = vavrg4(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
+    c = c & 0x80808080; // msb = carry-outs
+    r = c >> 7;         // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmpge4(uint a, uint b)
+{
+    uint r, s;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetge4(a, b);
+    s = r << 8;         // convert bool
+    r = s - r;          //  to mask
+#else
+    asm ("not.b32 %0,%0;" : "+r"(b));
+    r = vavrg4 (a, b);  // (a + ~b + 1) / 2 = (a - b) / 2
+    r = r & 0x80808080; // msb = carry-outs
+    s = r >> 7;         // build mask
+    s = r - s;          //  from
+    r = s | r;          //   msbs
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetgt4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint c;
+    asm("not.b32 %0, %0;" : "+r"(b));
+    c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+    c = c & 0x80808080; // msb = carry-outs
+    r = c >> 7;         // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmpgt4(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetgt4(a, b);
+    c = r << 8;         // convert bool
+    r = c - r;          //  to mask
+#else
+    asm("not.b32 %0, %0;" : "+r"(b));
+    c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+    c = c & 0x80808080; // msb = carry-outs
+    r = c >> 7;         // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vsetne4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    // inspired by Alan Mycroft's null-byte detection algorithm:
+    // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+    uint c;
+    r = a ^ b;          // 0x00 if a == b
+    c = r | 0x80808080; // set msbs, to catch carry out
+    c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
+    c = r | c;          // msb = 1, if r was not 0x00
+    c = c & 0x80808080; // extract msbs
+    r = c >> 7;         // convert to bool
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vcmpne4(uint a, uint b)
+{
+    uint r, c;
+
+#if CV_CUDEV_ARCH >= 300
+    r = vsetne4(a, b);
+    c = r << 8;         // convert bool
+    r = c - r;          //  to mask
+#else
+    // inspired by Alan Mycroft's null-byte detection algorithm:
+    // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+    r = a ^ b;          // 0x00 if a == b
+    c = r | 0x80808080; // set msbs, to catch carry out
+    c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
+    c = r | c;          // msb = 1, if r was not 0x00
+    c = c & 0x80808080; // extract msbs
+    r = c >> 7;         // convert
+    r = c - r;          //  msbs to
+    r = c | r;          //   mask
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vabsdiff4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s;
+    s = vcmpge4(a, b);  // mask = 0xff if a >= b
+    r = a ^ b;          //
+    s = (r &  s) ^ b;   // select a when a >= b, else select b => max(a,b)
+    r = s ^ r;          // select a when b >= a, else select b => min(a,b)
+    r = s - r;          // |a - b| = max(a,b) - min(a,b);
+#endif
+
+    return r;
+}
+
+__device__ __forceinline__ uint vmax4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s;
+    s = vcmpge4(a, b);  // mask = 0xff if a >= b
+    r = a & s;          // select a when b >= a
+    s = b & ~s;         // select b when b < a
+    r = r | s;          // combine byte selections
+#endif
+
+    return r;           // byte-wise unsigned maximum
+}
+
+__device__ __forceinline__ uint vmin4(uint a, uint b)
+{
+    uint r = 0;
+
+#if CV_CUDEV_ARCH >= 300
+    asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#elif CV_CUDEV_ARCH >= 200
+    asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+#else
+    uint s;
+    s = vcmpge4(b, a);  // mask = 0xff if a >= b
+    r = a & s;          // select a when b >= a
+    s = b & ~s;         // select b when b < a
+    r = r | s;          // combine byte selections
+#endif
+
+    return r;
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/tuple.hpp b/modules/cudev/include/opencv2/cudev/util/tuple.hpp
new file mode 100644
index 000000000..b015ff344
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/tuple.hpp
@@ -0,0 +1,80 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_TUPLE_HPP__
+#define __OPENCV_CUDEV_UTIL_TUPLE_HPP__
+
+#include "../common.hpp"
+#include "detail/tuple.hpp"
+
+namespace cv { namespace cudev {
+
+using tuple_detail::tuple;
+using tuple_detail::tuple_size;
+using tuple_detail::get;
+using tuple_detail::tuple_element;
+using tuple_detail::make_tuple;
+using tuple_detail::tie;
+
+template <typename T> struct TupleTraits
+{
+    enum { is_tuple = 0 };
+    enum { size = 1 };
+};
+template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9>
+struct TupleTraits< tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >
+{
+    enum { is_tuple = 1 };
+    enum { size = tuple_size< tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value };
+};
+
+template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple
+{
+    typedef typename tuple_detail::ConvertTuple<Tuple, tuple_size<Tuple>::value, CvtOp>::type type;
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/type_traits.hpp b/modules/cudev/include/opencv2/cudev/util/type_traits.hpp
new file mode 100644
index 000000000..ca800c0b7
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/type_traits.hpp
@@ -0,0 +1,169 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_TYPE_TRAITS_HPP__
+#define __OPENCV_CUDEV_UTIL_TYPE_TRAITS_HPP__
+
+#include "../common.hpp"
+#include "vec_traits.hpp"
+#include "detail/type_traits.hpp"
+
+namespace cv { namespace cudev {
+
+// NullType
+
+struct NullType {};
+
+// Int2Type
+
+template <int A> struct Int2Type
+{
+   enum { value = A };
+};
+
+// ArrayWrapper
+
+template <typename T, int COUNT> struct ArrayWrapper
+{
+    T array[COUNT];
+};
+
+// Log2 (compile time calculation)
+
+template <int N, int CURRENT_VAL = N, int COUNT = 0> struct Log2
+{
+    enum { value = Log2<N, (CURRENT_VAL >> 1), COUNT + 1>::VALUE };
+};
+template <int N, int COUNT> struct Log2<N, 0, COUNT>
+{
+    enum { value = (1 << (COUNT - 1) < N) ? COUNT : COUNT - 1 };
+};
+
+// IsPowerOf2
+
+template <int N> struct IsPowerOf2
+{
+    enum { value = ((N != 0) && !(N & (N - 1))) };
+};
+
+// SelectIf
+
+template <bool, typename ThenType, typename ElseType> struct SelectIf
+{
+    typedef ThenType type;
+};
+template <typename ThenType, typename ElseType> struct SelectIf<false, ThenType, ElseType>
+{
+    typedef ElseType type;
+};
+
+// EnableIf
+
+template <bool, typename T = void> struct EnableIf {};
+template <typename T> struct EnableIf<true, T> { typedef T type; };
+
+// DisableIf
+
+template <bool, typename T = void> struct DisableIf {};
+template <typename T> struct DisableIf<false, T> { typedef T type; };
+
+// TypesEquals
+
+template <typename A, typename B> struct TypesEquals
+{
+    enum { value = 0 };
+};
+template <typename A> struct TypesEquals<A, A>
+{
+    enum { value = 1 };
+};
+
+// TypeTraits
+
+template <typename T> struct TypeTraits
+{
+    typedef typename type_traits_detail::UnConst<T>::type                                                non_const_type;
+    typedef typename type_traits_detail::UnVolatile<T>::type                                             non_volatile_type;
+    typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type unqualified_type;
+    typedef typename type_traits_detail::PointerTraits<unqualified_type>::type                           pointee_type;
+    typedef typename type_traits_detail::ReferenceTraits<T>::type                                        referred_type;
+
+    enum { is_const          = type_traits_detail::UnConst<T>::value };
+    enum { is_volatile       = type_traits_detail::UnVolatile<T>::value };
+
+    enum { is_reference      = type_traits_detail::ReferenceTraits<unqualified_type>::value };
+    enum { is_pointer        = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<unqualified_type>::type>::value };
+
+    enum { is_unsigned_int   = type_traits_detail::IsUnsignedIntegral<unqualified_type>::value };
+    enum { is_signed_int     = type_traits_detail::IsSignedIntergral<unqualified_type>::value };
+    enum { is_integral       = type_traits_detail::IsIntegral<unqualified_type>::value };
+    enum { is_float          = type_traits_detail::IsFloat<unqualified_type>::value };
+    enum { is_scalar         = is_integral || is_float };
+    enum { is_vec            = type_traits_detail::IsVec<unqualified_type>::value };
+
+    typedef typename SelectIf<type_traits_detail::IsSimpleParameter<unqualified_type>::value,
+        T, typename type_traits_detail::AddParameterType<T>::type>::type parameter_type;
+};
+
+// LargerType
+
+template <typename A, typename B> struct LargerType
+{
+    typedef typename SelectIf<
+        unsigned(VecTraits<A>::cn) != unsigned(VecTraits<B>::cn),
+        void,
+        typename MakeVec<
+            typename type_traits_detail::LargerDepth<
+                typename VecTraits<A>::elem_type,
+                typename VecTraits<B>::elem_type
+            >::type,
+            VecTraits<A>::cn
+        >::type
+    >::type type;
+};
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/vec_math.hpp b/modules/cudev/include/opencv2/cudev/util/vec_math.hpp
new file mode 100644
index 000000000..3ce265e27
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/vec_math.hpp
@@ -0,0 +1,923 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_VEC_MATH_HPP__
+#define __OPENCV_CUDEV_UTIL_VEC_MATH_HPP__
+
+#include "vec_traits.hpp"
+#include "saturate_cast.hpp"
+
+namespace cv { namespace cudev {
+
+// saturate_cast
+
+namespace vec_math_detail
+{
+    template <int cn, typename VecD> struct SatCastHelper;
+
+    template <typename VecD> struct SatCastHelper<1, VecD>
+    {
+        template <typename VecS> __device__ __forceinline__ static VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x));
+        }
+    };
+
+    template <typename VecD> struct SatCastHelper<2, VecD>
+    {
+        template <typename VecS> __device__ __forceinline__ static VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
+        }
+    };
+
+    template <typename VecD> struct SatCastHelper<3, VecD>
+    {
+        template <typename VecS> __device__ __forceinline__ static VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
+        }
+    };
+
+    template <typename VecD> struct SatCastHelper<4, VecD>
+    {
+        template <typename VecS> __device__ __forceinline__ static VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
+        }
+    };
+}
+
+template<typename T> __device__ __forceinline__ T saturate_cast(const uchar1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const char1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const ushort1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const short1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const uint1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const int1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const float1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const double1& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+
+template<typename T> __device__ __forceinline__ T saturate_cast(const uchar2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const char2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const ushort2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const short2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const uint2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const int2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const float2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const double2& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+
+template<typename T> __device__ __forceinline__ T saturate_cast(const uchar3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const char3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const ushort3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const short3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const uint3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const int3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const float3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const double3& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+
+template<typename T> __device__ __forceinline__ T saturate_cast(const uchar4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const char4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const ushort4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const short4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const uint4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const int4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const float4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+template<typename T> __device__ __forceinline__ T saturate_cast(const double4& v) { return vec_math_detail::SatCastHelper<VecTraits<T>::cn, T>::cast(v); }
+
+// unary operators
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
+    { \
+        return VecTraits<output_type ## 1>::make(op (a.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
+    { \
+        return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
+    { \
+        return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
+    { \
+        return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
+
+// unary functions
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
+    { \
+        return VecTraits<output_type ## 1>::make(func (a.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
+    { \
+        return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
+    { \
+        return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
+    { \
+        return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
+
+// binary operators (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(a.x op b.x); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
+
+// binary operators (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 1>::make(a.x op s); \
+    } \
+    __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(s op b.x); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
+    }
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
+
+// binary function (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
+
+// binary function (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/util/vec_traits.hpp b/modules/cudev/include/opencv2/cudev/util/vec_traits.hpp
new file mode 100644
index 000000000..bdff958d8
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/util/vec_traits.hpp
@@ -0,0 +1,320 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_UTIL_VEC_TRAITS_HPP__
+#define __OPENCV_CUDEV_UTIL_VEC_TRAITS_HPP__
+
+#include "../common.hpp"
+
+namespace cv { namespace cudev {
+
+// MakeVec
+
+template<typename T, int CN> struct MakeVec;
+
+#define CV_CUDEV_MAKE_VEC_INST(elem_type) \
+    template<> struct MakeVec<elem_type, 1> { typedef elem_type      type; }; \
+    template<> struct MakeVec<elem_type, 2> { typedef elem_type ## 2 type; }; \
+    template<> struct MakeVec<elem_type, 3> { typedef elem_type ## 3 type; }; \
+    template<> struct MakeVec<elem_type, 4> { typedef elem_type ## 4 type; };
+
+CV_CUDEV_MAKE_VEC_INST(uchar)
+CV_CUDEV_MAKE_VEC_INST(ushort)
+CV_CUDEV_MAKE_VEC_INST(short)
+CV_CUDEV_MAKE_VEC_INST(int)
+CV_CUDEV_MAKE_VEC_INST(uint)
+CV_CUDEV_MAKE_VEC_INST(float)
+CV_CUDEV_MAKE_VEC_INST(double)
+
+#undef CV_CUDEV_MAKE_VEC_INST
+
+template<> struct MakeVec<schar, 1> { typedef char  type; };
+template<> struct MakeVec<schar, 2> { typedef char2 type; };
+template<> struct MakeVec<schar, 3> { typedef char3 type; };
+template<> struct MakeVec<schar, 4> { typedef char4 type; };
+
+template<> struct MakeVec<bool, 1> { typedef uchar  type; };
+template<> struct MakeVec<bool, 2> { typedef uchar2 type; };
+template<> struct MakeVec<bool, 3> { typedef uchar3 type; };
+template<> struct MakeVec<bool, 4> { typedef uchar4 type; };
+
+// VecTraits
+
+template<typename T> struct VecTraits;
+
+#define CV_CUDEV_VEC_TRAITS_INST(type) \
+    template <> struct VecTraits<type> \
+    { \
+        typedef type elem_type; \
+        enum {cn=1}; \
+        __host__ __device__ __forceinline__ static type all(type v) {return v;} \
+        __host__ __device__ __forceinline__ static type make(type x) {return x;} \
+        __host__ __device__ __forceinline__ static type make(const type* v) {return *v;} \
+    }; \
+    template <> struct VecTraits<type ## 1> \
+    { \
+        typedef type elem_type; \
+        enum {cn=1}; \
+        __host__ __device__ __forceinline__ static type ## 1 all(type v) {return make_ ## type ## 1(v);} \
+        __host__ __device__ __forceinline__ static type ## 1 make(type x) {return make_ ## type ## 1(x);} \
+        __host__ __device__ __forceinline__ static type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
+    }; \
+    template <> struct VecTraits<type ## 2> \
+    { \
+        typedef type elem_type; \
+        enum {cn=2}; \
+        __host__ __device__ __forceinline__ static type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
+        __host__ __device__ __forceinline__ static type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
+        __host__ __device__ __forceinline__ static type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
+    }; \
+    template <> struct VecTraits<type ## 3> \
+    { \
+        typedef type elem_type; \
+        enum {cn=3}; \
+        __host__ __device__ __forceinline__ static type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
+        __host__ __device__ __forceinline__ static type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
+        __host__ __device__ __forceinline__ static type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
+    }; \
+    template <> struct VecTraits<type ## 4> \
+    { \
+        typedef type elem_type; \
+        enum {cn=4}; \
+        __host__ __device__ __forceinline__ static type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
+        __host__ __device__ __forceinline__ static type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
+        __host__ __device__ __forceinline__ static type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
+    };
+
+CV_CUDEV_VEC_TRAITS_INST(uchar)
+CV_CUDEV_VEC_TRAITS_INST(ushort)
+CV_CUDEV_VEC_TRAITS_INST(short)
+CV_CUDEV_VEC_TRAITS_INST(int)
+CV_CUDEV_VEC_TRAITS_INST(uint)
+CV_CUDEV_VEC_TRAITS_INST(float)
+CV_CUDEV_VEC_TRAITS_INST(double)
+
+#undef CV_CUDEV_VEC_TRAITS_INST
+
+template<> struct VecTraits<schar>
+{
+    typedef schar elem_type;
+    enum {cn=1};
+    __host__ __device__ __forceinline__ static schar all(schar v) {return v;}
+    __host__ __device__ __forceinline__ static schar make(schar x) {return x;}
+    __host__ __device__ __forceinline__ static schar make(const schar* x) {return *x;}
+};
+template<> struct VecTraits<char1>
+{
+    typedef schar elem_type;
+    enum {cn=1};
+    __host__ __device__ __forceinline__ static char1 all(schar v) {return make_char1(v);}
+    __host__ __device__ __forceinline__ static char1 make(schar x) {return make_char1(x);}
+    __host__ __device__ __forceinline__ static char1 make(const schar* v) {return make_char1(v[0]);}
+};
+template<> struct VecTraits<char2>
+{
+    typedef schar elem_type;
+    enum {cn=2};
+    __host__ __device__ __forceinline__ static char2 all(schar v) {return make_char2(v, v);}
+    __host__ __device__ __forceinline__ static char2 make(schar x, schar y) {return make_char2(x, y);}
+    __host__ __device__ __forceinline__ static char2 make(const schar* v) {return make_char2(v[0], v[1]);}
+};
+template<> struct VecTraits<char3>
+{
+    typedef schar elem_type;
+    enum {cn=3};
+    __host__ __device__ __forceinline__ static char3 all(schar v) {return make_char3(v, v, v);}
+    __host__ __device__ __forceinline__ static char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
+    __host__ __device__ __forceinline__ static char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
+};
+template<> struct VecTraits<char4>
+{
+    typedef schar elem_type;
+    enum {cn=4};
+    __host__ __device__ __forceinline__ static char4 all(schar v) {return make_char4(v, v, v, v);}
+    __host__ __device__ __forceinline__ static char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
+    __host__ __device__ __forceinline__ static char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
+};
+
+}}
+
+// DataType
+
+namespace cv {
+
+template <> class DataType<uint>
+{
+public:
+    typedef uint         value_type;
+    typedef value_type   work_type;
+    typedef value_type   channel_type;
+    typedef value_type   vec_type;
+    enum { generic_type = 0,
+           depth        = CV_32S,
+           channels     = 1,
+           fmt          = (int)'i',
+           type         = CV_MAKE_TYPE(depth, channels)
+         };
+};
+
+#define CV_CUDEV_DATA_TYPE_INST(_depth_type, _channel_num) \
+    template <> class DataType< _depth_type ## _channel_num > \
+    { \
+    public: \
+        typedef _depth_type ## _channel_num     value_type; \
+        typedef value_type                      work_type; \
+        typedef _depth_type                     channel_type; \
+        typedef value_type                      vec_type; \
+        enum { generic_type = 0, \
+               depth        = DataType<channel_type>::depth, \
+               channels     = _channel_num, \
+               fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8), \
+               type         = CV_MAKE_TYPE(depth, channels) \
+             }; \
+    };
+
+CV_CUDEV_DATA_TYPE_INST(uchar, 1)
+CV_CUDEV_DATA_TYPE_INST(uchar, 2)
+CV_CUDEV_DATA_TYPE_INST(uchar, 3)
+CV_CUDEV_DATA_TYPE_INST(uchar, 4)
+
+CV_CUDEV_DATA_TYPE_INST(ushort, 1)
+CV_CUDEV_DATA_TYPE_INST(ushort, 2)
+CV_CUDEV_DATA_TYPE_INST(ushort, 3)
+CV_CUDEV_DATA_TYPE_INST(ushort, 4)
+
+CV_CUDEV_DATA_TYPE_INST(short, 1)
+CV_CUDEV_DATA_TYPE_INST(short, 2)
+CV_CUDEV_DATA_TYPE_INST(short, 3)
+CV_CUDEV_DATA_TYPE_INST(short, 4)
+
+CV_CUDEV_DATA_TYPE_INST(int, 1)
+CV_CUDEV_DATA_TYPE_INST(int, 2)
+CV_CUDEV_DATA_TYPE_INST(int, 3)
+CV_CUDEV_DATA_TYPE_INST(int, 4)
+
+CV_CUDEV_DATA_TYPE_INST(uint, 1)
+CV_CUDEV_DATA_TYPE_INST(uint, 2)
+CV_CUDEV_DATA_TYPE_INST(uint, 3)
+CV_CUDEV_DATA_TYPE_INST(uint, 4)
+
+CV_CUDEV_DATA_TYPE_INST(float, 1)
+CV_CUDEV_DATA_TYPE_INST(float, 2)
+CV_CUDEV_DATA_TYPE_INST(float, 3)
+CV_CUDEV_DATA_TYPE_INST(float, 4)
+
+CV_CUDEV_DATA_TYPE_INST(double, 1)
+CV_CUDEV_DATA_TYPE_INST(double, 2)
+CV_CUDEV_DATA_TYPE_INST(double, 3)
+CV_CUDEV_DATA_TYPE_INST(double, 4)
+
+#undef CV_CUDEV_DATA_TYPE_INST
+
+template<> class DataType<char1>
+{
+public:
+    typedef char1      value_type;
+    typedef value_type work_type;
+    typedef schar      channel_type;
+    typedef value_type vec_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 1,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKE_TYPE(depth, channels)
+         };
+};
+
+template<> class DataType<char2>
+{
+public:
+    typedef char2      value_type;
+    typedef value_type work_type;
+    typedef schar      channel_type;
+    typedef value_type vec_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKE_TYPE(depth, channels)
+         };
+};
+
+template<> class DataType<char3>
+{
+public:
+    typedef char3      value_type;
+    typedef value_type work_type;
+    typedef schar      channel_type;
+    typedef value_type vec_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 3,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKE_TYPE(depth, channels)
+         };
+};
+
+template<> class DataType<char4>
+{
+public:
+    typedef char4      value_type;
+    typedef value_type work_type;
+    typedef schar      channel_type;
+    typedef value_type vec_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 4,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKE_TYPE(depth, channels)
+         };
+};
+
+}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp b/modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp
new file mode 100644
index 000000000..7f9faf9ca
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp
@@ -0,0 +1,222 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_WARP_REDUCE_DETAIL_HPP__
+#define __OPENCV_CUDEV_WARP_REDUCE_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/tuple.hpp"
+#include "../../warp/shuffle.hpp"
+
+namespace cv { namespace cudev {
+
+namespace warp_reduce_detail
+{
+    // GetType
+
+    template <typename T> struct GetType;
+
+    template <typename T> struct GetType<T*>
+    {
+        typedef T type;
+    };
+
+    template <typename T> struct GetType<volatile T*>
+    {
+        typedef T type;
+    };
+
+    template <typename T> struct GetType<T&>
+    {
+        typedef T type;
+    };
+
+    // For
+
+    template <int I, int N> struct For
+    {
+        template <class PointerTuple, class ValTuple>
+        __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
+        {
+            get<I>(smem)[tid] = get<I>(val);
+
+            For<I + 1, N>::loadToSmem(smem, val, tid);
+        }
+
+        template <class PointerTuple, class ValTuple, class OpTuple>
+        __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op)
+        {
+            typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta];
+            get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg);
+
+            For<I + 1, N>::merge(smem, val, tid, delta, op);
+        }
+
+#if CV_CUDEV_ARCH >= 300
+        template <class ValTuple, class OpTuple>
+        __device__ static void mergeShfl(const ValTuple& val, uint delta, uint width, const OpTuple& op)
+        {
+            typename GetType<typename tuple_element<I, ValTuple>::type>::type reg = shfl_down(get<I>(val), delta, width);
+            get<I>(val) = get<I>(op)(get<I>(val), reg);
+
+            For<I + 1, N>::mergeShfl(val, delta, width, op);
+        }
+#endif
+    };
+
+    template <int N> struct For<N, N>
+    {
+        template <class PointerTuple, class ValTuple>
+        __device__ __forceinline__ static void loadToSmem(const PointerTuple&, const ValTuple&, uint)
+        {
+        }
+
+        template <class PointerTuple, class ValTuple, class OpTuple>
+        __device__ __forceinline__ static void merge(const PointerTuple&, const ValTuple&, uint, uint, const OpTuple&)
+        {
+        }
+
+#if CV_CUDEV_ARCH >= 300
+        template <class ValTuple, class OpTuple>
+        __device__ __forceinline__ static void mergeShfl(const ValTuple&, uint, uint, const OpTuple&)
+        {
+        }
+#endif
+    };
+
+    // loadToSmem
+
+    template <typename T>
+    __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid)
+    {
+        smem[tid] = val;
+    }
+
+    template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+              typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
+    __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                               const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                               uint tid)
+    {
+        For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
+    }
+
+    // merge
+
+    template <typename T, class Op>
+    __device__ __forceinline__ void merge(volatile T* smem, T& val, uint tid, uint delta, const Op& op)
+    {
+        T reg = smem[tid + delta];
+        smem[tid] = val = op(val, reg);
+    }
+
+    template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+              typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+              class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+    __device__ __forceinline__ void merge(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                          const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                          uint tid,
+                                          uint delta,
+                                          const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+    {
+        For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
+    }
+
+    // mergeShfl
+
+#if CV_CUDEV_ARCH >= 300
+    template <typename T, class Op>
+    __device__ __forceinline__ void mergeShfl(T& val, uint delta, uint width, const Op& op)
+    {
+        T reg = shfl_down(val, delta, width);
+        val = op(val, reg);
+    }
+
+    template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+              class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+    __device__ __forceinline__ void mergeShfl(const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                              uint delta,
+                                              uint width,
+                                              const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+    {
+        For<0, tuple_size<tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
+    }
+#endif
+
+    // WarpReductor
+
+    struct WarpReductor
+    {
+        template <typename Pointer, typename Reference, class Op>
+        __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op)
+        {
+        #if CV_CUDEV_ARCH >= 300
+            (void) smem;
+            (void) tid;
+
+            mergeShfl(val, 16, 32, op);
+            mergeShfl(val, 8, 32, op);
+            mergeShfl(val, 4, 32, op);
+            mergeShfl(val, 2, 32, op);
+            mergeShfl(val, 1, 32, op);
+        #else
+            loadToSmem(smem, val, tid);
+
+            if (tid < 16)
+            {
+                merge(smem, val, tid, 16, op);
+                merge(smem, val, tid, 8, op);
+                merge(smem, val, tid, 4, op);
+                merge(smem, val, tid, 2, op);
+                merge(smem, val, tid, 1, op);
+            }
+        #endif
+        }
+    };
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp b/modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp
new file mode 100644
index 000000000..770179575
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp
@@ -0,0 +1,239 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_WARP_REDUCE_KEY_VAL_DETAIL_HPP__
+#define __OPENCV_CUDEV_WARP_REDUCE_KEY_VAL_DETAIL_HPP__
+
+#include "../../common.hpp"
+#include "../../util/tuple.hpp"
+
+namespace cv { namespace cudev {
+
+namespace warp_reduce_key_val_detail
+{
+    // GetType
+
+    template <typename T> struct GetType;
+
+    template <typename T> struct GetType<T*>
+    {
+        typedef T type;
+    };
+
+    template <typename T> struct GetType<volatile T*>
+    {
+        typedef T type;
+    };
+
+    template <typename T> struct GetType<T&>
+    {
+        typedef T type;
+    };
+
+    // For
+
+    template <int I, int N> struct For
+    {
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid)
+        {
+            get<I>(smem)[tid] = get<I>(data);
+
+            For<I + 1, N>::loadToSmem(smem, data, tid);
+        }
+
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ static void copy(const PointerTuple& svals, const ReferenceTuple& val, uint tid, uint delta)
+        {
+            get<I>(svals)[tid] = get<I>(val) = get<I>(svals)[tid + delta];
+
+            For<I + 1, N>::copy(svals, val, tid, delta);
+        }
+
+        template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
+        __device__ static void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
+                                     const ValPointerTuple& svals, const ValReferenceTuple& val,
+                                     const CmpTuple& cmp,
+                                     uint tid, uint delta)
+        {
+            typename GetType<typename tuple_element<I, KeyPointerTuple>::type>::type reg = get<I>(skeys)[tid + delta];
+
+            if (get<I>(cmp)(reg, get<I>(key)))
+            {
+                get<I>(skeys)[tid] = get<I>(key) = reg;
+                get<I>(svals)[tid] = get<I>(val) = get<I>(svals)[tid + delta];
+            }
+
+            For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
+        }
+    };
+
+    template <int N> struct For<N, N>
+    {
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ __forceinline__ static void loadToSmem(const PointerTuple&, const ReferenceTuple&, uint)
+        {
+        }
+
+        template <class PointerTuple, class ReferenceTuple>
+        __device__ __forceinline__ static void copy(const PointerTuple&, const ReferenceTuple&, uint, uint)
+        {
+        }
+
+        template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
+        __device__ __forceinline__ static void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
+                                                     const ValPointerTuple&, const ValReferenceTuple&,
+                                                     const CmpTuple&,
+                                                     uint, uint)
+        {
+        }
+    };
+
+    // loadToSmem
+
+    template <typename T>
+    __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid)
+    {
+        smem[tid] = data;
+    }
+
+    template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+    __device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
+                                               const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
+                                               uint tid)
+    {
+        For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
+    }
+
+    // copyVals
+
+    template <typename V>
+    __device__ __forceinline__ void copyVals(volatile V* svals, V& val, uint tid, uint delta)
+    {
+        svals[tid] = val = svals[tid + delta];
+    }
+
+    template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+    __device__ __forceinline__ void copyVals(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                             const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                             uint tid, uint delta)
+    {
+        For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
+    }
+
+    // merge
+
+    template <typename K, typename V, class Cmp>
+    __device__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, uint tid, uint delta)
+    {
+        K reg = skeys[tid + delta];
+
+        if (cmp(reg, key))
+        {
+            skeys[tid] = key = reg;
+            copyVals(svals, val, tid, delta);
+        }
+    }
+
+    template <typename K,
+              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+              class Cmp>
+    __device__ void merge(volatile K* skeys, K& key,
+                          const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                          const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                          const Cmp& cmp, uint tid, uint delta)
+    {
+        K reg = skeys[tid + delta];
+
+        if (cmp(reg, key))
+        {
+            skeys[tid] = key = reg;
+            copyVals(svals, val, tid, delta);
+        }
+    }
+
+    template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
+              typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+              class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+    __device__ __forceinline__ void merge(const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
+                                          const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                          const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                          const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                          const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
+                                          uint tid, uint delta)
+    {
+        For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
+    }
+
+    // WarpReductor
+
+    struct WarpReductor
+    {
+        template <class KP, class KR, class VP, class VR, class Cmp>
+        __device__ static void reduce(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp)
+        {
+            loadToSmem(skeys, key, tid);
+            loadToSmem(svals, val, tid);
+
+            if (tid < 16)
+            {
+                merge(skeys, key, svals, val, cmp, tid, 16);
+                merge(skeys, key, svals, val, cmp, tid, 8);
+                merge(skeys, key, svals, val, cmp, tid, 4);
+                merge(skeys, key, svals, val, cmp, tid, 2);
+                merge(skeys, key, svals, val, cmp, tid, 1);
+            }
+        }
+    };
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/warp/reduce.hpp b/modules/cudev/include/opencv2/cudev/warp/reduce.hpp
new file mode 100644
index 000000000..089ef92d0
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/warp/reduce.hpp
@@ -0,0 +1,206 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_WARP_REDUCE_HPP__
+#define __OPENCV_CUDEV_WARP_REDUCE_HPP__
+
+#include "../common.hpp"
+#include "../util/tuple.hpp"
+#include "detail/reduce.hpp"
+#include "detail/reduce_key_val.hpp"
+
+namespace cv { namespace cudev {
+
+// warpReduce
+
+template <typename T, class Op>
+__device__ __forceinline__ void warpReduce(volatile T* smem, T& val, uint tid, const Op& op)
+{
+    warp_reduce_detail::WarpReductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
+}
+
+template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+          typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+          class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+__device__ __forceinline__ void warpReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                           const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                           uint tid,
+                                           const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+{
+    warp_reduce_detail::WarpReductor::template reduce<
+            const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
+            const tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
+            const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
+}
+
+// warpReduceKeyVal
+
+template <typename K, typename V, class Cmp>
+__device__ __forceinline__ void warpReduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, uint tid, const Cmp& cmp)
+{
+    warp_reduce_key_val_detail::WarpReductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
+}
+
+template <typename K,
+          typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+          typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+          class Cmp>
+__device__ __forceinline__ void warpReduceKeyVal(volatile K* skeys, K& key,
+                                                 const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                 const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                 uint tid, const Cmp& cmp)
+{
+    warp_reduce_key_val_detail::WarpReductor::template reduce<volatile K*, K&,
+            const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
+            const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
+            const Cmp&>(skeys, key, svals, val, tid, cmp);
+}
+
+template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
+          typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+          typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+          typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+          class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+__device__ __forceinline__ void warpReduceKeyVal(const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
+                                                 const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                                 const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                 const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                 uint tid,
+                                                 const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
+{
+    warp_reduce_key_val_detail::WarpReductor::template reduce<
+            const tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
+            const tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
+            const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
+            const tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
+            const tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
+            >(skeys, key, svals, val, tid, cmp);
+}
+
+// smem_tuple
+
+template <typename T0>
+__device__ __forceinline__
+tuple<volatile T0*>
+smem_tuple(T0* t0)
+{
+    return make_tuple((volatile T0*) t0);
+}
+
+template <typename T0, typename T1>
+__device__ __forceinline__
+tuple<volatile T0*, volatile T1*>
+smem_tuple(T0* t0, T1* t1)
+{
+    return make_tuple((volatile T0*) t0, (volatile T1*) t1);
+}
+
+template <typename T0, typename T1, typename T2>
+__device__ __forceinline__
+tuple<volatile T0*, volatile T1*, volatile T2*>
+smem_tuple(T0* t0, T1* t1, T2* t2)
+{
+    return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
+}
+
+template <typename T0, typename T1, typename T2, typename T3>
+__device__ __forceinline__
+tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
+smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
+{
+    return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4>
+__device__ __forceinline__
+tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
+smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
+{
+    return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
+__device__ __forceinline__
+tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
+smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
+{
+    return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+__device__ __forceinline__
+tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
+smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
+{
+    return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
+__device__ __forceinline__
+tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
+smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
+{
+    return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
+__device__ __forceinline__
+tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
+smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
+{
+    return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
+__device__ __forceinline__
+tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
+smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
+{
+    return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/warp/scan.hpp b/modules/cudev/include/opencv2/cudev/warp/scan.hpp
new file mode 100644
index 000000000..acd032fb0
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/warp/scan.hpp
@@ -0,0 +1,99 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_WARP_SCAN_HPP__
+#define __OPENCV_CUDEV_WARP_SCAN_HPP__
+
+#include "../common.hpp"
+#include "warp.hpp"
+#include "shuffle.hpp"
+
+namespace cv { namespace cudev {
+
+template <typename T>
+__device__ T warpScanInclusive(T data, volatile T* smem, uint tid)
+{
+#if CV_CUDEV_ARCH >= 300
+    (void) smem;
+    (void) tid;
+
+    const uint laneId = Warp::laneId();
+
+    // scan on shufl functions
+    #pragma unroll
+    for (int i = 1; i <= (WARP_SIZE / 2); i *= 2)
+    {
+        const T val = shfl_up(data, i);
+        if (laneId >= i)
+              data += val;
+    }
+
+    return data;
+#else
+    uint pos = 2 * tid - (tid & (WARP_SIZE - 1));
+    smem[pos] = 0;
+
+    pos += WARP_SIZE;
+    smem[pos] = data;
+
+    smem[pos] += smem[pos - 1];
+    smem[pos] += smem[pos - 2];
+    smem[pos] += smem[pos - 4];
+    smem[pos] += smem[pos - 8];
+    smem[pos] += smem[pos - 16];
+
+    return smem[pos];
+#endif
+}
+
+template <typename T>
+__device__ __forceinline__ T warpScanExclusive(T data, volatile T* smem, uint tid)
+{
+    return warpScanInclusive(data, smem, tid) - data;
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp b/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp
new file mode 100644
index 000000000..a6aae5b90
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp
@@ -0,0 +1,424 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_WARP_SHUFFLE_HPP__
+#define __OPENCV_CUDEV_WARP_SHUFFLE_HPP__
+
+#include "../common.hpp"
+#include "../util/vec_traits.hpp"
+
+namespace cv { namespace cudev {
+
+#if CV_CUDEV_ARCH >= 300
+
+// shfl
+
+__device__ __forceinline__ uchar shfl(uchar val, int srcLane, int width = warpSize)
+{
+    return (uchar) __shfl((int) val, srcLane, width);
+}
+
+__device__ __forceinline__ schar shfl(schar val, int srcLane, int width = warpSize)
+{
+    return (schar) __shfl((int) val, srcLane, width);
+}
+
+__device__ __forceinline__ ushort shfl(ushort val, int srcLane, int width = warpSize)
+{
+    return (ushort) __shfl((int) val, srcLane, width);
+}
+
+__device__ __forceinline__ short shfl(short val, int srcLane, int width = warpSize)
+{
+    return (short) __shfl((int) val, srcLane, width);
+}
+
+__device__ __forceinline__ int shfl(int val, int srcLane, int width = warpSize)
+{
+    return __shfl(val, srcLane, width);
+}
+
+__device__ __forceinline__ uint shfl(uint val, int srcLane, int width = warpSize)
+{
+    return (uint) __shfl((int) val, srcLane, width);
+}
+
+__device__ __forceinline__ float shfl(float val, int srcLane, int width = warpSize)
+{
+    return __shfl(val, srcLane, width);
+}
+
+__device__ double shfl(double val, int srcLane, int width = warpSize)
+{
+    int lo = __double2loint(val);
+    int hi = __double2hiint(val);
+
+    lo = __shfl(lo, srcLane, width);
+    hi = __shfl(hi, srcLane, width);
+
+    return __hiloint2double(hi, lo);
+}
+
+#define CV_CUDEV_SHFL_VEC_INST(input_type) \
+    __device__ __forceinline__ input_type ## 1 shfl(const input_type ## 1 & val, int srcLane, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 1>::make( \
+                        shfl(val.x, srcLane, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 2 shfl(const input_type ## 2 & val, int srcLane, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 2>::make( \
+                        shfl(val.x, srcLane, width), \
+                        shfl(val.y, srcLane, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 3 shfl(const input_type ## 3 & val, int srcLane, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 3>::make( \
+                        shfl(val.x, srcLane, width), \
+                        shfl(val.y, srcLane, width), \
+                        shfl(val.z, srcLane, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 4 shfl(const input_type ## 4 & val, int srcLane, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 4>::make( \
+                        shfl(val.x, srcLane, width), \
+                        shfl(val.y, srcLane, width), \
+                        shfl(val.z, srcLane, width), \
+                        shfl(val.w, srcLane, width) \
+                    ); \
+    }
+
+CV_CUDEV_SHFL_VEC_INST(uchar)
+CV_CUDEV_SHFL_VEC_INST(char)
+CV_CUDEV_SHFL_VEC_INST(ushort)
+CV_CUDEV_SHFL_VEC_INST(short)
+CV_CUDEV_SHFL_VEC_INST(uint)
+CV_CUDEV_SHFL_VEC_INST(int)
+CV_CUDEV_SHFL_VEC_INST(float)
+CV_CUDEV_SHFL_VEC_INST(double)
+
+#undef CV_CUDEV_SHFL_VEC_INST
+
+// shfl_up
+
+__device__ __forceinline__ uchar shfl_up(uchar val, uint delta, int width = warpSize)
+{
+    return (uchar) __shfl_up((int) val, delta, width);
+}
+
+__device__ __forceinline__ schar shfl_up(schar val, uint delta, int width = warpSize)
+{
+    return (schar) __shfl_up((int) val, delta, width);
+}
+
+__device__ __forceinline__ ushort shfl_up(ushort val, uint delta, int width = warpSize)
+{
+    return (ushort) __shfl_up((int) val, delta, width);
+}
+
+__device__ __forceinline__ short shfl_up(short val, uint delta, int width = warpSize)
+{
+    return (short) __shfl_up((int) val, delta, width);
+}
+
+__device__ __forceinline__ int shfl_up(int val, uint delta, int width = warpSize)
+{
+    return __shfl_up(val, delta, width);
+}
+
+__device__ __forceinline__ uint shfl_up(uint val, uint delta, int width = warpSize)
+{
+    return (uint) __shfl_up((int) val, delta, width);
+}
+
+__device__ __forceinline__ float shfl_up(float val, uint delta, int width = warpSize)
+{
+    return __shfl_up(val, delta, width);
+}
+
+__device__ double shfl_up(double val, uint delta, int width = warpSize)
+{
+    int lo = __double2loint(val);
+    int hi = __double2hiint(val);
+
+    lo = __shfl_up(lo, delta, width);
+    hi = __shfl_up(hi, delta, width);
+
+    return __hiloint2double(hi, lo);
+}
+
+#define CV_CUDEV_SHFL_UP_VEC_INST(input_type) \
+    __device__ __forceinline__ input_type ## 1 shfl_up(const input_type ## 1 & val, uint delta, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 1>::make( \
+                        shfl_up(val.x, delta, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 2 shfl_up(const input_type ## 2 & val, uint delta, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 2>::make( \
+                        shfl_up(val.x, delta, width), \
+                        shfl_up(val.y, delta, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 3 shfl_up(const input_type ## 3 & val, uint delta, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 3>::make( \
+                        shfl_up(val.x, delta, width), \
+                        shfl_up(val.y, delta, width), \
+                        shfl_up(val.z, delta, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 4 shfl_up(const input_type ## 4 & val, uint delta, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 4>::make( \
+                        shfl_up(val.x, delta, width), \
+                        shfl_up(val.y, delta, width), \
+                        shfl_up(val.z, delta, width), \
+                        shfl_up(val.w, delta, width) \
+                    ); \
+    }
+
+CV_CUDEV_SHFL_UP_VEC_INST(uchar)
+CV_CUDEV_SHFL_UP_VEC_INST(char)
+CV_CUDEV_SHFL_UP_VEC_INST(ushort)
+CV_CUDEV_SHFL_UP_VEC_INST(short)
+CV_CUDEV_SHFL_UP_VEC_INST(uint)
+CV_CUDEV_SHFL_UP_VEC_INST(int)
+CV_CUDEV_SHFL_UP_VEC_INST(float)
+CV_CUDEV_SHFL_UP_VEC_INST(double)
+
+#undef CV_CUDEV_SHFL_UP_VEC_INST
+
+// shfl_down
+
+__device__ __forceinline__ uchar shfl_down(uchar val, uint delta, int width = warpSize)
+{
+    return (uchar) __shfl_down((int) val, delta, width);
+}
+
+__device__ __forceinline__ schar shfl_down(schar val, uint delta, int width = warpSize)
+{
+    return (schar) __shfl_down((int) val, delta, width);
+}
+
+__device__ __forceinline__ ushort shfl_down(ushort val, uint delta, int width = warpSize)
+{
+    return (ushort) __shfl_down((int) val, delta, width);
+}
+
+__device__ __forceinline__ short shfl_down(short val, uint delta, int width = warpSize)
+{
+    return (short) __shfl_down((int) val, delta, width);
+}
+
+__device__ __forceinline__ int shfl_down(int val, uint delta, int width = warpSize)
+{
+    return __shfl_down(val, delta, width);
+}
+
+__device__ __forceinline__ uint shfl_down(uint val, uint delta, int width = warpSize)
+{
+    return (uint) __shfl_down((int) val, delta, width);
+}
+
+__device__ __forceinline__ float shfl_down(float val, uint delta, int width = warpSize)
+{
+    return __shfl_down(val, delta, width);
+}
+
+__device__ double shfl_down(double val, uint delta, int width = warpSize)
+{
+    int lo = __double2loint(val);
+    int hi = __double2hiint(val);
+
+    lo = __shfl_down(lo, delta, width);
+    hi = __shfl_down(hi, delta, width);
+
+    return __hiloint2double(hi, lo);
+}
+
+#define CV_CUDEV_SHFL_DOWN_VEC_INST(input_type) \
+    __device__ __forceinline__ input_type ## 1 shfl_down(const input_type ## 1 & val, uint delta, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 1>::make( \
+                        shfl_down(val.x, delta, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 2 shfl_down(const input_type ## 2 & val, uint delta, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 2>::make( \
+                        shfl_down(val.x, delta, width), \
+                        shfl_down(val.y, delta, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 3 shfl_down(const input_type ## 3 & val, uint delta, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 3>::make( \
+                        shfl_down(val.x, delta, width), \
+                        shfl_down(val.y, delta, width), \
+                        shfl_down(val.z, delta, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 4 shfl_down(const input_type ## 4 & val, uint delta, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 4>::make( \
+                        shfl_down(val.x, delta, width), \
+                        shfl_down(val.y, delta, width), \
+                        shfl_down(val.z, delta, width), \
+                        shfl_down(val.w, delta, width) \
+                    ); \
+    }
+
+CV_CUDEV_SHFL_DOWN_VEC_INST(uchar)
+CV_CUDEV_SHFL_DOWN_VEC_INST(char)
+CV_CUDEV_SHFL_DOWN_VEC_INST(ushort)
+CV_CUDEV_SHFL_DOWN_VEC_INST(short)
+CV_CUDEV_SHFL_DOWN_VEC_INST(uint)
+CV_CUDEV_SHFL_DOWN_VEC_INST(int)
+CV_CUDEV_SHFL_DOWN_VEC_INST(float)
+CV_CUDEV_SHFL_DOWN_VEC_INST(double)
+
+#undef CV_CUDEV_SHFL_DOWN_VEC_INST
+
+// shfl_xor
+
+__device__ __forceinline__ uchar shfl_xor(uchar val, int laneMask, int width = warpSize)
+{
+    return (uchar) __shfl_xor((int) val, laneMask, width);
+}
+
+__device__ __forceinline__ schar shfl_xor(schar val, int laneMask, int width = warpSize)
+{
+    return (schar) __shfl_xor((int) val, laneMask, width);
+}
+
+__device__ __forceinline__ ushort shfl_xor(ushort val, int laneMask, int width = warpSize)
+{
+    return (ushort) __shfl_xor((int) val, laneMask, width);
+}
+
+__device__ __forceinline__ short shfl_xor(short val, int laneMask, int width = warpSize)
+{
+    return (short) __shfl_xor((int) val, laneMask, width);
+}
+
+__device__ __forceinline__ int shfl_xor(int val, int laneMask, int width = warpSize)
+{
+    return __shfl_xor(val, laneMask, width);
+}
+
+__device__ __forceinline__ uint shfl_xor(uint val, int laneMask, int width = warpSize)
+{
+    return (uint) __shfl_xor((int) val, laneMask, width);
+}
+
+__device__ __forceinline__ float shfl_xor(float val, int laneMask, int width = warpSize)
+{
+    return __shfl_xor(val, laneMask, width);
+}
+
+__device__ double shfl_xor(double val, int laneMask, int width = warpSize)
+{
+    int lo = __double2loint(val);
+    int hi = __double2hiint(val);
+
+    lo = __shfl_xor(lo, laneMask, width);
+    hi = __shfl_xor(hi, laneMask, width);
+
+    return __hiloint2double(hi, lo);
+}
+
+#define CV_CUDEV_SHFL_XOR_VEC_INST(input_type) \
+    __device__ __forceinline__ input_type ## 1 shfl_xor(const input_type ## 1 & val, int laneMask, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 1>::make( \
+                        shfl_xor(val.x, laneMask, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 2 shfl_xor(const input_type ## 2 & val, int laneMask, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 2>::make( \
+                        shfl_xor(val.x, laneMask, width), \
+                        shfl_xor(val.y, laneMask, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 3 shfl_xor(const input_type ## 3 & val, int laneMask, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 3>::make( \
+                        shfl_xor(val.x, laneMask, width), \
+                        shfl_xor(val.y, laneMask, width), \
+                        shfl_xor(val.z, laneMask, width) \
+                    ); \
+    } \
+    __device__ __forceinline__ input_type ## 4 shfl_xor(const input_type ## 4 & val, int laneMask, int width = warpSize) \
+    { \
+        return VecTraits<input_type ## 4>::make( \
+                        shfl_xor(val.x, laneMask, width), \
+                        shfl_xor(val.y, laneMask, width), \
+                        shfl_xor(val.z, laneMask, width), \
+                        shfl_xor(val.w, laneMask, width) \
+                    ); \
+    }
+
+CV_CUDEV_SHFL_XOR_VEC_INST(uchar)
+CV_CUDEV_SHFL_XOR_VEC_INST(char)
+CV_CUDEV_SHFL_XOR_VEC_INST(ushort)
+CV_CUDEV_SHFL_XOR_VEC_INST(short)
+CV_CUDEV_SHFL_XOR_VEC_INST(uint)
+CV_CUDEV_SHFL_XOR_VEC_INST(int)
+CV_CUDEV_SHFL_XOR_VEC_INST(float)
+CV_CUDEV_SHFL_XOR_VEC_INST(double)
+
+#undef CV_CUDEV_SHFL_XOR_VEC_INST
+
+#endif // CV_CUDEV_ARCH >= 300
+
+}}
+
+#endif
diff --git a/modules/cudev/include/opencv2/cudev/warp/warp.hpp b/modules/cudev/include/opencv2/cudev/warp/warp.hpp
new file mode 100644
index 000000000..c7649880f
--- /dev/null
+++ b/modules/cudev/include/opencv2/cudev/warp/warp.hpp
@@ -0,0 +1,122 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma once
+
+#ifndef __OPENCV_CUDEV_WARP_WARP_HPP__
+#define __OPENCV_CUDEV_WARP_WARP_HPP__
+
+#include "../common.hpp"
+
+namespace cv { namespace cudev {
+
+enum
+{
+    LOG_WARP_SIZE = 5,
+    WARP_SIZE     = 1 << LOG_WARP_SIZE
+};
+
+struct Warp
+{
+    __device__ __forceinline__ static uint laneId()
+    {
+        uint ret;
+        asm("mov.u32 %0, %laneid;" : "=r"(ret));
+        return ret;
+    }
+
+    __device__ __forceinline__ static uint warpId()
+    {
+        const uint tid = (threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x;
+        return tid / WARP_SIZE;
+    }
+};
+
+template <class It, typename T>
+__device__ __forceinline__ void warpFill(It beg, It end, const T& value)
+{
+    for(It t = beg + Warp::laneId(); t < end; t += WARP_SIZE)
+        *t = value;
+}
+
+template <class InIt, class OutIt>
+__device__ __forceinline__ OutIt warpCopy(InIt beg, InIt end, OutIt out)
+{
+    for(InIt t = beg + Warp::laneId(); t < end; t += WARP_SIZE, out += WARP_SIZE)
+        *out = *t;
+    return out;
+}
+
+template <class InIt, class OutIt, class UnOp>
+__device__ __forceinline__ OutIt warpTransform(InIt beg, InIt end, OutIt out, const UnOp& op)
+{
+    for(InIt t = beg + Warp::laneId(); t < end; t += WARP_SIZE, out += WARP_SIZE)
+        *out = op(*t);
+    return out;
+}
+
+template <class InIt1, class InIt2, class OutIt, class BinOp>
+__device__ __forceinline__ OutIt warpTransform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, const BinOp& op)
+{
+    uint lane = Warp::laneId();
+
+    InIt1 t1 = beg1 + lane;
+    InIt2 t2 = beg2 + lane;
+    for(; t1 < end1; t1 += WARP_SIZE, t2 += WARP_SIZE, out += WARP_SIZE)
+        *out = op(*t1, *t2);
+    return out;
+}
+
+template<typename OutIt, typename T>
+__device__ __forceinline__ void warpYota(OutIt beg, OutIt end, T value)
+{
+    uint lane = Warp::laneId();
+    value += lane;
+
+    for(OutIt t = beg + lane; t < end; t += WARP_SIZE, value += WARP_SIZE)
+        *t = value;
+}
+
+}}
+
+#endif
diff --git a/modules/cudev/src/stub.cpp b/modules/cudev/src/stub.cpp
new file mode 100644
index 000000000..ec060aded
--- /dev/null
+++ b/modules/cudev/src/stub.cpp
@@ -0,0 +1,11 @@
+#include <opencv2/core/cvdef.h>
+
+namespace cv { namespace cudev {
+
+CV_EXPORTS void stubFunc();
+
+}}
+
+void cv::cudev::stubFunc()
+{
+}
diff --git a/modules/cudev/test/CMakeLists.txt b/modules/cudev/test/CMakeLists.txt
new file mode 100644
index 000000000..438e0a64c
--- /dev/null
+++ b/modules/cudev/test/CMakeLists.txt
@@ -0,0 +1,48 @@
+set(test_deps opencv_cudev opencv_core opencv_imgproc opencv_highgui opencv_ts ${OPENCV_MODULE_opencv_ts_DEPS})
+
+ocv_check_dependencies(${test_deps})
+
+if(OCV_DEPENDENCIES_FOUND)
+  set(the_target "opencv_test_${name}")
+
+  ocv_module_include_directories("${test_deps}" "${the_module}")
+
+  file(GLOB test_srcs "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/*.cu")
+  file(GLOB test_hdrs "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp")
+  source_group("Src" FILES ${test_srcs})
+  source_group("Include" FILES ${test_hdrs})
+  set(OPENCV_TEST_${the_module}_SOURCES ${test_srcs} ${test_hdrs})
+
+  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
+    # we remove /EHa as it generates warnings under windows
+    string(REPLACE "/EHa" "" ${var} "${${var}}")
+
+    # we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1)
+    string(REPLACE "-ggdb3" "" ${var} "${${var}}")
+
+    # we remove -Wsign-promo as it generates warnings under linux
+    string(REPLACE "-Wsign-promo" "" ${var} "${${var}}")
+
+    # we remove -fvisibility-inlines-hidden because it's used for C++ compiler
+    # but NVCC uses C compiler by default
+    string(REPLACE "-fvisibility-inlines-hidden" "" ${var} "${${var}}")
+  endforeach()
+
+  CUDA_ADD_EXECUTABLE(${the_target} ${OPENCV_TEST_${the_module}_SOURCES})
+  target_link_libraries(${the_target} ${test_deps} ${OPENCV_LINKER_LIBS} ${CUDA_LIBRARIES})
+  add_dependencies(opencv_tests ${the_target})
+
+  # Additional target properties
+  set_target_properties(${the_target} PROPERTIES
+    DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+    RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}"
+  )
+
+  if(ENABLE_SOLUTION_FOLDERS)
+    set_target_properties(${the_target} PROPERTIES FOLDER "tests accuracy")
+  endif()
+
+  enable_testing()
+  get_target_property(LOC ${the_target} LOCATION)
+  add_test(${the_target} "${LOC}")
+endif()
diff --git a/modules/cudev/test/test_arithm_func.cu b/modules/cudev/test/test_arithm_func.cu
new file mode 100644
index 000000000..bb73b0452
--- /dev/null
+++ b/modules/cudev/test/test_arithm_func.cu
@@ -0,0 +1,168 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+////////////////////////////////////////////////////////////////////////////////
+// SqrtTest
+
+template <typename T>
+class SqrtTest : public ::testing::Test
+{
+public:
+    void test_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+
+        GpuMat_<T> dst = sqrt_(d_src);
+
+        Mat dst_gold;
+        cv::sqrt(src, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_expr()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst = sqrt_(d_src1 * d_src2);
+
+        Mat dst_gold;
+        cv::multiply(src1, src2, dst_gold);
+        cv::sqrt(dst_gold, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(SqrtTest, float);
+
+TYPED_TEST(SqrtTest, GpuMat)
+{
+    SqrtTest<TypeParam>::test_gpumat();
+}
+
+TYPED_TEST(SqrtTest, Expr)
+{
+    SqrtTest<TypeParam>::test_expr();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MagnitudeTest
+
+template <typename T>
+class MagnitudeTest : public ::testing::Test
+{
+public:
+    void test_accuracy()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst1 = hypot_(d_src1, d_src2);
+        GpuMat_<T> dst2 = magnitude_(d_src1, d_src2);
+        GpuMat_<T> dst3 = sqrt_(sqr_(d_src1) + sqr_(d_src2));
+
+        EXPECT_MAT_NEAR(dst1, dst2, 1e-4);
+        EXPECT_MAT_NEAR(dst2, dst3, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(MagnitudeTest, float);
+
+TYPED_TEST(MagnitudeTest, Accuracy)
+{
+    MagnitudeTest<TypeParam>::test_accuracy();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// PowTest
+
+template <typename T>
+class PowTest : public ::testing::Test
+{
+public:
+    void test_accuracy()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+
+        GpuMat_<T> dst1 = pow_(d_src, 0.5);
+        GpuMat_<T> dst2 = sqrt_(d_src);
+
+        EXPECT_MAT_NEAR(dst1, dst2, 1e-5);
+    }
+};
+
+TYPED_TEST_CASE(PowTest, float);
+
+TYPED_TEST(PowTest, Accuracy)
+{
+    PowTest<TypeParam>::test_accuracy();
+}
diff --git a/modules/cudev/test/test_arithm_op.cu b/modules/cudev/test/test_arithm_op.cu
new file mode 100644
index 000000000..a904c54c7
--- /dev/null
+++ b/modules/cudev/test/test_arithm_op.cu
@@ -0,0 +1,395 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+typedef ::testing::Types<uchar, ushort, short, int, float> AllTypes;
+typedef ::testing::Types<short, int, float> SignedTypes;
+
+////////////////////////////////////////////////////////////////////////////////
+// UnaryMinusTest
+
+template <typename T>
+class UnaryMinusTest : public ::testing::Test
+{
+public:
+    void test_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+
+        GpuMat_<T> dst = -d_src;
+
+        Mat dst_gold;
+        src.convertTo(dst_gold, src.depth(), -1);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_globptr()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+        GlobPtrSz<T> d_src_ptr = d_src;
+
+        GpuMat_<T> dst = -d_src_ptr;
+
+        Mat dst_gold;
+        src.convertTo(dst_gold, src.depth(), -1);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_texptr()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+        Texture<T> tex_src(d_src);
+
+        GpuMat_<T> dst = -tex_src;
+
+        Mat dst_gold;
+        src.convertTo(dst_gold, src.depth(), -1);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_expr()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst = -(d_src1 + d_src2);
+
+        Mat dst_gold;
+        cv::add(src1, src2, dst_gold);
+        dst_gold.convertTo(dst_gold, dst_gold.depth(), -1);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(UnaryMinusTest, SignedTypes);
+
+TYPED_TEST(UnaryMinusTest, GpuMat)
+{
+    UnaryMinusTest<TypeParam>::test_gpumat();
+}
+
+TYPED_TEST(UnaryMinusTest, GlobPtrSz)
+{
+    UnaryMinusTest<TypeParam>::test_globptr();
+}
+
+TYPED_TEST(UnaryMinusTest, TexturePtr)
+{
+    UnaryMinusTest<TypeParam>::test_texptr();
+}
+
+TYPED_TEST(UnaryMinusTest, Expr)
+{
+    UnaryMinusTest<TypeParam>::test_expr();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// PlusTest
+
+template <typename T>
+class PlusTest : public ::testing::Test
+{
+public:
+    void test_gpumat_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst = d_src1 + d_src2;
+
+        Mat dst_gold;
+        cv::add(src1, src2, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_texptr_scalar()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+        Texture<T> tex_src(d_src);
+
+        GpuMat_<T> dst = tex_src + static_cast<T>(5);
+
+        Mat dst_gold;
+        cv::add(src, 5, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_expr_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+        Mat src3 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2), d_src3(src3);
+
+        GpuMat_<T> dst = d_src1 + d_src2 + d_src3;
+
+        Mat dst_gold;
+        cv::add(src1, src2, dst_gold);
+        cv::add(dst_gold, src3, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_scalar_expr()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst = static_cast<T>(5) + (d_src1 + d_src2);
+
+        Mat dst_gold;
+        cv::add(src1, src2, dst_gold);
+        cv::add(dst_gold, 5, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(PlusTest, AllTypes);
+
+TYPED_TEST(PlusTest, GpuMat_GpuMat)
+{
+    PlusTest<TypeParam>::test_gpumat_gpumat();
+}
+
+TYPED_TEST(PlusTest, TexturePtr_Scalar)
+{
+    PlusTest<TypeParam>::test_texptr_scalar();
+}
+
+TYPED_TEST(PlusTest, Expr_GpuMat)
+{
+    PlusTest<TypeParam>::test_expr_gpumat();
+}
+
+TYPED_TEST(PlusTest, Scalar_Expr)
+{
+    PlusTest<TypeParam>::test_scalar_expr();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MinusTest
+
+template <typename T>
+class MinusTest : public ::testing::Test
+{
+public:
+    void test_gpumat_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst = d_src1 - d_src2;
+
+        Mat dst_gold;
+        cv::subtract(src1, src2, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_texptr_scalar()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+        Texture<T> tex_src(d_src);
+
+        GpuMat_<T> dst = tex_src - static_cast<T>(5);
+
+        Mat dst_gold;
+        cv::subtract(src, 5, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_expr_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+        Mat src3 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2), d_src3(src3);
+
+        GpuMat_<T> dst = (d_src1 + d_src2) - d_src3;
+
+        Mat dst_gold;
+        cv::add(src1, src2, dst_gold);
+        cv::subtract(dst_gold, src3, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_scalar_expr()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst = static_cast<T>(5) - (d_src1 + d_src2);
+
+        Mat dst_gold;
+        cv::add(src1, src2, dst_gold);
+        cv::subtract(5, dst_gold, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(MinusTest, SignedTypes);
+
+TYPED_TEST(MinusTest, GpuMat_GpuMat)
+{
+    MinusTest<TypeParam>::test_gpumat_gpumat();
+}
+
+TYPED_TEST(MinusTest, TexturePtr_Scalar)
+{
+    MinusTest<TypeParam>::test_texptr_scalar();
+}
+
+TYPED_TEST(MinusTest, Expr_GpuMat)
+{
+    MinusTest<TypeParam>::test_expr_gpumat();
+}
+
+TYPED_TEST(MinusTest, Scalar_Expr)
+{
+    MinusTest<TypeParam>::test_scalar_expr();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// AbsDiffTest
+
+template <typename T>
+class AbsDiffTest : public ::testing::Test
+{
+public:
+    void test_accuracy()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst1 = absdiff_(d_src1, d_src2);
+        GpuMat_<T> dst2 = abs_(d_src1 - d_src2);
+
+        EXPECT_MAT_NEAR(dst1, dst2, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(AbsDiffTest, SignedTypes);
+
+TYPED_TEST(AbsDiffTest, Accuracy)
+{
+    AbsDiffTest<TypeParam>::test_accuracy();
+}
diff --git a/modules/cudev/test/test_bitwize_op.cu b/modules/cudev/test/test_bitwize_op.cu
new file mode 100644
index 000000000..908d46d66
--- /dev/null
+++ b/modules/cudev/test/test_bitwize_op.cu
@@ -0,0 +1,146 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+typedef ::testing::Types<uchar, ushort, short, int> IntTypes;
+
+////////////////////////////////////////////////////////////////////////////////
+// BitNotTest
+
+template <typename T>
+class BitNotTest : public ::testing::Test
+{
+public:
+    void test_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+
+        GpuMat_<T> dst = ~d_src;
+
+        Mat dst_gold;
+        cv::bitwise_not(src, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(BitNotTest, IntTypes);
+
+TYPED_TEST(BitNotTest, GpuMat)
+{
+    BitNotTest<TypeParam>::test_gpumat();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// BitAndTest
+
+template <typename T>
+class BitAndTest : public ::testing::Test
+{
+public:
+    void test_gpumat_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst = d_src1 & d_src2;
+
+        Mat dst_gold;
+        cv::bitwise_and(src1, src2, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(BitAndTest, IntTypes);
+
+TYPED_TEST(BitAndTest, GpuMat_GpuMat)
+{
+    BitAndTest<TypeParam>::test_gpumat_gpumat();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// LShiftTest
+
+template <typename T>
+class LShiftTest : public ::testing::Test
+{
+public:
+    void test_accuracy()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+
+        GpuMat_<T> dst1 = d_src << 2;
+        GpuMat_<T> dst2 = d_src * 4;
+
+        EXPECT_MAT_NEAR(dst1, dst2, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(LShiftTest, int);
+
+TYPED_TEST(LShiftTest, Accuracy)
+{
+    LShiftTest<TypeParam>::test_accuracy();
+}
diff --git a/modules/cudev/test/test_cmp_op.cu b/modules/cudev/test/test_cmp_op.cu
new file mode 100644
index 000000000..4d557b652
--- /dev/null
+++ b/modules/cudev/test/test_cmp_op.cu
@@ -0,0 +1,151 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+typedef ::testing::Types<uchar, ushort, short, int, float> AllTypes;
+
+////////////////////////////////////////////////////////////////////////////////
+// LessTest
+
+template <typename T>
+class LessTest : public ::testing::Test
+{
+public:
+    void test_gpumat_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<uchar> dst = (d_src1 < d_src2) * 255;
+
+        Mat dst_gold;
+        cv::compare(src1, src2, dst_gold, CMP_LT);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(LessTest, AllTypes);
+
+TYPED_TEST(LessTest, GpuMat_GpuMat)
+{
+    LessTest<TypeParam>::test_gpumat_gpumat();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MinTest
+
+template <typename T>
+class MinTest : public ::testing::Test
+{
+public:
+    void test_gpumat_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, type);
+        Mat src2 = randomMat(size, type);
+
+        GpuMat_<T> d_src1(src1), d_src2(src2);
+
+        GpuMat_<T> dst = min_(d_src1, d_src2);
+
+        Mat dst_gold;
+        cv::min(src1, src2, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(MinTest, AllTypes);
+
+TYPED_TEST(MinTest, GpuMat_GpuMat)
+{
+    MinTest<TypeParam>::test_gpumat_gpumat();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// ThreshBinaryTest
+
+typedef ::testing::Types<uchar, short, float> ThreshTypes;
+
+template <typename T>
+class ThreshBinaryTest : public ::testing::Test
+{
+public:
+    void test_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+
+        GpuMat_<T> dst = threshBinary_(d_src, 128, 0);
+
+        Mat dst_gold;
+        cv::threshold(src, dst_gold, 128, 0, THRESH_BINARY);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(ThreshBinaryTest, ThreshTypes);
+
+TYPED_TEST(ThreshBinaryTest, GpuMat)
+{
+    ThreshBinaryTest<TypeParam>::test_gpumat();
+}
diff --git a/modules/cudev/test/test_color_cvt.cu b/modules/cudev/test/test_color_cvt.cu
new file mode 100644
index 000000000..70d904b49
--- /dev/null
+++ b/modules/cudev/test/test_color_cvt.cu
@@ -0,0 +1,180 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+namespace cv {
+
+enum {
+    COLOR_BGR2BGR = COLOR_BGR2RGB,
+    COLOR_BGR2LRGB = COLOR_BGR2RGB,
+    COLOR_BGR2LBGR = COLOR_BGR2RGB
+};
+
+}
+
+#define CVT_COLOR_TEST(src_space, dst_space, src_cn, dst_cn) \
+    TEST(CvtColor, src_space ## _to_ ## dst_space) \
+    { \
+        const Size size = randomSize(100, 400); \
+        Mat bgrb = randomMat(size, CV_8UC3); \
+        Mat srcb; \
+        cv::cvtColor(bgrb, srcb, COLOR_BGR ## 2 ## src_space, src_cn); \
+        GpuMat_<SelectIf<src_cn == 1, uchar, uchar ## src_cn>::type> d_srcb(srcb); \
+        GpuMat_<SelectIf<dst_cn == 1, uchar, uchar ## dst_cn>::type> dstb = src_space ## _to_ ## dst_space ## _(d_srcb); \
+        Mat dstb_gold; \
+        cv::cvtColor(srcb, dstb_gold, COLOR_ ## src_space ## 2 ## dst_space); \
+        EXPECT_MAT_NEAR(dstb_gold, dstb, 1.0); \
+        Mat bgrf = randomMat(size, CV_32FC3, 0, 1); \
+        Mat srcf; \
+        cv::cvtColor(bgrf, srcf, COLOR_BGR ## 2 ## src_space, src_cn); \
+        GpuMat_<SelectIf<src_cn == 1, float, float ## src_cn>::type> d_srcf(srcf); \
+        GpuMat_<SelectIf<dst_cn == 1, float, float ## dst_cn>::type> dstf = src_space ## _to_ ## dst_space ## _(d_srcf); \
+        Mat dstf_gold; \
+        cv::cvtColor(srcf, dstf_gold, COLOR_ ## src_space ## 2 ## dst_space); \
+        EXPECT_MAT_NEAR(dstf_gold, dstf, 1.0); \
+    }
+
+// RGB <-> BGR
+
+CVT_COLOR_TEST(BGR, RGB, 3, 3)
+CVT_COLOR_TEST(BGR, BGRA, 3, 4)
+CVT_COLOR_TEST(BGR, RGBA, 3, 4)
+CVT_COLOR_TEST(BGRA, BGR, 4, 3)
+CVT_COLOR_TEST(BGRA, RGB, 4, 3)
+CVT_COLOR_TEST(BGRA, RGBA, 4, 4)
+
+// RGB <-> Gray
+
+CVT_COLOR_TEST(BGR, GRAY, 3, 1)
+CVT_COLOR_TEST(RGB, GRAY, 3, 1)
+CVT_COLOR_TEST(BGRA, GRAY, 4, 1)
+CVT_COLOR_TEST(RGBA, GRAY, 4, 1)
+
+CVT_COLOR_TEST(GRAY, BGR, 1, 3)
+CVT_COLOR_TEST(GRAY, BGRA, 1, 4)
+
+// RGB <-> YUV
+
+CVT_COLOR_TEST(RGB, YUV, 3, 3)
+CVT_COLOR_TEST(BGR, YUV, 3, 3)
+
+CVT_COLOR_TEST(YUV, RGB, 3, 3)
+CVT_COLOR_TEST(YUV, BGR, 3, 3)
+
+// RGB <-> YCrCb
+
+CVT_COLOR_TEST(RGB, YCrCb, 3, 3)
+CVT_COLOR_TEST(BGR, YCrCb, 3, 3)
+
+CVT_COLOR_TEST(YCrCb, RGB, 3, 3)
+CVT_COLOR_TEST(YCrCb, BGR, 3, 3)
+
+// RGB <-> XYZ
+
+CVT_COLOR_TEST(RGB, XYZ, 3, 3)
+CVT_COLOR_TEST(BGR, XYZ, 3, 3)
+
+CVT_COLOR_TEST(XYZ, RGB, 3, 3)
+CVT_COLOR_TEST(XYZ, BGR, 3, 3)
+
+// RGB <-> HSV
+
+CVT_COLOR_TEST(RGB, HSV, 3, 3)
+CVT_COLOR_TEST(BGR, HSV, 3, 3)
+
+CVT_COLOR_TEST(HSV, RGB, 3, 3)
+CVT_COLOR_TEST(HSV, BGR, 3, 3)
+
+CVT_COLOR_TEST(RGB, HSV_FULL, 3, 3)
+CVT_COLOR_TEST(BGR, HSV_FULL, 3, 3)
+
+CVT_COLOR_TEST(HSV, RGB_FULL, 3, 3)
+CVT_COLOR_TEST(HSV, BGR_FULL, 3, 3)
+
+// RGB <-> HLS
+
+CVT_COLOR_TEST(RGB, HLS, 3, 3)
+CVT_COLOR_TEST(BGR, HLS, 3, 3)
+
+CVT_COLOR_TEST(HLS, RGB, 3, 3)
+CVT_COLOR_TEST(HLS, BGR, 3, 3)
+
+CVT_COLOR_TEST(RGB, HLS_FULL, 3, 3)
+CVT_COLOR_TEST(BGR, HLS_FULL, 3, 3)
+
+CVT_COLOR_TEST(HLS, RGB_FULL, 3, 3)
+CVT_COLOR_TEST(HLS, BGR_FULL, 3, 3)
+
+// RGB <-> Lab
+
+CVT_COLOR_TEST(RGB, Lab, 3, 3)
+CVT_COLOR_TEST(BGR, Lab, 3, 3)
+
+CVT_COLOR_TEST(Lab, RGB, 3, 3)
+CVT_COLOR_TEST(Lab, BGR, 3, 3)
+
+CVT_COLOR_TEST(LRGB, Lab, 3, 3)
+CVT_COLOR_TEST(LBGR, Lab, 3, 3)
+
+CVT_COLOR_TEST(Lab, LRGB, 3, 3)
+CVT_COLOR_TEST(Lab, LBGR, 3, 3)
+
+// RGB <-> Luv
+
+CVT_COLOR_TEST(RGB, Luv, 3, 3)
+CVT_COLOR_TEST(BGR, Luv, 3, 3)
+
+CVT_COLOR_TEST(Luv, RGB, 3, 3)
+CVT_COLOR_TEST(Luv, BGR, 3, 3)
+
+CVT_COLOR_TEST(LRGB, Luv, 3, 3)
+CVT_COLOR_TEST(LBGR, Luv, 3, 3)
+
+CVT_COLOR_TEST(Luv, LRGB, 3, 3)
+CVT_COLOR_TEST(Luv, LBGR, 3, 3)
diff --git a/modules/cudev/test/test_cvt.cu b/modules/cudev/test/test_cvt.cu
new file mode 100644
index 000000000..bdfa493bd
--- /dev/null
+++ b/modules/cudev/test/test_cvt.cu
@@ -0,0 +1,83 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+typedef ::testing::Types<uchar, ushort, short, int, float> AllTypes;
+
+////////////////////////////////////////////////////////////////////////////////
+// CvtTest
+
+template <typename T>
+class CvtTest : public ::testing::Test
+{
+public:
+    void test_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+
+        GpuMat_<T> d_src(src);
+
+        GpuMat_<T> dst = cvt_<T>(cvt_<float>(d_src) * 2.0f - 10.0f);
+
+        Mat dst_gold;
+        src.convertTo(dst_gold, src.depth(), 2, -10);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(CvtTest, AllTypes);
+
+TYPED_TEST(CvtTest, GpuMat)
+{
+    CvtTest<TypeParam>::test_gpumat();
+}
diff --git a/modules/cudev/test/test_deriv.cu b/modules/cudev/test/test_deriv.cu
new file mode 100644
index 000000000..8ef9fb76a
--- /dev/null
+++ b/modules/cudev/test/test_deriv.cu
@@ -0,0 +1,109 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+TEST(Sobel, Accuracy)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+    Texture<uchar> tex_src(d_src);
+
+    GpuMat_<short> dx = sobelX_(cvt_<int>(tex_src));
+    GpuMat_<short> dy = sobelY_(cvt_<int>(tex_src));
+
+    Mat dx_gold, dy_gold;
+    cv::Sobel(src, dx_gold, CV_16S, 1, 0, 3, 1, 0, BORDER_REPLICATE);
+    cv::Sobel(src, dy_gold, CV_16S, 0, 1, 3, 1, 0, BORDER_REPLICATE);
+
+    EXPECT_MAT_NEAR(dx_gold, dx, 0.0);
+    EXPECT_MAT_NEAR(dy_gold, dy, 0.0);
+}
+
+TEST(Scharr, Accuracy)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+    Texture<uchar> tex_src(d_src);
+
+    GpuMat_<short> dx = scharrX_(cvt_<int>(tex_src));
+    GpuMat_<short> dy = scharrY_(cvt_<int>(tex_src));
+
+    Mat dx_gold, dy_gold;
+    cv::Scharr(src, dx_gold, CV_16S, 1, 0, 1, 0, BORDER_REPLICATE);
+    cv::Scharr(src, dy_gold, CV_16S, 0, 1, 1, 0, BORDER_REPLICATE);
+
+    EXPECT_MAT_NEAR(dx_gold, dx, 0.0);
+    EXPECT_MAT_NEAR(dy_gold, dy, 0.0);
+}
+
+TEST(Laplacian, Accuracy)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+    Texture<uchar> tex_src(d_src);
+
+    GpuMat_<short> dst1 = laplacian_<1>(cvt_<int>(tex_src));
+    GpuMat_<short> dst3 = laplacian_<3>(cvt_<int>(tex_src));
+
+    Mat dst1_gold, dst3_gold;
+    cv::Laplacian(src, dst1_gold, CV_16S, 1, 1, 0, BORDER_REPLICATE);
+    cv::Laplacian(src, dst3_gold, CV_16S, 3, 1, 0, BORDER_REPLICATE);
+
+    EXPECT_MAT_NEAR(dst1_gold, dst1, 0.0);
+    EXPECT_MAT_NEAR(dst3_gold, dst3, 0.0);
+}
diff --git a/modules/cudev/test/test_integral.cu b/modules/cudev/test/test_integral.cu
new file mode 100644
index 000000000..190fc354a
--- /dev/null
+++ b/modules/cudev/test/test_integral.cu
@@ -0,0 +1,103 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+TEST(Integral, _8u)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<uint> dst = integral_(d_src);
+
+    Mat dst_gold;
+    cv::integral(src, dst_gold);
+
+    dst_gold = dst_gold(Rect(1, 1, size.width, size.height));
+
+    ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST(Integral, _32f)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_32FC1, 0, 1);
+
+    GpuMat_<float> d_src(src);
+
+    GpuMat_<float> dst = integral_(d_src);
+
+    Mat dst_gold;
+    cv::integral(src, dst_gold, CV_32F);
+
+    dst_gold = dst_gold(Rect(1, 1, size.width, size.height));
+
+    ASSERT_PRED_FORMAT2(cvtest::MatComparator(1e-5, 0), dst_gold, Mat(dst));
+}
+
+TEST(Integral, _8u_opt)
+{
+    const Size size(640, 480);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<uint> dst = integral_(d_src);
+
+    Mat dst_gold;
+    cv::integral(src, dst_gold);
+
+    dst_gold = dst_gold(Rect(1, 1, size.width, size.height));
+
+    ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
+}
diff --git a/modules/cudev/test/test_lut.cu b/modules/cudev/test/test_lut.cu
new file mode 100644
index 000000000..d2548ec24
--- /dev/null
+++ b/modules/cudev/test/test_lut.cu
@@ -0,0 +1,82 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+////////////////////////////////////////////////////////////////////////////////
+// LutTest
+
+template <typename T>
+class LutTest : public ::testing::Test
+{
+public:
+    void test_gpumat()
+    {
+        const Size size = randomSize(100, 400);
+        const int type = DataType<T>::type;
+
+        Mat src = randomMat(size, type);
+        Mat tbl = randomMat(Size(256, 1), type);
+
+        GpuMat_<T> d_src(src), d_tbl(tbl);
+
+        GpuMat_<T> dst = lut_(d_src, d_tbl);
+
+        Mat dst_gold;
+        cv::LUT(src, tbl, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(LutTest, uchar);
+
+TYPED_TEST(LutTest, GpuMat)
+{
+    LutTest<TypeParam>::test_gpumat();
+}
diff --git a/modules/stitching/src/precomp.cpp b/modules/cudev/test/test_main.cpp
similarity index 95%
rename from modules/stitching/src/precomp.cpp
rename to modules/cudev/test/test_main.cpp
index 390dbfbc6..fc7f8a3b3 100644
--- a/modules/stitching/src/precomp.cpp
+++ b/modules/cudev/test/test_main.cpp
@@ -12,6 +12,7 @@
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -40,4 +41,6 @@
 //
 //M*/
 
-#include "precomp.hpp"
\ No newline at end of file
+#include "opencv2/ts.hpp"
+
+CV_TEST_MAIN("cv")
diff --git a/modules/cudev/test/test_precomp.hpp b/modules/cudev/test/test_precomp.hpp
new file mode 100644
index 000000000..18e7cc1ce
--- /dev/null
+++ b/modules/cudev/test/test_precomp.hpp
@@ -0,0 +1,57 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/cudev.hpp"
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "cvconfig.h"
+
+#endif
diff --git a/modules/gpubgsegm/test/test_precomp.cpp b/modules/cudev/test/test_pyramids.cu
similarity index 73%
rename from modules/gpubgsegm/test/test_precomp.cpp
rename to modules/cudev/test/test_pyramids.cu
index 0fb652180..c196c923c 100644
--- a/modules/gpubgsegm/test/test_precomp.cpp
+++ b/modules/cudev/test/test_pyramids.cu
@@ -7,11 +7,12 @@
 //  copy or use the software.
 //
 //
-//                           License Agreement
+//                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -41,3 +42,40 @@
 //M*/
 
 #include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+TEST(PyrDown, _8uc1)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<uchar> dst = pyrDown_(d_src);
+
+    Mat dst_gold;
+    cv::pyrDown(src, dst_gold);
+
+    ASSERT_MAT_NEAR(dst_gold, dst, 1.0);
+}
+
+TEST(PyrUp, _32fc4)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_32FC4);
+
+    GpuMat_<float4> d_src(src);
+
+    GpuMat_<float4> dst = pyrDown_(d_src);
+
+    Mat dst_gold;
+    cv::pyrDown(src, dst_gold);
+
+    ASSERT_MAT_NEAR(dst_gold, dst, 1e-4);
+}
diff --git a/modules/cudev/test/test_reduction.cu b/modules/cudev/test/test_reduction.cu
new file mode 100644
index 000000000..22cadbebc
--- /dev/null
+++ b/modules/cudev/test/test_reduction.cu
@@ -0,0 +1,312 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+TEST(Sum, GpuMat)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<float> dst = sum_(d_src);
+    float res;
+    dst.download(_OutputArray(&res, 1));
+
+    Scalar dst_gold = cv::sum(src);
+
+    ASSERT_FLOAT_EQ(static_cast<float>(dst_gold[0]), res);
+}
+
+TEST(Sum, Expr)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src1 = randomMat(size, CV_32FC1, 0, 1);
+    Mat src2 = randomMat(size, CV_32FC1, 0, 1);
+
+    GpuMat_<float> d_src1(src1), d_src2(src2);
+
+    GpuMat_<float> dst = sum_(abs_(d_src1 - d_src2));
+    float res;
+    dst.download(_OutputArray(&res, 1));
+
+    Scalar dst_gold = cv::norm(src1, src2, NORM_L1);
+
+    ASSERT_FLOAT_EQ(static_cast<float>(dst_gold[0]), res);
+}
+
+TEST(MinVal, GpuMat)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<float> dst = minVal_(d_src);
+    float res;
+    dst.download(_OutputArray(&res, 1));
+
+    double res_gold;
+    cv::minMaxLoc(src, &res_gold, 0);
+
+    ASSERT_FLOAT_EQ(static_cast<float>(res_gold), res);
+}
+
+TEST(MaxVal, Expr)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src1 = randomMat(size, CV_32SC1);
+    Mat src2 = randomMat(size, CV_32SC1);
+
+    GpuMat_<int> d_src1(src1), d_src2(src2);
+
+    GpuMat_<float> dst = maxVal_(abs_(d_src1 - d_src2));
+    float res;
+    dst.download(_OutputArray(&res, 1));
+
+    double res_gold = cv::norm(src1, src2, NORM_INF);
+
+    ASSERT_FLOAT_EQ(static_cast<float>(res_gold), res);
+}
+
+TEST(MinMaxVal, GpuMat)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<float> dst = minMaxVal_(d_src);
+    float res[2];
+    dst.download(Mat(1, 2, CV_32FC1, res));
+
+    double res_gold[2];
+    cv::minMaxLoc(src, &res_gold[0], &res_gold[1]);
+
+    ASSERT_FLOAT_EQ(static_cast<float>(res_gold[0]), res[0]);
+    ASSERT_FLOAT_EQ(static_cast<float>(res_gold[1]), res[1]);
+}
+
+TEST(NonZeroCount, Accuracy)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1, 0, 5);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<int> dst1 = countNonZero_(d_src);
+    GpuMat_<int> dst2 = sum_(cvt_<int>(d_src) != 0);
+
+    EXPECT_MAT_NEAR(dst1, dst2, 0.0);
+}
+
+TEST(ReduceToRow, Sum)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<int> dst = reduceToRow_<Sum<int> >(d_src);
+
+    Mat dst_gold;
+    cv::reduce(src, dst_gold, 0, REDUCE_SUM, CV_32S);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST(ReduceToRow, Avg)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<float> dst = reduceToRow_<Avg<float> >(d_src);
+
+    Mat dst_gold;
+    cv::reduce(src, dst_gold, 0, REDUCE_AVG, CV_32F);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
+}
+
+TEST(ReduceToRow, Min)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<uchar> dst = reduceToRow_<Min<uchar> >(d_src);
+
+    Mat dst_gold;
+    cv::reduce(src, dst_gold, 0, REDUCE_MIN);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST(ReduceToRow, Max)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<uchar> dst = reduceToRow_<Max<uchar> >(d_src);
+
+    Mat dst_gold;
+    cv::reduce(src, dst_gold, 0, REDUCE_MAX);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST(ReduceToColumn, Sum)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<int> dst = reduceToColumn_<Sum<int> >(d_src);
+
+    Mat dst_gold;
+    cv::reduce(src, dst_gold, 1, REDUCE_SUM, CV_32S);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST(ReduceToColumn, Avg)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<float> dst = reduceToColumn_<Avg<float> >(d_src);
+
+    Mat dst_gold;
+    cv::reduce(src, dst_gold, 1, REDUCE_AVG, CV_32F);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
+}
+
+TEST(ReduceToColumn, Min)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<uchar> dst = reduceToColumn_<Min<uchar> >(d_src);
+
+    Mat dst_gold;
+    cv::reduce(src, dst_gold, 1, REDUCE_MIN);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST(ReduceToColumn, Max)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<uchar> dst = reduceToColumn_<Max<uchar> >(d_src);
+
+    Mat dst_gold;
+    cv::reduce(src, dst_gold, 1, REDUCE_MAX);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+static void calcHistGold(const cv::Mat& src, cv::Mat& hist)
+{
+    hist.create(1, 256, CV_32SC1);
+    hist.setTo(cv::Scalar::all(0));
+
+    int* hist_row = hist.ptr<int>();
+    for (int y = 0; y < src.rows; ++y)
+    {
+        const uchar* src_row = src.ptr(y);
+
+        for (int x = 0; x < src.cols; ++x)
+            ++hist_row[src_row[x]];
+    }
+}
+
+TEST(Histogram, GpuMat)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<int> dst = histogram_<256>(d_src);
+
+    Mat dst_gold;
+    calcHistGold(src, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
diff --git a/modules/cudev/test/test_split_merge.cu b/modules/cudev/test/test_split_merge.cu
new file mode 100644
index 000000000..3af24f588
--- /dev/null
+++ b/modules/cudev/test/test_split_merge.cu
@@ -0,0 +1,180 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+typedef ::testing::Types<uchar, ushort, short, int, float> AllTypes;
+
+////////////////////////////////////////////////////////////////////////////////
+// MergeTest
+
+template <typename T>
+class MergeTest : public ::testing::Test
+{
+public:
+    void test_c2()
+    {
+        const Size size = randomSize(100, 400);
+
+        const int src_type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, src_type);
+        Mat src2 = randomMat(size, src_type);
+
+        GpuMat_<T> d_src1(src1);
+        GpuMat_<T> d_src2(src2);
+
+        GpuMat_<typename MakeVec<T, 2>::type> dst;
+        gridMerge(zipPtr(d_src1, d_src2), dst);
+
+        Mat dst_gold;
+        Mat srcs[] = {src1, src2};
+        cv::merge(srcs, 2, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+
+    void test_c3()
+    {
+        const Size size = randomSize(100, 400);
+
+        const int src_type = DataType<T>::type;
+
+        Mat src1 = randomMat(size, src_type);
+        Mat src2 = randomMat(size, src_type);
+        Mat src3 = randomMat(size, src_type);
+
+        GpuMat_<T> d_src1(src1);
+        GpuMat_<T> d_src2(src2);
+        GpuMat_<T> d_src3(src3);
+
+        GpuMat_<typename MakeVec<T, 3>::type> dst;
+        gridMerge(zipPtr(d_src1, d_src2, d_src3), dst);
+
+        Mat dst_gold;
+        Mat srcs[] = {src1, src2, src3};
+        cv::merge(srcs, 3, dst_gold);
+
+        ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(MergeTest, AllTypes);
+
+TYPED_TEST(MergeTest, C2)
+{
+    MergeTest<TypeParam>::test_c2();
+}
+
+TYPED_TEST(MergeTest, C3)
+{
+    MergeTest<TypeParam>::test_c3();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// SplitTest
+
+template <typename T>
+class SplitTest : public ::testing::Test
+{
+public:
+    void test_c3()
+    {
+        const Size size = randomSize(100, 400);
+
+        const int src_type = CV_MAKE_TYPE(DataType<T>::depth, 3);
+
+        Mat src = randomMat(size, src_type);
+
+        GpuMat_<typename MakeVec<T, 3>::type> d_src(src);
+
+        GpuMat_<T> dst1, dst2, dst3;
+        gridSplit(d_src, tie(dst1, dst2, dst3));
+
+        std::vector<Mat> dst;
+        cv::split(src, dst);
+
+        ASSERT_MAT_NEAR(dst[0], dst1, 0.0);
+        ASSERT_MAT_NEAR(dst[1], dst2, 0.0);
+        ASSERT_MAT_NEAR(dst[2], dst3, 0.0);
+    }
+
+    void test_c4()
+    {
+        const Size size = randomSize(100, 400);
+
+        const int src_type = CV_MAKE_TYPE(DataType<T>::depth, 4);
+
+        Mat src = randomMat(size, src_type);
+
+        GpuMat_<typename MakeVec<T, 4>::type> d_src(src);
+
+        GpuMat_<T> dst1, dst2, dst3, dst4;
+        gridSplit(d_src, tie(dst1, dst2, dst3, dst4));
+
+        std::vector<Mat> dst;
+        cv::split(src, dst);
+
+        ASSERT_MAT_NEAR(dst[0], dst1, 0.0);
+        ASSERT_MAT_NEAR(dst[1], dst2, 0.0);
+        ASSERT_MAT_NEAR(dst[2], dst3, 0.0);
+        ASSERT_MAT_NEAR(dst[3], dst4, 0.0);
+    }
+};
+
+TYPED_TEST_CASE(SplitTest, AllTypes);
+
+TYPED_TEST(SplitTest, C3)
+{
+    SplitTest<TypeParam>::test_c3();
+}
+
+TYPED_TEST(SplitTest, C4)
+{
+    SplitTest<TypeParam>::test_c4();
+}
diff --git a/modules/cudev/test/test_warp.cu b/modules/cudev/test/test_warp.cu
new file mode 100644
index 000000000..8777867bf
--- /dev/null
+++ b/modules/cudev/test/test_warp.cu
@@ -0,0 +1,256 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+// remap
+
+enum { HALF_SIZE=0, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH };
+
+static void generateMap(Mat& mapx, Mat& mapy, int remapMode)
+{
+    for (int j = 0; j < mapx.rows; ++j)
+    {
+        for (int i = 0; i < mapx.cols; ++i)
+        {
+            switch (remapMode)
+            {
+            case HALF_SIZE:
+                if (i > mapx.cols*0.25 && i < mapx.cols*0.75 && j > mapx.rows*0.25 && j < mapx.rows*0.75)
+                {
+                    mapx.at<float>(j,i) = 2.f * (i - mapx.cols * 0.25f) + 0.5f;
+                    mapy.at<float>(j,i) = 2.f * (j - mapx.rows * 0.25f) + 0.5f;
+                }
+                else
+                {
+                    mapx.at<float>(j,i) = 0.f;
+                    mapy.at<float>(j,i) = 0.f;
+                }
+                break;
+            case UPSIDE_DOWN:
+                mapx.at<float>(j,i) = static_cast<float>(i);
+                mapy.at<float>(j,i) = static_cast<float>(mapx.rows - j);
+                break;
+            case REFLECTION_X:
+                mapx.at<float>(j,i) = static_cast<float>(mapx.cols - i);
+                mapy.at<float>(j,i) = static_cast<float>(j);
+                break;
+            case REFLECTION_BOTH:
+                mapx.at<float>(j,i) = static_cast<float>(mapx.cols - i);
+                mapy.at<float>(j,i) = static_cast<float>(mapx.rows - j);
+                break;
+            } // end of switch
+        }
+    }
+}
+
+static void test_remap(int remapMode)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_32FC1, 0, 1);
+
+    Mat mapx(size, CV_32FC1);
+    Mat mapy(size, CV_32FC1);
+    generateMap(mapx, mapy, remapMode);
+
+    GpuMat_<float> d_src(src);
+    GpuMat_<float> d_mapx(mapx);
+    GpuMat_<float> d_mapy(mapy);
+
+    GpuMat_<float> dst = remap_(interNearest(brdReplicate(d_src)), d_mapx, d_mapy);
+
+    Mat dst_gold;
+    cv::remap(src, dst_gold, mapx, mapy, INTER_NEAREST, BORDER_REPLICATE);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST(Remap, HALF_SIZE)
+{
+    test_remap(HALF_SIZE);
+}
+
+TEST(Remap, UPSIDE_DOWN)
+{
+    test_remap(UPSIDE_DOWN);
+}
+
+TEST(Remap, REFLECTION_X)
+{
+    test_remap(REFLECTION_X);
+}
+
+TEST(Remap, REFLECTION_BOTH)
+{
+    test_remap(REFLECTION_BOTH);
+}
+
+// resize
+
+TEST(Resize, Upscale)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_32FC1, 0, 1);
+
+    GpuMat_<float> d_src(src);
+    Texture<float> tex_src(d_src);
+
+    GpuMat_<float> dst1 = resize_(interCubic(tex_src), 2, 2);
+
+    Mat mapx(size.height * 2, size.width * 2, CV_32FC1);
+    Mat mapy(size.height * 2, size.width * 2, CV_32FC1);
+
+    for (int y = 0; y < mapx.rows; ++y)
+    {
+        for (int x = 0; x < mapx.cols; ++x)
+        {
+            mapx.at<float>(y, x) = static_cast<float>(x / 2);
+            mapy.at<float>(y, x) = static_cast<float>(y / 2);
+        }
+    }
+
+    GpuMat_<float> d_mapx(mapx);
+    GpuMat_<float> d_mapy(mapy);
+
+    GpuMat_<float> dst2 = remap_(interCubic(brdReplicate(d_src)), d_mapx, d_mapy);
+
+    EXPECT_MAT_NEAR(dst1, dst2, 0.0);
+}
+
+TEST(Resize, Downscale)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_32FC1, 0, 1);
+    const float fx = 1.0f / 3.0f;
+    const float fy = 1.0f / 3.0f;
+
+    GpuMat_<float> d_src(src);
+    Texture<float> tex_src(d_src);
+
+    GpuMat_<float> dst1 = resize_(interArea(tex_src, Size(3, 3)), fx, fy);
+
+    Mat mapx(cv::saturate_cast<int>(size.height * fy), cv::saturate_cast<int>(size.width * fx), CV_32FC1);
+    Mat mapy(cv::saturate_cast<int>(size.height * fy), cv::saturate_cast<int>(size.width * fx), CV_32FC1);
+
+    for (int y = 0; y < mapx.rows; ++y)
+    {
+        for (int x = 0; x < mapx.cols; ++x)
+        {
+            mapx.at<float>(y, x) = x / fx;
+            mapy.at<float>(y, x) = y / fy;
+        }
+    }
+
+    GpuMat_<float> d_mapx(mapx);
+    GpuMat_<float> d_mapy(mapy);
+
+    GpuMat_<float> dst2 = remap_(interArea(brdReplicate(d_src), Size(3, 3)), d_mapx, d_mapy);
+
+    EXPECT_MAT_NEAR(dst1, dst2, 0.0);
+}
+
+// warpAffine & warpPerspective
+
+Mat createAffineTransfomMatrix(Size srcSize, float angle, bool perspective)
+{
+    cv::Mat M(perspective ? 3 : 2, 3, CV_32FC1);
+
+    {
+        M.at<float>(0, 0) = std::cos(angle); M.at<float>(0, 1) = -std::sin(angle); M.at<float>(0, 2) = static_cast<float>(srcSize.width / 2);
+        M.at<float>(1, 0) = std::sin(angle); M.at<float>(1, 1) =  std::cos(angle); M.at<float>(1, 2) = 0.0f;
+    }
+    if (perspective)
+    {
+        M.at<float>(2, 0) = 0.0f           ; M.at<float>(2, 1) =  0.0f           ; M.at<float>(2, 2) = 1.0f;
+    }
+
+    return M;
+}
+
+TEST(WarpAffine, Rotation)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_32FC1, 0, 1);
+    Mat M = createAffineTransfomMatrix(size, static_cast<float>(CV_PI / 4), false);
+
+    GpuMat_<float> d_src(src);
+    GpuMat_<float> d_M;
+    createContinuous(M.size(), M.type(), d_M);
+    d_M.upload(M);
+
+    GpuMat_<float> dst = warpAffine_(interNearest(brdConstant(d_src)), size, d_M);
+
+    Mat dst_gold;
+    cv::warpAffine(src, dst_gold, M, size, INTER_NEAREST | WARP_INVERSE_MAP);
+
+    EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-3);
+}
+
+TEST(WarpPerspective, Rotation)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_32FC1, 0, 1);
+    Mat M = createAffineTransfomMatrix(size, static_cast<float>(CV_PI / 4), true);
+
+    GpuMat_<float> d_src(src);
+    GpuMat_<float> d_M;
+    createContinuous(M.size(), M.type(), d_M);
+    d_M.upload(M);
+
+    GpuMat_<float> dst = warpPerspective_(interNearest(brdConstant(d_src)), size, d_M);
+
+    Mat dst_gold;
+    cv::warpPerspective(src, dst_gold, M, size, INTER_NEAREST | WARP_INVERSE_MAP);
+
+    EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-3);
+}
diff --git a/modules/cudev/test/transpose.cu b/modules/cudev/test/transpose.cu
new file mode 100644
index 000000000..a1e477c22
--- /dev/null
+++ b/modules/cudev/test/transpose.cu
@@ -0,0 +1,81 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+using namespace cv::cudev;
+using namespace cvtest;
+
+TEST(Transpose, _8uc1)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_8UC1);
+
+    GpuMat_<uchar> d_src(src);
+
+    GpuMat_<uchar> dst = transpose_(d_src);
+
+    Mat dst_gold;
+    cv::transpose(src, dst_gold);
+
+    ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST(Transpose, _32fc3)
+{
+    const Size size = randomSize(100, 400);
+
+    Mat src = randomMat(size, CV_32FC3);
+
+    GpuMat_<float3> d_src(src);
+
+    GpuMat_<float3> dst = transpose_(d_src);
+
+    Mat dst_gold;
+    cv::transpose(src, dst_gold);
+
+    ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
+}
diff --git a/modules/features2d/CMakeLists.txt b/modules/features2d/CMakeLists.txt
index 7d36a58ac..0b080cfb9 100644
--- a/modules/features2d/CMakeLists.txt
+++ b/modules/features2d/CMakeLists.txt
@@ -1,3 +1,2 @@
 set(the_description "2D Features Framework")
 ocv_define_module(features2d opencv_imgproc opencv_flann OPTIONAL opencv_highgui)
-
diff --git a/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst b/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst
index a1ac7b95e..83688c0a3 100644
--- a/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst
+++ b/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst
@@ -9,7 +9,10 @@ represented as vectors in a multidimensional space. All objects that implement t
 descriptor extractors inherit the
 :ocv:class:`DescriptorExtractor` interface.
 
+.. note::
 
+   * An example explaining keypoint extraction can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+   * An example on descriptor evaluation can be found at opencv_source_code/samples/cpp/detector_descriptor_evaluation.cpp
 
 DescriptorExtractor
 -------------------
@@ -57,6 +60,8 @@ Computes the descriptors for a set of keypoints detected in an image (first vari
 
 .. ocv:function:: void DescriptorExtractor::compute( const vector<Mat>& images, vector<vector<KeyPoint> >& keypoints, vector<Mat>& descriptors ) const
 
+.. ocv:pyfunction:: cv2.DescriptorExtractor_create.compute(image, keypoints[, descriptors]) -> keypoints, descriptors
+
     :param image: Image.
 
     :param images: Image set.
@@ -72,15 +77,18 @@ Creates a descriptor extractor by name.
 
 .. ocv:function:: Ptr<DescriptorExtractor>  DescriptorExtractor::create( const String& descriptorExtractorType )
 
+.. ocv:pyfunction:: cv2.DescriptorExtractor_create(descriptorExtractorType) -> retval
+
     :param descriptorExtractorType: Descriptor extractor type.
 
 The current implementation supports the following types of a descriptor extractor:
 
  * ``"SIFT"`` -- :ocv:class:`SIFT`
  * ``"SURF"`` -- :ocv:class:`SURF`
- * ``"ORB"`` -- :ocv:class:`ORB`
- * ``"BRISK"`` -- :ocv:class:`BRISK`
  * ``"BRIEF"`` -- :ocv:class:`BriefDescriptorExtractor`
+ * ``"BRISK"`` -- :ocv:class:`BRISK`
+ * ``"ORB"`` -- :ocv:class:`ORB`
+ * ``"FREAK"`` -- :ocv:class:`FREAK`
 
 A combined format is also supported: descriptor extractor adapter name ( ``"Opponent"`` --
 :ocv:class:`OpponentColorDescriptorExtractor` ) + descriptor extractor name (see above),
@@ -137,4 +145,6 @@ Strecha C., Fua P. *BRIEF: Binary Robust Independent Elementary Features* ,
         ...
     };
 
+.. note::
 
+   * A complete BRIEF extractor sample can be found at opencv_source_code/samples/cpp/brief_match_test.cpp
diff --git a/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst
index 4c49cad61..2c2cf28f8 100644
--- a/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst
+++ b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst
@@ -9,6 +9,11 @@ that are represented as vectors in a multidimensional space. All objects that im
 descriptor matchers inherit the
 :ocv:class:`DescriptorMatcher` interface.
 
+.. note::
+
+   * An example explaining keypoint matching can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+   * An example on descriptor matching evaluation can be found at opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp
+   * An example on one to many image matching can be found at opencv_source_code/samples/cpp/matching_to_many_images.cpp
 
 DescriptorMatcher
 -----------------
@@ -271,4 +276,3 @@ Flann-based descriptor matcher. This matcher trains :ocv:class:`flann::Index_` o
     };
 
 ..
-
diff --git a/modules/features2d/doc/common_interfaces_of_feature_detectors.rst b/modules/features2d/doc/common_interfaces_of_feature_detectors.rst
index 3bbaa8aca..434585d1e 100644
--- a/modules/features2d/doc/common_interfaces_of_feature_detectors.rst
+++ b/modules/features2d/doc/common_interfaces_of_feature_detectors.rst
@@ -8,6 +8,9 @@ between different algorithms solving the same problem. All objects that implemen
 inherit the
 :ocv:class:`FeatureDetector` interface.
 
+.. note::
+
+   * An example explaining keypoint detection can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
 
 FeatureDetector
 ---------------
@@ -44,6 +47,8 @@ Detects keypoints in an image (first variant) or image set (second variant).
 
 .. ocv:function:: void FeatureDetector::detect( const vector<Mat>& images, vector<vector<KeyPoint> >& keypoints, const vector<Mat>& masks=vector<Mat>() ) const
 
+.. ocv:pyfunction:: cv2.FeatureDetector_create.detect(image[, mask]) -> keypoints
+
     :param image: Image.
 
     :param images: Image set.
@@ -60,6 +65,8 @@ Creates a feature detector by its name.
 
 .. ocv:function:: Ptr<FeatureDetector> FeatureDetector::create( const String& detectorType )
 
+.. ocv:pyfunction:: cv2.FeatureDetector_create(detectorType) -> retval
+
     :param detectorType: Feature detector type.
 
 The following detector types are supported:
@@ -162,7 +169,7 @@ StarFeatureDetector
 -------------------
 .. ocv:class:: StarFeatureDetector : public FeatureDetector
 
-The class implements the keypoint detector introduced by K. Konolige, synonym of ``StarDetector``.  ::
+The class implements the keypoint detector introduced by [Agrawal08]_, synonym of ``StarDetector``.  ::
 
     class StarFeatureDetector : public FeatureDetector
     {
@@ -176,6 +183,9 @@ The class implements the keypoint detector introduced by K. Konolige, synonym of
         ...
     };
 
+.. [Agrawal08] Agrawal, M., Konolige, K., & Blas, M. R. (2008). Censure: Center surround extremas for realtime feature detection and matching. In Computer Vision–ECCV 2008 (pp. 102-115). Springer Berlin Heidelberg.
+
+
 DenseFeatureDetector
 --------------------
 .. ocv:class:: DenseFeatureDetector : public FeatureDetector
diff --git a/modules/features2d/doc/common_interfaces_of_generic_descriptor_matchers.rst b/modules/features2d/doc/common_interfaces_of_generic_descriptor_matchers.rst
index a306c6606..5a7f952bc 100644
--- a/modules/features2d/doc/common_interfaces_of_generic_descriptor_matchers.rst
+++ b/modules/features2d/doc/common_interfaces_of_generic_descriptor_matchers.rst
@@ -11,7 +11,11 @@ Every descriptor with the
 :ocv:class:`VectorDescriptorMatcher` ).
 There are descriptors such as the One-way descriptor and Ferns that have the ``GenericDescriptorMatcher`` interface implemented but do not support ``DescriptorExtractor``.
 
+.. note::
 
+   * An example explaining keypoint description can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+   * An example on descriptor matching evaluation can be found at opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp
+   * An example on one to many image matching can be found at opencv_source_code/samples/cpp/matching_to_many_images.cpp
 
 GenericDescriptorMatcher
 ------------------------
@@ -270,5 +274,3 @@ Example: ::
 
     VectorDescriptorMatcher matcher( new SurfDescriptorExtractor,
                                      new BruteForceMatcher<L2<float> > );
-
-
diff --git a/modules/features2d/doc/drawing_function_of_keypoints_and_matches.rst b/modules/features2d/doc/drawing_function_of_keypoints_and_matches.rst
index 2669ab9f2..68c68fc6c 100644
--- a/modules/features2d/doc/drawing_function_of_keypoints_and_matches.rst
+++ b/modules/features2d/doc/drawing_function_of_keypoints_and_matches.rst
@@ -11,6 +11,10 @@ Draws the found matches of keypoints from two images.
 
 .. ocv:function:: void drawMatches( const Mat& img1, const vector<KeyPoint>& keypoints1, const Mat& img2, const vector<KeyPoint>& keypoints2, const vector<vector<DMatch> >& matches1to2, Mat& outImg, const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), const vector<vector<char> >& matchesMask=vector<vector<char> >(), int flags=DrawMatchesFlags::DEFAULT )
 
+.. ocv:pyfunction:: cv2.drawMatches(img1, keypoints1, img2, keypoints2, matches1to2[, outImg[, matchColor[, singlePointColor[, matchesMask[, flags]]]]]) -> outImg
+
+.. ocv:pyfunction:: cv2.drawMatchesKnn(img1, keypoints1, img2, keypoints2, matches1to2[, outImg[, matchColor[, singlePointColor[, matchesMask[, flags]]]]]) -> outImg
+
 
     :param img1: First source image.
 
@@ -67,6 +71,8 @@ Draws keypoints.
 
 .. ocv:function:: void drawKeypoints( const Mat& image, const vector<KeyPoint>& keypoints, Mat& outImage, const Scalar& color=Scalar::all(-1), int flags=DrawMatchesFlags::DEFAULT )
 
+.. ocv:pyfunction:: cv2.drawKeypoints(image, keypoints[, outImage[, color[, flags]]]) -> outImage
+
     :param image: Source image.
 
     :param keypoints: Keypoints from the source image.
@@ -77,3 +83,4 @@ Draws keypoints.
 
     :param flags: Flags setting drawing features. Possible  ``flags``  bit values are defined by  ``DrawMatchesFlags``. See details above in  :ocv:func:`drawMatches` .
 
+.. note:: For Python API, flags are modified as `cv2.DRAW_MATCHES_FLAGS_DEFAULT`, `cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS`, `cv2.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG`, `cv2.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS`
diff --git a/modules/features2d/doc/feature_detection_and_description.rst b/modules/features2d/doc/feature_detection_and_description.rst
index f265ab3c4..a6fe7c8fa 100644
--- a/modules/features2d/doc/feature_detection_and_description.rst
+++ b/modules/features2d/doc/feature_detection_and_description.rst
@@ -3,6 +3,10 @@ Feature Detection and Description
 
 .. highlight:: cpp
 
+.. note::
+
+   * An example explaining keypoint detection and description can be found at opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+
 FAST
 ----
 Detects corners using the FAST algorithm
@@ -10,6 +14,11 @@ Detects corners using the FAST algorithm
 .. ocv:function:: void FAST( InputArray image, vector<KeyPoint>& keypoints, int threshold, bool nonmaxSupression=true )
 .. ocv:function:: void FAST( InputArray image, vector<KeyPoint>& keypoints, int threshold, bool nonmaxSupression, int type )
 
+.. ocv:pyfunction:: cv2.FastFeatureDetector([, threshold[, nonmaxSuppression]]) -> <FastFeatureDetector object>
+.. ocv:pyfunction:: cv2.FastFeatureDetector(threshold, nonmaxSuppression, type) -> <FastFeatureDetector object>
+.. ocv:pyfunction:: cv2.FastFeatureDetector.detect(image[, mask]) -> keypoints
+
+
     :param image: grayscale image where keypoints (corners) are detected.
 
     :param keypoints: keypoints detected on the image.
@@ -22,6 +31,9 @@ Detects corners using the FAST algorithm
 
 Detects corners using the FAST algorithm by [Rosten06]_.
 
+..note:: In Python API, types are given as ``cv2.FAST_FEATURE_DETECTOR_TYPE_5_8``, ``cv2.FAST_FEATURE_DETECTOR_TYPE_7_12`` and  ``cv2.FAST_FEATURE_DETECTOR_TYPE_9_16``. For corner detection, use ``cv2.FAST.detect()`` method.
+
+
 .. [Rosten06] E. Rosten. Machine Learning for High-speed Corner Detection, 2006.
 
 
@@ -50,6 +62,10 @@ Maximally stable extremal region extractor. ::
 The class encapsulates all the parameters of the MSER extraction algorithm (see
 http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions). Also see http://code.opencv.org/projects/opencv/wiki/MSER for useful comments and parameters description.
 
+.. note::
+
+   * (Python) A complete example showing the use of the MSER detector can be found at opencv_source_code/samples/python2/mser.py
+
 
 ORB
 ---
@@ -65,6 +81,9 @@ The ORB constructor
 
 .. ocv:function:: ORB::ORB(int nfeatures = 500, float scaleFactor = 1.2f, int nlevels = 8, int edgeThreshold = 31, int firstLevel = 0, int WTA_K=2, int scoreType=ORB::HARRIS_SCORE, int patchSize=31)
 
+.. ocv:pyfunction:: cv2.ORB([, nfeatures[, scaleFactor[, nlevels[, edgeThreshold[, firstLevel[, WTA_K[, scoreType[, patchSize]]]]]]]]) -> <ORB object>
+
+
     :param nfeatures: The maximum number of features to retain.
 
     :param scaleFactor: Pyramid decimation ratio, greater than 1. ``scaleFactor==2`` means the classical pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor will mean that to cover certain scale range you will need more pyramid levels and so the speed will suffer.
@@ -87,6 +106,11 @@ Finds keypoints in an image and computes their descriptors
 
 .. ocv:function:: void ORB::operator()(InputArray image, InputArray mask, vector<KeyPoint>& keypoints, OutputArray descriptors, bool useProvidedKeypoints=false ) const
 
+.. ocv:pyfunction:: cv2.ORB.detect(image[, mask]) -> keypoints
+.. ocv:pyfunction:: cv2.ORB.compute(image, keypoints[, descriptors]) -> keypoints, descriptors
+.. ocv:pyfunction:: cv2.ORB.detectAndCompute(image, mask[, descriptors[, useProvidedKeypoints]]) -> keypoints, descriptors
+
+
     :param image: The input 8-bit grayscale image.
 
     :param mask: The operation mask.
@@ -97,6 +121,7 @@ Finds keypoints in an image and computes their descriptors
 
     :param useProvidedKeypoints: If it is true, then the method will use the provided vector of keypoints instead of detecting them.
 
+
 BRISK
 -----
 .. ocv:class:: BRISK : public Feature2D
@@ -111,6 +136,8 @@ The BRISK constructor
 
 .. ocv:function:: BRISK::BRISK(int thresh=30, int octaves=3, float patternScale=1.0f)
 
+.. ocv:pyfunction:: cv2.BRISK([, thresh[, octaves[, patternScale]]]) -> <BRISK object>
+
     :param thresh: FAST/AGAST detection threshold score.
 
     :param octaves: detection octaves. Use 0 to do single scale.
@@ -123,6 +150,8 @@ The BRISK constructor for a custom pattern
 
 .. ocv:function:: BRISK::BRISK(std::vector<float> &radiusList, std::vector<int> &numberList, float dMax=5.85f, float dMin=8.2f, std::vector<int> indexChange=std::vector<int>())
 
+.. ocv:pyfunction:: cv2.BRISK(radiusList, numberList[, dMax[, dMin[, indexChange]]]) -> <BRISK object>
+
     :param radiusList: defines the radii (in pixels) where the samples around a keypoint are taken (for keypoint scale 1).
 
     :param numberList: defines the number of sampling points on the sampling circle. Must be the same size as radiusList..
@@ -139,6 +168,10 @@ Finds keypoints in an image and computes their descriptors
 
 .. ocv:function:: void BRISK::operator()(InputArray image, InputArray mask, vector<KeyPoint>& keypoints, OutputArray descriptors, bool useProvidedKeypoints=false ) const
 
+.. ocv:pyfunction:: cv2.BRISK.detect(image[, mask]) -> keypoints
+.. ocv:pyfunction:: cv2.BRISK.compute(image, keypoints[, descriptors]) -> keypoints, descriptors
+.. ocv:pyfunction:: cv2.BRISK.detectAndCompute(image, mask[, descriptors[, useProvidedKeypoints]]) -> keypoints, descriptors
+
     :param image: The input 8-bit grayscale image.
 
     :param mask: The operation mask.
@@ -157,6 +190,10 @@ Class implementing the FREAK (*Fast Retina Keypoint*) keypoint descriptor, descr
 
 .. [AOV12] A. Alahi, R. Ortiz, and P. Vandergheynst. FREAK: Fast Retina Keypoint. In IEEE Conference on Computer Vision and Pattern Recognition, 2012. CVPR 2012 Open Source Award Winner.
 
+.. note::
+
+   * An example on how to use the FREAK descriptor can be found at opencv_source_code/samples/cpp/freak_demo.cpp
+
 FREAK::FREAK
 ------------
 The FREAK constructor
diff --git a/modules/features2d/doc/object_categorization.rst b/modules/features2d/doc/object_categorization.rst
index 73089ce7a..644634fd0 100644
--- a/modules/features2d/doc/object_categorization.rst
+++ b/modules/features2d/doc/object_categorization.rst
@@ -5,6 +5,12 @@ Object Categorization
 
 This section describes approaches based on local 2D features and used to categorize objects.
 
+.. note::
+
+   * A complete Bag-Of-Words sample can be found at opencv_source_code/samples/cpp/bagofwords_classification.cpp
+
+   * (Python) An example using the features2D framework to perform object categorization can be found at opencv_source_code/samples/python2/find_obj.py
+
 BOWTrainer
 ----------
 .. ocv:class:: BOWTrainer
@@ -198,4 +204,3 @@ BOWImgDescriptorExtractor::descriptorType
 Returns an image descriptor type.
 
 .. ocv:function:: int BOWImgDescriptorExtractor::descriptorType() const
-
diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp
index 4b21ddb09..af687e308 100644
--- a/modules/features2d/include/opencv2/features2d.hpp
+++ b/modules/features2d/include/opencv2/features2d.hpp
@@ -535,11 +535,11 @@ protected:
 };
 
 
-class CV_EXPORTS GFTTDetector : public FeatureDetector
+class CV_EXPORTS_W GFTTDetector : public FeatureDetector
 {
 public:
-    GFTTDetector( int maxCorners=1000, double qualityLevel=0.01, double minDistance=1,
-                  int blockSize=3, bool useHarrisDetector=false, double k=0.04 );
+    CV_WRAP GFTTDetector( int maxCorners=1000, double qualityLevel=0.01, double minDistance=1,
+                          int blockSize=3, bool useHarrisDetector=false, double k=0.04 );
     AlgorithmInfo* info() const;
 
 protected:
@@ -646,7 +646,7 @@ public:
      * gridRows            Grid rows count.
      * gridCols            Grid column count.
      */
-    CV_WRAP GridAdaptedFeatureDetector( const Ptr<FeatureDetector>& detector=0,
+    CV_WRAP GridAdaptedFeatureDetector( const Ptr<FeatureDetector>& detector=Ptr<FeatureDetector>(),
                                         int maxTotalKeypoints=1000,
                                         int gridRows=4, int gridCols=4 );
 
@@ -961,7 +961,7 @@ struct CV_EXPORTS Hamming
 
 typedef Hamming HammingLUT;
 
-template<int cellsize> struct CV_EXPORTS HammingMultilevel
+template<int cellsize> struct HammingMultilevel
 {
     enum { normType = NORM_HAMMING + (cellsize>1) };
     typedef unsigned char ValueType;
@@ -1143,8 +1143,8 @@ protected:
 class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher
 {
 public:
-    CV_WRAP FlannBasedMatcher( const Ptr<flann::IndexParams>& indexParams=new flann::KDTreeIndexParams(),
-                       const Ptr<flann::SearchParams>& searchParams=new flann::SearchParams() );
+    CV_WRAP FlannBasedMatcher( const Ptr<flann::IndexParams>& indexParams=makePtr<flann::KDTreeIndexParams>(),
+                       const Ptr<flann::SearchParams>& searchParams=makePtr<flann::SearchParams>() );
 
     virtual void add( const std::vector<Mat>& descriptors );
     virtual void clear();
@@ -1404,15 +1404,15 @@ CV_EXPORTS_W void drawKeypoints( const Mat& image, const std::vector<KeyPoint>&
                                const Scalar& color=Scalar::all(-1), int flags=DrawMatchesFlags::DEFAULT );
 
 // Draws matches of keypints from two images on output image.
-CV_EXPORTS void drawMatches( const Mat& img1, const std::vector<KeyPoint>& keypoints1,
+CV_EXPORTS_W void drawMatches( const Mat& img1, const std::vector<KeyPoint>& keypoints1,
                              const Mat& img2, const std::vector<KeyPoint>& keypoints2,
-                             const std::vector<DMatch>& matches1to2, Mat& outImg,
+                             const std::vector<DMatch>& matches1to2, CV_OUT Mat& outImg,
                              const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1),
                              const std::vector<char>& matchesMask=std::vector<char>(), int flags=DrawMatchesFlags::DEFAULT );
 
-CV_EXPORTS void drawMatches( const Mat& img1, const std::vector<KeyPoint>& keypoints1,
+CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( const Mat& img1, const std::vector<KeyPoint>& keypoints1,
                              const Mat& img2, const std::vector<KeyPoint>& keypoints2,
-                             const std::vector<std::vector<DMatch> >& matches1to2, Mat& outImg,
+                             const std::vector<std::vector<DMatch> >& matches1to2, CV_OUT Mat& outImg,
                              const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1),
                              const std::vector<std::vector<char> >& matchesMask=std::vector<std::vector<char> >(), int flags=DrawMatchesFlags::DEFAULT );
 
diff --git a/modules/features2d/include/opencv2/features2d/features2d.hpp b/modules/features2d/include/opencv2/features2d/features2d.hpp
index c825295ec..e81df0ad0 100644
--- a/modules/features2d/include/opencv2/features2d/features2d.hpp
+++ b/modules/features2d/include/opencv2/features2d/features2d.hpp
@@ -45,4 +45,4 @@
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 
-#include "opencv2/features2d.hpp"
\ No newline at end of file
+#include "opencv2/features2d.hpp"
diff --git a/modules/features2d/perf/perf_fast.cpp b/modules/features2d/perf/perf_fast.cpp
index fe7396183..25b82bd41 100644
--- a/modules/features2d/perf/perf_fast.cpp
+++ b/modules/features2d/perf/perf_fast.cpp
@@ -41,4 +41,3 @@ PERF_TEST_P(fast, detect, testing::Combine(
 
     SANITY_CHECK_KEYPOINTS(points);
 }
-
diff --git a/modules/features2d/perf/perf_precomp.cpp b/modules/features2d/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/features2d/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/features2d/src/bagofwords.cpp b/modules/features2d/src/bagofwords.cpp
index b27b85123..a257f6cbe 100644
--- a/modules/features2d/src/bagofwords.cpp
+++ b/modules/features2d/src/bagofwords.cpp
@@ -147,7 +147,7 @@ void BOWImgDescriptorExtractor::compute( const Mat& image, std::vector<KeyPoint>
     int clusterCount = descriptorSize(); // = vocabulary.rows
 
     // Compute descriptors for the image.
-    Mat descriptors = _descriptors ? *_descriptors : Mat();
+    Mat descriptors;
     dextractor->compute( image, keypoints, descriptors );
 
     // Match keypoint descriptors to cluster center (to vocabulary)
@@ -176,6 +176,11 @@ void BOWImgDescriptorExtractor::compute( const Mat& image, std::vector<KeyPoint>
 
     // Normalize image descriptor.
     imgDescriptor /= descriptors.rows;
+
+    // Add the descriptors of image keypoints
+    if (_descriptors) {
+        *_descriptors = descriptors.clone();
+    }
 }
 
 int BOWImgDescriptorExtractor::descriptorSize() const
diff --git a/modules/features2d/src/brisk.cpp b/modules/features2d/src/brisk.cpp
index 5513bad85..76bded666 100644
--- a/modules/features2d/src/brisk.cpp
+++ b/modules/features2d/src/brisk.cpp
@@ -525,7 +525,11 @@ BRISK::operator()( InputArray _image, InputArray _mask, std::vector<KeyPoint>& k
   bool doOrientation=true;
   if (useProvidedKeypoints)
     doOrientation = false;
-  computeDescriptorsAndOrOrientation(_image, _mask, keypoints, _descriptors, true, doOrientation,
+
+  // If the user specified cv::noArray(), this will yield false. Otherwise it will return true.
+  bool doDescriptors = _descriptors.needed();
+
+  computeDescriptorsAndOrOrientation(_image, _mask, keypoints, _descriptors, doDescriptors, doOrientation,
                                        useProvidedKeypoints);
 }
 
@@ -1999,7 +2003,7 @@ BriskLayer::BriskLayer(const cv::Mat& img_in, float scale_in, float offset_in)
   scale_ = scale_in;
   offset_ = offset_in;
   // create an agast detector
-  fast_9_16_ = new FastFeatureDetector(1, true, FastFeatureDetector::TYPE_9_16);
+  fast_9_16_ = makePtr<FastFeatureDetector>(1, true, FastFeatureDetector::TYPE_9_16);
   makeOffsets(pixel_5_8_, (int)img_.step, 8);
   makeOffsets(pixel_9_16_, (int)img_.step, 16);
 }
@@ -2021,7 +2025,7 @@ BriskLayer::BriskLayer(const BriskLayer& layer, int mode)
     offset_ = 0.5f * scale_ - 0.5f;
   }
   scores_ = cv::Mat::zeros(img_.rows, img_.cols, CV_8U);
-  fast_9_16_ = new FastFeatureDetector(1, false, FastFeatureDetector::TYPE_9_16);
+  fast_9_16_ = makePtr<FastFeatureDetector>(1, false, FastFeatureDetector::TYPE_9_16);
   makeOffsets(pixel_5_8_, (int)img_.step, 8);
   makeOffsets(pixel_9_16_, (int)img_.step, 16);
 }
diff --git a/modules/features2d/src/descriptors.cpp b/modules/features2d/src/descriptors.cpp
index 4f434032e..b79768ae1 100644
--- a/modules/features2d/src/descriptors.cpp
+++ b/modules/features2d/src/descriptors.cpp
@@ -99,7 +99,7 @@ Ptr<DescriptorExtractor> DescriptorExtractor::create(const String& descriptorExt
     {
         size_t pos = String("Opponent").size();
         String type = descriptorExtractorType.substr(pos);
-        return new OpponentColorDescriptorExtractor(DescriptorExtractor::create(type));
+        return makePtr<OpponentColorDescriptorExtractor>(DescriptorExtractor::create(type));
     }
 
     return Algorithm::create<DescriptorExtractor>("Feature2D." + descriptorExtractorType);
@@ -119,7 +119,7 @@ CV_WRAP void Feature2D::compute( const Mat& image, CV_OUT CV_IN_OUT std::vector<
 OpponentColorDescriptorExtractor::OpponentColorDescriptorExtractor( const Ptr<DescriptorExtractor>& _descriptorExtractor ) :
         descriptorExtractor(_descriptorExtractor)
 {
-    CV_Assert( !descriptorExtractor.empty() );
+    CV_Assert( descriptorExtractor );
 }
 
 static void convertBGRImageToOpponentColorSpace( const Mat& bgrImage, std::vector<Mat>& opponentChannels )
@@ -249,7 +249,7 @@ int OpponentColorDescriptorExtractor::descriptorType() const
 
 bool OpponentColorDescriptorExtractor::empty() const
 {
-    return descriptorExtractor.empty() || (DescriptorExtractor*)(descriptorExtractor)->empty();
+    return !descriptorExtractor || descriptorExtractor->empty();
 }
 
 }
diff --git a/modules/features2d/src/detectors.cpp b/modules/features2d/src/detectors.cpp
index c20d57355..63a882dd3 100644
--- a/modules/features2d/src/detectors.cpp
+++ b/modules/features2d/src/detectors.cpp
@@ -90,19 +90,19 @@ Ptr<FeatureDetector> FeatureDetector::create( const String& detectorType )
 {
     if( detectorType.find("Grid") == 0 )
     {
-        return new GridAdaptedFeatureDetector(FeatureDetector::create(
+        return makePtr<GridAdaptedFeatureDetector>(FeatureDetector::create(
                                 detectorType.substr(strlen("Grid"))));
     }
 
     if( detectorType.find("Pyramid") == 0 )
     {
-        return new PyramidAdaptedFeatureDetector(FeatureDetector::create(
+        return makePtr<PyramidAdaptedFeatureDetector>(FeatureDetector::create(
                                 detectorType.substr(strlen("Pyramid"))));
     }
 
     if( detectorType.find("Dynamic") == 0 )
     {
-        return new DynamicAdaptedFeatureDetector(AdjusterAdapter::create(
+        return makePtr<DynamicAdaptedFeatureDetector>(AdjusterAdapter::create(
                                 detectorType.substr(strlen("Dynamic"))));
     }
 
@@ -190,7 +190,7 @@ GridAdaptedFeatureDetector::GridAdaptedFeatureDetector( const Ptr<FeatureDetecto
 
 bool GridAdaptedFeatureDetector::empty() const
 {
-    return detector.empty() || (FeatureDetector*)detector->empty();
+    return !detector || detector->empty();
 }
 
 struct ResponseComparator
@@ -295,7 +295,7 @@ PyramidAdaptedFeatureDetector::PyramidAdaptedFeatureDetector( const Ptr<FeatureD
 
 bool PyramidAdaptedFeatureDetector::empty() const
 {
-    return detector.empty() || (FeatureDetector*)detector->empty();
+    return !detector || detector->empty();
 }
 
 void PyramidAdaptedFeatureDetector::detectImpl( const Mat& image, std::vector<KeyPoint>& keypoints, const Mat& mask ) const
diff --git a/modules/features2d/src/dynamic.cpp b/modules/features2d/src/dynamic.cpp
index d08434da4..6bd6ab4de 100644
--- a/modules/features2d/src/dynamic.cpp
+++ b/modules/features2d/src/dynamic.cpp
@@ -51,7 +51,7 @@ DynamicAdaptedFeatureDetector::DynamicAdaptedFeatureDetector(const Ptr<AdjusterA
 
 bool DynamicAdaptedFeatureDetector::empty() const
 {
-    return adjuster_.empty() || adjuster_->empty();
+    return !adjuster_ || adjuster_->empty();
 }
 
 void DynamicAdaptedFeatureDetector::detectImpl(const Mat& image, std::vector<KeyPoint>& keypoints, const Mat& mask) const
@@ -124,7 +124,7 @@ bool FastAdjuster::good() const
 
 Ptr<AdjusterAdapter> FastAdjuster::clone() const
 {
-    Ptr<AdjusterAdapter> cloned_obj = new FastAdjuster( init_thresh_, nonmax_, min_thresh_, max_thresh_ );
+    Ptr<AdjusterAdapter> cloned_obj(new FastAdjuster( init_thresh_, nonmax_, min_thresh_, max_thresh_ ));
     return cloned_obj;
 }
 
@@ -158,7 +158,7 @@ bool StarAdjuster::good() const
 
 Ptr<AdjusterAdapter> StarAdjuster::clone() const
 {
-    Ptr<AdjusterAdapter> cloned_obj = new StarAdjuster( init_thresh_, min_thresh_, max_thresh_ );
+    Ptr<AdjusterAdapter> cloned_obj(new StarAdjuster( init_thresh_, min_thresh_, max_thresh_ ));
     return cloned_obj;
 }
 
@@ -195,7 +195,7 @@ bool SurfAdjuster::good() const
 
 Ptr<AdjusterAdapter> SurfAdjuster::clone() const
 {
-    Ptr<AdjusterAdapter> cloned_obj = new SurfAdjuster( init_thresh_, min_thresh_, max_thresh_ );
+    Ptr<AdjusterAdapter> cloned_obj(new SurfAdjuster( init_thresh_, min_thresh_, max_thresh_ ));
     return cloned_obj;
 }
 
@@ -205,15 +205,15 @@ Ptr<AdjusterAdapter> AdjusterAdapter::create( const String& detectorType )
 
     if( !detectorType.compare( "FAST" ) )
     {
-        adapter = new FastAdjuster();
+        adapter = makePtr<FastAdjuster>();
     }
     else if( !detectorType.compare( "STAR" ) )
     {
-        adapter = new StarAdjuster();
+        adapter = makePtr<StarAdjuster>();
     }
     else if( !detectorType.compare( "SURF" ) )
     {
-        adapter = new SurfAdjuster();
+        adapter = makePtr<SurfAdjuster>();
     }
 
     return adapter;
diff --git a/modules/features2d/src/evaluation.cpp b/modules/features2d/src/evaluation.cpp
index cdc5834d4..5bde951d6 100644
--- a/modules/features2d/src/evaluation.cpp
+++ b/modules/features2d/src/evaluation.cpp
@@ -257,7 +257,7 @@ struct IntersectAreaCounter
     {
         CV_Assert( miny < maxy );
         CV_Assert( dr > FLT_EPSILON );
-        
+
         int temp_bua = bua, temp_bna = bna;
         for( int i = range.begin(); i != range.end(); i++ )
         {
@@ -461,7 +461,7 @@ void cv::evaluateFeatureDetector( const Mat& img1, const Mat& img2, const Mat& H
     keypoints1 = _keypoints1 != 0 ? _keypoints1 : &buf1;
     keypoints2 = _keypoints2 != 0 ? _keypoints2 : &buf2;
 
-    if( (keypoints1->empty() || keypoints2->empty()) && fdetector.empty() )
+    if( (keypoints1->empty() || keypoints2->empty()) && !fdetector )
         CV_Error( Error::StsBadArg, "fdetector must not be empty when keypoints1 or keypoints2 is empty" );
 
     if( keypoints1->empty() )
@@ -575,7 +575,7 @@ void cv::evaluateGenericDescriptorMatcher( const Mat& img1, const Mat& img2, con
     if( keypoints1.empty() )
         CV_Error( Error::StsBadArg, "keypoints1 must not be empty" );
 
-    if( matches1to2->empty() && dmatcher.empty() )
+    if( matches1to2->empty() && !dmatcher )
         CV_Error( Error::StsBadArg, "dmatch must not be empty when matches1to2 is empty" );
 
     bool computeKeypoints2ByPrj = keypoints2.empty();
diff --git a/modules/features2d/src/fast_score.cpp b/modules/features2d/src/fast_score.cpp
index 423b1f950..de697b7c9 100644
--- a/modules/features2d/src/fast_score.cpp
+++ b/modules/features2d/src/fast_score.cpp
@@ -357,4 +357,3 @@ int cornerScore<8>(const uchar* ptr, const int pixel[], int threshold)
 }
 
 } // namespace cv
-
diff --git a/modules/features2d/src/matchers.cpp b/modules/features2d/src/matchers.cpp
index 54da1833e..087c6a78b 100644
--- a/modules/features2d/src/matchers.cpp
+++ b/modules/features2d/src/matchers.cpp
@@ -326,7 +326,7 @@ BFMatcher::BFMatcher( int _normType, bool _crossCheck )
 
 Ptr<DescriptorMatcher> BFMatcher::clone( bool emptyTrainData ) const
 {
-    BFMatcher* matcher = new BFMatcher(normType, crossCheck);
+    Ptr<BFMatcher> matcher = makePtr<BFMatcher>(normType, crossCheck);
     if( !emptyTrainData )
     {
         matcher->trainDescCollection.resize(trainDescCollection.size());
@@ -458,31 +458,31 @@ void BFMatcher::radiusMatchImpl( const Mat& queryDescriptors, std::vector<std::v
  */
 Ptr<DescriptorMatcher> DescriptorMatcher::create( const String& descriptorMatcherType )
 {
-    DescriptorMatcher* dm = 0;
+    Ptr<DescriptorMatcher> dm;
     if( !descriptorMatcherType.compare( "FlannBased" ) )
     {
-        dm = new FlannBasedMatcher();
+        dm = makePtr<FlannBasedMatcher>();
     }
     else if( !descriptorMatcherType.compare( "BruteForce" ) ) // L2
     {
-        dm = new BFMatcher(NORM_L2);
+        dm = makePtr<BFMatcher>(int(NORM_L2)); // anonymous enums can't be template parameters
     }
     else if( !descriptorMatcherType.compare( "BruteForce-SL2" ) ) // Squared L2
     {
-        dm = new BFMatcher(NORM_L2SQR);
+        dm = makePtr<BFMatcher>(int(NORM_L2SQR));
     }
     else if( !descriptorMatcherType.compare( "BruteForce-L1" ) )
     {
-        dm = new BFMatcher(NORM_L1);
+        dm = makePtr<BFMatcher>(int(NORM_L1));
     }
     else if( !descriptorMatcherType.compare("BruteForce-Hamming") ||
              !descriptorMatcherType.compare("BruteForce-HammingLUT") )
     {
-        dm = new BFMatcher(NORM_HAMMING);
+        dm = makePtr<BFMatcher>(int(NORM_HAMMING));
     }
     else if( !descriptorMatcherType.compare("BruteForce-Hamming(2)") )
     {
-        dm = new BFMatcher(NORM_HAMMING2);
+        dm = makePtr<BFMatcher>(int(NORM_HAMMING2));
     }
     else
         CV_Error( Error::StsBadArg, "Unknown matcher name" );
@@ -497,8 +497,8 @@ Ptr<DescriptorMatcher> DescriptorMatcher::create( const String& descriptorMatche
 FlannBasedMatcher::FlannBasedMatcher( const Ptr<flann::IndexParams>& _indexParams, const Ptr<flann::SearchParams>& _searchParams )
     : indexParams(_indexParams), searchParams(_searchParams), addedDescCount(0)
 {
-    CV_Assert( !_indexParams.empty() );
-    CV_Assert( !_searchParams.empty() );
+    CV_Assert( _indexParams );
+    CV_Assert( _searchParams );
 }
 
 void FlannBasedMatcher::add( const std::vector<Mat>& descriptors )
@@ -522,17 +522,17 @@ void FlannBasedMatcher::clear()
 
 void FlannBasedMatcher::train()
 {
-    if( flannIndex.empty() || mergedDescriptors.size() < addedDescCount )
+    if( !flannIndex || mergedDescriptors.size() < addedDescCount )
     {
         mergedDescriptors.set( trainDescCollection );
-        flannIndex = new flann::Index( mergedDescriptors.getDescriptors(), *indexParams );
+        flannIndex = makePtr<flann::Index>( mergedDescriptors.getDescriptors(), *indexParams );
     }
 }
 
 void FlannBasedMatcher::read( const FileNode& fn)
 {
-     if (indexParams.empty())
-         indexParams = new flann::IndexParams();
+     if (!indexParams)
+         indexParams = makePtr<flann::IndexParams>();
 
      FileNode ip = fn["indexParams"];
      CV_Assert(ip.type() == FileNode::SEQ);
@@ -570,8 +570,8 @@ void FlannBasedMatcher::read( const FileNode& fn)
         };
      }
 
-     if (searchParams.empty())
-         searchParams = new flann::SearchParams();
+     if (!searchParams)
+         searchParams = makePtr<flann::SearchParams>();
 
      FileNode sp = fn["searchParams"];
      CV_Assert(sp.type() == FileNode::SEQ);
@@ -725,7 +725,7 @@ bool FlannBasedMatcher::isMaskSupported() const
 
 Ptr<DescriptorMatcher> FlannBasedMatcher::clone( bool emptyTrainData ) const
 {
-    FlannBasedMatcher* matcher = new FlannBasedMatcher(indexParams, searchParams);
+    Ptr<FlannBasedMatcher> matcher = makePtr<FlannBasedMatcher>(indexParams, searchParams);
     if( !emptyTrainData )
     {
         CV_Error( Error::StsNotImplemented, "deep clone functionality is not implemented, because "
@@ -1066,7 +1066,7 @@ Ptr<GenericDescriptorMatcher> GenericDescriptorMatcher::create( const String& ge
     Ptr<GenericDescriptorMatcher> descriptorMatcher =
         Algorithm::create<GenericDescriptorMatcher>("DescriptorMatcher." + genericDescritptorMatcherType);
 
-    if( !paramsFilename.empty() && !descriptorMatcher.empty() )
+    if( !paramsFilename.empty() && descriptorMatcher )
     {
         FileStorage fs = FileStorage( paramsFilename, FileStorage::READ );
         if( fs.isOpened() )
@@ -1086,7 +1086,7 @@ VectorDescriptorMatcher::VectorDescriptorMatcher( const Ptr<DescriptorExtractor>
                                                   const Ptr<DescriptorMatcher>& _matcher )
                                 : extractor( _extractor ), matcher( _matcher )
 {
-    CV_Assert( !extractor.empty() && !matcher.empty() );
+    CV_Assert( extractor && matcher );
 }
 
 VectorDescriptorMatcher::~VectorDescriptorMatcher()
@@ -1152,14 +1152,14 @@ void VectorDescriptorMatcher::write (FileStorage& fs) const
 
 bool VectorDescriptorMatcher::empty() const
 {
-    return extractor.empty() || extractor->empty() ||
-           matcher.empty() || matcher->empty();
+    return !extractor || extractor->empty() ||
+           !matcher || matcher->empty();
 }
 
 Ptr<GenericDescriptorMatcher> VectorDescriptorMatcher::clone( bool emptyTrainData ) const
 {
     // TODO clone extractor
-    return new VectorDescriptorMatcher( extractor, matcher->clone(emptyTrainData) );
+    return makePtr<VectorDescriptorMatcher>( extractor, matcher->clone(emptyTrainData) );
 }
 
 }
diff --git a/modules/features2d/src/precomp.cpp b/modules/features2d/src/precomp.cpp
deleted file mode 100644
index 3e0ec42de..000000000
--- a/modules/features2d/src/precomp.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-/* End of file. */
diff --git a/modules/features2d/test/test_brisk.cpp b/modules/features2d/test/test_brisk.cpp
index adac8e2e7..38e07c3f6 100644
--- a/modules/features2d/test/test_brisk.cpp
+++ b/modules/features2d/test/test_brisk.cpp
@@ -93,4 +93,3 @@ void CV_BRISKTest::run( int )
 }
 
 TEST(Features2d_BRISK, regression) { CV_BRISKTest test; test.safe_run(); }
-
diff --git a/modules/features2d/test/test_descriptors_regression.cpp b/modules/features2d/test/test_descriptors_regression.cpp
index 548e81884..08eb59eae 100644
--- a/modules/features2d/test/test_descriptors_regression.cpp
+++ b/modules/features2d/test/test_descriptors_regression.cpp
@@ -141,7 +141,7 @@ protected:
 
     void emptyDataTest()
     {
-        assert( !dextractor.empty() );
+        assert( dextractor );
 
         // One image.
         Mat image;
@@ -186,7 +186,7 @@ protected:
 
     void regressionTest()
     {
-        assert( !dextractor.empty() );
+        assert( dextractor );
 
         // Read the test image.
         string imgFilename =  string(ts->get_data_path()) + FEATURES2D_DIR + "/" + IMAGE_FILENAME;
@@ -267,7 +267,7 @@ protected:
     void run(int)
     {
         createDescriptorExtractor();
-        if( dextractor.empty() )
+        if( !dextractor )
         {
             ts->printf(cvtest::TS::LOG, "Descriptor extractor is empty.\n");
             ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA );
diff --git a/modules/features2d/test/test_detectors_regression.cpp b/modules/features2d/test/test_detectors_regression.cpp
index 9a88c42f7..8f34913a9 100644
--- a/modules/features2d/test/test_detectors_regression.cpp
+++ b/modules/features2d/test/test_detectors_regression.cpp
@@ -230,7 +230,7 @@ void CV_FeatureDetectorTest::regressionTest()
 
 void CV_FeatureDetectorTest::run( int /*start_from*/ )
 {
-    if( fdetector.empty() )
+    if( !fdetector )
     {
         ts->printf( cvtest::TS::LOG, "Feature detector is empty.\n" );
         ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA );
diff --git a/modules/features2d/test/test_fast.cpp b/modules/features2d/test/test_fast.cpp
index 76d791864..d500ce549 100644
--- a/modules/features2d/test/test_fast.cpp
+++ b/modules/features2d/test/test_fast.cpp
@@ -135,4 +135,3 @@ void CV_FastTest::run( int )
 }
 
 TEST(Features2d_FAST, regression) { CV_FastTest test; test.safe_run(); }
-
diff --git a/modules/features2d/test/test_keypoints.cpp b/modules/features2d/test/test_keypoints.cpp
index 7bc5c6e9a..e15d4fa17 100644
--- a/modules/features2d/test/test_keypoints.cpp
+++ b/modules/features2d/test/test_keypoints.cpp
@@ -62,7 +62,7 @@ protected:
     virtual void run(int)
     {
         cv::initModule_features2d();
-        CV_Assert(!detector.empty());
+        CV_Assert(detector);
         string imgFilename = string(ts->get_data_path()) + FEATURES2D_DIR + "/" + IMAGE_FILENAME;
 
         // Read the test image.
@@ -166,5 +166,3 @@ TEST(Features2d_Detector_Keypoints_Dense, validation)
     CV_FeatureDetectorKeypointsTest test(Algorithm::create<FeatureDetector>("Feature2D.Dense"));
     test.safe_run();
 }
-
-
diff --git a/modules/features2d/test/test_mser.cpp b/modules/features2d/test/test_mser.cpp
index 1627886ba..6b1b4ef63 100644
--- a/modules/features2d/test/test_mser.cpp
+++ b/modules/features2d/test/test_mser.cpp
@@ -205,4 +205,3 @@ void CV_MserTest::run(int)
 }
 
 TEST(Features2d_MSER, DISABLED_regression) { CV_MserTest test; test.safe_run(); }
-
diff --git a/modules/features2d/test/test_precomp.cpp b/modules/features2d/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/features2d/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/features2d/test/test_rotation_and_scale_invariance.cpp b/modules/features2d/test/test_rotation_and_scale_invariance.cpp
index dd0e48e7d..adfe42804 100644
--- a/modules/features2d/test/test_rotation_and_scale_invariance.cpp
+++ b/modules/features2d/test/test_rotation_and_scale_invariance.cpp
@@ -196,7 +196,7 @@ public:
         minKeyPointMatchesRatio(_minKeyPointMatchesRatio),
         minAngleInliersRatio(_minAngleInliersRatio)
     {
-        CV_Assert(!featureDetector.empty());
+        CV_Assert(featureDetector);
     }
 
 protected:
@@ -307,8 +307,8 @@ public:
         normType(_normType),
         minDescInliersRatio(_minDescInliersRatio)
     {
-        CV_Assert(!featureDetector.empty());
-        CV_Assert(!descriptorExtractor.empty());
+        CV_Assert(featureDetector);
+        CV_Assert(descriptorExtractor);
     }
 
 protected:
@@ -392,7 +392,7 @@ public:
         minKeyPointMatchesRatio(_minKeyPointMatchesRatio),
         minScaleInliersRatio(_minScaleInliersRatio)
     {
-        CV_Assert(!featureDetector.empty());
+        CV_Assert(featureDetector);
     }
 
 protected:
@@ -510,8 +510,8 @@ public:
         normType(_normType),
         minDescInliersRatio(_minDescInliersRatio)
     {
-        CV_Assert(!featureDetector.empty());
-        CV_Assert(!descriptorExtractor.empty());
+        CV_Assert(featureDetector);
+        CV_Assert(descriptorExtractor);
     }
 
 protected:
diff --git a/modules/flann/CMakeLists.txt b/modules/flann/CMakeLists.txt
index 645e8ce85..a6326c40a 100644
--- a/modules/flann/CMakeLists.txt
+++ b/modules/flann/CMakeLists.txt
@@ -1,3 +1,2 @@
 set(the_description "Clustering and Search in Multi-Dimensional Spaces")
 ocv_define_module(flann opencv_core)
-
diff --git a/modules/flann/include/opencv2/flann/any.h b/modules/flann/include/opencv2/flann/any.h
index 4042db67c..070d5a421 100644
--- a/modules/flann/include/opencv2/flann/any.h
+++ b/modules/flann/include/opencv2/flann/any.h
@@ -257,8 +257,7 @@ public:
     const T& cast() const
     {
         if (policy->type() != typeid(T)) throw anyimpl::bad_any_cast();
-        void* obj = const_cast<void*>(object);
-        T* r = reinterpret_cast<T*>(policy->get_value(&obj));
+        T* r = reinterpret_cast<T*>(policy->get_value(const_cast<void **>(&object)));
         return *r;
     }
 
diff --git a/modules/flann/include/opencv2/flann/dist.h b/modules/flann/include/opencv2/flann/dist.h
index e001da7cd..dd4cc421d 100644
--- a/modules/flann/include/opencv2/flann/dist.h
+++ b/modules/flann/include/opencv2/flann/dist.h
@@ -43,8 +43,12 @@ typedef unsigned __int64 uint64_t;
 
 #include "defines.h"
 
+#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
+# include <Intrin.h>
+#endif
+
 #ifdef __ARM_NEON__
-#include "arm_neon.h"
+# include "arm_neon.h"
 #endif
 
 namespace cvflann
diff --git a/modules/flann/include/opencv2/flann/flann.hpp b/modules/flann/include/opencv2/flann/flann.hpp
index f40aaf79f..227683f97 100644
--- a/modules/flann/include/opencv2/flann/flann.hpp
+++ b/modules/flann/include/opencv2/flann/flann.hpp
@@ -45,4 +45,4 @@
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 
-#include "opencv2/flann.hpp"
\ No newline at end of file
+#include "opencv2/flann.hpp"
diff --git a/modules/flann/include/opencv2/flann/random.h b/modules/flann/include/opencv2/flann/random.h
index 2a67352da..a3cf5ec53 100644
--- a/modules/flann/include/opencv2/flann/random.h
+++ b/modules/flann/include/opencv2/flann/random.h
@@ -131,5 +131,3 @@ public:
 }
 
 #endif //OPENCV_FLANN_RANDOM_H
-
-
diff --git a/modules/flann/include/opencv2/flann/result_set.h b/modules/flann/include/opencv2/flann/result_set.h
index 7bb709b76..3adad4659 100644
--- a/modules/flann/include/opencv2/flann/result_set.h
+++ b/modules/flann/include/opencv2/flann/result_set.h
@@ -540,4 +540,3 @@ private:
 }
 
 #endif //OPENCV_FLANN_RESULTSET_H
-
diff --git a/modules/flann/src/flann.cpp b/modules/flann/src/flann.cpp
index 43d0e1f2a..7ebb7eda7 100644
--- a/modules/flann/src/flann.cpp
+++ b/modules/flann/src/flann.cpp
@@ -54,4 +54,4 @@ namespace cvflann
     }
 
     void dummyfunc() {}
-}
\ No newline at end of file
+}
diff --git a/modules/flann/src/precomp.cpp b/modules/flann/src/precomp.cpp
deleted file mode 100644
index c149df18f..000000000
--- a/modules/flann/src/precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "precomp.hpp"
diff --git a/modules/flann/src/precomp.hpp b/modules/flann/src/precomp.hpp
index b16e4a518..1c4154285 100644
--- a/modules/flann/src/precomp.hpp
+++ b/modules/flann/src/precomp.hpp
@@ -23,4 +23,3 @@
 #include "opencv2/core/private.hpp"
 
 #endif
-
diff --git a/modules/flann/test/test_precomp.cpp b/modules/flann/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/flann/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/gpu/doc/data_structures.rst b/modules/gpu/doc/data_structures.rst
index 823d3c309..2a2171035 100644
--- a/modules/gpu/doc/data_structures.rst
+++ b/modules/gpu/doc/data_structures.rst
@@ -309,4 +309,3 @@ Class that enables getting ``cudaStream_t`` from :ocv:class:`gpu::Stream` and is
     {
         CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
     };
-
diff --git a/modules/gpu/doc/initalization_and_information.rst b/modules/gpu/doc/initalization_and_information.rst
index ad4b29d42..abfc0860c 100644
--- a/modules/gpu/doc/initalization_and_information.rst
+++ b/modules/gpu/doc/initalization_and_information.rst
@@ -147,10 +147,10 @@ Class providing functionality for querying the specified GPU properties. ::
         size_t totalConstMem() const;
 
         //! major compute capability
-        int major() const;
+        int majorVersion() const;
 
         //! minor compute capability
-        int minor() const;
+        int minorVersion() const;
 
         //! alignment requirement for textures
         size_t textureAlignment() const;
@@ -313,19 +313,19 @@ Returns the device name.
 
 
 
-gpu::DeviceInfo::major
-----------------------
+gpu::DeviceInfo::majorVersion
+-----------------------------
 Returns the major compute capability version.
 
-.. ocv:function:: int gpu::DeviceInfo::major()
+.. ocv:function:: int gpu::DeviceInfo::majorVersion()
 
 
 
-gpu::DeviceInfo::minor
-----------------------
+gpu::DeviceInfo::minorVersion
+-----------------------------
 Returns the minor compute capability version.
 
-.. ocv:function:: int gpu::DeviceInfo::minor()
+.. ocv:function:: int gpu::DeviceInfo::minorVersion()
 
 
 
diff --git a/modules/gpu/doc/introduction.rst b/modules/gpu/doc/introduction.rst
index ef34c369b..a1237629c 100644
--- a/modules/gpu/doc/introduction.rst
+++ b/modules/gpu/doc/introduction.rst
@@ -60,4 +60,3 @@ With this algorithm, a dual GPU gave a 180
 %
 performance increase comparing to the single Fermi GPU. For a source code example, see
 http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/gpu/.
-
diff --git a/modules/gpu/doc/object_detection.rst b/modules/gpu/doc/object_detection.rst
index 010124410..52755608b 100644
--- a/modules/gpu/doc/object_detection.rst
+++ b/modules/gpu/doc/object_detection.rst
@@ -62,7 +62,12 @@ The class implements Histogram of Oriented Gradients ([Dalal2005]_) object detec
 
 Interfaces of all methods are kept similar to the ``CPU HOG`` descriptor and detector analogues as much as possible.
 
+.. note::
 
+   * An example applying the HOG descriptor for people detection can be found at opencv_source_code/samples/cpp/peopledetect.cpp
+   * A GPU example applying the HOG descriptor for people detection can be found at opencv_source_code/samples/gpu/hog.cpp
+
+   * (Python) An example applying the HOG descriptor for people detection can be found at opencv_source_code/samples/python2/peopledetect.py
 
 gpu::HOGDescriptor::HOGDescriptor
 -------------------------------------
@@ -229,7 +234,10 @@ Cascade classifier class used for object detection. Supports HAAR and LBP cascad
             Size getClassifierSize() const;
     };
 
+.. note::
 
+   * A cascade classifier example can be found at opencv_source_code/samples/gpu/cascadeclassifier.cpp
+   * A Nvidea API specific cascade classifier example can be found at opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
 
 gpu::CascadeClassifier_GPU::CascadeClassifier_GPU
 -----------------------------------------------------
diff --git a/modules/gpu/misc/mark_nvidia.py b/modules/gpu/misc/mark_nvidia.py
index 08743fb13..80dc7f9ce 100755
--- a/modules/gpu/misc/mark_nvidia.py
+++ b/modules/gpu/misc/mark_nvidia.py
@@ -257,4 +257,3 @@ if __name__ == "__main__":
     outputFile = open(sys.argv[2], 'w')
     outputFile.writelines(lines)
     outputFile.close()
-
diff --git a/modules/gpu/perf/perf_main.cpp b/modules/gpu/perf/perf_main.cpp
index a7ac1ccce..802d559af 100644
--- a/modules/gpu/perf/perf_main.cpp
+++ b/modules/gpu/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpu, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpu)
diff --git a/modules/gpu/perf/perf_precomp.cpp b/modules/gpu/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpu/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpu/perf4au/main.cpp b/modules/gpu/perf4au/main.cpp
index d86f7b8f3..707251c42 100644
--- a/modules/gpu/perf4au/main.cpp
+++ b/modules/gpu/perf4au/main.cpp
@@ -52,16 +52,12 @@
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_perf.hpp"
 
-int main(int argc, char* argv[])
-{
-    perf::printCudaInfo();
+static const char * impls[] = {
+    "cuda",
+    "plain"
+};
 
-    perf::Regression::Init("gpu_perf4au");
-    perf::TestBase::Init(argc, argv);
-    testing::InitGoogleTest(&argc, argv);
-
-    return RUN_ALL_TESTS();
-}
+CV_PERF_TEST_MAIN_WITH_IMPLS(gpu_perf4au, impls, perf::printCudaInfo())
 
 //////////////////////////////////////////////////////////
 // HoughLinesP
@@ -86,13 +82,14 @@ PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg"))
     {
         cv::gpu::GpuMat d_image(image);
         cv::gpu::GpuMat d_lines;
-        cv::gpu::HoughLinesBuf d_buf;
 
-        cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
+        cv::Ptr<cv::gpu::HoughSegmentDetector> hough = cv::gpu::createHoughSegmentDetector(rho, theta, minLineLenght, maxLineGap);
+
+        hough->detect(d_image, d_lines);
 
         TEST_CYCLE()
         {
-            cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
+            hough->detect(d_image, d_lines);
         }
     }
     else
@@ -147,17 +144,17 @@ PERF_TEST_P(Image_Depth, GoodFeaturesToTrack,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, k);
+        cv::Ptr<cv::gpu::CornersDetector> detector = cv::gpu::createGoodFeaturesToTrackDetector(src.type(), maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, k);
 
         cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_mask(mask);
         cv::gpu::GpuMat d_pts;
 
-        d_detector(d_src, d_pts, d_mask);
+        detector->detect(d_src, d_pts, d_mask);
 
         TEST_CYCLE()
         {
-            d_detector(d_src, d_pts, d_mask);
+            detector->detect(d_src, d_pts, d_mask);
         }
     }
     else
diff --git a/modules/gpu/src/calib3d.cpp b/modules/gpu/src/calib3d.cpp
index 9e912582a..631174218 100644
--- a/modules/gpu/src/calib3d.cpp
+++ b/modules/gpu/src/calib3d.cpp
@@ -290,5 +290,3 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
 }
 
 #endif
-
-
diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp
index 74867b48d..fc5eafbf4 100644
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@@ -207,8 +207,8 @@ private:
         ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
 
         // Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
-        gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice, static_cast<int>(devProp.textureAlignment));
-        cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, static_cast<int>(devProp.textureAlignment));
+        gpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeDevice, static_cast<int>(devProp.textureAlignment));
+        cpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeHostPinned, static_cast<int>(devProp.textureAlignment));
 
         ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
         ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);
@@ -217,9 +217,9 @@ private:
         ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR);
 
-        h_haarStages   = new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages);
-        h_haarNodes    = new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes);
-        h_haarFeatures = new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures);
+        h_haarStages.reset  (new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages));
+        h_haarNodes.reset   (new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes));
+        h_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures));
 
         ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
         ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
@@ -228,9 +228,9 @@ private:
         ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures);
         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", NCV_FILE_ERROR);
 
-        d_haarStages   = new NCVVectorAlloc<HaarStage64>(*gpuCascadeAllocator, haarNumStages);
-        d_haarNodes    = new NCVVectorAlloc<HaarClassifierNode128>(*gpuCascadeAllocator, haarNumNodes);
-        d_haarFeatures = new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures);
+        d_haarStages.reset  (new NCVVectorAlloc<HaarStage64>(*gpuCascadeAllocator, haarNumStages));
+        d_haarNodes.reset   (new NCVVectorAlloc<HaarClassifierNode128>(*gpuCascadeAllocator, haarNumNodes));
+        d_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures));
 
         ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
         ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
@@ -279,8 +279,8 @@ private:
         ncvAssertReturnNcvStat(ncvStat);
         ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
 
-        gpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
-        cpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
+        gpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
+        cpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
 
         ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR);
         ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR);
diff --git a/modules/gpu/src/precomp.cpp b/modules/gpu/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpu/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpu/test/test_precomp.cpp b/modules/gpu/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpu/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp
index f2b0bf405..0a9c1df3d 100644
--- a/modules/gpu/test/test_precomp.hpp
+++ b/modules/gpu/test/test_precomp.hpp
@@ -62,4 +62,6 @@
 #include "opencv2/calib3d.hpp"
 #include "opencv2/objdetect.hpp"
 
+#include "cvconfig.h"
+
 #endif
diff --git a/modules/gpu/test/test_stream.cpp b/modules/gpu/test/test_stream.cpp
index a34a4e840..4ce29db75 100644
--- a/modules/gpu/test/test_stream.cpp
+++ b/modules/gpu/test/test_stream.cpp
@@ -44,10 +44,12 @@
 
 #ifdef HAVE_CUDA
 
-using namespace cvtest;
+#include <cuda_runtime.h>
 
 #if CUDART_VERSION >= 5000
 
+using namespace cvtest;
+
 struct Async : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::CudaMem src;
@@ -61,20 +63,21 @@ struct Async : testing::TestWithParam<cv::gpu::DeviceInfo>
         cv::gpu::DeviceInfo devInfo = GetParam();
         cv::gpu::setDevice(devInfo.deviceID());
 
+        src = cv::gpu::CudaMem(cv::gpu::CudaMem::PAGE_LOCKED);
+
         cv::Mat m = randomMat(cv::Size(128, 128), CV_8UC1);
-        src.create(m.size(), m.type(), cv::gpu::CudaMem::ALLOC_PAGE_LOCKED);
-        m.copyTo(src.createMatHeader());
+        m.copyTo(src);
     }
 };
 
-void checkMemSet(cv::gpu::Stream&, int status, void* userData)
+void checkMemSet(int status, void* userData)
 {
     ASSERT_EQ(cudaSuccess, status);
 
     Async* test = reinterpret_cast<Async*>(userData);
 
-    cv::Mat src = test->src;
-    cv::Mat dst = test->dst;
+    cv::gpu::CudaMem src = test->src;
+    cv::gpu::CudaMem dst = test->dst;
 
     cv::Mat dst_gold = cv::Mat::zeros(src.size(), src.type());
 
@@ -87,8 +90,8 @@ GPU_TEST_P(Async, MemSet)
 
     d_dst.upload(src);
 
-    stream.enqueueMemSet(d_dst, cv::Scalar::all(0));
-    stream.enqueueDownload(d_dst, dst);
+    d_dst.setTo(cv::Scalar::all(0), stream);
+    d_dst.download(dst, stream);
 
     Async* test = this;
     stream.enqueueHostCallback(checkMemSet, test);
@@ -96,17 +99,17 @@ GPU_TEST_P(Async, MemSet)
     stream.waitForCompletion();
 }
 
-void checkConvert(cv::gpu::Stream&, int status, void* userData)
+void checkConvert(int status, void* userData)
 {
     ASSERT_EQ(cudaSuccess, status);
 
     Async* test = reinterpret_cast<Async*>(userData);
 
-    cv::Mat src = test->src;
-    cv::Mat dst = test->dst;
+    cv::gpu::CudaMem src = test->src;
+    cv::gpu::CudaMem dst = test->dst;
 
     cv::Mat dst_gold;
-    src.convertTo(dst_gold, CV_32S);
+    src.createMatHeader().convertTo(dst_gold, CV_32S);
 
     ASSERT_MAT_NEAR(dst_gold, dst, 0);
 }
@@ -115,9 +118,9 @@ GPU_TEST_P(Async, Convert)
 {
     cv::gpu::Stream stream;
 
-    stream.enqueueUpload(src, d_src);
-    stream.enqueueConvert(d_src, d_dst, CV_32S);
-    stream.enqueueDownload(d_dst, dst);
+    d_src.upload(src, stream);
+    d_src.convertTo(d_dst, CV_32S, stream);
+    d_dst.download(dst, stream);
 
     Async* test = this;
     stream.enqueueHostCallback(checkConvert, test);
@@ -127,6 +130,6 @@ GPU_TEST_P(Async, Convert)
 
 INSTANTIATE_TEST_CASE_P(GPU_Stream, Async, ALL_DEVICES);
 
-#endif
+#endif // CUDART_VERSION >= 5000
 
 #endif // HAVE_CUDA
diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
index 8fbe296d8..dabb4e2ab 100644
--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -49,17 +49,6 @@
 
 #include "opencv2/core/gpu.hpp"
 
-#if defined __GNUC__
-    #define __OPENCV_GPUARITHM_DEPR_BEFORE__
-    #define __OPENCV_GPUARITHM_DEPR_AFTER__ __attribute__ ((deprecated))
-#elif (defined WIN32 || defined _WIN32)
-    #define __OPENCV_GPUARITHM_DEPR_BEFORE__ __declspec(deprecated)
-    #define __OPENCV_GPUARITHM_DEPR_AFTER__
-#else
-    #define __OPENCV_GPUARITHM_DEPR_BEFORE__
-    #define __OPENCV_GPUARITHM_DEPR_AFTER__
-#endif
-
 namespace cv { namespace gpu {
 
 //! adds one matrix to another (dst = src1 + src2)
@@ -197,13 +186,8 @@ class CV_EXPORTS LookUpTable : public Algorithm
 public:
     virtual void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0;
 };
-CV_EXPORTS Ptr<LookUpTable> createLookUpTable(InputArray lut);
 
-__OPENCV_GPUARITHM_DEPR_BEFORE__ void LUT(InputArray src, InputArray lut, OutputArray dst, Stream& stream = Stream::Null()) __OPENCV_GPUARITHM_DEPR_AFTER__;
-inline void LUT(InputArray src, InputArray lut, OutputArray dst, Stream& stream)
-{
-    createLookUpTable(lut)->transform(src, dst, stream);
-}
+CV_EXPORTS Ptr<LookUpTable> createLookUpTable(InputArray lut);
 
 //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
 CV_EXPORTS void copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType,
@@ -382,38 +366,9 @@ class CV_EXPORTS Convolution : public Algorithm
 public:
     virtual void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) = 0;
 };
+
 CV_EXPORTS Ptr<Convolution> createConvolution(Size user_block_size = Size());
 
-__OPENCV_GPUARITHM_DEPR_BEFORE__ void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) __OPENCV_GPUARITHM_DEPR_AFTER__;
-inline void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr , Stream& stream)
-{
-    createConvolution()->convolve(image, templ, result, ccorr, stream);
-}
-
-struct ConvolveBuf
-{
-    Size result_size;
-    Size block_size;
-    Size user_block_size;
-    Size dft_size;
-    int spect_len;
-
-    GpuMat image_spect, templ_spect, result_spect;
-    GpuMat image_block, templ_block, result_data;
-
-    void create(Size, Size){}
-    static Size estimateBlockSize(Size, Size){ return Size(); }
-};
-
-__OPENCV_GPUARITHM_DEPR_BEFORE__ void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null()) __OPENCV_GPUARITHM_DEPR_AFTER__;
-inline void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr, ConvolveBuf& buf, Stream& stream)
-{
-    createConvolution(buf.user_block_size)->convolve(image, templ, result, ccorr, stream);
-}
-
 }} // namespace cv { namespace gpu {
 
-#undef __OPENCV_GPUARITHM_DEPR_BEFORE__
-#undef __OPENCV_GPUARITHM_DEPR_AFTER__
-
 #endif /* __OPENCV_GPUARITHM_HPP__ */
diff --git a/modules/gpuarithm/perf/perf_arithm.cpp b/modules/gpuarithm/perf/perf_arithm.cpp
index dfeafa0fa..b18c8a8c0 100644
--- a/modules/gpuarithm/perf/perf_arithm.cpp
+++ b/modules/gpuarithm/perf/perf_arithm.cpp
@@ -84,7 +84,7 @@ PERF_TEST_P(Sz_Type_Flags, GEMM,
 
         TEST_CYCLE() cv::gpu::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, dst, flags);
 
-        GPU_SANITY_CHECK(dst, 1e-6);
+        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
     else
     {
@@ -234,7 +234,7 @@ PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve,
 
         TEST_CYCLE() convolution->convolve(d_image, d_templ, dst, ccorr);
 
-        GPU_SANITY_CHECK(dst);
+        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
     else
     {
diff --git a/modules/gpuarithm/perf/perf_main.cpp b/modules/gpuarithm/perf/perf_main.cpp
index b35791cda..552cf5c16 100644
--- a/modules/gpuarithm/perf/perf_main.cpp
+++ b/modules/gpuarithm/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpuarithm, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpuarithm)
diff --git a/modules/gpuarithm/perf/perf_precomp.cpp b/modules/gpuarithm/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpuarithm/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
index 6045cf5ba..78071ba8a 100644
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -173,7 +173,7 @@ void cv::gpu::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
     (void) _dst;
     (void) flags;
     (void) stream;
-    CV_Error(:Error::StsNotImplemented, "The library was build without CUBLAS");
+    CV_Error(Error::StsNotImplemented, "The library was build without CUBLAS");
 #else
     // CUBLAS works with column-major matrices
 
@@ -624,12 +624,12 @@ namespace
 
 Ptr<Convolution> cv::gpu::createConvolution(Size user_block_size)
 {
-#ifndef HAVE_CUBLAS
+#ifndef HAVE_CUFFT
     (void) user_block_size;
-    CV_Error(cv::Error::StsNotImplemented, "The library was build without CUFFT");
-    return Ptr<BLAS>();
+    CV_Error(Error::StsNotImplemented, "The library was build without CUFFT");
+    return Ptr<Convolution>();
 #else
-    return new ConvolutionImpl(user_block_size);
+    return makePtr<ConvolutionImpl>(user_block_size);
 #endif
 }
 
diff --git a/modules/gpuarithm/src/core.cpp b/modules/gpuarithm/src/core.cpp
index 22887796a..05ea5741d 100644
--- a/modules/gpuarithm/src/core.cpp
+++ b/modules/gpuarithm/src/core.cpp
@@ -497,7 +497,7 @@ namespace
 
 Ptr<LookUpTable> cv::gpu::createLookUpTable(InputArray lut)
 {
-    return new LookUpTableImpl(lut);
+    return makePtr<LookUpTableImpl>(lut);
 }
 
 ////////////////////////////////////////////////////////////////////////
diff --git a/modules/gpuarithm/src/cuda/reduce.cu b/modules/gpuarithm/src/cuda/reduce.cu
index 51c354cf9..8588a3b23 100644
--- a/modules/gpuarithm/src/cuda/reduce.cu
+++ b/modules/gpuarithm/src/cuda/reduce.cu
@@ -72,7 +72,7 @@ namespace reduce
         }
 
         template <typename T>
-        __device__ __forceinline__ T result(T r, double) const
+        __device__ __forceinline__ T result(T r, int) const
         {
             return r;
         }
@@ -81,6 +81,15 @@ namespace reduce
         __host__ __device__ __forceinline__ Sum(const Sum&) {}
     };
 
+    template <typename T> struct OutputType
+    {
+        typedef float type;
+    };
+    template <> struct OutputType<double>
+    {
+        typedef double type;
+    };
+
     struct Avg
     {
         template <typename T>
@@ -96,7 +105,7 @@ namespace reduce
         }
 
         template <typename T>
-        __device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const
+        __device__ __forceinline__ typename TypeVec<typename OutputType<typename VecTraits<T>::elem_type>::type, VecTraits<T>::cn>::vec_type result(T r, float sz) const
         {
             return r / sz;
         }
@@ -121,7 +130,7 @@ namespace reduce
         }
 
         template <typename T>
-        __device__ __forceinline__ T result(T r, double) const
+        __device__ __forceinline__ T result(T r, int) const
         {
             return r;
         }
@@ -146,7 +155,7 @@ namespace reduce
         }
 
         template <typename T>
-        __device__ __forceinline__ T result(T r, double) const
+        __device__ __forceinline__ T result(T r, int) const
         {
             return r;
         }
diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp
index 3ec4f84f6..20473de38 100644
--- a/modules/gpuarithm/src/element_operations.cpp
+++ b/modules/gpuarithm/src/element_operations.cpp
@@ -1912,7 +1912,7 @@ void cv::gpu::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, S
     }
     else
     {
-        bitMatNot<unsigned short>(
+        bitMatNot<unsigned char>(
                     PtrStepSzb(src.rows, bcols, src.data, src.step),
                     PtrStepSzb(src.rows, bcols, dst.data, dst.step),
                     mask, stream);
diff --git a/modules/gpuarithm/src/precomp.cpp b/modules/gpuarithm/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpuarithm/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpuarithm/test/test_precomp.cpp b/modules/gpuarithm/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpuarithm/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpuarithm/test/test_precomp.hpp b/modules/gpuarithm/test/test_precomp.hpp
index f9d3ad43c..faa0f5adb 100644
--- a/modules/gpuarithm/test/test_precomp.hpp
+++ b/modules/gpuarithm/test/test_precomp.hpp
@@ -51,6 +51,8 @@
 #ifndef __OPENCV_TEST_PRECOMP_HPP__
 #define __OPENCV_TEST_PRECOMP_HPP__
 
+#include <functional>
+
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_test.hpp"
 
@@ -58,4 +60,6 @@
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
 
+#include "cvconfig.h"
+
 #endif
diff --git a/modules/gpubgsegm/CMakeLists.txt b/modules/gpubgsegm/CMakeLists.txt
index f37ec595c..f87f3cb88 100644
--- a/modules/gpubgsegm/CMakeLists.txt
+++ b/modules/gpubgsegm/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Background Segmentation")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpubgsegm opencv_video opencv_imgproc opencv_legacy opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc)
+ocv_define_module(gpubgsegm opencv_video OPTIONAL opencv_legacy opencv_imgproc opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc)
diff --git a/modules/gpubgsegm/doc/background_segmentation.rst b/modules/gpubgsegm/doc/background_segmentation.rst
index f892ee666..53e67ead6 100644
--- a/modules/gpubgsegm/doc/background_segmentation.rst
+++ b/modules/gpubgsegm/doc/background_segmentation.rst
@@ -5,404 +5,123 @@ Background Segmentation
 
 
 
-gpu::FGDStatModel
------------------
-.. ocv:class:: gpu::FGDStatModel
+gpu::BackgroundSubtractorMOG
+----------------------------
+Gaussian Mixture-based Background/Foreground Segmentation Algorithm.
 
-Class used for background/foreground segmentation. ::
-
-    class FGDStatModel
-    {
-    public:
-        struct Params
-        {
-            ...
-        };
-
-        explicit FGDStatModel(int out_cn = 3);
-        explicit FGDStatModel(const cv::gpu::GpuMat& firstFrame, const Params& params = Params(), int out_cn = 3);
-
-        ~FGDStatModel();
-
-        void create(const cv::gpu::GpuMat& firstFrame, const Params& params = Params());
-        void release();
-
-        int update(const cv::gpu::GpuMat& curFrame);
-
-        //8UC3 or 8UC4 reference background image
-        cv::gpu::GpuMat background;
-
-        //8UC1 foreground image
-        cv::gpu::GpuMat foreground;
-
-        std::vector< std::vector<cv::Point> > foreground_regions;
-    };
-
-  The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [FGD2003]_.
-
-  The results are available through the class fields:
-
-    .. ocv:member:: cv::gpu::GpuMat background
-
-        The output background image.
-
-    .. ocv:member:: cv::gpu::GpuMat foreground
-
-        The output foreground mask as an 8-bit binary image.
-
-    .. ocv:member:: cv::gpu::GpuMat foreground_regions
-
-        The output foreground regions calculated by :ocv:func:`findContours`.
-
-
-
-gpu::FGDStatModel::FGDStatModel
--------------------------------
-Constructors.
-
-.. ocv:function:: gpu::FGDStatModel::FGDStatModel(int out_cn = 3)
-.. ocv:function:: gpu::FGDStatModel::FGDStatModel(const cv::gpu::GpuMat& firstFrame, const Params& params = Params(), int out_cn = 3)
-
-    :param firstFrame: First frame from video stream. Supports 3- and 4-channels input ( ``CV_8UC3`` and ``CV_8UC4`` ).
-
-    :param params: Algorithm's parameters. See [FGD2003]_ for explanation.
-
-    :param out_cn: Channels count in output result and inner buffers. Can be 3 or 4. 4-channels version requires more memory, but works a bit faster.
-
-.. seealso:: :ocv:func:`gpu::FGDStatModel::create`
-
-
-
-gpu::FGDStatModel::create
--------------------------
-Initializes background model.
-
-.. ocv:function:: void gpu::FGDStatModel::create(const cv::gpu::GpuMat& firstFrame, const Params& params = Params())
-
-    :param firstFrame: First frame from video stream. Supports 3- and 4-channels input ( ``CV_8UC3`` and ``CV_8UC4`` ).
-
-    :param params: Algorithm's parameters. See [FGD2003]_ for explanation.
-
-
-
-gpu::FGDStatModel::release
---------------------------
-Releases all inner buffer's memory.
-
-.. ocv:function:: void gpu::FGDStatModel::release()
-
-
-
-gpu::FGDStatModel::update
---------------------------
-Updates the background model and returns foreground regions count.
-
-.. ocv:function:: int gpu::FGDStatModel::update(const cv::gpu::GpuMat& curFrame)
-
-    :param curFrame: Next video frame.
-
-
-
-gpu::MOG_GPU
-------------
-.. ocv:class:: gpu::MOG_GPU
-
-Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm. ::
-
-    class MOG_GPU
-    {
-    public:
-        MOG_GPU(int nmixtures = -1);
-
-        void initialize(Size frameSize, int frameType);
-
-        void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = 0.0f, Stream& stream = Stream::Null());
-
-        void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
-
-        void release();
-
-        int history;
-        float varThreshold;
-        float backgroundRatio;
-        float noiseSigma;
-    };
+.. ocv:class:: gpu::BackgroundSubtractorMOG : public cv::BackgroundSubtractorMOG
 
 The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2001]_.
 
 .. seealso:: :ocv:class:`BackgroundSubtractorMOG`
 
+.. note::
+
+   * An example on gaussian mixture based background/foreground segmantation can be found at opencv_source_code/samples/gpu/bgfg_segm.cpp
 
 
-gpu::MOG_GPU::MOG_GPU
----------------------
-The constructor.
+gpu::createBackgroundSubtractorMOG
+----------------------------------
+Creates mixture-of-gaussian background subtractor
 
-.. ocv:function:: gpu::MOG_GPU::MOG_GPU(int nmixtures = -1)
+.. ocv:function:: Ptr<gpu::BackgroundSubtractorMOG> gpu::createBackgroundSubtractorMOG(int history=200, int nmixtures=5, double backgroundRatio=0.7, double noiseSigma=0)
+
+    :param history: Length of the history.
 
     :param nmixtures: Number of Gaussian mixtures.
 
-Default constructor sets all parameters to default values.
+    :param backgroundRatio: Background ratio.
+
+    :param noiseSigma: Noise strength (standard deviation of the brightness or each color channel). 0 means some automatic value.
 
 
 
-gpu::MOG_GPU::operator()
-------------------------
-Updates the background model and returns the foreground mask.
+gpu::BackgroundSubtractorMOG2
+-----------------------------
+Gaussian Mixture-based Background/Foreground Segmentation Algorithm.
 
-.. ocv:function:: void gpu::MOG_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = 0.0f, Stream& stream = Stream::Null())
+.. ocv:class:: gpu::BackgroundSubtractorMOG2 : public cv::BackgroundSubtractorMOG2
 
-    :param frame: Next video frame.
-
-    :param fgmask: The output foreground mask as an 8-bit binary image.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::MOG_GPU::getBackgroundImage
---------------------------------
-Computes a background image.
-
-.. ocv:function:: void gpu::MOG_GPU::getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const
-
-    :param backgroundImage: The output background image.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::MOG_GPU::release
----------------------
-Releases all inner buffer's memory.
-
-.. ocv:function:: void gpu::MOG_GPU::release()
-
-
-
-gpu::MOG2_GPU
--------------
-.. ocv:class:: gpu::MOG2_GPU
-
-Gaussian Mixture-based Background/Foreground Segmentation Algorithm. ::
-
-    class MOG2_GPU
-    {
-    public:
-        MOG2_GPU(int nmixtures = -1);
-
-        void initialize(Size frameSize, int frameType);
-
-        void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = 0.0f, Stream& stream = Stream::Null());
-
-        void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
-
-        void release();
-
-        // parameters
-        ...
-    };
-
-  The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2004]_.
-
-  Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
-
-    .. ocv:member:: float backgroundRatio
-
-        Threshold defining whether the component is significant enough to be included into the background model ( corresponds to ``TB=1-cf`` from the paper??which paper??). ``cf=0.1 => TB=0.9`` is default. For ``alpha=0.001``, it means that the mode should exist for approximately 105 frames before it is considered foreground.
-
-    .. ocv:member:: float varThreshold
-
-        Threshold for the squared Mahalanobis distance that helps decide when a sample is close to the existing components (corresponds to ``Tg``). If it is not close to any component, a new component is generated. ``3 sigma => Tg=3*3=9`` is default. A smaller ``Tg`` value generates more components. A higher ``Tg`` value may result in a small number of components but they can grow too large.
-
-    .. ocv:member:: float fVarInit
-
-        Initial variance for the newly generated components. It affects the speed of adaptation. The parameter value is based on your estimate of the typical standard deviation from the images. OpenCV uses 15 as a reasonable value.
-
-    .. ocv:member:: float fVarMin
-
-        Parameter used to further control the variance.
-
-    .. ocv:member:: float fVarMax
-
-        Parameter used to further control the variance.
-
-    .. ocv:member:: float fCT
-
-        Complexity reduction parameter. This parameter defines the number of samples needed to accept to prove the component exists. ``CT=0.05`` is a default value for all the samples. By setting ``CT=0`` you get an algorithm very similar to the standard Stauffer&Grimson algorithm.
-
-    .. ocv:member:: uchar nShadowDetection
-
-        The value for marking shadow pixels in the output foreground mask. Default value is 127.
-
-    .. ocv:member:: float fTau
-
-        Shadow threshold. The shadow is detected if the pixel is a darker version of the background. ``Tau`` is a threshold defining how much darker the shadow can be. ``Tau= 0.5`` means that if a pixel is more than twice darker then it is not shadow. See [ShadowDetect2003]_.
-
-    .. ocv:member:: bool bShadowDetection
-
-        Parameter defining whether shadow detection should be enabled.
+The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2004]_.
 
 .. seealso:: :ocv:class:`BackgroundSubtractorMOG2`
 
 
 
-gpu::MOG2_GPU::MOG2_GPU
------------------------
-The constructor.
+gpu::createBackgroundSubtractorMOG2
+-----------------------------------
+Creates MOG2 Background Subtractor
 
-.. ocv:function:: gpu::MOG2_GPU::MOG2_GPU(int nmixtures = -1)
+.. ocv:function:: Ptr<gpu::BackgroundSubtractorMOG2> gpu::createBackgroundSubtractorMOG2( int history=500, double varThreshold=16, bool detectShadows=true )
 
-    :param nmixtures: Number of Gaussian mixtures.
+  :param history: Length of the history.
 
-Default constructor sets all parameters to default values.
+  :param varThreshold: Threshold on the squared Mahalanobis distance between the pixel and the model to decide whether a pixel is well described by the background model. This parameter does not affect the background update.
+
+  :param detectShadows: If true, the algorithm will detect shadows and mark them. It decreases the speed a bit, so if you do not need this feature, set the parameter to false.
 
 
 
-gpu::MOG2_GPU::operator()
--------------------------
-Updates the background model and returns the foreground mask.
+gpu::BackgroundSubtractorGMG
+----------------------------
+Background/Foreground Segmentation Algorithm.
 
-.. ocv:function:: void gpu::MOG2_GPU::operator()( const GpuMat& frame, GpuMat& fgmask, float learningRate=-1.0f, Stream& stream=Stream::Null() )
+.. ocv:class:: gpu::BackgroundSubtractorGMG : public cv::BackgroundSubtractorGMG
 
-    :param frame: Next video frame.
-
-    :param fgmask: The output foreground mask as an 8-bit binary image.
-
-    :param stream: Stream for the asynchronous version.
+The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [GMG2012]_.
 
 
 
-gpu::MOG2_GPU::getBackgroundImage
----------------------------------
-Computes a background image.
+gpu::createBackgroundSubtractorGMG
+----------------------------------
+Creates GMG Background Subtractor
 
-.. ocv:function:: void gpu::MOG2_GPU::getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const
+.. ocv:function:: Ptr<gpu::BackgroundSubtractorGMG> gpu::createBackgroundSubtractorGMG(int initializationFrames = 120, double decisionThreshold = 0.8)
 
-    :param backgroundImage: The output background image.
+    :param initializationFrames: Number of frames of video to use to initialize histograms.
 
-    :param stream: Stream for the asynchronous version.
+    :param decisionThreshold: Value above which pixel is determined to be FG.
 
 
 
-gpu::MOG2_GPU::release
-----------------------
-Releases all inner buffer's memory.
+gpu::BackgroundSubtractorFGD
+----------------------------
 
-.. ocv:function:: void gpu::MOG2_GPU::release()
+.. ocv:class:: gpu::BackgroundSubtractorFGD : public cv::BackgroundSubtractor
 
+The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [FGD2003]_. ::
 
-
-gpu::GMG_GPU
-------------
-.. ocv:class:: gpu::GMG_GPU
-
-  Class used for background/foreground segmentation. ::
-
-    class GMG_GPU_GPU
+    class CV_EXPORTS BackgroundSubtractorFGD : public cv::BackgroundSubtractor
     {
     public:
-        GMG_GPU();
-
-        void initialize(Size frameSize, float min = 0.0f, float max = 255.0f);
-
-        void operator ()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
-
-        void release();
-
-        int    maxFeatures;
-        float  learningRate;
-        int    numInitializationFrames;
-        int    quantizationLevels;
-        float  backgroundPrior;
-        float  decisionThreshold;
-        int    smoothingRadius;
-
-        ...
+        virtual void getForegroundRegions(OutputArrayOfArrays foreground_regions) = 0;
     };
 
-  The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [GMG2012]_.
-
-  Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
-
-    .. ocv:member:: int maxFeatures
-
-        Total number of distinct colors to maintain in histogram.
-
-    .. ocv:member:: float learningRate
-
-        Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
-
-    .. ocv:member:: int numInitializationFrames
-
-        Number of frames of video to use to initialize histograms.
-
-    .. ocv:member:: int quantizationLevels
-
-        Number of discrete levels in each channel to be used in histograms.
-
-    .. ocv:member:: float backgroundPrior
-
-        Prior probability that any given pixel is a background pixel. A sensitivity parameter.
-
-    .. ocv:member:: float decisionThreshold
-
-        Value above which pixel is determined to be FG.
-
-    .. ocv:member:: float smoothingRadius
-
-        Smoothing radius, in pixels, for cleaning up FG image.
+.. seealso:: :ocv:class:`BackgroundSubtractor`
 
 
 
-gpu::GMG_GPU::GMG_GPU
----------------------
-The default constructor.
+gpu::BackgroundSubtractorFGD::getForegroundRegions
+--------------------------------------------------
+Returns the output foreground regions calculated by :ocv:func:`findContours`.
 
-.. ocv:function:: gpu::GMG_GPU::GMG_GPU()
+.. ocv:function:: void gpu::BackgroundSubtractorFGD::getForegroundRegions(OutputArrayOfArrays foreground_regions)
 
-Default constructor sets all parameters to default values.
+    :params foreground_regions: Output array (CPU memory).
 
 
 
-gpu::GMG_GPU::initialize
-------------------------
-Initialize background model and allocates all inner buffers.
+gpu::createBackgroundSubtractorFGD
+----------------------------------
+Creates FGD Background Subtractor
 
-.. ocv:function:: void gpu::GMG_GPU::initialize(Size frameSize, float min = 0.0f, float max = 255.0f)
+.. ocv:function:: Ptr<gpu::BackgroundSubtractorGMG> gpu::createBackgroundSubtractorFGD(const FGDParams& params = FGDParams())
 
-    :param frameSize: Input frame size.
-
-    :param min: Minimum value taken on by pixels in image sequence. Usually 0.
-
-    :param max: Maximum value taken on by pixels in image sequence, e.g. 1.0 or 255.
-
-
-
-gpu::GMG_GPU::operator()
-------------------------
-Updates the background model and returns the foreground mask
-
-.. ocv:function:: void gpu::GMG_GPU::operator ()( const GpuMat& frame, GpuMat& fgmask, float learningRate=-1.0f, Stream& stream=Stream::Null() )
-
-    :param frame: Next video frame.
-
-    :param fgmask: The output foreground mask as an 8-bit binary image.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::GMG_GPU::release
----------------------
-Releases all inner buffer's memory.
-
-.. ocv:function:: void gpu::GMG_GPU::release()
+    :param params: Algorithm's parameters. See [FGD2003]_ for explanation.
 
 
 
 .. [FGD2003] Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian. *Foreground Object Detection from Videos Containing Complex Background*. ACM MM2003 9p, 2003.
 .. [MOG2001] P. KadewTraKuPong and R. Bowden. *An improved adaptive background mixture model for real-time tracking with shadow detection*. Proc. 2nd European Workshop on Advanced Video-Based Surveillance Systems, 2001
 .. [MOG2004] Z. Zivkovic. *Improved adaptive Gausian mixture model for background subtraction*. International Conference Pattern Recognition, UK, August, 2004
-.. [ShadowDetect2003] Prati, Mikic, Trivedi and Cucchiarra. *Detecting Moving Shadows...*. IEEE PAMI, 2003
 .. [GMG2012] A. Godbehere, A. Matsukawa and K. Goldberg. *Visual Tracking of Human Visitors under Variable-Lighting Conditions for a Responsive Audio Art Installation*. American Control Conference, Montreal, June 2012
diff --git a/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
index 3fe62ec94..c6d9223dd 100644
--- a/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
+++ b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
@@ -47,284 +47,106 @@
 #  error gpubgsegm.hpp header must be compiled as C++
 #endif
 
-#include <memory>
-
 #include "opencv2/core/gpu.hpp"
-#include "opencv2/gpufilters.hpp"
+#include "opencv2/video/background_segm.hpp"
 
 namespace cv { namespace gpu {
 
-// Foreground Object Detection from Videos Containing Complex Background.
-// Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian.
-// ACM MM2003 9p
-class CV_EXPORTS FGDStatModel
+////////////////////////////////////////////////////
+// MOG
+
+class CV_EXPORTS BackgroundSubtractorMOG : public cv::BackgroundSubtractorMOG
 {
 public:
-    struct CV_EXPORTS Params
-    {
-        int Lc;  // Quantized levels per 'color' component. Power of two, typically 32, 64 or 128.
-        int N1c; // Number of color vectors used to model normal background color variation at a given pixel.
-        int N2c; // Number of color vectors retained at given pixel.  Must be > N1c, typically ~ 5/3 of N1c.
-        // Used to allow the first N1c vectors to adapt over time to changing background.
+    using cv::BackgroundSubtractorMOG::apply;
+    using cv::BackgroundSubtractorMOG::getBackgroundImage;
 
-        int Lcc;  // Quantized levels per 'color co-occurrence' component.  Power of two, typically 16, 32 or 64.
-        int N1cc; // Number of color co-occurrence vectors used to model normal background color variation at a given pixel.
-        int N2cc; // Number of color co-occurrence vectors retained at given pixel.  Must be > N1cc, typically ~ 5/3 of N1cc.
-        // Used to allow the first N1cc vectors to adapt over time to changing background.
+    virtual void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream) = 0;
 
-        bool is_obj_without_holes; // If TRUE we ignore holes within foreground blobs. Defaults to TRUE.
-        int perform_morphing;     // Number of erode-dilate-erode foreground-blob cleanup iterations.
-        // These erase one-pixel junk blobs and merge almost-touching blobs. Default value is 1.
-
-        float alpha1; // How quickly we forget old background pixel values seen. Typically set to 0.1.
-        float alpha2; // "Controls speed of feature learning". Depends on T. Typical value circa 0.005.
-        float alpha3; // Alternate to alpha2, used (e.g.) for quicker initial convergence. Typical value 0.1.
-
-        float delta;   // Affects color and color co-occurrence quantization, typically set to 2.
-        float T;       // A percentage value which determines when new features can be recognized as new background. (Typically 0.9).
-        float minArea; // Discard foreground blobs whose bounding box is smaller than this threshold.
-
-        // default Params
-        Params();
-    };
-
-    // out_cn - channels count in output result (can be 3 or 4)
-    // 4-channels require more memory, but a bit faster
-    explicit FGDStatModel(int out_cn = 3);
-    explicit FGDStatModel(const cv::gpu::GpuMat& firstFrame, const Params& params = Params(), int out_cn = 3);
-
-    ~FGDStatModel();
-
-    void create(const cv::gpu::GpuMat& firstFrame, const Params& params = Params());
-    void release();
-
-    int update(const cv::gpu::GpuMat& curFrame);
-
-    //8UC3 or 8UC4 reference background image
-    cv::gpu::GpuMat background;
-
-    //8UC1 foreground image
-    cv::gpu::GpuMat foreground;
-
-    std::vector< std::vector<cv::Point> > foreground_regions;
-
-private:
-    FGDStatModel(const FGDStatModel&);
-    FGDStatModel& operator=(const FGDStatModel&);
-
-    class Impl;
-    std::auto_ptr<Impl> impl_;
+    virtual void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const = 0;
 };
 
-/*!
- Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+CV_EXPORTS Ptr<gpu::BackgroundSubtractorMOG>
+    createBackgroundSubtractorMOG(int history = 200, int nmixtures = 5,
+                                  double backgroundRatio = 0.7, double noiseSigma = 0);
 
- The class implements the following algorithm:
- "An improved adaptive background mixture model for real-time tracking with shadow detection"
- P. KadewTraKuPong and R. Bowden,
- Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
- http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
-*/
-class CV_EXPORTS MOG_GPU
+////////////////////////////////////////////////////
+// MOG2
+
+class CV_EXPORTS BackgroundSubtractorMOG2 : public cv::BackgroundSubtractorMOG2
 {
 public:
-    //! the default constructor
-    MOG_GPU(int nmixtures = -1);
+    using cv::BackgroundSubtractorMOG2::apply;
+    using cv::BackgroundSubtractorMOG2::getBackgroundImage;
 
-    //! re-initiaization method
-    void initialize(Size frameSize, int frameType);
+    virtual void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream) = 0;
 
-    //! the update operator
-    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = 0.0f, Stream& stream = Stream::Null());
-
-    //! computes a background image which are the mean of all background gaussians
-    void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
-
-    //! releases all inner buffers
-    void release();
-
-    int history;
-    float varThreshold;
-    float backgroundRatio;
-    float noiseSigma;
-
-private:
-    int nmixtures_;
-
-    Size frameSize_;
-    int frameType_;
-    int nframes_;
-
-    GpuMat weight_;
-    GpuMat sortKey_;
-    GpuMat mean_;
-    GpuMat var_;
+    virtual void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const = 0;
 };
 
-/*!
- The class implements the following algorithm:
- "Improved adaptive Gausian mixture model for background subtraction"
- Z.Zivkovic
- International Conference Pattern Recognition, UK, August, 2004.
- http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
-*/
-class CV_EXPORTS MOG2_GPU
+CV_EXPORTS Ptr<gpu::BackgroundSubtractorMOG2>
+    createBackgroundSubtractorMOG2(int history = 500, double varThreshold = 16,
+                                   bool detectShadows = true);
+
+////////////////////////////////////////////////////
+// GMG
+
+class CV_EXPORTS BackgroundSubtractorGMG : public cv::BackgroundSubtractorGMG
 {
 public:
-    //! the default constructor
-    MOG2_GPU(int nmixtures = -1);
+    using cv::BackgroundSubtractorGMG::apply;
 
-    //! re-initiaization method
-    void initialize(Size frameSize, int frameType);
-
-    //! the update operator
-    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
-
-    //! computes a background image which are the mean of all background gaussians
-    void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
-
-    //! releases all inner buffers
-    void release();
-
-    // parameters
-    // you should call initialize after parameters changes
-
-    int history;
-
-    //! here it is the maximum allowed number of mixture components.
-    //! Actual number is determined dynamically per pixel
-    float varThreshold;
-    // threshold on the squared Mahalanobis distance to decide if it is well described
-    // by the background model or not. Related to Cthr from the paper.
-    // This does not influence the update of the background. A typical value could be 4 sigma
-    // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
-
-    /////////////////////////
-    // less important parameters - things you might change but be carefull
-    ////////////////////////
-
-    float backgroundRatio;
-    // corresponds to fTB=1-cf from the paper
-    // TB - threshold when the component becomes significant enough to be included into
-    // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
-    // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
-    // it is considered foreground
-    // float noiseSigma;
-    float varThresholdGen;
-
-    //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
-    //when a sample is close to the existing components. If it is not close
-    //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
-    //Smaller Tg leads to more generated components and higher Tg might make
-    //lead to small number of components but they can grow too large
-    float fVarInit;
-    float fVarMin;
-    float fVarMax;
-
-    //initial variance  for the newly generated components.
-    //It will will influence the speed of adaptation. A good guess should be made.
-    //A simple way is to estimate the typical standard deviation from the images.
-    //I used here 10 as a reasonable value
-    // min and max can be used to further control the variance
-    float fCT; //CT - complexity reduction prior
-    //this is related to the number of samples needed to accept that a component
-    //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
-    //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
-
-    //shadow detection parameters
-    bool bShadowDetection; //default 1 - do shadow detection
-    unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
-    float fTau;
-    // Tau - shadow threshold. The shadow is detected if the pixel is darker
-    //version of the background. Tau is a threshold on how much darker the shadow can be.
-    //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
-    //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
-
-private:
-    int nmixtures_;
-
-    Size frameSize_;
-    int frameType_;
-    int nframes_;
-
-    GpuMat weight_;
-    GpuMat variance_;
-    GpuMat mean_;
-
-    GpuMat bgmodelUsedModes_; //keep track of number of modes per pixel
+    virtual void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream) = 0;
 };
 
+CV_EXPORTS Ptr<gpu::BackgroundSubtractorGMG>
+    createBackgroundSubtractorGMG(int initializationFrames = 120, double decisionThreshold = 0.8);
+
+////////////////////////////////////////////////////
+// FGD
+
 /**
- * Background Subtractor module. Takes a series of images and returns a sequence of mask (8UC1)
- * images of the same size, where 255 indicates Foreground and 0 represents Background.
- * This class implements an algorithm described in "Visual Tracking of Human Visitors under
- * Variable-Lighting Conditions for a Responsive Audio Art Installation," A. Godbehere,
- * A. Matsukawa, K. Goldberg, American Control Conference, Montreal, June 2012.
+ * Foreground Object Detection from Videos Containing Complex Background.
+ * Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian.
+ * ACM MM2003 9p
  */
-class CV_EXPORTS GMG_GPU
+class CV_EXPORTS BackgroundSubtractorFGD : public cv::BackgroundSubtractor
 {
 public:
-    GMG_GPU();
-
-    /**
-     * Validate parameters and set up data structures for appropriate frame size.
-     * @param frameSize Input frame size
-     * @param min       Minimum value taken on by pixels in image sequence. Usually 0
-     * @param max       Maximum value taken on by pixels in image sequence. e.g. 1.0 or 255
-     */
-    void initialize(Size frameSize, float min = 0.0f, float max = 255.0f);
-
-    /**
-     * Performs single-frame background subtraction and builds up a statistical background image
-     * model.
-     * @param frame        Input frame
-     * @param fgmask       Output mask image representing foreground and background pixels
-     * @param stream       Stream for the asynchronous version
-     */
-    void operator ()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
-
-    //! Releases all inner buffers
-    void release();
-
-    //! Total number of distinct colors to maintain in histogram.
-    int maxFeatures;
-
-    //! Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
-    float learningRate;
-
-    //! Number of frames of video to use to initialize histograms.
-    int numInitializationFrames;
-
-    //! Number of discrete levels in each channel to be used in histograms.
-    int quantizationLevels;
-
-    //! Prior probability that any given pixel is a background pixel. A sensitivity parameter.
-    float backgroundPrior;
-
-    //! Value above which pixel is determined to be FG.
-    float decisionThreshold;
-
-    //! Smoothing radius, in pixels, for cleaning up FG image.
-    int smoothingRadius;
-
-    //! Perform background model update.
-    bool updateBackgroundModel;
-
-private:
-    float maxVal_, minVal_;
-
-    Size frameSize_;
-
-    int frameNum_;
-
-    GpuMat nfeatures_;
-    GpuMat colors_;
-    GpuMat weights_;
-
-    Ptr<gpu::Filter> boxFilter_;
-    GpuMat buf_;
+    virtual void getForegroundRegions(OutputArrayOfArrays foreground_regions) = 0;
 };
 
+struct CV_EXPORTS FGDParams
+{
+    int Lc;  // Quantized levels per 'color' component. Power of two, typically 32, 64 or 128.
+    int N1c; // Number of color vectors used to model normal background color variation at a given pixel.
+    int N2c; // Number of color vectors retained at given pixel.  Must be > N1c, typically ~ 5/3 of N1c.
+    // Used to allow the first N1c vectors to adapt over time to changing background.
+
+    int Lcc;  // Quantized levels per 'color co-occurrence' component.  Power of two, typically 16, 32 or 64.
+    int N1cc; // Number of color co-occurrence vectors used to model normal background color variation at a given pixel.
+    int N2cc; // Number of color co-occurrence vectors retained at given pixel.  Must be > N1cc, typically ~ 5/3 of N1cc.
+    // Used to allow the first N1cc vectors to adapt over time to changing background.
+
+    bool is_obj_without_holes; // If TRUE we ignore holes within foreground blobs. Defaults to TRUE.
+    int perform_morphing;     // Number of erode-dilate-erode foreground-blob cleanup iterations.
+    // These erase one-pixel junk blobs and merge almost-touching blobs. Default value is 1.
+
+    float alpha1; // How quickly we forget old background pixel values seen. Typically set to 0.1.
+    float alpha2; // "Controls speed of feature learning". Depends on T. Typical value circa 0.005.
+    float alpha3; // Alternate to alpha2, used (e.g.) for quicker initial convergence. Typical value 0.1.
+
+    float delta;   // Affects color and color co-occurrence quantization, typically set to 2.
+    float T;       // A percentage value which determines when new features can be recognized as new background. (Typically 0.9).
+    float minArea; // Discard foreground blobs whose bounding box is smaller than this threshold.
+
+    // default Params
+    FGDParams();
+};
+
+CV_EXPORTS Ptr<gpu::BackgroundSubtractorFGD>
+    createBackgroundSubtractorFGD(const FGDParams& params = FGDParams());
+
 }} // namespace cv { namespace gpu {
 
 #endif /* __OPENCV_GPUBGSEGM_HPP__ */
diff --git a/modules/gpubgsegm/perf/perf_bgsegm.cpp b/modules/gpubgsegm/perf/perf_bgsegm.cpp
index 15842d59b..34b65f6be 100644
--- a/modules/gpubgsegm/perf/perf_bgsegm.cpp
+++ b/modules/gpubgsegm/perf/perf_bgsegm.cpp
@@ -41,7 +41,14 @@
 //M*/
 
 #include "perf_precomp.hpp"
-#include "opencv2/legacy.hpp"
+
+#ifdef HAVE_OPENCV_LEGACY
+#  include "opencv2/legacy.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUIMGPROC
+#  include "opencv2/gpuimgproc.hpp"
+#endif
 
 using namespace std;
 using namespace testing;
@@ -50,6 +57,7 @@ using namespace perf;
 #if defined(HAVE_XINE)         || \
     defined(HAVE_GSTREAMER)    || \
     defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
     defined(HAVE_AVFOUNDATION) || \
     defined(HAVE_FFMPEG)       || \
     defined(WIN32) /* assume that we have ffmpeg */
@@ -59,18 +67,22 @@ using namespace perf;
 #  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
 #endif
 
+//////////////////////////////////////////////////////
+// FGDStatModel
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+#ifdef HAVE_OPENCV_LEGACY
+
 namespace cv
 {
-    template<> void Ptr<CvBGStatModel>::delete_obj()
+    template<> void DefaultDeleter<CvBGStatModel>::operator ()(CvBGStatModel* obj) const
     {
         cvReleaseBGStatModel(&obj);
     }
 }
 
-//////////////////////////////////////////////////////
-// FGDStatModel
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+#endif
 
 DEF_PARAM_TEST_1(Video, string);
 
@@ -90,10 +102,10 @@ PERF_TEST_P(Video, FGDStatModel,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame(frame);
+        cv::gpu::GpuMat d_frame(frame), foreground;
 
-        cv::gpu::FGDStatModel d_model(4);
-        d_model.create(d_frame);
+        cv::Ptr<cv::gpu::BackgroundSubtractorFGD> d_fgd = cv::gpu::createBackgroundSubtractorFGD();
+        d_fgd->apply(d_frame, foreground);
 
         for (int i = 0; i < 10; ++i)
         {
@@ -103,18 +115,22 @@ PERF_TEST_P(Video, FGDStatModel,
             d_frame.upload(frame);
 
             startTimer(); next();
-            d_model.update(d_frame);
+            d_fgd->apply(d_frame, foreground);
             stopTimer();
         }
 
-        const cv::gpu::GpuMat background = d_model.background;
-        const cv::gpu::GpuMat foreground = d_model.foreground;
-
-        GPU_SANITY_CHECK(background, 1e-2, ERROR_RELATIVE);
         GPU_SANITY_CHECK(foreground, 1e-2, ERROR_RELATIVE);
+
+#ifdef HAVE_OPENCV_GPUIMGPROC
+        cv::gpu::GpuMat background3, background;
+        d_fgd->getBackgroundImage(background3);
+        cv::gpu::cvtColor(background3, background, cv::COLOR_BGR2BGRA);
+        GPU_SANITY_CHECK(background, 1e-2, ERROR_RELATIVE);
+#endif
     }
     else
     {
+#ifdef HAVE_OPENCV_LEGACY
         IplImage ipl_frame = frame;
         cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
 
@@ -135,6 +151,9 @@ PERF_TEST_P(Video, FGDStatModel,
 
         CPU_SANITY_CHECK(background);
         CPU_SANITY_CHECK(foreground);
+#else
+        FAIL_NO_CPU();
+#endif
     }
 }
 
@@ -176,11 +195,12 @@ PERF_TEST_P(Video_Cn_LearningRate, MOG,
 
     if (PERF_RUN_GPU())
     {
+        cv::Ptr<cv::BackgroundSubtractor> d_mog = cv::gpu::createBackgroundSubtractorMOG();
+
         cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::MOG_GPU d_mog;
         cv::gpu::GpuMat foreground;
 
-        d_mog(d_frame, foreground, learningRate);
+        d_mog->apply(d_frame, foreground, learningRate);
 
         for (int i = 0; i < 10; ++i)
         {
@@ -200,7 +220,7 @@ PERF_TEST_P(Video_Cn_LearningRate, MOG,
             d_frame.upload(frame);
 
             startTimer(); next();
-            d_mog(d_frame, foreground, learningRate);
+            d_mog->apply(d_frame, foreground, learningRate);
             stopTimer();
         }
 
@@ -273,13 +293,13 @@ PERF_TEST_P(Video_Cn, MOG2,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::MOG2_GPU d_mog2;
-        d_mog2.bShadowDetection = false;
+        cv::Ptr<cv::BackgroundSubtractorMOG2> d_mog2 = cv::gpu::createBackgroundSubtractorMOG2();
+        d_mog2->setDetectShadows(false);
 
         cv::gpu::GpuMat d_frame(frame);
         cv::gpu::GpuMat foreground;
 
-        d_mog2(d_frame, foreground);
+        d_mog2->apply(d_frame, foreground);
 
         for (int i = 0; i < 10; ++i)
         {
@@ -299,7 +319,7 @@ PERF_TEST_P(Video_Cn, MOG2,
             d_frame.upload(frame);
 
             startTimer(); next();
-            d_mog2(d_frame, foreground);
+            d_mog2->apply(d_frame, foreground);
             stopTimer();
         }
 
@@ -307,8 +327,8 @@ PERF_TEST_P(Video_Cn, MOG2,
     }
     else
     {
-        cv::Ptr<cv::BackgroundSubtractor> mog2 = cv::createBackgroundSubtractorMOG2();
-        mog2->set("detectShadows", false);
+        cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = cv::createBackgroundSubtractorMOG2();
+        mog2->setDetectShadows(false);
 
         cv::Mat foreground;
 
@@ -359,8 +379,9 @@ PERF_TEST_P(Video_Cn, MOG2GetBackgroundImage,
 
     if (PERF_RUN_GPU())
     {
+        cv::Ptr<cv::BackgroundSubtractor> d_mog2 = cv::gpu::createBackgroundSubtractorMOG2();
+
         cv::gpu::GpuMat d_frame;
-        cv::gpu::MOG2_GPU d_mog2;
         cv::gpu::GpuMat d_foreground;
 
         for (int i = 0; i < 10; ++i)
@@ -380,12 +401,12 @@ PERF_TEST_P(Video_Cn, MOG2GetBackgroundImage,
 
             d_frame.upload(frame);
 
-            d_mog2(d_frame, d_foreground);
+            d_mog2->apply(d_frame, d_foreground);
         }
 
         cv::gpu::GpuMat background;
 
-        TEST_CYCLE() d_mog2.getBackgroundImage(background);
+        TEST_CYCLE() d_mog2->getBackgroundImage(background);
 
         GPU_SANITY_CHECK(background, 1);
     }
@@ -460,10 +481,10 @@ PERF_TEST_P(Video_Cn_MaxFeatures, GMG,
         cv::gpu::GpuMat d_frame(frame);
         cv::gpu::GpuMat foreground;
 
-        cv::gpu::GMG_GPU d_gmg;
-        d_gmg.maxFeatures = maxFeatures;
+        cv::Ptr<cv::BackgroundSubtractorGMG> d_gmg = cv::gpu::createBackgroundSubtractorGMG();
+        d_gmg->setMaxFeatures(maxFeatures);
 
-        d_gmg(d_frame, foreground);
+        d_gmg->apply(d_frame, foreground);
 
         for (int i = 0; i < 150; ++i)
         {
@@ -488,7 +509,7 @@ PERF_TEST_P(Video_Cn_MaxFeatures, GMG,
             d_frame.upload(frame);
 
             startTimer(); next();
-            d_gmg(d_frame, foreground);
+            d_gmg->apply(d_frame, foreground);
             stopTimer();
         }
 
@@ -499,9 +520,8 @@ PERF_TEST_P(Video_Cn_MaxFeatures, GMG,
         cv::Mat foreground;
         cv::Mat zeros(frame.size(), CV_8UC1, cv::Scalar::all(0));
 
-        cv::Ptr<cv::BackgroundSubtractor> gmg = cv::createBackgroundSubtractorGMG();
-        gmg->set("maxFeatures", maxFeatures);
-        //gmg.initialize(frame.size(), 0.0, 255.0);
+        cv::Ptr<cv::BackgroundSubtractorGMG> gmg = cv::createBackgroundSubtractorGMG();
+        gmg->setMaxFeatures(maxFeatures);
 
         gmg->apply(frame, foreground);
 
diff --git a/modules/gpubgsegm/perf/perf_main.cpp b/modules/gpubgsegm/perf/perf_main.cpp
index 99066f450..f69d0ea8f 100644
--- a/modules/gpubgsegm/perf/perf_main.cpp
+++ b/modules/gpubgsegm/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpubgsegm, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpubgsegm)
diff --git a/modules/gpubgsegm/perf/perf_precomp.cpp b/modules/gpubgsegm/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpubgsegm/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpubgsegm/perf/perf_precomp.hpp b/modules/gpubgsegm/perf/perf_precomp.hpp
index 3a13b9200..83d0fd4f0 100644
--- a/modules/gpubgsegm/perf/perf_precomp.hpp
+++ b/modules/gpubgsegm/perf/perf_precomp.hpp
@@ -57,6 +57,8 @@
 #include "opencv2/gpubgsegm.hpp"
 #include "opencv2/video.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
 #endif
diff --git a/modules/gpubgsegm/src/cuda/fgd.cu b/modules/gpubgsegm/src/cuda/fgd.cu
index 3d5519945..7dd616c71 100644
--- a/modules/gpubgsegm/src/cuda/fgd.cu
+++ b/modules/gpubgsegm/src/cuda/fgd.cu
@@ -53,7 +53,7 @@
 using namespace cv::gpu;
 using namespace cv::gpu::cudev;
 
-namespace bgfg
+namespace fgd
 {
     ////////////////////////////////////////////////////////////////////////////
     // calcDiffHistogram
diff --git a/modules/gpubgsegm/src/cuda/fgd.hpp b/modules/gpubgsegm/src/cuda/fgd.hpp
index dd7151960..50b9838cd 100644
--- a/modules/gpubgsegm/src/cuda/fgd.hpp
+++ b/modules/gpubgsegm/src/cuda/fgd.hpp
@@ -45,7 +45,7 @@
 
 #include "opencv2/core/gpu_types.hpp"
 
-namespace bgfg
+namespace fgd
 {
     struct BGPixelStat
     {
diff --git a/modules/gpubgsegm/src/cuda/gmg.cu b/modules/gpubgsegm/src/cuda/gmg.cu
index 8ae9b037b..235c1f0e2 100644
--- a/modules/gpubgsegm/src/cuda/gmg.cu
+++ b/modules/gpubgsegm/src/cuda/gmg.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/limits.hpp"
 
 namespace cv { namespace gpu { namespace cudev {
-    namespace bgfg_gmg
+    namespace gmg
     {
         __constant__ int   c_width;
         __constant__ int   c_height;
diff --git a/modules/gpubgsegm/src/cuda/mog.cu b/modules/gpubgsegm/src/cuda/mog.cu
index 6508262d2..ed0720c30 100644
--- a/modules/gpubgsegm/src/cuda/mog.cu
+++ b/modules/gpubgsegm/src/cuda/mog.cu
@@ -111,14 +111,6 @@ namespace cv { namespace gpu { namespace cudev
                                 0.0f);
         }
 
-        template <class Ptr2D>
-        __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
-        {
-            typename Ptr2D::elem_type val = ptr(k * rows + y, x);
-            ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
-            ptr((k + 1) * rows + y, x) = val;
-        }
-
         ///////////////////////////////////////////////////////////////
         // MOG without learning
 
@@ -426,337 +418,6 @@ namespace cv { namespace gpu { namespace cudev
 
             funcs[cn](weight, mean, dst, nmixtures, backgroundRatio, stream);
         }
-
-        ///////////////////////////////////////////////////////////////
-        // MOG2
-
-        __constant__ int           c_nmixtures;
-        __constant__ float         c_Tb;
-        __constant__ float         c_TB;
-        __constant__ float         c_Tg;
-        __constant__ float         c_varInit;
-        __constant__ float         c_varMin;
-        __constant__ float         c_varMax;
-        __constant__ float         c_tau;
-        __constant__ unsigned char c_shadowVal;
-
-        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
-        {
-            varMin = ::fminf(varMin, varMax);
-            varMax = ::fmaxf(varMin, varMax);
-
-            cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
-        }
-
-        template <bool detectShadows, typename SrcT, typename WorkT>
-        __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
-                             PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
-                             const float alphaT, const float alpha1, const float prune)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= frame.cols || y >= frame.rows)
-                return;
-
-            WorkT pix = cvt(frame(y, x));
-
-            //calculate distances to the modes (+ sort)
-            //here we need to go in descending order!!!
-
-            bool background = false; // true - the pixel classified as background
-
-            //internal:
-
-            bool fitsPDF = false; //if it remains zero a new GMM mode will be added
-
-            int nmodes = modesUsed(y, x);
-            int nNewModes = nmodes; //current number of modes in GMM
-
-            float totalWeight = 0.0f;
-
-            //go through all modes
-
-            for (int mode = 0; mode < nmodes; ++mode)
-            {
-                //need only weight if fit is found
-                float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
-
-                //fit not found yet
-                if (!fitsPDF)
-                {
-                    //check if it belongs to some of the remaining modes
-                    float var = gmm_variance(mode * frame.rows + y, x);
-
-                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
-
-                    //calculate difference and distance
-                    WorkT diff = mean - pix;
-                    float dist2 = sqr(diff);
-
-                    //background? - Tb - usually larger than Tg
-                    if (totalWeight < c_TB && dist2 < c_Tb * var)
-                        background = true;
-
-                    //check fit
-                    if (dist2 < c_Tg * var)
-                    {
-                        //belongs to the mode
-                        fitsPDF = true;
-
-                        //update distribution
-
-                        //update weight
-                        weight += alphaT;
-                        float k = alphaT / weight;
-
-                        //update mean
-                        gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
-
-                        //update variance
-                        float varnew = var + k * (dist2 - var);
-
-                        //limit the variance
-                        varnew = ::fmaxf(varnew, c_varMin);
-                        varnew = ::fminf(varnew, c_varMax);
-
-                        gmm_variance(mode * frame.rows + y, x) = varnew;
-
-                        //sort
-                        //all other weights are at the same place and
-                        //only the matched (iModes) is higher -> just find the new place for it
-
-                        for (int i = mode; i > 0; --i)
-                        {
-                            //check one up
-                            if (weight < gmm_weight((i - 1) * frame.rows + y, x))
-                                break;
-
-                            //swap one up
-                            swap(gmm_weight, x, y, i - 1, frame.rows);
-                            swap(gmm_variance, x, y, i - 1, frame.rows);
-                            swap(gmm_mean, x, y, i - 1, frame.rows);
-                        }
-
-                        //belongs to the mode - bFitsPDF becomes 1
-                    }
-                } // !fitsPDF
-
-                //check prune
-                if (weight < -prune)
-                {
-                    weight = 0.0;
-                    nmodes--;
-                }
-
-                gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value
-                totalWeight += weight;
-            }
-
-            //renormalize weights
-
-            totalWeight = 1.f / totalWeight;
-            for (int mode = 0; mode < nmodes; ++mode)
-                gmm_weight(mode * frame.rows + y, x) *= totalWeight;
-
-            nmodes = nNewModes;
-
-            //make new mode if needed and exit
-
-            if (!fitsPDF)
-            {
-                // replace the weakest or add a new one
-                int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
-
-                if (nmodes == 1)
-                    gmm_weight(mode * frame.rows + y, x) = 1.f;
-                else
-                {
-                    gmm_weight(mode * frame.rows + y, x) = alphaT;
-
-                    // renormalize all other weights
-
-                    for (int i = 0; i < nmodes - 1; ++i)
-                        gmm_weight(i * frame.rows + y, x) *= alpha1;
-                }
-
-                // init
-
-                gmm_mean(mode * frame.rows + y, x) = pix;
-                gmm_variance(mode * frame.rows + y, x) = c_varInit;
-
-                //sort
-                //find the new place for it
-
-                for (int i = nmodes - 1; i > 0; --i)
-                {
-                    // check one up
-                    if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
-                        break;
-
-                    //swap one up
-                    swap(gmm_weight, x, y, i - 1, frame.rows);
-                    swap(gmm_variance, x, y, i - 1, frame.rows);
-                    swap(gmm_mean, x, y, i - 1, frame.rows);
-                }
-            }
-
-            //set the number of modes
-            modesUsed(y, x) = nmodes;
-
-            bool isShadow = false;
-            if (detectShadows && !background)
-            {
-                float tWeight = 0.0f;
-
-                // check all the components  marked as background:
-                for (int mode = 0; mode < nmodes; ++mode)
-                {
-                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
-
-                    WorkT pix_mean = pix * mean;
-
-                    float numerator = sum(pix_mean);
-                    float denominator = sqr(mean);
-
-                    // no division by zero allowed
-                    if (denominator == 0)
-                        break;
-
-                    // if tau < a < 1 then also check the color distortion
-                    if (numerator <= denominator && numerator >= c_tau * denominator)
-                    {
-                        float a = numerator / denominator;
-
-                        WorkT dD = a * mean - pix;
-
-                        if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
-                        {
-                            isShadow = true;
-                            break;
-                        }
-                    };
-
-                    tWeight += gmm_weight(mode * frame.rows + y, x);
-                    if (tWeight > c_TB)
-                        break;
-                }
-            }
-
-            fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
-        }
-
-        template <typename SrcT, typename WorkT>
-        void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
-                         float alphaT, float prune, bool detectShadows, cudaStream_t stream)
-        {
-            dim3 block(32, 8);
-            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
-
-            const float alpha1 = 1.0f - alphaT;
-
-            if (detectShadows)
-            {
-                cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
-
-                mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
-                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
-                                                                    alphaT, alpha1, prune);
-            }
-            else
-            {
-                cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
-
-                mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
-                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
-                                                                    alphaT, alpha1, prune);
-            }
-
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
-                      float alphaT, float prune, bool detectShadows, cudaStream_t stream)
-        {
-            typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
-
-            static const func_t funcs[] =
-            {
-                0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
-            };
-
-            funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
-        }
-
-        template <typename WorkT, typename OutT>
-        __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= modesUsed.cols || y >= modesUsed.rows)
-                return;
-
-            int nmodes = modesUsed(y, x);
-
-            WorkT meanVal = VecTraits<WorkT>::all(0.0f);
-            float totalWeight = 0.0f;
-
-            for (int mode = 0; mode < nmodes; ++mode)
-            {
-                float weight = gmm_weight(mode * modesUsed.rows + y, x);
-
-                WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
-                meanVal = meanVal + weight * mean;
-
-                totalWeight += weight;
-
-                if(totalWeight > c_TB)
-                    break;
-            }
-
-            meanVal = meanVal * (1.f / totalWeight);
-
-            dst(y, x) = saturate_cast<OutT>(meanVal);
-        }
-
-        template <typename WorkT, typename OutT>
-        void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
-        {
-            dim3 block(32, 8);
-            dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
-
-            getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
-        {
-            typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
-
-            static const func_t funcs[] =
-            {
-                0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
-            };
-
-            funcs[cn](modesUsed, weight, mean, dst, stream);
-        }
     }
 }}}
 
diff --git a/modules/gpubgsegm/src/cuda/mog2.cu b/modules/gpubgsegm/src/cuda/mog2.cu
new file mode 100644
index 000000000..50cb9fa56
--- /dev/null
+++ b/modules/gpubgsegm/src/cuda/mog2.cu
@@ -0,0 +1,438 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/limits.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace mog2
+    {
+        ///////////////////////////////////////////////////////////////
+        // Utility
+
+        __device__ __forceinline__ float cvt(uchar val)
+        {
+            return val;
+        }
+        __device__ __forceinline__ float3 cvt(const uchar3& val)
+        {
+            return make_float3(val.x, val.y, val.z);
+        }
+        __device__ __forceinline__ float4 cvt(const uchar4& val)
+        {
+            return make_float4(val.x, val.y, val.z, val.w);
+        }
+
+        __device__ __forceinline__ float sqr(float val)
+        {
+            return val * val;
+        }
+        __device__ __forceinline__ float sqr(const float3& val)
+        {
+            return val.x * val.x + val.y * val.y + val.z * val.z;
+        }
+        __device__ __forceinline__ float sqr(const float4& val)
+        {
+            return val.x * val.x + val.y * val.y + val.z * val.z;
+        }
+
+        __device__ __forceinline__ float sum(float val)
+        {
+            return val;
+        }
+        __device__ __forceinline__ float sum(const float3& val)
+        {
+            return val.x + val.y + val.z;
+        }
+        __device__ __forceinline__ float sum(const float4& val)
+        {
+            return val.x + val.y + val.z;
+        }
+
+        template <class Ptr2D>
+        __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
+        {
+            typename Ptr2D::elem_type val = ptr(k * rows + y, x);
+            ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
+            ptr((k + 1) * rows + y, x) = val;
+        }
+
+        ///////////////////////////////////////////////////////////////
+        // MOG2
+
+        __constant__ int           c_nmixtures;
+        __constant__ float         c_Tb;
+        __constant__ float         c_TB;
+        __constant__ float         c_Tg;
+        __constant__ float         c_varInit;
+        __constant__ float         c_varMin;
+        __constant__ float         c_varMax;
+        __constant__ float         c_tau;
+        __constant__ unsigned char c_shadowVal;
+
+        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
+        {
+            varMin = ::fminf(varMin, varMax);
+            varMax = ::fmaxf(varMin, varMax);
+
+            cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
+        }
+
+        template <bool detectShadows, typename SrcT, typename WorkT>
+        __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
+                             PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
+                             const float alphaT, const float alpha1, const float prune)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= frame.cols || y >= frame.rows)
+                return;
+
+            WorkT pix = cvt(frame(y, x));
+
+            //calculate distances to the modes (+ sort)
+            //here we need to go in descending order!!!
+
+            bool background = false; // true - the pixel classified as background
+
+            //internal:
+
+            bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+
+            int nmodes = modesUsed(y, x);
+            int nNewModes = nmodes; //current number of modes in GMM
+
+            float totalWeight = 0.0f;
+
+            //go through all modes
+
+            for (int mode = 0; mode < nmodes; ++mode)
+            {
+                //need only weight if fit is found
+                float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
+
+                //fit not found yet
+                if (!fitsPDF)
+                {
+                    //check if it belongs to some of the remaining modes
+                    float var = gmm_variance(mode * frame.rows + y, x);
+
+                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
+
+                    //calculate difference and distance
+                    WorkT diff = mean - pix;
+                    float dist2 = sqr(diff);
+
+                    //background? - Tb - usually larger than Tg
+                    if (totalWeight < c_TB && dist2 < c_Tb * var)
+                        background = true;
+
+                    //check fit
+                    if (dist2 < c_Tg * var)
+                    {
+                        //belongs to the mode
+                        fitsPDF = true;
+
+                        //update distribution
+
+                        //update weight
+                        weight += alphaT;
+                        float k = alphaT / weight;
+
+                        //update mean
+                        gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
+
+                        //update variance
+                        float varnew = var + k * (dist2 - var);
+
+                        //limit the variance
+                        varnew = ::fmaxf(varnew, c_varMin);
+                        varnew = ::fminf(varnew, c_varMax);
+
+                        gmm_variance(mode * frame.rows + y, x) = varnew;
+
+                        //sort
+                        //all other weights are at the same place and
+                        //only the matched (iModes) is higher -> just find the new place for it
+
+                        for (int i = mode; i > 0; --i)
+                        {
+                            //check one up
+                            if (weight < gmm_weight((i - 1) * frame.rows + y, x))
+                                break;
+
+                            //swap one up
+                            swap(gmm_weight, x, y, i - 1, frame.rows);
+                            swap(gmm_variance, x, y, i - 1, frame.rows);
+                            swap(gmm_mean, x, y, i - 1, frame.rows);
+                        }
+
+                        //belongs to the mode - bFitsPDF becomes 1
+                    }
+                } // !fitsPDF
+
+                //check prune
+                if (weight < -prune)
+                {
+                    weight = 0.0f;
+                    nmodes--;
+                }
+
+                gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value
+                totalWeight += weight;
+            }
+
+            //renormalize weights
+
+            totalWeight = 1.f / totalWeight;
+            for (int mode = 0; mode < nmodes; ++mode)
+                gmm_weight(mode * frame.rows + y, x) *= totalWeight;
+
+            nmodes = nNewModes;
+
+            //make new mode if needed and exit
+
+            if (!fitsPDF)
+            {
+                // replace the weakest or add a new one
+                int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
+
+                if (nmodes == 1)
+                    gmm_weight(mode * frame.rows + y, x) = 1.f;
+                else
+                {
+                    gmm_weight(mode * frame.rows + y, x) = alphaT;
+
+                    // renormalize all other weights
+
+                    for (int i = 0; i < nmodes - 1; ++i)
+                        gmm_weight(i * frame.rows + y, x) *= alpha1;
+                }
+
+                // init
+
+                gmm_mean(mode * frame.rows + y, x) = pix;
+                gmm_variance(mode * frame.rows + y, x) = c_varInit;
+
+                //sort
+                //find the new place for it
+
+                for (int i = nmodes - 1; i > 0; --i)
+                {
+                    // check one up
+                    if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
+                        break;
+
+                    //swap one up
+                    swap(gmm_weight, x, y, i - 1, frame.rows);
+                    swap(gmm_variance, x, y, i - 1, frame.rows);
+                    swap(gmm_mean, x, y, i - 1, frame.rows);
+                }
+            }
+
+            //set the number of modes
+            modesUsed(y, x) = nmodes;
+
+            bool isShadow = false;
+            if (detectShadows && !background)
+            {
+                float tWeight = 0.0f;
+
+                // check all the components  marked as background:
+                for (int mode = 0; mode < nmodes; ++mode)
+                {
+                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
+
+                    WorkT pix_mean = pix * mean;
+
+                    float numerator = sum(pix_mean);
+                    float denominator = sqr(mean);
+
+                    // no division by zero allowed
+                    if (denominator == 0)
+                        break;
+
+                    // if tau < a < 1 then also check the color distortion
+                    if (numerator <= denominator && numerator >= c_tau * denominator)
+                    {
+                        float a = numerator / denominator;
+
+                        WorkT dD = a * mean - pix;
+
+                        if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
+                        {
+                            isShadow = true;
+                            break;
+                        }
+                    };
+
+                    tWeight += gmm_weight(mode * frame.rows + y, x);
+                    if (tWeight > c_TB)
+                        break;
+                }
+            }
+
+            fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
+        }
+
+        template <typename SrcT, typename WorkT>
+        void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
+                         float alphaT, float prune, bool detectShadows, cudaStream_t stream)
+        {
+            dim3 block(32, 8);
+            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
+
+            const float alpha1 = 1.0f - alphaT;
+
+            if (detectShadows)
+            {
+                cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
+
+                mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
+                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
+                                                                    alphaT, alpha1, prune);
+            }
+            else
+            {
+                cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
+
+                mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
+                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
+                                                                    alphaT, alpha1, prune);
+            }
+
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
+                      float alphaT, float prune, bool detectShadows, cudaStream_t stream)
+        {
+            typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
+
+            static const func_t funcs[] =
+            {
+                0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
+            };
+
+            funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
+        }
+
+        template <typename WorkT, typename OutT>
+        __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= modesUsed.cols || y >= modesUsed.rows)
+                return;
+
+            int nmodes = modesUsed(y, x);
+
+            WorkT meanVal = VecTraits<WorkT>::all(0.0f);
+            float totalWeight = 0.0f;
+
+            for (int mode = 0; mode < nmodes; ++mode)
+            {
+                float weight = gmm_weight(mode * modesUsed.rows + y, x);
+
+                WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
+                meanVal = meanVal + weight * mean;
+
+                totalWeight += weight;
+
+                if(totalWeight > c_TB)
+                    break;
+            }
+
+            meanVal = meanVal * (1.f / totalWeight);
+
+            dst(y, x) = saturate_cast<OutT>(meanVal);
+        }
+
+        template <typename WorkT, typename OutT>
+        void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
+        {
+            dim3 block(32, 8);
+            dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
+
+            getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
+        {
+            typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
+
+            static const func_t funcs[] =
+            {
+                0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
+            };
+
+            funcs[cn](modesUsed, weight, mean, dst, stream);
+        }
+    }
+}}}
+
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpubgsegm/src/fgd.cpp b/modules/gpubgsegm/src/fgd.cpp
index fb14ff172..31cf370c1 100644
--- a/modules/gpubgsegm/src/fgd.cpp
+++ b/modules/gpubgsegm/src/fgd.cpp
@@ -42,329 +42,150 @@
 
 #include "precomp.hpp"
 
-#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
+using namespace cv;
+using namespace cv::gpu;
 
-class cv::gpu::FGDStatModel::Impl
-{
-};
+#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER) || !defined(HAVE_OPENCV_IMGPROC) || !defined(HAVE_OPENCV_GPUARITHM) || !defined(HAVE_OPENCV_GPUIMGPROC)
 
-cv::gpu::FGDStatModel::Params::Params() { throw_no_cuda(); }
+cv::gpu::FGDParams::FGDParams() { throw_no_cuda(); }
 
-cv::gpu::FGDStatModel::FGDStatModel(int) { throw_no_cuda(); }
-cv::gpu::FGDStatModel::FGDStatModel(const cv::gpu::GpuMat&, const Params&, int) { throw_no_cuda(); }
-cv::gpu::FGDStatModel::~FGDStatModel() {}
-void cv::gpu::FGDStatModel::create(const cv::gpu::GpuMat&, const Params&) { throw_no_cuda(); }
-void cv::gpu::FGDStatModel::release() {}
-int cv::gpu::FGDStatModel::update(const cv::gpu::GpuMat&) { throw_no_cuda(); return 0; }
+Ptr<gpu::BackgroundSubtractorFGD> cv::gpu::createBackgroundSubtractorFGD(const FGDParams&) { throw_no_cuda(); return Ptr<gpu::BackgroundSubtractorFGD>(); }
 
 #else
 
 #include "cuda/fgd.hpp"
 #include "opencv2/imgproc/imgproc_c.h"
 
-namespace
-{
-    class BGPixelStat
-    {
-    public:
-        void create(cv::Size size, const cv::gpu::FGDStatModel::Params& params, int out_cn);
-        void release();
-
-        void setTrained();
-
-        operator bgfg::BGPixelStat();
-
-    private:
-        cv::gpu::GpuMat Pbc_;
-        cv::gpu::GpuMat Pbcc_;
-        cv::gpu::GpuMat is_trained_st_model_;
-        cv::gpu::GpuMat is_trained_dyn_model_;
-
-        cv::gpu::GpuMat ctable_Pv_;
-        cv::gpu::GpuMat ctable_Pvb_;
-        cv::gpu::GpuMat ctable_v_;
-
-        cv::gpu::GpuMat cctable_Pv_;
-        cv::gpu::GpuMat cctable_Pvb_;
-        cv::gpu::GpuMat cctable_v1_;
-        cv::gpu::GpuMat cctable_v2_;
-    };
-
-    void BGPixelStat::create(cv::Size size, const cv::gpu::FGDStatModel::Params& params, int out_cn)
-    {
-        cv::gpu::ensureSizeIsEnough(size, CV_32FC1, Pbc_);
-        Pbc_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(size, CV_32FC1, Pbcc_);
-        Pbcc_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(size, CV_8UC1, is_trained_st_model_);
-        is_trained_st_model_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(size, CV_8UC1, is_trained_dyn_model_);
-        is_trained_dyn_model_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_32FC1, ctable_Pv_);
-        ctable_Pv_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_32FC1, ctable_Pvb_);
-        ctable_Pvb_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_8UC(out_cn), ctable_v_);
-        ctable_v_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_32FC1, cctable_Pv_);
-        cctable_Pv_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_32FC1, cctable_Pvb_);
-        cctable_Pvb_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_8UC(out_cn), cctable_v1_);
-        cctable_v1_.setTo(cv::Scalar::all(0));
-
-        cv::gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_8UC(out_cn), cctable_v2_);
-        cctable_v2_.setTo(cv::Scalar::all(0));
-    }
-
-    void BGPixelStat::release()
-    {
-        Pbc_.release();
-        Pbcc_.release();
-        is_trained_st_model_.release();
-        is_trained_dyn_model_.release();
-
-        ctable_Pv_.release();
-        ctable_Pvb_.release();
-        ctable_v_.release();
-
-        cctable_Pv_.release();
-        cctable_Pvb_.release();
-        cctable_v1_.release();
-        cctable_v2_.release();
-    }
-
-    void BGPixelStat::setTrained()
-    {
-        is_trained_st_model_.setTo(cv::Scalar::all(1));
-        is_trained_dyn_model_.setTo(cv::Scalar::all(1));
-    }
-
-    BGPixelStat::operator bgfg::BGPixelStat()
-    {
-        bgfg::BGPixelStat stat;
-
-        stat.rows_ = Pbc_.rows;
-
-        stat.Pbc_data_ = Pbc_.data;
-        stat.Pbc_step_ = Pbc_.step;
-
-        stat.Pbcc_data_ = Pbcc_.data;
-        stat.Pbcc_step_ = Pbcc_.step;
-
-        stat.is_trained_st_model_data_ = is_trained_st_model_.data;
-        stat.is_trained_st_model_step_ = is_trained_st_model_.step;
-
-        stat.is_trained_dyn_model_data_ = is_trained_dyn_model_.data;
-        stat.is_trained_dyn_model_step_ = is_trained_dyn_model_.step;
-
-        stat.ctable_Pv_data_ = ctable_Pv_.data;
-        stat.ctable_Pv_step_ = ctable_Pv_.step;
-
-        stat.ctable_Pvb_data_ = ctable_Pvb_.data;
-        stat.ctable_Pvb_step_ = ctable_Pvb_.step;
-
-        stat.ctable_v_data_ = ctable_v_.data;
-        stat.ctable_v_step_ = ctable_v_.step;
-
-        stat.cctable_Pv_data_ = cctable_Pv_.data;
-        stat.cctable_Pv_step_ = cctable_Pv_.step;
-
-        stat.cctable_Pvb_data_ = cctable_Pvb_.data;
-        stat.cctable_Pvb_step_ = cctable_Pvb_.step;
-
-        stat.cctable_v1_data_ = cctable_v1_.data;
-        stat.cctable_v1_step_ = cctable_v1_.step;
-
-        stat.cctable_v2_data_ = cctable_v2_.data;
-        stat.cctable_v2_step_ = cctable_v2_.step;
-
-        return stat;
-    }
-}
-
-class cv::gpu::FGDStatModel::Impl
-{
-public:
-    Impl(cv::gpu::GpuMat& background, cv::gpu::GpuMat& foreground, std::vector< std::vector<cv::Point> >& foreground_regions, int out_cn);
-    ~Impl();
-
-    void create(const cv::gpu::GpuMat& firstFrame, const cv::gpu::FGDStatModel::Params& params);
-    void release();
-
-    int update(const cv::gpu::GpuMat& curFrame);
-
-private:
-    Impl(const Impl&);
-    Impl& operator=(const Impl&);
-
-    int out_cn_;
-
-    cv::gpu::FGDStatModel::Params params_;
-
-    cv::gpu::GpuMat& background_;
-    cv::gpu::GpuMat& foreground_;
-    std::vector< std::vector<cv::Point> >& foreground_regions_;
-
-    cv::Mat h_foreground_;
-
-    cv::gpu::GpuMat prevFrame_;
-    cv::gpu::GpuMat Ftd_;
-    cv::gpu::GpuMat Fbd_;
-    BGPixelStat stat_;
-
-    cv::gpu::GpuMat hist_;
-    cv::gpu::GpuMat histBuf_;
-
-    cv::gpu::GpuMat countBuf_;
-
-    cv::gpu::GpuMat buf_;
-    cv::gpu::GpuMat filterBrd_;
-
-    cv::Ptr<cv::gpu::Filter> dilateFilter_;
-    cv::Ptr<cv::gpu::Filter> erodeFilter_;
-
-    CvMemStorage* storage_;
-};
-
-cv::gpu::FGDStatModel::Impl::Impl(cv::gpu::GpuMat& background, cv::gpu::GpuMat& foreground, std::vector< std::vector<cv::Point> >& foreground_regions, int out_cn) :
-    out_cn_(out_cn), background_(background), foreground_(foreground), foreground_regions_(foreground_regions)
-{
-    CV_Assert( out_cn_ == 3 || out_cn_ == 4 );
-
-    storage_ = cvCreateMemStorage();
-    CV_Assert( storage_ != 0 );
-}
-
-cv::gpu::FGDStatModel::Impl::~Impl()
-{
-    cvReleaseMemStorage(&storage_);
-}
+/////////////////////////////////////////////////////////////////////////
+// FGDParams
 
 namespace
 {
-    void copyChannels(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, int dst_cn = -1)
+    // Default parameters of foreground detection algorithm:
+    const int BGFG_FGD_LC  = 128;
+    const int BGFG_FGD_N1C = 15;
+    const int BGFG_FGD_N2C = 25;
+
+    const int BGFG_FGD_LCC   = 64;
+    const int BGFG_FGD_N1CC = 25;
+    const int BGFG_FGD_N2CC = 40;
+
+    // Background reference image update parameter:
+    const float BGFG_FGD_ALPHA_1 = 0.1f;
+
+    // stat model update parameter
+    // 0.002f ~ 1K frame(~45sec), 0.005 ~ 18sec (if 25fps and absolutely static BG)
+    const float BGFG_FGD_ALPHA_2 = 0.005f;
+
+    // start value for alpha parameter (to fast initiate statistic model)
+    const float BGFG_FGD_ALPHA_3 = 0.1f;
+
+    const float BGFG_FGD_DELTA = 2.0f;
+
+    const float BGFG_FGD_T = 0.9f;
+
+    const float BGFG_FGD_MINAREA= 15.0f;
+}
+
+cv::gpu::FGDParams::FGDParams()
+{
+    Lc      = BGFG_FGD_LC;
+    N1c     = BGFG_FGD_N1C;
+    N2c     = BGFG_FGD_N2C;
+
+    Lcc     = BGFG_FGD_LCC;
+    N1cc    = BGFG_FGD_N1CC;
+    N2cc    = BGFG_FGD_N2CC;
+
+    delta   = BGFG_FGD_DELTA;
+
+    alpha1  = BGFG_FGD_ALPHA_1;
+    alpha2  = BGFG_FGD_ALPHA_2;
+    alpha3  = BGFG_FGD_ALPHA_3;
+
+    T       = BGFG_FGD_T;
+    minArea = BGFG_FGD_MINAREA;
+
+    is_obj_without_holes = true;
+    perform_morphing     = 1;
+}
+
+/////////////////////////////////////////////////////////////////////////
+// copyChannels
+
+namespace
+{
+    void copyChannels(const GpuMat& src, GpuMat& dst, int dst_cn = -1)
     {
         const int src_cn = src.channels();
 
         if (dst_cn < 0)
             dst_cn = src_cn;
 
-        cv::gpu::ensureSizeIsEnough(src.size(), CV_MAKE_TYPE(src.depth(), dst_cn), dst);
+        gpu::ensureSizeIsEnough(src.size(), CV_MAKE_TYPE(src.depth(), dst_cn), dst);
 
         if (src_cn == dst_cn)
+        {
             src.copyTo(dst);
+        }
         else
         {
             static const int cvt_codes[4][4] =
             {
-                {-1, -1, cv::COLOR_GRAY2BGR, cv::COLOR_GRAY2BGRA},
+                {-1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA},
                 {-1, -1, -1, -1},
-                {cv::COLOR_BGR2GRAY, -1, -1, cv::COLOR_BGR2BGRA},
-                {cv::COLOR_BGRA2GRAY, -1, cv::COLOR_BGRA2BGR, -1}
+                {COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA},
+                {COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1}
             };
 
             const int cvt_code = cvt_codes[src_cn - 1][dst_cn - 1];
             CV_DbgAssert( cvt_code >= 0 );
 
-            cv::gpu::cvtColor(src, dst, cvt_code, dst_cn);
+            gpu::cvtColor(src, dst, cvt_code, dst_cn);
         }
     }
 }
 
-void cv::gpu::FGDStatModel::Impl::create(const cv::gpu::GpuMat& firstFrame, const cv::gpu::FGDStatModel::Params& params)
-{
-    CV_Assert(firstFrame.type() == CV_8UC3 || firstFrame.type() == CV_8UC4);
-
-    params_ = params;
-
-    cv::gpu::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, foreground_);
-
-    copyChannels(firstFrame, background_, out_cn_);
-
-    copyChannels(firstFrame, prevFrame_);
-
-    cv::gpu::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, Ftd_);
-    cv::gpu::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, Fbd_);
-
-    stat_.create(firstFrame.size(), params_, out_cn_);
-    bgfg::setBGPixelStat(stat_);
-
-    if (params_.perform_morphing > 0)
-    {
-        cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(1 + params_.perform_morphing * 2, 1 + params_.perform_morphing * 2));
-        cv::Point anchor(params_.perform_morphing, params_.perform_morphing);
-
-        dilateFilter_ = cv::gpu::createMorphologyFilter(cv::MORPH_DILATE, CV_8UC1, kernel, anchor);
-        erodeFilter_ = cv::gpu::createMorphologyFilter(cv::MORPH_ERODE, CV_8UC1, kernel, anchor);
-    }
-}
-
-void cv::gpu::FGDStatModel::Impl::release()
-{
-    background_.release();
-    foreground_.release();
-
-    prevFrame_.release();
-    Ftd_.release();
-    Fbd_.release();
-    stat_.release();
-
-    hist_.release();
-    histBuf_.release();
-
-    countBuf_.release();
-
-    buf_.release();
-    filterBrd_.release();
-}
-
 /////////////////////////////////////////////////////////////////////////
 // changeDetection
 
 namespace
 {
-    void calcDiffHistogram(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::gpu::GpuMat& hist, cv::gpu::GpuMat& histBuf)
+    void calcDiffHistogram(const GpuMat& prevFrame, const GpuMat& curFrame, GpuMat& hist, GpuMat& histBuf)
     {
-        typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
+        typedef void (*func_t)(PtrStepSzb prevFrame, PtrStepSzb curFrame,
+                               unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
+                               unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
+                               bool cc20, cudaStream_t stream);
         static const func_t funcs[4][4] =
         {
             {0,0,0,0},
             {0,0,0,0},
-            {0,0,bgfg::calcDiffHistogram_gpu<uchar3, uchar3>,bgfg::calcDiffHistogram_gpu<uchar3, uchar4>},
-            {0,0,bgfg::calcDiffHistogram_gpu<uchar4, uchar3>,bgfg::calcDiffHistogram_gpu<uchar4, uchar4>}
+            {0,0,fgd::calcDiffHistogram_gpu<uchar3, uchar3>,fgd::calcDiffHistogram_gpu<uchar3, uchar4>},
+            {0,0,fgd::calcDiffHistogram_gpu<uchar4, uchar3>,fgd::calcDiffHistogram_gpu<uchar4, uchar4>}
         };
 
         hist.create(3, 256, CV_32SC1);
-        histBuf.create(3, bgfg::PARTIAL_HISTOGRAM_COUNT * bgfg::HISTOGRAM_BIN_COUNT, CV_32SC1);
+        histBuf.create(3, fgd::PARTIAL_HISTOGRAM_COUNT * fgd::HISTOGRAM_BIN_COUNT, CV_32SC1);
 
         funcs[prevFrame.channels() - 1][curFrame.channels() - 1](
                     prevFrame, curFrame,
                     hist.ptr<unsigned int>(0), hist.ptr<unsigned int>(1), hist.ptr<unsigned int>(2),
                     histBuf.ptr<unsigned int>(0), histBuf.ptr<unsigned int>(1), histBuf.ptr<unsigned int>(2),
-                    cv::gpu::deviceSupports(cv::gpu::FEATURE_SET_COMPUTE_20), 0);
+                    deviceSupports(FEATURE_SET_COMPUTE_20), 0);
     }
 
-    void calcRelativeVariance(unsigned int hist[3 * 256], double relativeVariance[3][bgfg::HISTOGRAM_BIN_COUNT])
+    void calcRelativeVariance(unsigned int hist[3 * 256], double relativeVariance[3][fgd::HISTOGRAM_BIN_COUNT])
     {
-        std::memset(relativeVariance, 0, 3 * bgfg::HISTOGRAM_BIN_COUNT * sizeof(double));
+        std::memset(relativeVariance, 0, 3 * fgd::HISTOGRAM_BIN_COUNT * sizeof(double));
 
-        for (int thres = bgfg::HISTOGRAM_BIN_COUNT - 2; thres >= 0; --thres)
+        for (int thres = fgd::HISTOGRAM_BIN_COUNT - 2; thres >= 0; --thres)
         {
-            cv::Vec3d sum(0.0, 0.0, 0.0);
-            cv::Vec3d sqsum(0.0, 0.0, 0.0);
-            cv::Vec3i count(0, 0, 0);
+            Vec3d sum(0.0, 0.0, 0.0);
+            Vec3d sqsum(0.0, 0.0, 0.0);
+            Vec3i count(0, 0, 0);
 
-            for (int j = thres; j < bgfg::HISTOGRAM_BIN_COUNT; ++j)
+            for (int j = thres; j < fgd::HISTOGRAM_BIN_COUNT; ++j)
             {
                 sum[0]   += static_cast<double>(j) * hist[j];
                 sqsum[0] += static_cast<double>(j * j) * hist[j];
@@ -383,7 +204,7 @@ namespace
             count[1] = std::max(count[1], 1);
             count[2] = std::max(count[2], 1);
 
-            cv::Vec3d my(
+            Vec3d my(
                 sum[0] / count[0],
                 sum[1] / count[1],
                 sum[2] / count[2]
@@ -395,37 +216,39 @@ namespace
         }
     }
 
-    void calcDiffThreshMask(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::Vec3d bestThres, cv::gpu::GpuMat& changeMask)
+    void calcDiffThreshMask(const GpuMat& prevFrame, const GpuMat& curFrame, Vec3d bestThres, GpuMat& changeMask)
     {
-        typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, uchar3 bestThres, cv::gpu::PtrStepSzb changeMask, cudaStream_t stream);
+        typedef void (*func_t)(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
         static const func_t funcs[4][4] =
         {
             {0,0,0,0},
             {0,0,0,0},
-            {0,0,bgfg::calcDiffThreshMask_gpu<uchar3, uchar3>,bgfg::calcDiffThreshMask_gpu<uchar3, uchar4>},
-            {0,0,bgfg::calcDiffThreshMask_gpu<uchar4, uchar3>,bgfg::calcDiffThreshMask_gpu<uchar4, uchar4>}
+            {0,0,fgd::calcDiffThreshMask_gpu<uchar3, uchar3>,fgd::calcDiffThreshMask_gpu<uchar3, uchar4>},
+            {0,0,fgd::calcDiffThreshMask_gpu<uchar4, uchar3>,fgd::calcDiffThreshMask_gpu<uchar4, uchar4>}
         };
 
-        changeMask.setTo(cv::Scalar::all(0));
+        changeMask.setTo(Scalar::all(0));
 
-        funcs[prevFrame.channels() - 1][curFrame.channels() - 1](prevFrame, curFrame, make_uchar3((uchar)bestThres[0], (uchar)bestThres[1], (uchar)bestThres[2]), changeMask, 0);
+        funcs[prevFrame.channels() - 1][curFrame.channels() - 1](prevFrame, curFrame,
+                                                                 make_uchar3((uchar)bestThres[0], (uchar)bestThres[1], (uchar)bestThres[2]),
+                                                                 changeMask, 0);
     }
 
     // performs change detection for Foreground detection algorithm
-    void changeDetection(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::gpu::GpuMat& changeMask, cv::gpu::GpuMat& hist, cv::gpu::GpuMat& histBuf)
+    void changeDetection(const GpuMat& prevFrame, const GpuMat& curFrame, GpuMat& changeMask, GpuMat& hist, GpuMat& histBuf)
     {
         calcDiffHistogram(prevFrame, curFrame, hist, histBuf);
 
         unsigned int histData[3 * 256];
-        cv::Mat h_hist(3, 256, CV_32SC1, histData);
+        Mat h_hist(3, 256, CV_32SC1, histData);
         hist.download(h_hist);
 
-        double relativeVariance[3][bgfg::HISTOGRAM_BIN_COUNT];
+        double relativeVariance[3][fgd::HISTOGRAM_BIN_COUNT];
         calcRelativeVariance(histData, relativeVariance);
 
         // Find maximum:
-        cv::Vec3d bestThres(10.0, 10.0, 10.0);
-        for (int i = 0; i < bgfg::HISTOGRAM_BIN_COUNT; ++i)
+        Vec3d bestThres(10.0, 10.0, 10.0);
+        for (int i = 0; i < fgd::HISTOGRAM_BIN_COUNT; ++i)
         {
             bestThres[0] = std::max(bestThres[0], relativeVariance[0][i]);
             bestThres[1] = std::max(bestThres[1], relativeVariance[1][i]);
@@ -441,12 +264,12 @@ namespace
 
 namespace
 {
-    int bgfgClassification(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame,
-                           const cv::gpu::GpuMat& Ftd, const cv::gpu::GpuMat& Fbd,
-                           cv::gpu::GpuMat& foreground, cv::gpu::GpuMat& countBuf,
-                           const cv::gpu::FGDStatModel::Params& params, int out_cn)
+    int bgfgClassification(const GpuMat& prevFrame, const GpuMat& curFrame,
+                           const GpuMat& Ftd, const GpuMat& Fbd,
+                           GpuMat& foreground, GpuMat& countBuf,
+                           const FGDParams& params, int out_cn)
     {
-        typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd, cv::gpu::PtrStepSzb foreground,
+        typedef void (*func_t)(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground,
                                int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
         static const func_t funcs[4][4][4] =
         {
@@ -458,24 +281,26 @@ namespace
             },
             {
                 {0,0,0,0}, {0,0,0,0},
-                {0,0,bgfg::bgfgClassification_gpu<uchar3, uchar3, uchar3>,bgfg::bgfgClassification_gpu<uchar3, uchar3, uchar4>},
-                {0,0,bgfg::bgfgClassification_gpu<uchar3, uchar4, uchar3>,bgfg::bgfgClassification_gpu<uchar3, uchar4, uchar4>}
+                {0,0,fgd::bgfgClassification_gpu<uchar3, uchar3, uchar3>,fgd::bgfgClassification_gpu<uchar3, uchar3, uchar4>},
+                {0,0,fgd::bgfgClassification_gpu<uchar3, uchar4, uchar3>,fgd::bgfgClassification_gpu<uchar3, uchar4, uchar4>}
             },
             {
                 {0,0,0,0}, {0,0,0,0},
-                {0,0,bgfg::bgfgClassification_gpu<uchar4, uchar3, uchar3>,bgfg::bgfgClassification_gpu<uchar4, uchar3, uchar4>},
-                {0,0,bgfg::bgfgClassification_gpu<uchar4, uchar4, uchar3>,bgfg::bgfgClassification_gpu<uchar4, uchar4, uchar4>}
+                {0,0,fgd::bgfgClassification_gpu<uchar4, uchar3, uchar3>,fgd::bgfgClassification_gpu<uchar4, uchar3, uchar4>},
+                {0,0,fgd::bgfgClassification_gpu<uchar4, uchar4, uchar3>,fgd::bgfgClassification_gpu<uchar4, uchar4, uchar4>}
             }
         };
 
         const int deltaC  = cvRound(params.delta * 256 / params.Lc);
         const int deltaCC = cvRound(params.delta * 256 / params.Lcc);
 
-        funcs[prevFrame.channels() - 1][curFrame.channels() - 1][out_cn - 1](prevFrame, curFrame, Ftd, Fbd, foreground, deltaC, deltaCC, params.alpha2, params.N1c, params.N1cc, 0);
+        funcs[prevFrame.channels() - 1][curFrame.channels() - 1][out_cn - 1](prevFrame, curFrame, Ftd, Fbd, foreground,
+                                                                             deltaC, deltaCC, params.alpha2,
+                                                                             params.N1c, params.N1cc, 0);
 
-        int count = cv::gpu::countNonZero(foreground, countBuf);
+        int count = gpu::countNonZero(foreground, countBuf);
 
-        cv::gpu::multiply(foreground, cv::Scalar::all(255), foreground);
+        gpu::multiply(foreground, Scalar::all(255), foreground);
 
         return count;
     }
@@ -484,22 +309,24 @@ namespace
 /////////////////////////////////////////////////////////////////////////
 // smoothForeground
 
+#ifdef HAVE_OPENCV_GPUFILTERS
+
 namespace
 {
-    void morphology(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, cv::gpu::GpuMat& filterBrd, int brd, cv::Ptr<cv::gpu::Filter>& filter, cv::Scalar brdVal)
+    void morphology(const GpuMat& src, GpuMat& dst, GpuMat& filterBrd, int brd, Ptr<gpu::Filter>& filter, Scalar brdVal)
     {
-        cv::gpu::copyMakeBorder(src, filterBrd, brd, brd, brd, brd, cv::BORDER_CONSTANT, brdVal);
-        filter->apply(filterBrd(cv::Rect(brd, brd, src.cols, src.rows)), dst);
+        gpu::copyMakeBorder(src, filterBrd, brd, brd, brd, brd, BORDER_CONSTANT, brdVal);
+        filter->apply(filterBrd(Rect(brd, brd, src.cols, src.rows)), dst);
     }
 
-    void smoothForeground(cv::gpu::GpuMat& foreground, cv::gpu::GpuMat& filterBrd, cv::gpu::GpuMat& buf,
-                          cv::Ptr<cv::gpu::Filter>& erodeFilter, cv::Ptr<cv::gpu::Filter>& dilateFilter,
-                          const cv::gpu::FGDStatModel::Params& params)
+    void smoothForeground(GpuMat& foreground, GpuMat& filterBrd, GpuMat& buf,
+                          Ptr<gpu::Filter>& erodeFilter, Ptr<gpu::Filter>& dilateFilter,
+                          const FGDParams& params)
     {
         const int brd = params.perform_morphing;
 
-        const cv::Scalar erodeBrdVal = cv::Scalar::all(UCHAR_MAX);
-        const cv::Scalar dilateBrdVal = cv::Scalar::all(0);
+        const Scalar erodeBrdVal = Scalar::all(UCHAR_MAX);
+        const Scalar dilateBrdVal = Scalar::all(0);
 
         // MORPH_OPEN
         morphology(foreground, buf, filterBrd, brd, erodeFilter, erodeBrdVal);
@@ -511,33 +338,35 @@ namespace
     }
 }
 
+#endif
+
 /////////////////////////////////////////////////////////////////////////
 // findForegroundRegions
 
 namespace
 {
-    void seqToContours(CvSeq* _ccontours, CvMemStorage* storage, cv::OutputArrayOfArrays _contours)
+    void seqToContours(CvSeq* _ccontours, CvMemStorage* storage, OutputArrayOfArrays _contours)
     {
-        cv::Seq<CvSeq*> all_contours(cvTreeToNodeSeq(_ccontours, sizeof(CvSeq), storage));
+        Seq<CvSeq*> all_contours(cvTreeToNodeSeq(_ccontours, sizeof(CvSeq), storage));
 
         size_t total = all_contours.size();
 
         _contours.create((int) total, 1, 0, -1, true);
 
-        cv::SeqIterator<CvSeq*> it = all_contours.begin();
+        SeqIterator<CvSeq*> it = all_contours.begin();
         for (size_t i = 0; i < total; ++i, ++it)
         {
             CvSeq* c = *it;
             ((CvContour*)c)->color = (int)i;
             _contours.create((int)c->total, 1, CV_32SC2, (int)i, true);
-            cv::Mat ci = _contours.getMat((int)i);
+            Mat ci = _contours.getMat((int)i);
             CV_Assert( ci.isContinuous() );
             cvCvtSeqToArray(c, ci.data);
         }
     }
 
-    int findForegroundRegions(cv::gpu::GpuMat& d_foreground, cv::Mat& h_foreground, std::vector< std::vector<cv::Point> >& foreground_regions,
-                              CvMemStorage* storage, const cv::gpu::FGDStatModel::Params& params)
+    int findForegroundRegions(GpuMat& d_foreground, Mat& h_foreground, std::vector< std::vector<Point> >& foreground_regions,
+                              CvMemStorage* storage, const FGDParams& params)
     {
         int region_count = 0;
 
@@ -581,7 +410,7 @@ namespace
         seqToContours(first_seq, storage, foreground_regions);
         h_foreground.setTo(0);
 
-        cv::drawContours(h_foreground, foreground_regions, -1, cv::Scalar::all(255), -1);
+        drawContours(h_foreground, foreground_regions, -1, Scalar::all(255), -1);
 
         d_foreground.upload(h_foreground);
 
@@ -594,12 +423,12 @@ namespace
 
 namespace
 {
-    void updateBackgroundModel(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, const cv::gpu::GpuMat& Ftd, const cv::gpu::GpuMat& Fbd,
-                               const cv::gpu::GpuMat& foreground, cv::gpu::GpuMat& background,
-                               const cv::gpu::FGDStatModel::Params& params)
+    void updateBackgroundModel(const GpuMat& prevFrame, const GpuMat& curFrame, const GpuMat& Ftd, const GpuMat& Fbd,
+                               const GpuMat& foreground, GpuMat& background,
+                               const FGDParams& params)
     {
-        typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd,
-                               cv::gpu::PtrStepSzb foreground, cv::gpu::PtrStepSzb background,
+        typedef void (*func_t)(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd,
+                               PtrStepSzb foreground, PtrStepSzb background,
                                int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
         static const func_t funcs[4][4][4] =
         {
@@ -611,13 +440,13 @@ namespace
             },
             {
                 {0,0,0,0}, {0,0,0,0},
-                {0,0,bgfg::updateBackgroundModel_gpu<uchar3, uchar3, uchar3>,bgfg::updateBackgroundModel_gpu<uchar3, uchar3, uchar4>},
-                {0,0,bgfg::updateBackgroundModel_gpu<uchar3, uchar4, uchar3>,bgfg::updateBackgroundModel_gpu<uchar3, uchar4, uchar4>}
+                {0,0,fgd::updateBackgroundModel_gpu<uchar3, uchar3, uchar3>,fgd::updateBackgroundModel_gpu<uchar3, uchar3, uchar4>},
+                {0,0,fgd::updateBackgroundModel_gpu<uchar3, uchar4, uchar3>,fgd::updateBackgroundModel_gpu<uchar3, uchar4, uchar4>}
             },
             {
                 {0,0,0,0}, {0,0,0,0},
-                {0,0,bgfg::updateBackgroundModel_gpu<uchar4, uchar3, uchar3>,bgfg::updateBackgroundModel_gpu<uchar4, uchar3, uchar4>},
-                {0,0,bgfg::updateBackgroundModel_gpu<uchar4, uchar4, uchar3>,bgfg::updateBackgroundModel_gpu<uchar4, uchar4, uchar4>}
+                {0,0,fgd::updateBackgroundModel_gpu<uchar4, uchar3, uchar3>,fgd::updateBackgroundModel_gpu<uchar4, uchar3, uchar4>},
+                {0,0,fgd::updateBackgroundModel_gpu<uchar4, uchar4, uchar3>,fgd::updateBackgroundModel_gpu<uchar4, uchar4, uchar4>}
             }
         };
 
@@ -626,126 +455,277 @@ namespace
 
         funcs[prevFrame.channels() - 1][curFrame.channels() - 1][background.channels() - 1](
                     prevFrame, curFrame, Ftd, Fbd, foreground, background,
-                    deltaC, deltaCC, params.alpha1, params.alpha2, params.alpha3, params.N1c, params.N1cc, params.N2c, params.N2cc, params.T,
+                    deltaC, deltaCC, params.alpha1, params.alpha2, params.alpha3,
+                    params.N1c, params.N1cc, params.N2c, params.N2cc, params.T,
                     0);
     }
 }
 
-/////////////////////////////////////////////////////////////////////////
-// Impl::update
-
-int cv::gpu::FGDStatModel::Impl::update(const cv::gpu::GpuMat& curFrame)
-{
-    CV_Assert(curFrame.type() == CV_8UC3 || curFrame.type() == CV_8UC4);
-    CV_Assert(curFrame.size() == prevFrame_.size());
-
-    cvClearMemStorage(storage_);
-    foreground_regions_.clear();
-    foreground_.setTo(cv::Scalar::all(0));
-
-    changeDetection(prevFrame_, curFrame, Ftd_, hist_, histBuf_);
-    changeDetection(background_, curFrame, Fbd_, hist_, histBuf_);
-
-    int FG_pixels_count = bgfgClassification(prevFrame_, curFrame, Ftd_, Fbd_, foreground_, countBuf_, params_, out_cn_);
-
-    if (params_.perform_morphing > 0)
-        smoothForeground(foreground_, filterBrd_, buf_, erodeFilter_, dilateFilter_, params_);
-
-    int region_count = 0;
-    if (params_.minArea > 0 || params_.is_obj_without_holes)
-        region_count = findForegroundRegions(foreground_, h_foreground_, foreground_regions_, storage_, params_);
-
-    // Check ALL BG update condition:
-    const double BGFG_FGD_BG_UPDATE_TRESH = 0.5;
-    if (static_cast<double>(FG_pixels_count) / Ftd_.size().area() > BGFG_FGD_BG_UPDATE_TRESH)
-        stat_.setTrained();
-
-    updateBackgroundModel(prevFrame_, curFrame, Ftd_, Fbd_, foreground_, background_, params_);
-
-    copyChannels(curFrame, prevFrame_);
-
-    return region_count;
-}
 
 namespace
 {
-    // Default parameters of foreground detection algorithm:
-    const int BGFG_FGD_LC  = 128;
-    const int BGFG_FGD_N1C = 15;
-    const int BGFG_FGD_N2C = 25;
+    class BGPixelStat
+    {
+    public:
+        void create(Size size, const FGDParams& params);
 
-    const int BGFG_FGD_LCC   = 64;
-    const int BGFG_FGD_N1CC = 25;
-    const int BGFG_FGD_N2CC = 40;
+        void setTrained();
 
-    // Background reference image update parameter:
-    const float BGFG_FGD_ALPHA_1 = 0.1f;
+        operator fgd::BGPixelStat();
 
-    // stat model update parameter
-    // 0.002f ~ 1K frame(~45sec), 0.005 ~ 18sec (if 25fps and absolutely static BG)
-    const float BGFG_FGD_ALPHA_2 = 0.005f;
+    private:
+        GpuMat Pbc_;
+        GpuMat Pbcc_;
+        GpuMat is_trained_st_model_;
+        GpuMat is_trained_dyn_model_;
 
-    // start value for alpha parameter (to fast initiate statistic model)
-    const float BGFG_FGD_ALPHA_3 = 0.1f;
+        GpuMat ctable_Pv_;
+        GpuMat ctable_Pvb_;
+        GpuMat ctable_v_;
 
-    const float BGFG_FGD_DELTA = 2.0f;
+        GpuMat cctable_Pv_;
+        GpuMat cctable_Pvb_;
+        GpuMat cctable_v1_;
+        GpuMat cctable_v2_;
+    };
 
-    const float BGFG_FGD_T = 0.9f;
+    void BGPixelStat::create(Size size, const FGDParams& params)
+    {
+        gpu::ensureSizeIsEnough(size, CV_32FC1, Pbc_);
+        Pbc_.setTo(Scalar::all(0));
 
-    const float BGFG_FGD_MINAREA= 15.0f;
+        gpu::ensureSizeIsEnough(size, CV_32FC1, Pbcc_);
+        Pbcc_.setTo(Scalar::all(0));
+
+        gpu::ensureSizeIsEnough(size, CV_8UC1, is_trained_st_model_);
+        is_trained_st_model_.setTo(Scalar::all(0));
+
+        gpu::ensureSizeIsEnough(size, CV_8UC1, is_trained_dyn_model_);
+        is_trained_dyn_model_.setTo(Scalar::all(0));
+
+        gpu::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_32FC1, ctable_Pv_);
+        ctable_Pv_.setTo(Scalar::all(0));
+
+        gpu::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_32FC1, ctable_Pvb_);
+        ctable_Pvb_.setTo(Scalar::all(0));
+
+        gpu::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_8UC4, ctable_v_);
+        ctable_v_.setTo(Scalar::all(0));
+
+        gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_32FC1, cctable_Pv_);
+        cctable_Pv_.setTo(Scalar::all(0));
+
+        gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_32FC1, cctable_Pvb_);
+        cctable_Pvb_.setTo(Scalar::all(0));
+
+        gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_8UC4, cctable_v1_);
+        cctable_v1_.setTo(Scalar::all(0));
+
+        gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_8UC4, cctable_v2_);
+        cctable_v2_.setTo(Scalar::all(0));
+    }
+
+    void BGPixelStat::setTrained()
+    {
+        is_trained_st_model_.setTo(Scalar::all(1));
+        is_trained_dyn_model_.setTo(Scalar::all(1));
+    }
+
+    BGPixelStat::operator fgd::BGPixelStat()
+    {
+        fgd::BGPixelStat stat;
+
+        stat.rows_ = Pbc_.rows;
+
+        stat.Pbc_data_ = Pbc_.data;
+        stat.Pbc_step_ = Pbc_.step;
+
+        stat.Pbcc_data_ = Pbcc_.data;
+        stat.Pbcc_step_ = Pbcc_.step;
+
+        stat.is_trained_st_model_data_ = is_trained_st_model_.data;
+        stat.is_trained_st_model_step_ = is_trained_st_model_.step;
+
+        stat.is_trained_dyn_model_data_ = is_trained_dyn_model_.data;
+        stat.is_trained_dyn_model_step_ = is_trained_dyn_model_.step;
+
+        stat.ctable_Pv_data_ = ctable_Pv_.data;
+        stat.ctable_Pv_step_ = ctable_Pv_.step;
+
+        stat.ctable_Pvb_data_ = ctable_Pvb_.data;
+        stat.ctable_Pvb_step_ = ctable_Pvb_.step;
+
+        stat.ctable_v_data_ = ctable_v_.data;
+        stat.ctable_v_step_ = ctable_v_.step;
+
+        stat.cctable_Pv_data_ = cctable_Pv_.data;
+        stat.cctable_Pv_step_ = cctable_Pv_.step;
+
+        stat.cctable_Pvb_data_ = cctable_Pvb_.data;
+        stat.cctable_Pvb_step_ = cctable_Pvb_.step;
+
+        stat.cctable_v1_data_ = cctable_v1_.data;
+        stat.cctable_v1_step_ = cctable_v1_.step;
+
+        stat.cctable_v2_data_ = cctable_v2_.data;
+        stat.cctable_v2_step_ = cctable_v2_.step;
+
+        return stat;
+    }
+
+    class FGDImpl : public gpu::BackgroundSubtractorFGD
+    {
+    public:
+        explicit FGDImpl(const FGDParams& params);
+        ~FGDImpl();
+
+        void apply(InputArray image, OutputArray fgmask, double learningRate=-1);
+
+        void getBackgroundImage(OutputArray backgroundImage) const;
+
+        void getForegroundRegions(OutputArrayOfArrays foreground_regions);
+
+    private:
+        void initialize(const GpuMat& firstFrame);
+
+        FGDParams params_;
+        Size frameSize_;
+
+        GpuMat background_;
+        GpuMat foreground_;
+        std::vector< std::vector<Point> > foreground_regions_;
+
+        Mat h_foreground_;
+
+        GpuMat prevFrame_;
+        GpuMat Ftd_;
+        GpuMat Fbd_;
+        BGPixelStat stat_;
+
+        GpuMat hist_;
+        GpuMat histBuf_;
+
+        GpuMat countBuf_;
+
+        GpuMat buf_;
+        GpuMat filterBrd_;
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+        Ptr<gpu::Filter> dilateFilter_;
+        Ptr<gpu::Filter> erodeFilter_;
+#endif
+
+        CvMemStorage* storage_;
+    };
+
+    FGDImpl::FGDImpl(const FGDParams& params) : params_(params), frameSize_(0, 0)
+    {
+        storage_ = cvCreateMemStorage();
+        CV_Assert( storage_ != 0 );
+    }
+
+    FGDImpl::~FGDImpl()
+    {
+        cvReleaseMemStorage(&storage_);
+    }
+
+    void FGDImpl::apply(InputArray _frame, OutputArray fgmask, double)
+    {
+        GpuMat curFrame = _frame.getGpuMat();
+
+        if (curFrame.size() != frameSize_)
+        {
+            initialize(curFrame);
+            return;
+        }
+
+        CV_Assert( curFrame.type() == CV_8UC3 || curFrame.type() == CV_8UC4 );
+        CV_Assert( curFrame.size() == prevFrame_.size() );
+
+        cvClearMemStorage(storage_);
+        foreground_regions_.clear();
+        foreground_.setTo(Scalar::all(0));
+
+        changeDetection(prevFrame_, curFrame, Ftd_, hist_, histBuf_);
+        changeDetection(background_, curFrame, Fbd_, hist_, histBuf_);
+
+        int FG_pixels_count = bgfgClassification(prevFrame_, curFrame, Ftd_, Fbd_, foreground_, countBuf_, params_, 4);
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+        if (params_.perform_morphing > 0)
+            smoothForeground(foreground_, filterBrd_, buf_, erodeFilter_, dilateFilter_, params_);
+#endif
+
+        if (params_.minArea > 0 || params_.is_obj_without_holes)
+            findForegroundRegions(foreground_, h_foreground_, foreground_regions_, storage_, params_);
+
+        // Check ALL BG update condition:
+        const double BGFG_FGD_BG_UPDATE_TRESH = 0.5;
+        if (static_cast<double>(FG_pixels_count) / Ftd_.size().area() > BGFG_FGD_BG_UPDATE_TRESH)
+            stat_.setTrained();
+
+        updateBackgroundModel(prevFrame_, curFrame, Ftd_, Fbd_, foreground_, background_, params_);
+
+        copyChannels(curFrame, prevFrame_, 4);
+
+        foreground_.copyTo(fgmask);
+    }
+
+    void FGDImpl::getBackgroundImage(OutputArray backgroundImage) const
+    {
+        gpu::cvtColor(background_, backgroundImage, COLOR_BGRA2BGR);
+    }
+
+    void FGDImpl::getForegroundRegions(OutputArrayOfArrays dst)
+    {
+        size_t total = foreground_regions_.size();
+
+        dst.create((int) total, 1, 0, -1, true);
+
+        for (size_t i = 0; i < total; ++i)
+        {
+            std::vector<Point>& c = foreground_regions_[i];
+
+            dst.create((int) c.size(), 1, CV_32SC2, (int) i, true);
+            Mat ci = dst.getMat((int) i);
+
+            Mat(ci.size(), ci.type(), &c[0]).copyTo(ci);
+        }
+    }
+
+    void FGDImpl::initialize(const GpuMat& firstFrame)
+    {
+        CV_Assert( firstFrame.type() == CV_8UC3 || firstFrame.type() == CV_8UC4 );
+
+        frameSize_ = firstFrame.size();
+
+        gpu::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, foreground_);
+
+        copyChannels(firstFrame, background_, 4);
+        copyChannels(firstFrame, prevFrame_, 4);
+
+        gpu::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, Ftd_);
+        gpu::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, Fbd_);
+
+        stat_.create(firstFrame.size(), params_);
+        fgd::setBGPixelStat(stat_);
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+        if (params_.perform_morphing > 0)
+        {
+            Mat kernel = getStructuringElement(MORPH_RECT, Size(1 + params_.perform_morphing * 2, 1 + params_.perform_morphing * 2));
+            Point anchor(params_.perform_morphing, params_.perform_morphing);
+
+            dilateFilter_ = gpu::createMorphologyFilter(MORPH_DILATE, CV_8UC1, kernel, anchor);
+            erodeFilter_ = gpu::createMorphologyFilter(MORPH_ERODE, CV_8UC1, kernel, anchor);
+        }
+#endif
+    }
 }
 
-cv::gpu::FGDStatModel::Params::Params()
+Ptr<gpu::BackgroundSubtractorFGD> cv::gpu::createBackgroundSubtractorFGD(const FGDParams& params)
 {
-    Lc      = BGFG_FGD_LC;
-    N1c     = BGFG_FGD_N1C;
-    N2c     = BGFG_FGD_N2C;
-
-    Lcc     = BGFG_FGD_LCC;
-    N1cc    = BGFG_FGD_N1CC;
-    N2cc    = BGFG_FGD_N2CC;
-
-    delta   = BGFG_FGD_DELTA;
-
-    alpha1  = BGFG_FGD_ALPHA_1;
-    alpha2  = BGFG_FGD_ALPHA_2;
-    alpha3  = BGFG_FGD_ALPHA_3;
-
-    T       = BGFG_FGD_T;
-    minArea = BGFG_FGD_MINAREA;
-
-    is_obj_without_holes = true;
-    perform_morphing     = 1;
-}
-
-cv::gpu::FGDStatModel::FGDStatModel(int out_cn)
-{
-    impl_.reset(new Impl(background, foreground, foreground_regions, out_cn));
-}
-
-cv::gpu::FGDStatModel::FGDStatModel(const cv::gpu::GpuMat& firstFrame, const Params& params, int out_cn)
-{
-    impl_.reset(new Impl(background, foreground, foreground_regions, out_cn));
-    create(firstFrame, params);
-}
-
-cv::gpu::FGDStatModel::~FGDStatModel()
-{
-}
-
-void cv::gpu::FGDStatModel::create(const cv::gpu::GpuMat& firstFrame, const Params& params)
-{
-    impl_->create(firstFrame, params);
-}
-
-void cv::gpu::FGDStatModel::release()
-{
-    impl_->release();
-}
-
-int cv::gpu::FGDStatModel::update(const cv::gpu::GpuMat& curFrame)
-{
-    return impl_->update(curFrame);
+    return makePtr<FGDImpl>(params);
 }
 
 #endif // HAVE_CUDA
diff --git a/modules/gpubgsegm/src/gmg.cpp b/modules/gpubgsegm/src/gmg.cpp
index b97f0836f..f8650b242 100644
--- a/modules/gpubgsegm/src/gmg.cpp
+++ b/modules/gpubgsegm/src/gmg.cpp
@@ -42,17 +42,17 @@
 
 #include "precomp.hpp"
 
+using namespace cv;
+using namespace cv::gpu;
+
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-cv::gpu::GMG_GPU::GMG_GPU() { throw_no_cuda(); }
-void cv::gpu::GMG_GPU::initialize(cv::Size, float, float) { throw_no_cuda(); }
-void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, float, cv::gpu::Stream&) { throw_no_cuda(); }
-void cv::gpu::GMG_GPU::release() {}
+Ptr<gpu::BackgroundSubtractorGMG> cv::gpu::createBackgroundSubtractorGMG(int, double) { throw_no_cuda(); return Ptr<gpu::BackgroundSubtractorGMG>(); }
 
 #else
 
 namespace cv { namespace gpu { namespace cudev {
-    namespace bgfg_gmg
+    namespace gmg
     {
         void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior,
                            float decisionThreshold, int maxFeatures, int numInitializationFrames);
@@ -63,103 +63,215 @@ namespace cv { namespace gpu { namespace cudev {
     }
 }}}
 
-cv::gpu::GMG_GPU::GMG_GPU()
+namespace
 {
-    maxFeatures = 64;
-    learningRate = 0.025f;
-    numInitializationFrames = 120;
-    quantizationLevels = 16;
-    backgroundPrior = 0.8f;
-    decisionThreshold = 0.8f;
-    smoothingRadius = 7;
-    updateBackgroundModel = true;
-}
-
-void cv::gpu::GMG_GPU::initialize(cv::Size frameSize, float min, float max)
-{
-    using namespace cv::gpu::cudev::bgfg_gmg;
-
-    CV_Assert(min < max);
-    CV_Assert(maxFeatures > 0);
-    CV_Assert(learningRate >= 0.0f && learningRate <= 1.0f);
-    CV_Assert(numInitializationFrames >= 1);
-    CV_Assert(quantizationLevels >= 1 && quantizationLevels <= 255);
-    CV_Assert(backgroundPrior >= 0.0f && backgroundPrior <= 1.0f);
-
-    minVal_ = min;
-    maxVal_ = max;
-
-    frameSize_ = frameSize;
-
-    frameNum_ = 0;
-
-    nfeatures_.create(frameSize_, CV_32SC1);
-    colors_.create(maxFeatures * frameSize_.height, frameSize_.width, CV_32SC1);
-    weights_.create(maxFeatures * frameSize_.height, frameSize_.width, CV_32FC1);
-
-    nfeatures_.setTo(cv::Scalar::all(0));
-
-    if (smoothingRadius > 0)
-        boxFilter_ = cv::gpu::createBoxFilter(CV_8UC1, -1, cv::Size(smoothingRadius, smoothingRadius));
-
-    loadConstants(frameSize_.width, frameSize_.height, minVal_, maxVal_, quantizationLevels, backgroundPrior, decisionThreshold, maxFeatures, numInitializationFrames);
-}
-
-void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat& fgmask, float newLearningRate, cv::gpu::Stream& stream)
-{
-    using namespace cv::gpu::cudev::bgfg_gmg;
-
-    typedef void (*func_t)(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
-                           int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
-    static const func_t funcs[6][4] =
+    class GMGImpl : public gpu::BackgroundSubtractorGMG
     {
-        {update_gpu<uchar>, 0, update_gpu<uchar3>, update_gpu<uchar4>},
-        {0,0,0,0},
-        {update_gpu<ushort>, 0, update_gpu<ushort3>, update_gpu<ushort4>},
-        {0,0,0,0},
-        {0,0,0,0},
-        {update_gpu<float>, 0, update_gpu<float3>, update_gpu<float4>}
+    public:
+        GMGImpl(int initializationFrames, double decisionThreshold);
+
+        void apply(InputArray image, OutputArray fgmask, double learningRate=-1);
+        void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream);
+
+        void getBackgroundImage(OutputArray backgroundImage) const;
+
+        int getMaxFeatures() const { return maxFeatures_; }
+        void setMaxFeatures(int maxFeatures) { maxFeatures_ = maxFeatures; }
+
+        double getDefaultLearningRate() const { return learningRate_; }
+        void setDefaultLearningRate(double lr) { learningRate_ = (float) lr; }
+
+        int getNumFrames() const { return numInitializationFrames_; }
+        void setNumFrames(int nframes) { numInitializationFrames_ = nframes; }
+
+        int getQuantizationLevels() const { return quantizationLevels_; }
+        void setQuantizationLevels(int nlevels) { quantizationLevels_ = nlevels; }
+
+        double getBackgroundPrior() const { return backgroundPrior_; }
+        void setBackgroundPrior(double bgprior) { backgroundPrior_ = (float) bgprior; }
+
+        int getSmoothingRadius() const { return smoothingRadius_; }
+        void setSmoothingRadius(int radius) { smoothingRadius_ = radius; }
+
+        double getDecisionThreshold() const { return decisionThreshold_; }
+        void setDecisionThreshold(double thresh) { decisionThreshold_ = (float) thresh; }
+
+        bool getUpdateBackgroundModel() const { return updateBackgroundModel_; }
+        void setUpdateBackgroundModel(bool update) { updateBackgroundModel_ = update; }
+
+        double getMinVal() const { return minVal_; }
+        void setMinVal(double val) { minVal_ = (float) val; }
+
+        double getMaxVal() const { return maxVal_; }
+        void setMaxVal(double val) { maxVal_ = (float) val; }
+
+    private:
+        void initialize(Size frameSize, float min, float max);
+
+        //! Total number of distinct colors to maintain in histogram.
+        int maxFeatures_;
+
+        //! Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
+        float learningRate_;
+
+        //! Number of frames of video to use to initialize histograms.
+        int numInitializationFrames_;
+
+        //! Number of discrete levels in each channel to be used in histograms.
+        int quantizationLevels_;
+
+        //! Prior probability that any given pixel is a background pixel. A sensitivity parameter.
+        float backgroundPrior_;
+
+        //! Smoothing radius, in pixels, for cleaning up FG image.
+        int smoothingRadius_;
+
+        //! Value above which pixel is determined to be FG.
+        float decisionThreshold_;
+
+        //! Perform background model update.
+        bool updateBackgroundModel_;
+
+        float minVal_, maxVal_;
+
+        Size frameSize_;
+        int frameNum_;
+
+        GpuMat nfeatures_;
+        GpuMat colors_;
+        GpuMat weights_;
+
+#if defined(HAVE_OPENCV_GPUFILTERS) && defined(HAVE_OPENCV_GPUARITHM)
+        Ptr<gpu::Filter> boxFilter_;
+        GpuMat buf_;
+#endif
     };
 
-    CV_Assert(frame.depth() == CV_8U || frame.depth() == CV_16U || frame.depth() == CV_32F);
-    CV_Assert(frame.channels() == 1 || frame.channels() == 3 || frame.channels() == 4);
-
-    if (newLearningRate != -1.0f)
+    GMGImpl::GMGImpl(int initializationFrames, double decisionThreshold)
     {
-        CV_Assert(newLearningRate >= 0.0f && newLearningRate <= 1.0f);
-        learningRate = newLearningRate;
+        maxFeatures_ = 64;
+        learningRate_ = 0.025f;
+        numInitializationFrames_ = initializationFrames;
+        quantizationLevels_ = 16;
+        backgroundPrior_ = 0.8f;
+        decisionThreshold_ = (float) decisionThreshold;
+        smoothingRadius_ = 7;
+        updateBackgroundModel_ = true;
+        minVal_ = maxVal_ = 0;
     }
 
-    if (frame.size() != frameSize_)
-        initialize(frame.size(), 0.0f, frame.depth() == CV_8U ? 255.0f : frame.depth() == CV_16U ? std::numeric_limits<ushort>::max() : 1.0f);
-
-    fgmask.create(frameSize_, CV_8UC1);
-    fgmask.setTo(cv::Scalar::all(0), stream);
-
-    funcs[frame.depth()][frame.channels() - 1](frame, fgmask, colors_, weights_, nfeatures_, frameNum_, learningRate, updateBackgroundModel, cv::gpu::StreamAccessor::getStream(stream));
-
-    // medianBlur
-    if (smoothingRadius > 0)
+    void GMGImpl::apply(InputArray image, OutputArray fgmask, double learningRate)
     {
-        boxFilter_->apply(fgmask, buf_, stream);
-        int minCount = (smoothingRadius * smoothingRadius + 1) / 2;
-        double thresh = 255.0 * minCount / (smoothingRadius * smoothingRadius);
-        cv::gpu::threshold(buf_, fgmask, thresh, 255.0, cv::THRESH_BINARY, stream);
+        apply(image, fgmask, learningRate, Stream::Null());
     }
 
-    // keep track of how many frames we have processed
-    ++frameNum_;
+    void GMGImpl::apply(InputArray _frame, OutputArray _fgmask, double newLearningRate, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::gmg;
+
+        typedef void (*func_t)(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
+                               int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        static const func_t funcs[6][4] =
+        {
+            {update_gpu<uchar>, 0, update_gpu<uchar3>, update_gpu<uchar4>},
+            {0,0,0,0},
+            {update_gpu<ushort>, 0, update_gpu<ushort3>, update_gpu<ushort4>},
+            {0,0,0,0},
+            {0,0,0,0},
+            {update_gpu<float>, 0, update_gpu<float3>, update_gpu<float4>}
+        };
+
+        GpuMat frame = _frame.getGpuMat();
+
+        CV_Assert( frame.depth() == CV_8U || frame.depth() == CV_16U || frame.depth() == CV_32F );
+        CV_Assert( frame.channels() == 1 || frame.channels() == 3 || frame.channels() == 4 );
+
+        if (newLearningRate != -1.0)
+        {
+            CV_Assert( newLearningRate >= 0.0 && newLearningRate <= 1.0 );
+            learningRate_ = (float) newLearningRate;
+        }
+
+        if (frame.size() != frameSize_)
+        {
+            double minVal = minVal_;
+            double maxVal = maxVal_;
+
+            if (minVal_ == 0 && maxVal_ == 0)
+            {
+                minVal = 0;
+                maxVal = frame.depth() == CV_8U ? 255.0 : frame.depth() == CV_16U ? std::numeric_limits<ushort>::max() : 1.0;
+            }
+
+            initialize(frame.size(), (float) minVal, (float) maxVal);
+        }
+
+        _fgmask.create(frameSize_, CV_8UC1);
+        GpuMat fgmask = _fgmask.getGpuMat();
+
+        fgmask.setTo(Scalar::all(0), stream);
+
+        funcs[frame.depth()][frame.channels() - 1](frame, fgmask, colors_, weights_, nfeatures_, frameNum_,
+                                                   learningRate_, updateBackgroundModel_, StreamAccessor::getStream(stream));
+
+#if defined(HAVE_OPENCV_GPUFILTERS) && defined(HAVE_OPENCV_GPUARITHM)
+        // medianBlur
+        if (smoothingRadius_ > 0)
+        {
+            boxFilter_->apply(fgmask, buf_, stream);
+            const int minCount = (smoothingRadius_ * smoothingRadius_ + 1) / 2;
+            const double thresh = 255.0 * minCount / (smoothingRadius_ * smoothingRadius_);
+            gpu::threshold(buf_, fgmask, thresh, 255.0, THRESH_BINARY, stream);
+        }
+#endif
+
+        // keep track of how many frames we have processed
+        ++frameNum_;
+    }
+
+    void GMGImpl::getBackgroundImage(OutputArray backgroundImage) const
+    {
+        (void) backgroundImage;
+        CV_Error(Error::StsNotImplemented, "Not implemented");
+    }
+
+    void GMGImpl::initialize(Size frameSize, float min, float max)
+    {
+        using namespace cv::gpu::cudev::gmg;
+
+        CV_Assert( maxFeatures_ > 0 );
+        CV_Assert( learningRate_ >= 0.0f && learningRate_ <= 1.0f);
+        CV_Assert( numInitializationFrames_ >= 1);
+        CV_Assert( quantizationLevels_ >= 1 && quantizationLevels_ <= 255);
+        CV_Assert( backgroundPrior_ >= 0.0f && backgroundPrior_ <= 1.0f);
+
+        minVal_ = min;
+        maxVal_ = max;
+        CV_Assert( minVal_ < maxVal_ );
+
+        frameSize_ = frameSize;
+
+        frameNum_ = 0;
+
+        nfeatures_.create(frameSize_, CV_32SC1);
+        colors_.create(maxFeatures_ * frameSize_.height, frameSize_.width, CV_32SC1);
+        weights_.create(maxFeatures_ * frameSize_.height, frameSize_.width, CV_32FC1);
+
+        nfeatures_.setTo(Scalar::all(0));
+
+#if defined(HAVE_OPENCV_GPUFILTERS) && defined(HAVE_OPENCV_GPUARITHM)
+        if (smoothingRadius_ > 0)
+            boxFilter_ = gpu::createBoxFilter(CV_8UC1, -1, Size(smoothingRadius_, smoothingRadius_));
+#endif
+
+        loadConstants(frameSize_.width, frameSize_.height, minVal_, maxVal_,
+                      quantizationLevels_, backgroundPrior_, decisionThreshold_, maxFeatures_, numInitializationFrames_);
+    }
 }
 
-void cv::gpu::GMG_GPU::release()
+Ptr<gpu::BackgroundSubtractorGMG> cv::gpu::createBackgroundSubtractorGMG(int initializationFrames, double decisionThreshold)
 {
-    frameSize_ = Size();
-
-    nfeatures_.release();
-    colors_.release();
-    weights_.release();
-    boxFilter_.release();
-    buf_.release();
+    return makePtr<GMGImpl>(initializationFrames, decisionThreshold);
 }
 
 #endif
diff --git a/modules/gpubgsegm/src/mog.cpp b/modules/gpubgsegm/src/mog.cpp
index 22b31074f..12e5b3230 100644
--- a/modules/gpubgsegm/src/mog.cpp
+++ b/modules/gpubgsegm/src/mog.cpp
@@ -42,19 +42,12 @@
 
 #include "precomp.hpp"
 
+using namespace cv;
+using namespace cv::gpu;
+
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-cv::gpu::MOG_GPU::MOG_GPU(int) { throw_no_cuda(); }
-void cv::gpu::MOG_GPU::initialize(cv::Size, int) { throw_no_cuda(); }
-void cv::gpu::MOG_GPU::operator()(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, float, Stream&) { throw_no_cuda(); }
-void cv::gpu::MOG_GPU::getBackgroundImage(GpuMat&, Stream&) const { throw_no_cuda(); }
-void cv::gpu::MOG_GPU::release() {}
-
-cv::gpu::MOG2_GPU::MOG2_GPU(int) { throw_no_cuda(); }
-void cv::gpu::MOG2_GPU::initialize(cv::Size, int) { throw_no_cuda(); }
-void cv::gpu::MOG2_GPU::operator()(const GpuMat&, GpuMat&, float, Stream&) { throw_no_cuda(); }
-void cv::gpu::MOG2_GPU::getBackgroundImage(GpuMat&, Stream&) const { throw_no_cuda(); }
-void cv::gpu::MOG2_GPU::release() {}
+Ptr<gpu::BackgroundSubtractorMOG> cv::gpu::createBackgroundSubtractorMOG(int, int, double, double)  { throw_no_cuda(); return Ptr<gpu::BackgroundSubtractorMOG>(); }
 
 #else
 
@@ -66,14 +59,10 @@ namespace cv { namespace gpu { namespace cudev
                      int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma,
                      cudaStream_t stream);
         void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
-
-        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal);
-        void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
-        void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
     }
 }}}
 
-namespace mog
+namespace
 {
     const int defaultNMixtures = 5;
     const int defaultHistory = 200;
@@ -81,199 +70,140 @@ namespace mog
     const float defaultVarThreshold = 2.5f * 2.5f;
     const float defaultNoiseSigma = 30.0f * 0.5f;
     const float defaultInitialWeight = 0.05f;
+
+    class MOGImpl : public gpu::BackgroundSubtractorMOG
+    {
+    public:
+        MOGImpl(int history, int nmixtures, double backgroundRatio, double noiseSigma);
+
+        void apply(InputArray image, OutputArray fgmask, double learningRate=-1);
+        void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream);
+
+        void getBackgroundImage(OutputArray backgroundImage) const;
+        void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const;
+
+        int getHistory() const { return history_; }
+        void setHistory(int nframes) { history_ = nframes; }
+
+        int getNMixtures() const { return nmixtures_; }
+        void setNMixtures(int nmix) { nmixtures_ = nmix; }
+
+        double getBackgroundRatio() const { return backgroundRatio_; }
+        void setBackgroundRatio(double backgroundRatio) { backgroundRatio_ = (float) backgroundRatio; }
+
+        double getNoiseSigma() const { return noiseSigma_; }
+        void setNoiseSigma(double noiseSigma) { noiseSigma_ = (float) noiseSigma; }
+
+    private:
+        //! re-initiaization method
+        void initialize(Size frameSize, int frameType);
+
+        int history_;
+        int nmixtures_;
+        float backgroundRatio_;
+        float noiseSigma_;
+
+        float varThreshold_;
+
+        Size frameSize_;
+        int frameType_;
+        int nframes_;
+
+        GpuMat weight_;
+        GpuMat sortKey_;
+        GpuMat mean_;
+        GpuMat var_;
+    };
+
+    MOGImpl::MOGImpl(int history, int nmixtures, double backgroundRatio, double noiseSigma) :
+        frameSize_(0, 0), frameType_(0), nframes_(0)
+    {
+        history_ = history > 0 ? history : defaultHistory;
+        nmixtures_ = std::min(nmixtures > 0 ? nmixtures : defaultNMixtures, 8);
+        backgroundRatio_ = backgroundRatio > 0 ? (float) backgroundRatio : defaultBackgroundRatio;
+        noiseSigma_ = noiseSigma > 0 ? (float) noiseSigma : defaultNoiseSigma;
+
+        varThreshold_ = defaultVarThreshold;
+    }
+
+    void MOGImpl::apply(InputArray image, OutputArray fgmask, double learningRate)
+    {
+        apply(image, fgmask, learningRate, Stream::Null());
+    }
+
+    void MOGImpl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::mog;
+
+        GpuMat frame = _frame.getGpuMat();
+
+        CV_Assert( frame.depth() == CV_8U );
+
+        int ch = frame.channels();
+        int work_ch = ch;
+
+        if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels())
+            initialize(frame.size(), frame.type());
+
+        _fgmask.create(frameSize_, CV_8UC1);
+        GpuMat fgmask = _fgmask.getGpuMat();
+
+        ++nframes_;
+        learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(nframes_, history_);
+        CV_Assert( learningRate >= 0 );
+
+        mog_gpu(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_,
+                varThreshold_, (float) learningRate, backgroundRatio_, noiseSigma_,
+                StreamAccessor::getStream(stream));
+    }
+
+    void MOGImpl::getBackgroundImage(OutputArray backgroundImage) const
+    {
+        getBackgroundImage(backgroundImage, Stream::Null());
+    }
+
+    void MOGImpl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const
+    {
+        using namespace cv::gpu::cudev::mog;
+
+        _backgroundImage.create(frameSize_, frameType_);
+        GpuMat backgroundImage = _backgroundImage.getGpuMat();
+
+        getBackgroundImage_gpu(backgroundImage.channels(), weight_, mean_, backgroundImage, nmixtures_, backgroundRatio_, StreamAccessor::getStream(stream));
+    }
+
+    void MOGImpl::initialize(Size frameSize, int frameType)
+    {
+        CV_Assert( frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4 );
+
+        frameSize_ = frameSize;
+        frameType_ = frameType;
+
+        int ch = CV_MAT_CN(frameType);
+        int work_ch = ch;
+
+        // for each gaussian mixture of each pixel bg model we store
+        // the mixture sort key (w/sum_of_variances), the mixture weight (w),
+        // the mean (nchannels values) and
+        // the diagonal covariance matrix (another nchannels values)
+
+        weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+        sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+        mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+        var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+
+        weight_.setTo(cv::Scalar::all(0));
+        sortKey_.setTo(cv::Scalar::all(0));
+        mean_.setTo(cv::Scalar::all(0));
+        var_.setTo(cv::Scalar::all(0));
+
+        nframes_ = 0;
+    }
 }
 
-cv::gpu::MOG_GPU::MOG_GPU(int nmixtures) :
-    frameSize_(0, 0), frameType_(0), nframes_(0)
+Ptr<gpu::BackgroundSubtractorMOG> cv::gpu::createBackgroundSubtractorMOG(int history, int nmixtures, double backgroundRatio, double noiseSigma)
 {
-    nmixtures_ = std::min(nmixtures > 0 ? nmixtures : mog::defaultNMixtures, 8);
-    history = mog::defaultHistory;
-    varThreshold = mog::defaultVarThreshold;
-    backgroundRatio = mog::defaultBackgroundRatio;
-    noiseSigma = mog::defaultNoiseSigma;
-}
-
-void cv::gpu::MOG_GPU::initialize(cv::Size frameSize, int frameType)
-{
-    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
-
-    frameSize_ = frameSize;
-    frameType_ = frameType;
-
-    int ch = CV_MAT_CN(frameType);
-    int work_ch = ch;
-
-    // for each gaussian mixture of each pixel bg model we store
-    // the mixture sort key (w/sum_of_variances), the mixture weight (w),
-    // the mean (nchannels values) and
-    // the diagonal covariance matrix (another nchannels values)
-
-    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-    sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
-    var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
-
-    weight_.setTo(cv::Scalar::all(0));
-    sortKey_.setTo(cv::Scalar::all(0));
-    mean_.setTo(cv::Scalar::all(0));
-    var_.setTo(cv::Scalar::all(0));
-
-    nframes_ = 0;
-}
-
-void cv::gpu::MOG_GPU::operator()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat& fgmask, float learningRate, Stream& stream)
-{
-    using namespace cv::gpu::cudev::mog;
-
-    CV_Assert(frame.depth() == CV_8U);
-
-    int ch = frame.channels();
-    int work_ch = ch;
-
-    if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels())
-        initialize(frame.size(), frame.type());
-
-    fgmask.create(frameSize_, CV_8UC1);
-
-    ++nframes_;
-    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(nframes_, history);
-    CV_Assert(learningRate >= 0.0f);
-
-    mog_gpu(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_,
-            varThreshold, learningRate, backgroundRatio, noiseSigma,
-            StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::MOG_GPU::getBackgroundImage(GpuMat& backgroundImage, Stream& stream) const
-{
-    using namespace cv::gpu::cudev::mog;
-
-    backgroundImage.create(frameSize_, frameType_);
-
-    getBackgroundImage_gpu(backgroundImage.channels(), weight_, mean_, backgroundImage, nmixtures_, backgroundRatio, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::MOG_GPU::release()
-{
-    frameSize_ = Size(0, 0);
-    frameType_ = 0;
-    nframes_ = 0;
-
-    weight_.release();
-    sortKey_.release();
-    mean_.release();
-    var_.release();
-}
-
-/////////////////////////////////////////////////////////////////
-// MOG2
-
-namespace mog2
-{
-    // default parameters of gaussian background detection algorithm
-    const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
-    const float defaultVarThreshold = 4.0f * 4.0f;
-    const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
-    const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
-    const float defaultVarThresholdGen = 3.0f * 3.0f;
-    const float defaultVarInit = 15.0f; // initial variance for new components
-    const float defaultVarMax = 5.0f * defaultVarInit;
-    const float defaultVarMin = 4.0f;
-
-    // additional parameters
-    const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
-    const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
-    const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
-}
-
-cv::gpu::MOG2_GPU::MOG2_GPU(int nmixtures) :
-    frameSize_(0, 0), frameType_(0), nframes_(0)
-{
-    nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures;
-
-    history = mog2::defaultHistory;
-    varThreshold = mog2::defaultVarThreshold;
-    bShadowDetection = true;
-
-    backgroundRatio = mog2::defaultBackgroundRatio;
-    fVarInit = mog2::defaultVarInit;
-    fVarMax  = mog2::defaultVarMax;
-    fVarMin = mog2::defaultVarMin;
-
-    varThresholdGen = mog2::defaultVarThresholdGen;
-    fCT = mog2::defaultfCT;
-    nShadowDetection =  mog2::defaultnShadowDetection;
-    fTau = mog2::defaultfTau;
-}
-
-void cv::gpu::MOG2_GPU::initialize(cv::Size frameSize, int frameType)
-{
-    using namespace cv::gpu::cudev::mog;
-
-    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
-
-    frameSize_ = frameSize;
-    frameType_ = frameType;
-    nframes_ = 0;
-
-    int ch = CV_MAT_CN(frameType);
-    int work_ch = ch;
-
-    // for each gaussian mixture of each pixel bg model we store ...
-    // the mixture weight (w),
-    // the mean (nchannels values) and
-    // the covariance
-    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-    variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
-
-    //make the array for keeping track of the used modes per pixel - all zeros at start
-    bgmodelUsedModes_.create(frameSize_, CV_8UC1);
-    bgmodelUsedModes_.setTo(cv::Scalar::all(0));
-
-    loadConstants(nmixtures_, varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);
-}
-
-void cv::gpu::MOG2_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate, Stream& stream)
-{
-    using namespace cv::gpu::cudev::mog;
-
-    int ch = frame.channels();
-    int work_ch = ch;
-
-    if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.channels())
-        initialize(frame.size(), frame.type());
-
-    fgmask.create(frameSize_, CV_8UC1);
-    fgmask.setTo(cv::Scalar::all(0));
-
-    ++nframes_;
-    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history);
-    CV_Assert(learningRate >= 0.0f);
-
-    mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::MOG2_GPU::getBackgroundImage(GpuMat& backgroundImage, Stream& stream) const
-{
-    using namespace cv::gpu::cudev::mog;
-
-    backgroundImage.create(frameSize_, frameType_);
-
-    getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::MOG2_GPU::release()
-{
-    frameSize_ = Size(0, 0);
-    frameType_ = 0;
-    nframes_ = 0;
-
-    weight_.release();
-    variance_.release();
-    mean_.release();
-
-    bgmodelUsedModes_.release();
+    return makePtr<MOGImpl>(history, nmixtures, backgroundRatio, noiseSigma);
 }
 
 #endif
diff --git a/modules/gpubgsegm/src/mog2.cpp b/modules/gpubgsegm/src/mog2.cpp
new file mode 100644
index 000000000..5bfb5a1c4
--- /dev/null
+++ b/modules/gpubgsegm/src/mog2.cpp
@@ -0,0 +1,253 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
+
+Ptr<gpu::BackgroundSubtractorMOG2> cv::gpu::createBackgroundSubtractorMOG2(int, double, bool) { throw_no_cuda(); return Ptr<gpu::BackgroundSubtractorMOG2>(); }
+
+#else
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace mog2
+    {
+        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal);
+        void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
+        void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
+    }
+}}}
+
+namespace
+{
+    // default parameters of gaussian background detection algorithm
+    const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
+    const float defaultVarThreshold = 4.0f * 4.0f;
+    const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
+    const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
+    const float defaultVarThresholdGen = 3.0f * 3.0f;
+    const float defaultVarInit = 15.0f; // initial variance for new components
+    const float defaultVarMax = 5.0f * defaultVarInit;
+    const float defaultVarMin = 4.0f;
+
+    // additional parameters
+    const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
+    const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+    const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation
+
+    class MOG2Impl : public gpu::BackgroundSubtractorMOG2
+    {
+    public:
+        MOG2Impl(int history, double varThreshold, bool detectShadows);
+
+        void apply(InputArray image, OutputArray fgmask, double learningRate=-1);
+        void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream);
+
+        void getBackgroundImage(OutputArray backgroundImage) const;
+        void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const;
+
+        int getHistory() const { return history_; }
+        void setHistory(int history) { history_ = history; }
+
+        int getNMixtures() const { return nmixtures_; }
+        void setNMixtures(int nmixtures) { nmixtures_ = nmixtures; }
+
+        double getBackgroundRatio() const { return backgroundRatio_; }
+        void setBackgroundRatio(double ratio) { backgroundRatio_ = (float) ratio; }
+
+        double getVarThreshold() const { return varThreshold_; }
+        void setVarThreshold(double varThreshold) { varThreshold_ = (float) varThreshold; }
+
+        double getVarThresholdGen() const { return varThresholdGen_; }
+        void setVarThresholdGen(double varThresholdGen) { varThresholdGen_ = (float) varThresholdGen; }
+
+        double getVarInit() const { return varInit_; }
+        void setVarInit(double varInit) { varInit_ = (float) varInit; }
+
+        double getVarMin() const { return varMin_; }
+        void setVarMin(double varMin) { varMin_ = (float) varMin; }
+
+        double getVarMax() const { return varMax_; }
+        void setVarMax(double varMax) { varMax_ = (float) varMax; }
+
+        double getComplexityReductionThreshold() const { return ct_; }
+        void setComplexityReductionThreshold(double ct) { ct_ = (float) ct; }
+
+        bool getDetectShadows() const { return detectShadows_; }
+        void setDetectShadows(bool detectShadows) { detectShadows_ = detectShadows; }
+
+        int getShadowValue() const { return shadowValue_; }
+        void setShadowValue(int value) { shadowValue_ = (uchar) value; }
+
+        double getShadowThreshold() const { return shadowThreshold_; }
+        void setShadowThreshold(double threshold) { shadowThreshold_ = (float) threshold; }
+
+    private:
+        void initialize(Size frameSize, int frameType);
+
+        int history_;
+        int nmixtures_;
+        float backgroundRatio_;
+        float varThreshold_;
+        float varThresholdGen_;
+        float varInit_;
+        float varMin_;
+        float varMax_;
+        float ct_;
+        bool detectShadows_;
+        uchar shadowValue_;
+        float shadowThreshold_;
+
+        Size frameSize_;
+        int frameType_;
+        int nframes_;
+
+        GpuMat weight_;
+        GpuMat variance_;
+        GpuMat mean_;
+
+        //keep track of number of modes per pixel
+        GpuMat bgmodelUsedModes_;
+    };
+
+    MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) :
+        frameSize_(0, 0), frameType_(0), nframes_(0)
+    {
+        history_ = history > 0 ? history : defaultHistory;
+        varThreshold_ = varThreshold > 0 ? (float) varThreshold : defaultVarThreshold;
+        detectShadows_ = detectShadows;
+
+        nmixtures_ = defaultNMixtures;
+        backgroundRatio_ = defaultBackgroundRatio;
+        varInit_ = defaultVarInit;
+        varMax_ = defaultVarMax;
+        varMin_ = defaultVarMin;
+        varThresholdGen_ = defaultVarThresholdGen;
+        ct_ = defaultCT;
+        shadowValue_ =  defaultShadowValue;
+        shadowThreshold_ = defaultShadowThreshold;
+    }
+
+    void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate)
+    {
+        apply(image, fgmask, learningRate, Stream::Null());
+    }
+
+    void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::mog2;
+
+        GpuMat frame = _frame.getGpuMat();
+
+        int ch = frame.channels();
+        int work_ch = ch;
+
+        if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels())
+            initialize(frame.size(), frame.type());
+
+        _fgmask.create(frameSize_, CV_8UC1);
+        GpuMat fgmask = _fgmask.getGpuMat();
+
+        fgmask.setTo(Scalar::all(0), stream);
+
+        ++nframes_;
+        learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_);
+        CV_Assert( learningRate >= 0 );
+
+        mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_,
+                 (float) learningRate, static_cast<float>(-learningRate * ct_), detectShadows_, StreamAccessor::getStream(stream));
+    }
+
+    void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const
+    {
+        getBackgroundImage(backgroundImage, Stream::Null());
+    }
+
+    void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const
+    {
+        using namespace cv::gpu::cudev::mog2;
+
+        _backgroundImage.create(frameSize_, frameType_);
+        GpuMat backgroundImage = _backgroundImage.getGpuMat();
+
+        getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream));
+    }
+
+    void MOG2Impl::initialize(cv::Size frameSize, int frameType)
+    {
+        using namespace cv::gpu::cudev::mog2;
+
+        CV_Assert( frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4 );
+
+        frameSize_ = frameSize;
+        frameType_ = frameType;
+        nframes_ = 0;
+
+        int ch = CV_MAT_CN(frameType);
+        int work_ch = ch;
+
+        // for each gaussian mixture of each pixel bg model we store ...
+        // the mixture weight (w),
+        // the mean (nchannels values) and
+        // the covariance
+        weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+        variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+        mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+
+        //make the array for keeping track of the used modes per pixel - all zeros at start
+        bgmodelUsedModes_.create(frameSize_, CV_8UC1);
+        bgmodelUsedModes_.setTo(Scalar::all(0));
+
+        loadConstants(nmixtures_, varThreshold_, backgroundRatio_, varThresholdGen_, varInit_, varMin_, varMax_, shadowThreshold_, shadowValue_);
+    }
+}
+
+Ptr<gpu::BackgroundSubtractorMOG2> cv::gpu::createBackgroundSubtractorMOG2(int history, double varThreshold, bool detectShadows)
+{
+    return makePtr<MOG2Impl>(history, varThreshold, detectShadows);
+}
+
+#endif
diff --git a/modules/gpubgsegm/src/precomp.cpp b/modules/gpubgsegm/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpubgsegm/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpubgsegm/src/precomp.hpp b/modules/gpubgsegm/src/precomp.hpp
index 5f120961b..c61cc08bb 100644
--- a/modules/gpubgsegm/src/precomp.hpp
+++ b/modules/gpubgsegm/src/precomp.hpp
@@ -46,10 +46,21 @@
 #include <limits>
 
 #include "opencv2/gpubgsegm.hpp"
-#include "opencv2/gpuarithm.hpp"
-#include "opencv2/gpufilters.hpp"
-#include "opencv2/gpuimgproc.hpp"
 
 #include "opencv2/core/private.gpu.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUARITHM
+#  include "opencv2/gpuarithm.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+#  include "opencv2/gpufilters.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUIMGPROC
+#  include "opencv2/gpuimgproc.hpp"
+#endif
+
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpubgsegm/test/test_bgsegm.cpp b/modules/gpubgsegm/test/test_bgsegm.cpp
index a5d187b04..3a287d1bb 100644
--- a/modules/gpubgsegm/test/test_bgsegm.cpp
+++ b/modules/gpubgsegm/test/test_bgsegm.cpp
@@ -41,7 +41,10 @@
 //M*/
 
 #include "test_precomp.hpp"
-#include "opencv2/legacy.hpp"
+
+#ifdef HAVE_OPENCV_LEGACY
+#  include "opencv2/legacy.hpp"
+#endif
 
 #ifdef HAVE_CUDA
 
@@ -50,6 +53,7 @@ using namespace cvtest;
 #if defined(HAVE_XINE)         || \
     defined(HAVE_GSTREAMER)    || \
     defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
     defined(HAVE_AVFOUNDATION) || \
     defined(HAVE_FFMPEG)       || \
     defined(WIN32) /* assume that we have ffmpeg */
@@ -62,21 +66,20 @@ using namespace cvtest;
 //////////////////////////////////////////////////////
 // FGDStatModel
 
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT && defined(HAVE_OPENCV_LEGACY)
 
 namespace cv
 {
-    template<> void Ptr<CvBGStatModel>::delete_obj()
+    template<> void DefaultDeleter<CvBGStatModel>::operator ()(CvBGStatModel* obj) const
     {
         cvReleaseBGStatModel(&obj);
     }
 }
 
-PARAM_TEST_CASE(FGDStatModel, cv::gpu::DeviceInfo, std::string, Channels)
+PARAM_TEST_CASE(FGDStatModel, cv::gpu::DeviceInfo, std::string)
 {
     cv::gpu::DeviceInfo devInfo;
     std::string inputFile;
-    int out_cn;
 
     virtual void SetUp()
     {
@@ -84,8 +87,6 @@ PARAM_TEST_CASE(FGDStatModel, cv::gpu::DeviceInfo, std::string, Channels)
         cv::gpu::setDevice(devInfo.deviceID());
 
         inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
-
-        out_cn = GET_PARAM(2);
     }
 };
 
@@ -102,15 +103,10 @@ GPU_TEST_P(FGDStatModel, Update)
     cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
 
     cv::gpu::GpuMat d_frame(frame);
-    cv::gpu::FGDStatModel d_model(out_cn);
-    d_model.create(d_frame);
-
-    cv::Mat h_background;
-    cv::Mat h_foreground;
-    cv::Mat h_background3;
-
-    cv::Mat backgroundDiff;
-    cv::Mat foregroundDiff;
+    cv::Ptr<cv::gpu::BackgroundSubtractorFGD> d_fgd = cv::gpu::createBackgroundSubtractorFGD();
+    cv::gpu::GpuMat d_foreground, d_background;
+    std::vector< std::vector<cv::Point> > foreground_regions;
+    d_fgd->apply(d_frame, d_foreground);
 
     for (int i = 0; i < 5; ++i)
     {
@@ -121,32 +117,23 @@ GPU_TEST_P(FGDStatModel, Update)
         int gold_count = cvUpdateBGStatModel(&ipl_frame, model);
 
         d_frame.upload(frame);
-
-        int count = d_model.update(d_frame);
-
-        ASSERT_EQ(gold_count, count);
+        d_fgd->apply(d_frame, d_foreground);
+        d_fgd->getBackgroundImage(d_background);
+        d_fgd->getForegroundRegions(foreground_regions);
+        int count = (int) foreground_regions.size();
 
         cv::Mat gold_background = cv::cvarrToMat(model->background);
         cv::Mat gold_foreground = cv::cvarrToMat(model->foreground);
 
-        if (out_cn == 3)
-            d_model.background.download(h_background3);
-        else
-        {
-            d_model.background.download(h_background);
-            cv::cvtColor(h_background, h_background3, cv::COLOR_BGRA2BGR);
-        }
-        d_model.foreground.download(h_foreground);
-
-        ASSERT_MAT_NEAR(gold_background, h_background3, 1.0);
-        ASSERT_MAT_NEAR(gold_foreground, h_foreground, 0.0);
+        ASSERT_MAT_NEAR(gold_background, d_background, 1.0);
+        ASSERT_MAT_NEAR(gold_foreground, d_foreground, 0.0);
+        ASSERT_EQ(gold_count, count);
     }
 }
 
 INSTANTIATE_TEST_CASE_P(GPU_BgSegm, FGDStatModel, testing::Combine(
     ALL_DEVICES,
-    testing::Values(std::string("768x576.avi")),
-    testing::Values(Channels(3), Channels(4))));
+    testing::Values(std::string("768x576.avi"))));
 
 #endif
 
@@ -193,7 +180,7 @@ GPU_TEST_P(MOG, Update)
     cap >> frame;
     ASSERT_FALSE(frame.empty());
 
-    cv::gpu::MOG_GPU mog;
+    cv::Ptr<cv::BackgroundSubtractorMOG> mog = cv::gpu::createBackgroundSubtractorMOG();
     cv::gpu::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
 
     cv::Ptr<cv::BackgroundSubtractorMOG> mog_gold = cv::createBackgroundSubtractorMOG();
@@ -211,7 +198,7 @@ GPU_TEST_P(MOG, Update)
             cv::swap(temp, frame);
         }
 
-        mog(loadMat(frame, useRoi), foreground, (float)learningRate);
+        mog->apply(loadMat(frame, useRoi), foreground, learningRate);
 
         mog_gold->apply(frame, foreground_gold, learningRate);
 
@@ -267,8 +254,8 @@ GPU_TEST_P(MOG2, Update)
     cap >> frame;
     ASSERT_FALSE(frame.empty());
 
-    cv::gpu::MOG2_GPU mog2;
-    mog2.bShadowDetection = detectShadow;
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = cv::gpu::createBackgroundSubtractorMOG2();
+    mog2->setDetectShadows(detectShadow);
     cv::gpu::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
 
     cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = cv::createBackgroundSubtractorMOG2();
@@ -287,7 +274,7 @@ GPU_TEST_P(MOG2, Update)
             cv::swap(temp, frame);
         }
 
-        mog2(loadMat(frame, useRoi), foreground);
+        mog2->apply(loadMat(frame, useRoi), foreground);
 
         mog2_gold->apply(frame, foreground_gold);
 
@@ -312,8 +299,8 @@ GPU_TEST_P(MOG2, getBackgroundImage)
 
     cv::Mat frame;
 
-    cv::gpu::MOG2_GPU mog2;
-    mog2.bShadowDetection = detectShadow;
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = cv::gpu::createBackgroundSubtractorMOG2();
+    mog2->setDetectShadows(detectShadow);
     cv::gpu::GpuMat foreground;
 
     cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = cv::createBackgroundSubtractorMOG2();
@@ -325,13 +312,13 @@ GPU_TEST_P(MOG2, getBackgroundImage)
         cap >> frame;
         ASSERT_FALSE(frame.empty());
 
-        mog2(loadMat(frame, useRoi), foreground);
+        mog2->apply(loadMat(frame, useRoi), foreground);
 
         mog2_gold->apply(frame, foreground_gold);
     }
 
     cv::gpu::GpuMat background = createMat(frame.size(), frame.type(), useRoi);
-    mog2.getBackgroundImage(background);
+    mog2->getBackgroundImage(background);
 
     cv::Mat background_gold;
     mog2_gold->getBackgroundImage(background_gold);
@@ -372,16 +359,15 @@ GPU_TEST_P(GMG, Accuracy)
     cv::Mat frame = randomMat(size, type, 0, 100);
     cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
 
-    cv::gpu::GMG_GPU gmg;
-    gmg.numInitializationFrames = 5;
-    gmg.smoothingRadius = 0;
-    gmg.initialize(d_frame.size(), 0, 255);
+    cv::Ptr<cv::BackgroundSubtractorGMG> gmg = cv::gpu::createBackgroundSubtractorGMG();
+    gmg->setNumFrames(5);
+    gmg->setSmoothingRadius(0);
 
     cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
 
-    for (int i = 0; i < gmg.numInitializationFrames; ++i)
+    for (int i = 0; i < gmg->getNumFrames(); ++i)
     {
-        gmg(d_frame, d_fgmask);
+        gmg->apply(d_frame, d_fgmask);
 
         // fgmask should be entirely background during training
         ASSERT_MAT_NEAR(zeros, d_fgmask, 0);
@@ -389,7 +375,7 @@ GPU_TEST_P(GMG, Accuracy)
 
     frame = randomMat(size, type, 160, 255);
     d_frame = loadMat(frame, useRoi);
-    gmg(d_frame, d_fgmask);
+    gmg->apply(d_frame, d_fgmask);
 
     // now fgmask should be entirely foreground
     ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
diff --git a/modules/gpubgsegm/test/test_precomp.hpp b/modules/gpubgsegm/test/test_precomp.hpp
index 0249f5be9..783e9e64c 100644
--- a/modules/gpubgsegm/test/test_precomp.hpp
+++ b/modules/gpubgsegm/test/test_precomp.hpp
@@ -59,4 +59,7 @@
 #include "opencv2/gpubgsegm.hpp"
 #include "opencv2/video.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+#include "cvconfig.h"
+
 #endif
diff --git a/modules/gpucodec/CMakeLists.txt b/modules/gpucodec/CMakeLists.txt
index b1e0daeaa..7c3be5c0e 100644
--- a/modules/gpucodec/CMakeLists.txt
+++ b/modules/gpucodec/CMakeLists.txt
@@ -11,7 +11,7 @@ ocv_add_module(gpucodec opencv_highgui)
 ocv_module_include_directories()
 ocv_glob_module_sources()
 
-set(extra_libs ${HIGHGUI_LIBRARIES})
+set(extra_libs "")
 
 if(HAVE_NVCUVID)
   list(APPEND extra_libs ${CUDA_CUDA_LIBRARY} ${CUDA_nvcuvid_LIBRARY})
diff --git a/modules/gpucodec/doc/videodec.rst b/modules/gpucodec/doc/videodec.rst
index e2da30559..dbf895121 100644
--- a/modules/gpucodec/doc/videodec.rst
+++ b/modules/gpucodec/doc/videodec.rst
@@ -11,6 +11,9 @@ Video reader interface.
 
 .. ocv:class:: gpucodec::VideoReader
 
+.. note::
+
+   * An example on how to use the videoReader class can be found at opencv_source_code/samples/gpu/video_reader.cpp
 
 
 gpucodec::VideoReader::nextFrame
diff --git a/modules/gpucodec/doc/videoenc.rst b/modules/gpucodec/doc/videoenc.rst
index 739ec0d70..d4a6814a0 100644
--- a/modules/gpucodec/doc/videoenc.rst
+++ b/modules/gpucodec/doc/videoenc.rst
@@ -15,6 +15,9 @@ The implementation uses H264 video codec.
 
 .. note:: Currently only Windows platform is supported.
 
+.. note::
+
+   * An example on how to use the videoWriter class can be found at opencv_source_code/samples/gpu/video_writer.cpp
 
 
 gpucodec::VideoWriter::write
diff --git a/modules/gpucodec/perf/perf_main.cpp b/modules/gpucodec/perf/perf_main.cpp
index 2f4110b87..783965357 100644
--- a/modules/gpucodec/perf/perf_main.cpp
+++ b/modules/gpucodec/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpucodec, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpucodec)
diff --git a/modules/gpucodec/perf/perf_precomp.cpp b/modules/gpucodec/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpucodec/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpucodec/perf/perf_video.cpp b/modules/gpucodec/perf/perf_video.cpp
index f389605d0..c4ab227c8 100644
--- a/modules/gpucodec/perf/perf_video.cpp
+++ b/modules/gpucodec/perf/perf_video.cpp
@@ -50,6 +50,7 @@ using namespace perf;
 #if defined(HAVE_XINE)         || \
     defined(HAVE_GSTREAMER)    || \
     defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
     defined(HAVE_AVFOUNDATION) || \
     defined(HAVE_FFMPEG)       || \
     defined(WIN32) /* assume that we have ffmpeg */
diff --git a/modules/gpucodec/src/ffmpeg_video_source.cpp b/modules/gpucodec/src/ffmpeg_video_source.cpp
index b5a73875b..86d1b5931 100644
--- a/modules/gpucodec/src/ffmpeg_video_source.cpp
+++ b/modules/gpucodec/src/ffmpeg_video_source.cpp
@@ -45,10 +45,6 @@
 
 #ifdef HAVE_NVCUVID
 
-#if defined(HAVE_FFMPEG) && defined(BUILD_SHARED_LIBS) && !defined(WIN32)
-    #include "../src/cap_ffmpeg_impl.hpp"
-#endif
-
 using namespace cv;
 using namespace cv::gpucodec;
 using namespace cv::gpucodec::detail;
diff --git a/modules/gpucodec/src/precomp.cpp b/modules/gpucodec/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpucodec/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpucodec/src/thread.cpp b/modules/gpucodec/src/thread.cpp
index b936d8e21..6784a1c8e 100644
--- a/modules/gpucodec/src/thread.cpp
+++ b/modules/gpucodec/src/thread.cpp
@@ -167,9 +167,4 @@ void cv::gpucodec::detail::Thread::sleep(int ms)
 #endif
 }
 
-template <> void cv::Ptr<cv::gpucodec::detail::Thread::Impl>::delete_obj()
-{
-    if (obj) delete obj;
-}
-
 #endif // HAVE_NVCUVID
diff --git a/modules/gpucodec/src/thread.hpp b/modules/gpucodec/src/thread.hpp
index ccda5b5c7..c69367f96 100644
--- a/modules/gpucodec/src/thread.hpp
+++ b/modules/gpucodec/src/thread.hpp
@@ -67,8 +67,4 @@ private:
 
 }}}
 
-namespace cv {
-    template <> void Ptr<cv::gpucodec::detail::Thread::Impl>::delete_obj();
-}
-
 #endif // __THREAD_WRAPPERS_HPP__
diff --git a/modules/gpucodec/src/video_reader.cpp b/modules/gpucodec/src/video_reader.cpp
index 67e9cd107..3e758087a 100644
--- a/modules/gpucodec/src/video_reader.cpp
+++ b/modules/gpucodec/src/video_reader.cpp
@@ -58,12 +58,14 @@ namespace cv { namespace gpu { namespace cudev
     void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream = 0);
 }}}
 
+using namespace cv::gpucodec::detail;
+
 namespace
 {
     class VideoReaderImpl : public VideoReader
     {
     public:
-        explicit VideoReaderImpl(const Ptr<detail::VideoSource>& source);
+        explicit VideoReaderImpl(const Ptr<VideoSource>& source);
         ~VideoReaderImpl();
 
         bool nextFrame(OutputArray frame);
@@ -71,11 +73,11 @@ namespace
         FormatInfo format() const;
 
     private:
-        Ptr<detail::VideoSource> videoSource_;
+        Ptr<VideoSource> videoSource_;
 
-        Ptr<detail::FrameQueue> frameQueue_;
-        Ptr<detail::VideoDecoder> videoDecoder_;
-        Ptr<detail::VideoParser> videoParser_;
+        Ptr<FrameQueue> frameQueue_;
+        Ptr<VideoDecoder> videoDecoder_;
+        Ptr<VideoParser> videoParser_;
 
         CUvideoctxlock lock_;
 
@@ -87,7 +89,7 @@ namespace
         return videoSource_->format();
     }
 
-    VideoReaderImpl::VideoReaderImpl(const Ptr<detail::VideoSource>& source) :
+    VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source) :
         videoSource_(source),
         lock_(0)
     {
@@ -99,9 +101,9 @@ namespace
         cuSafeCall( cuCtxGetCurrent(&ctx) );
         cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) );
 
-        frameQueue_ = new detail::FrameQueue;
-        videoDecoder_ = new detail::VideoDecoder(videoSource_->format(), lock_);
-        videoParser_ = new detail::VideoParser(videoDecoder_, frameQueue_);
+        frameQueue_.reset(new FrameQueue);
+        videoDecoder_.reset(new VideoDecoder(videoSource_->format(), lock_));
+        videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_));
 
         videoSource_->setVideoParser(videoParser_);
         videoSource_->start();
@@ -159,7 +161,7 @@ namespace
                     return false;
 
                 // Wait a bit
-                detail::Thread::sleep(1);
+                Thread::sleep(1);
             }
 
             bool isProgressive = displayInfo.progressive_frame != 0;
@@ -212,25 +214,25 @@ Ptr<VideoReader> cv::gpucodec::createVideoReader(const String& filename)
 {
     CV_Assert( !filename.empty() );
 
-    Ptr<detail::VideoSource> videoSource;
+    Ptr<VideoSource> videoSource;
 
     try
     {
-        videoSource = new detail::CuvidVideoSource(filename);
+        videoSource.reset(new CuvidVideoSource(filename));
     }
     catch (...)
     {
-        Ptr<RawVideoSource> source(new detail::FFmpegVideoSource(filename));
-        videoSource = new detail::RawVideoSourceWrapper(source);
+        Ptr<RawVideoSource> source(new FFmpegVideoSource(filename));
+        videoSource.reset(new RawVideoSourceWrapper(source));
     }
 
-    return new VideoReaderImpl(videoSource);
+    return makePtr<VideoReaderImpl>(videoSource);
 }
 
 Ptr<VideoReader> cv::gpucodec::createVideoReader(const Ptr<RawVideoSource>& source)
 {
-    Ptr<detail::VideoSource> videoSource(new detail::RawVideoSourceWrapper(source));
-    return new VideoReaderImpl(videoSource);
+    Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source));
+    return makePtr<VideoReaderImpl>(videoSource);
 }
 
 #endif // HAVE_NVCUVID
diff --git a/modules/gpucodec/src/video_source.cpp b/modules/gpucodec/src/video_source.cpp
index ce6a1bd8c..e4241cf2d 100644
--- a/modules/gpucodec/src/video_source.cpp
+++ b/modules/gpucodec/src/video_source.cpp
@@ -69,7 +69,7 @@ void cv::gpucodec::detail::RawVideoSourceWrapper::start()
 {
     stop_ = false;
     hasError_ = false;
-    thread_ = new Thread(readLoop, this);
+    thread_.reset(new Thread(readLoop, this));
 }
 
 void cv::gpucodec::detail::RawVideoSourceWrapper::stop()
diff --git a/modules/gpucodec/src/video_writer.cpp b/modules/gpucodec/src/video_writer.cpp
index 6ffb7c12d..a1484c2b2 100644
--- a/modules/gpucodec/src/video_writer.cpp
+++ b/modules/gpucodec/src/video_writer.cpp
@@ -908,12 +908,12 @@ Ptr<VideoWriter> cv::gpucodec::createVideoWriter(const String& fileName, Size fr
 
 Ptr<VideoWriter> cv::gpucodec::createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, SurfaceFormat format)
 {
-    return new VideoWriterImpl(encoderCallback, frameSize, fps, format);
+    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, fps, format);
 }
 
 Ptr<VideoWriter> cv::gpucodec::createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format)
 {
-    return new VideoWriterImpl(encoderCallback, frameSize, fps, params, format);
+    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, fps, params, format);
 }
 
 #endif // !defined HAVE_CUDA || !defined WIN32
diff --git a/modules/gpucodec/test/test_precomp.cpp b/modules/gpucodec/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpucodec/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpucodec/test/test_precomp.hpp b/modules/gpucodec/test/test_precomp.hpp
index 0dc79935d..95e70d46d 100644
--- a/modules/gpucodec/test/test_precomp.hpp
+++ b/modules/gpucodec/test/test_precomp.hpp
@@ -57,4 +57,6 @@
 #include "opencv2/gpucodec.hpp"
 #include "opencv2/highgui.hpp"
 
+#include "cvconfig.h"
+
 #endif
diff --git a/modules/gpufeatures2d/perf/perf_features2d.cpp b/modules/gpufeatures2d/perf/perf_features2d.cpp
index 9396ba290..fd2852633 100644
--- a/modules/gpufeatures2d/perf/perf_features2d.cpp
+++ b/modules/gpufeatures2d/perf/perf_features2d.cpp
@@ -123,7 +123,7 @@ PERF_TEST_P(Image_NFeatures, ORB,
 
         sortKeyPoints(gpu_keypoints, gpu_descriptors);
 
-        SANITY_CHECK_KEYPOINTS(gpu_keypoints);
+        SANITY_CHECK_KEYPOINTS(gpu_keypoints, 1e-4);
         SANITY_CHECK(gpu_descriptors);
     }
     else
diff --git a/modules/gpufeatures2d/perf/perf_main.cpp b/modules/gpufeatures2d/perf/perf_main.cpp
index 0fd79fde3..5e7fb1b8b 100644
--- a/modules/gpufeatures2d/perf/perf_main.cpp
+++ b/modules/gpufeatures2d/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpufeatures2d, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpufeatures2d)
diff --git a/modules/gpufeatures2d/perf/perf_precomp.cpp b/modules/gpufeatures2d/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpufeatures2d/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpufeatures2d/src/cuda/orb.cu b/modules/gpufeatures2d/src/cuda/orb.cu
index 1e8864801..571ca12bd 100644
--- a/modules/gpufeatures2d/src/cuda/orb.cu
+++ b/modules/gpufeatures2d/src/cuda/orb.cu
@@ -197,8 +197,8 @@ namespace cv { namespace gpu { namespace cudev
                 if (threadIdx.x == 0)
                 {
                     float kp_dir = ::atan2f((float)m_01, (float)m_10);
-                    kp_dir += (kp_dir < 0) * (2.0f * CV_PI);
-                    kp_dir *= 180.0f / CV_PI;
+                    kp_dir += (kp_dir < 0) * (2.0f * CV_PI_F);
+                    kp_dir *= 180.0f / CV_PI_F;
 
                     angle[ptidx] = kp_dir;
                 }
@@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace cudev
             if (ptidx < npoints && descidx < dsize)
             {
                 float angle = angle_[ptidx];
-                angle *= (float)(CV_PI / 180.f);
+                angle *= (float)(CV_PI_F / 180.f);
 
                 float sina, cosa;
                 ::sincosf(angle, &sina, &cosa);
diff --git a/modules/gpufeatures2d/src/precomp.cpp b/modules/gpufeatures2d/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpufeatures2d/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpufeatures2d/test/test_precomp.cpp b/modules/gpufeatures2d/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpufeatures2d/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpufeatures2d/test/test_precomp.hpp b/modules/gpufeatures2d/test/test_precomp.hpp
index 7725d3f3d..4ce8f7849 100644
--- a/modules/gpufeatures2d/test/test_precomp.hpp
+++ b/modules/gpufeatures2d/test/test_precomp.hpp
@@ -57,4 +57,6 @@
 #include "opencv2/gpufeatures2d.hpp"
 #include "opencv2/features2d.hpp"
 
+#include "cvconfig.h"
+
 #endif
diff --git a/modules/gpufilters/doc/filtering.rst b/modules/gpufilters/doc/filtering.rst
index 925b05f2c..cb824bf2a 100644
--- a/modules/gpufilters/doc/filtering.rst
+++ b/modules/gpufilters/doc/filtering.rst
@@ -5,7 +5,9 @@ Image Filtering
 
 Functions and classes described in this section are used to perform various linear or non-linear filtering operations on 2D images.
 
+.. note::
 
+   * An example containing all basic morphology operators like erode and dilate can be found at opencv_source_code/samples/gpu/morphology.cpp
 
 gpu::Filter
 -----------
diff --git a/modules/gpufilters/perf/perf_main.cpp b/modules/gpufilters/perf/perf_main.cpp
index b5a3eda40..44b3129e1 100644
--- a/modules/gpufilters/perf/perf_main.cpp
+++ b/modules/gpufilters/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpufilters, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpufilters)
diff --git a/modules/gpufilters/perf/perf_precomp.cpp b/modules/gpufilters/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpufilters/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp
index 7f02bdac5..0043174cb 100644
--- a/modules/gpufilters/src/filtering.cpp
+++ b/modules/gpufilters/src/filtering.cpp
@@ -169,7 +169,7 @@ Ptr<Filter> cv::gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point
 
     dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType));
 
-    return new NPPBoxFilter(srcType, dstType, ksize, anchor, borderMode, borderVal);
+    return makePtr<NPPBoxFilter>(srcType, dstType, ksize, anchor, borderMode, borderVal);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -230,22 +230,22 @@ namespace
         switch (srcType)
         {
         case CV_8UC1:
-            func_ = cudev::filter2D<uchar, uchar>;
+            func_ = cv::gpu::cudev::filter2D<uchar, uchar>;
             break;
         case CV_8UC4:
-            func_ = cudev::filter2D<uchar4, uchar4>;
+            func_ = cv::gpu::cudev::filter2D<uchar4, uchar4>;
             break;
         case CV_16UC1:
-            func_ = cudev::filter2D<ushort, ushort>;
+            func_ = cv::gpu::cudev::filter2D<ushort, ushort>;
             break;
         case CV_16UC4:
-            func_ = cudev::filter2D<ushort4, ushort4>;
+            func_ = cv::gpu::cudev::filter2D<ushort4, ushort4>;
             break;
         case CV_32FC1:
-            func_ = cudev::filter2D<float, float>;
+            func_ = cv::gpu::cudev::filter2D<float, float>;
             break;
         case CV_32FC4:
-            func_ = cudev::filter2D<float4, float4>;
+            func_ = cv::gpu::cudev::filter2D<float4, float4>;
             break;
         }
     }
@@ -277,7 +277,7 @@ Ptr<Filter> cv::gpu::createLinearFilter(int srcType, int dstType, InputArray ker
 
     dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType));
 
-    return new LinearFilter(srcType, dstType, kernel, anchor, borderMode, borderVal);
+    return makePtr<LinearFilter>(srcType, dstType, kernel, anchor, borderMode, borderVal);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -409,7 +409,7 @@ namespace
         ensureSizeIsEnough(src.size(), bufType_, buf_);
 
         DeviceInfo devInfo;
-        const int cc = devInfo.major() * 10 + devInfo.minor();
+        const int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
 
         cudaStream_t stream = StreamAccessor::getStream(_stream);
 
@@ -428,7 +428,7 @@ Ptr<Filter> cv::gpu::createSeparableLinearFilter(int srcType, int dstType, Input
     if (columnBorderMode < 0)
         columnBorderMode = rowBorderMode;
 
-    return new SeparableLinearFilter(srcType, dstType, rowKernel, columnKernel, anchor, rowBorderMode, columnBorderMode);
+    return makePtr<SeparableLinearFilter>(srcType, dstType, rowKernel, columnKernel, anchor, rowBorderMode, columnBorderMode);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -748,27 +748,27 @@ Ptr<Filter> cv::gpu::createMorphologyFilter(int op, int srcType, InputArray kern
     {
     case MORPH_ERODE:
     case MORPH_DILATE:
-        return new MorphologyFilter(op, srcType, kernel, anchor, iterations);
+        return makePtr<MorphologyFilter>(op, srcType, kernel, anchor, iterations);
         break;
 
     case MORPH_OPEN:
-        return new MorphologyOpenFilter(srcType, kernel, anchor, iterations);
+        return makePtr<MorphologyOpenFilter>(srcType, kernel, anchor, iterations);
         break;
 
     case MORPH_CLOSE:
-        return new MorphologyCloseFilter(srcType, kernel, anchor, iterations);
+        return makePtr<MorphologyCloseFilter>(srcType, kernel, anchor, iterations);
         break;
 
     case MORPH_GRADIENT:
-        return new MorphologyGradientFilter(srcType, kernel, anchor, iterations);
+        return makePtr<MorphologyGradientFilter>(srcType, kernel, anchor, iterations);
         break;
 
     case MORPH_TOPHAT:
-        return new MorphologyTophatFilter(srcType, kernel, anchor, iterations);
+        return makePtr<MorphologyTophatFilter>(srcType, kernel, anchor, iterations);
         break;
 
     case MORPH_BLACKHAT:
-        return new MorphologyBlackhatFilter(srcType, kernel, anchor, iterations);
+        return makePtr<MorphologyBlackhatFilter>(srcType, kernel, anchor, iterations);
         break;
 
     default:
@@ -782,7 +782,7 @@ Ptr<Filter> cv::gpu::createMorphologyFilter(int op, int srcType, InputArray kern
 
 namespace
 {
-    enum
+    enum RankType
     {
         RANK_MAX,
         RANK_MIN
@@ -862,12 +862,12 @@ namespace
 
 Ptr<Filter> cv::gpu::createBoxMaxFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
 {
-    return new NPPRankFilter(RANK_MAX, srcType, ksize, anchor, borderMode, borderVal);
+    return makePtr<NPPRankFilter>(RANK_MAX, srcType, ksize, anchor, borderMode, borderVal);
 }
 
 Ptr<Filter> cv::gpu::createBoxMinFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
 {
-    return new NPPRankFilter(RANK_MIN, srcType, ksize, anchor, borderMode, borderVal);
+    return makePtr<NPPRankFilter>(RANK_MIN, srcType, ksize, anchor, borderMode, borderVal);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -931,7 +931,7 @@ namespace
 
 Ptr<Filter> cv::gpu::createRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
 {
-    return new NppRowSumFilter(srcType, dstType, ksize, anchor, borderMode, borderVal);
+    return makePtr<NppRowSumFilter>(srcType, dstType, ksize, anchor, borderMode, borderVal);
 }
 
 namespace
@@ -992,7 +992,7 @@ namespace
 
 Ptr<Filter> cv::gpu::createColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
 {
-    return new NppColumnSumFilter(srcType, dstType, ksize, anchor, borderMode, borderVal);
+    return makePtr<NppColumnSumFilter>(srcType, dstType, ksize, anchor, borderMode, borderVal);
 }
 
 #endif
diff --git a/modules/gpufilters/src/precomp.cpp b/modules/gpufilters/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpufilters/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpufilters/test/test_precomp.cpp b/modules/gpufilters/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpufilters/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpufilters/test/test_precomp.hpp b/modules/gpufilters/test/test_precomp.hpp
index 95984929f..7dfe713d3 100644
--- a/modules/gpufilters/test/test_precomp.hpp
+++ b/modules/gpufilters/test/test_precomp.hpp
@@ -57,4 +57,6 @@
 #include "opencv2/gpufilters.hpp"
 #include "opencv2/imgproc.hpp"
 
+#include "cvconfig.h"
+
 #endif
diff --git a/modules/gpuimgproc/CMakeLists.txt b/modules/gpuimgproc/CMakeLists.txt
index 3b9bd0725..dfecf1d21 100644
--- a/modules/gpuimgproc/CMakeLists.txt
+++ b/modules/gpuimgproc/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Image Processing")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 /wd4515 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
-ocv_define_module(gpuimgproc opencv_imgproc opencv_gpufilters OPTIONAL opencv_gpuarithm)
+ocv_define_module(gpuimgproc opencv_imgproc OPTIONAL opencv_gpuarithm opencv_gpufilters)
diff --git a/modules/gpuimgproc/doc/color.rst b/modules/gpuimgproc/doc/color.rst
index 70de236ea..b1e8e03be 100644
--- a/modules/gpuimgproc/doc/color.rst
+++ b/modules/gpuimgproc/doc/color.rst
@@ -6,16 +6,16 @@ Color space processing
 
 
 gpu::cvtColor
------------------
+-------------
 Converts an image from one color space to another.
 
-.. ocv:function:: void gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::cvtColor(InputArray src, OutputArray dst, int code, int dcn = 0, Stream& stream = Stream::Null())
 
     :param src: Source image with  ``CV_8U`` , ``CV_16U`` , or  ``CV_32F`` depth and 1, 3, or 4 channels.
 
-    :param dst: Destination image with the same size and depth as  ``src`` .
+    :param dst: Destination image.
 
-    :param code: Color space conversion code. For details, see  :ocv:func:`cvtColor` . Conversion to/from Luv and Bayer color spaces is not supported.
+    :param code: Color space conversion code. For details, see  :ocv:func:`cvtColor` .
 
     :param dcn: Number of channels in the destination image. If the parameter is 0, the number of the channels is derived automatically from  ``src`` and the  ``code`` .
 
@@ -27,11 +27,45 @@ Converts an image from one color space to another.
 
 
 
+gpu::demosaicing
+----------------
+Converts an image from Bayer pattern to RGB or grayscale.
+
+.. ocv:function:: void gpu::demosaicing(InputArray src, OutputArray dst, int code, int dcn = -1, Stream& stream = Stream::Null())
+
+    :param src: Source image (8-bit or 16-bit single channel).
+
+    :param dst: Destination image.
+
+    :param code: Color space conversion code (see the description below).
+
+    :param dcn: Number of channels in the destination image. If the parameter is 0, the number of the channels is derived automatically from  ``src`` and the  ``code`` .
+
+    :param stream: Stream for the asynchronous version.
+
+The function can do the following transformations:
+
+* Demosaicing using bilinear interpolation
+
+    * ``COLOR_BayerBG2GRAY`` , ``COLOR_BayerGB2GRAY`` , ``COLOR_BayerRG2GRAY`` , ``COLOR_BayerGR2GRAY``
+
+    * ``COLOR_BayerBG2BGR`` , ``COLOR_BayerGB2BGR`` , ``COLOR_BayerRG2BGR`` , ``COLOR_BayerGR2BGR``
+
+* Demosaicing using Malvar-He-Cutler algorithm ([MHT2011]_)
+
+    * ``COLOR_BayerBG2GRAY_MHT`` , ``COLOR_BayerGB2GRAY_MHT`` , ``COLOR_BayerRG2GRAY_MHT`` , ``COLOR_BayerGR2GRAY_MHT``
+
+    * ``COLOR_BayerBG2BGR_MHT`` , ``COLOR_BayerGB2BGR_MHT`` , ``COLOR_BayerRG2BGR_MHT`` , ``COLOR_BayerGR2BGR_MHT``
+
+.. seealso:: :ocv:func:`cvtColor`
+
+
+
 gpu::swapChannels
 -----------------
 Exchanges the color channels of an image in-place.
 
-.. ocv:function:: void gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::swapChannels(InputOutputArray image, const int dstOrder[4], Stream& stream = Stream::Null())
 
     :param image: Source image. Supports only ``CV_8UC4`` type.
 
@@ -43,11 +77,27 @@ The methods support arbitrary permutations of the original channels, including r
 
 
 
+gpu::gammaCorrection
+--------------------
+Routines for correcting image color gamma.
+
+.. ocv:function:: void gpu::gammaCorrection(InputArray src, OutputArray dst, bool forward = true, Stream& stream = Stream::Null())
+
+    :param src: Source image (3- or 4-channel 8 bit).
+
+    :param dst: Destination image.
+
+    :param forward: ``true`` for forward gamma correction or ``false`` for inverse gamma correction.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
 gpu::alphaComp
--------------------
+--------------
 Composites two images using alpha opacity values contained in each image.
 
-.. ocv:function:: void gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::alphaComp(InputArray img1, InputArray img2, OutputArray dst, int alpha_op, Stream& stream = Stream::Null())
 
     :param img1: First image. Supports ``CV_8UC4`` , ``CV_16UC4`` , ``CV_32SC4`` and ``CV_32FC4`` types.
 
@@ -72,3 +122,10 @@ Composites two images using alpha opacity values contained in each image.
             * **ALPHA_PREMUL**
 
     :param stream: Stream for the asynchronous version.
+
+.. note::
+
+   * An example demonstrating the use of alphaComp can be found at opencv_source_code/samples/gpu/alpha_comp.cpp
+
+
+.. [MHT2011] Pascal Getreuer, Malvar-He-Cutler Linear Image Demosaicking, Image Processing On Line, 2011
diff --git a/modules/gpuimgproc/doc/feature_detection.rst b/modules/gpuimgproc/doc/feature_detection.rst
index c38b8c200..fc5e592cf 100644
--- a/modules/gpuimgproc/doc/feature_detection.rst
+++ b/modules/gpuimgproc/doc/feature_detection.rst
@@ -5,15 +5,41 @@ Feature Detection
 
 
 
-gpu::cornerHarris
----------------------
-Computes the Harris cornerness criteria at each image pixel.
+gpu::CornernessCriteria
+-----------------------
+.. ocv:class:: gpu::CornernessCriteria : public Algorithm
 
-.. ocv:function:: void gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101)
+Base class for Cornerness Criteria computation. ::
 
-    :param src: Source image. Only  ``CV_8UC1`` and  ``CV_32FC1`` images are supported for now.
+    class CV_EXPORTS CornernessCriteria : public Algorithm
+    {
+    public:
+        virtual void compute(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0;
+    };
 
-    :param dst: Destination image containing cornerness values. It has the same size as ``src`` and ``CV_32FC1`` type.
+
+
+gpu::CornernessCriteria::compute
+--------------------------------
+Computes the cornerness criteria at each image pixel.
+
+.. ocv:function:: void gpu::CornernessCriteria::compute(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source image.
+
+    :param dst: Destination image containing cornerness values. It will have the same size as ``src`` and ``CV_32FC1`` type.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::createHarrisCorner
+-----------------------
+Creates implementation for Harris cornerness criteria.
+
+.. ocv:function:: Ptr<CornernessCriteria> gpu::createHarrisCorner(int srcType, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101)
+
+    :param srcType: Input source type. Only  ``CV_8UC1`` and  ``CV_32FC1`` are supported for now.
 
     :param blockSize: Neighborhood size.
 
@@ -27,55 +53,70 @@ Computes the Harris cornerness criteria at each image pixel.
 
 
 
-gpu::cornerMinEigenVal
---------------------------
-Computes the minimum eigen value of a 2x2 derivative covariation matrix at each pixel (the cornerness criteria).
+gpu::createMinEigenValCorner
+----------------------------
+Creates implementation for the minimum eigen value of a 2x2 derivative covariation matrix (the cornerness criteria).
 
-.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101)
+.. ocv:function:: Ptr<CornernessCriteria> gpu::createMinEigenValCorner(int srcType, int blockSize, int ksize, int borderType = BORDER_REFLECT101)
 
-.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101)
-
-.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null())
-
-    :param src: Source image. Only  ``CV_8UC1`` and  ``CV_32FC1`` images are supported for now.
-
-    :param dst: Destination image containing cornerness values. The size is the same. The type is  ``CV_32FC1`` .
+    :param srcType: Input source type. Only  ``CV_8UC1`` and  ``CV_32FC1`` are supported for now.
 
     :param blockSize: Neighborhood size.
 
     :param ksize: Aperture parameter for the Sobel operator.
 
-    :param borderType: Pixel extrapolation method. Only ``BORDER_REFLECT101`` and ``BORDER_REPLICATE`` are supported for now.
+    :param borderType: Pixel extrapolation method. Only  ``BORDER_REFLECT101`` and  ``BORDER_REPLICATE`` are supported for now.
 
 .. seealso:: :ocv:func:`cornerMinEigenVal`
 
 
 
-gpu::GoodFeaturesToTrackDetector_GPU
-------------------------------------
-.. ocv:class:: gpu::GoodFeaturesToTrackDetector_GPU
+gpu::CornersDetector
+--------------------
+.. ocv:class:: gpu::CornersDetector : public Algorithm
 
-Class used for strong corners detection on an image. ::
+Base class for Corners Detector. ::
 
-    class GoodFeaturesToTrackDetector_GPU
+    class CV_EXPORTS CornersDetector : public Algorithm
     {
     public:
-        explicit GoodFeaturesToTrackDetector_GPU(int maxCorners_ = 1000, double qualityLevel_ = 0.01, double minDistance_ = 0.0,
-            int blockSize_ = 3, bool useHarrisDetector_ = false, double harrisK_ = 0.04);
-
-        void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat());
-
-        int maxCorners;
-        double qualityLevel;
-        double minDistance;
-
-        int blockSize;
-        bool useHarrisDetector;
-        double harrisK;
-
-        void releaseMemory();
+        virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray()) = 0;
     };
 
-The class finds the most prominent corners in the image.
+
+
+gpu::CornersDetector::detect
+----------------------------
+Determines strong corners on an image.
+
+.. ocv:function:: void gpu::CornersDetector::detect(InputArray image, OutputArray corners, InputArray mask = noArray())
+
+    :param image: Input 8-bit or floating-point 32-bit, single-channel image.
+
+    :param corners: Output vector of detected corners (1-row matrix with CV_32FC2 type with corners positions).
+
+    :param mask: Optional region of interest. If the image is not empty (it needs to have the type  ``CV_8UC1``  and the same size as  ``image`` ), it  specifies the region in which the corners are detected.
+
+
+
+gpu::createGoodFeaturesToTrackDetector
+--------------------------------------
+Creates implementation for :ocv:class:`gpu::CornersDetector` .
+
+.. ocv:function:: Ptr<CornersDetector> gpu::createGoodFeaturesToTrackDetector(int srcType, int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04)
+
+    :param srcType: Input source type. Only  ``CV_8UC1`` and  ``CV_32FC1`` are supported for now.
+
+    :param maxCorners: Maximum number of corners to return. If there are more corners than are found, the strongest of them is returned.
+
+    :param qualityLevel: Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue (see  :ocv:func:`cornerMinEigenVal` ) or the Harris function response (see  :ocv:func:`cornerHarris` ). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the  ``qualityLevel=0.01`` , then all the corners with the quality measure less than 15 are rejected.
+
+    :param minDistance: Minimum possible Euclidean distance between the returned corners.
+
+    :param blockSize: Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See  :ocv:func:`cornerEigenValsAndVecs` .
+
+    :param useHarrisDetector: Parameter indicating whether to use a Harris detector (see :ocv:func:`cornerHarris`) or :ocv:func:`cornerMinEigenVal`.
+
+    :param harrisK: Free parameter of the Harris detector.
 
 .. seealso:: :ocv:func:`goodFeaturesToTrack`
diff --git a/modules/gpuimgproc/doc/histogram.rst b/modules/gpuimgproc/doc/histogram.rst
index 7b29de6ba..dfdf32286 100644
--- a/modules/gpuimgproc/doc/histogram.rst
+++ b/modules/gpuimgproc/doc/histogram.rst
@@ -5,11 +5,89 @@ Histogram Calculation
 
 
 
+gpu::calcHist
+-------------
+Calculates histogram for one channel 8-bit image.
+
+.. ocv:function:: void gpu::calcHist(InputArray src, OutputArray hist, Stream& stream = Stream::Null())
+
+    :param src: Source image with ``CV_8UC1`` type.
+
+    :param hist: Destination histogram with one row, 256 columns, and the  ``CV_32SC1`` type.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::equalizeHist
+-----------------
+Equalizes the histogram of a grayscale image.
+
+.. ocv:function:: void gpu::equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::equalizeHist(InputArray src, OutputArray dst, InputOutputArray buf, Stream& stream = Stream::Null())
+
+    :param src: Source image with ``CV_8UC1`` type.
+
+    :param dst: Destination image.
+
+    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`equalizeHist`
+
+
+
+gpu::CLAHE
+----------
+.. ocv:class:: gpu::CLAHE : public cv::CLAHE
+
+Base class for Contrast Limited Adaptive Histogram Equalization. ::
+
+    class CV_EXPORTS CLAHE : public cv::CLAHE
+    {
+    public:
+        using cv::CLAHE::apply;
+        virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0;
+    };
+
+
+
+gpu::CLAHE::apply
+-----------------
+Equalizes the histogram of a grayscale image using Contrast Limited Adaptive Histogram Equalization.
+
+.. ocv:function:: void gpu::CLAHE::apply(InputArray src, OutputArray dst)
+
+.. ocv:function:: void gpu::CLAHE::apply(InputArray src, OutputArray dst, Stream& stream)
+
+    :param src: Source image with ``CV_8UC1`` type.
+
+    :param dst: Destination image.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::createCLAHE
+----------------
+Creates implementation for :ocv:class:`gpu::CLAHE` .
+
+.. ocv:function:: Ptr<gpu::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8))
+
+    :param clipLimit: Threshold for contrast limiting.
+
+    :param tileGridSize: Size of grid for histogram equalization. Input image will be divided into equally sized rectangular tiles. ``tileGridSize`` defines the number of tiles in row and column.
+
+
+
+
 gpu::evenLevels
--------------------
+---------------
 Computes levels with even distribution.
 
-.. ocv:function:: void gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel)
+.. ocv:function:: void gpu::evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel)
 
     :param levels: Destination array.  ``levels`` has 1 row, ``nLevels`` columns, and the ``CV_32SC1`` type.
 
@@ -22,16 +100,16 @@ Computes levels with even distribution.
 
 
 gpu::histEven
------------------
+-------------
 Calculates a histogram with evenly distributed bins.
 
-.. ocv:function:: void gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::histEven(InputArray src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::histEven( const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::histEven( const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null())
 
     :param src: Source image. ``CV_8U``, ``CV_16U``, or ``CV_16S`` depth and 1 or 4 channels are supported. For a four-channel image, all channels are processed separately.
 
@@ -50,12 +128,16 @@ Calculates a histogram with evenly distributed bins.
 
 
 gpu::histRange
-------------------
+--------------
 Calculates a histogram with bins determined by the ``levels`` array.
 
-.. ocv:function:: void gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::histRange(InputArray src, OutputArray hist, InputArray levels, InputOutputArray buf, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream = Stream::Null())
 
     :param src: Source image. ``CV_8U`` , ``CV_16U`` , or  ``CV_16S`` depth and 1 or 4 channels are supported. For a four-channel image, all channels are processed separately.
 
@@ -66,39 +148,3 @@ Calculates a histogram with bins determined by the ``levels`` array.
     :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
 
     :param stream: Stream for the asynchronous version.
-
-
-
-gpu::calcHist
-------------------
-Calculates histogram for one channel 8-bit image.
-
-.. ocv:function:: void gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null())
-
-    :param src: Source image.
-
-    :param hist: Destination histogram with one row, 256 columns, and the  ``CV_32SC1`` type.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::equalizeHist
-------------------
-Equalizes the histogram of a grayscale image.
-
-.. ocv:function:: void gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
-
-.. ocv:function:: void gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null())
-
-    :param src: Source image.
-
-    :param dst: Destination image.
-
-    :param hist: Destination histogram with one row, 256 columns, and the  ``CV_32SC1`` type.
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`equalizeHist`
diff --git a/modules/gpuimgproc/doc/hough.rst b/modules/gpuimgproc/doc/hough.rst
index 33afabbb6..ec7117198 100644
--- a/modules/gpuimgproc/doc/hough.rst
+++ b/modules/gpuimgproc/doc/hough.rst
@@ -5,18 +5,70 @@ Hough Transform
 
 
 
-gpu::HoughLines
----------------
+gpu::HoughLinesDetector
+-----------------------
+.. ocv:class:: gpu::HoughLinesDetector : public Algorithm
+
+Base class for lines detector algorithm. ::
+
+    class CV_EXPORTS HoughLinesDetector : public Algorithm
+    {
+    public:
+        virtual void detect(InputArray src, OutputArray lines) = 0;
+        virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) = 0;
+
+        virtual void setRho(float rho) = 0;
+        virtual float getRho() const = 0;
+
+        virtual void setTheta(float theta) = 0;
+        virtual float getTheta() const = 0;
+
+        virtual void setThreshold(int threshold) = 0;
+        virtual int getThreshold() const = 0;
+
+        virtual void setDoSort(bool doSort) = 0;
+        virtual bool getDoSort() const = 0;
+
+        virtual void setMaxLines(int maxLines) = 0;
+        virtual int getMaxLines() const = 0;
+    };
+
+
+
+gpu::HoughLinesDetector::detect
+-------------------------------
 Finds lines in a binary image using the classical Hough transform.
 
-.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096)
-
-.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096)
+.. ocv:function:: void gpu::HoughLinesDetector::detect(InputArray src, OutputArray lines)
 
     :param src: 8-bit, single-channel binary source image.
 
     :param lines: Output vector of lines. Each line is represented by a two-element vector  :math:`(\rho, \theta)` .  :math:`\rho`  is the distance from the coordinate origin  :math:`(0,0)`  (top-left corner of the image).  :math:`\theta`  is the line rotation angle in radians ( :math:`0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}` ).
 
+.. seealso:: :ocv:func:`HoughLines`
+
+
+
+gpu::HoughLinesDetector::downloadResults
+----------------------------------------
+Downloads results from :ocv:func:`gpu::HoughLinesDetector::detect` to host memory.
+
+.. ocv:function:: void gpu::HoughLinesDetector::downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray())
+
+    :param d_lines: Result of :ocv:func:`gpu::HoughLinesDetector::detect` .
+
+    :param h_lines: Output host array.
+
+    :param h_votes: Optional output array for line's votes.
+
+
+
+gpu::createHoughLinesDetector
+-----------------------------
+Creates implementation for :ocv:class:`gpu::HoughLinesDetector` .
+
+.. ocv:function:: Ptr<HoughLinesDetector> gpu::createHoughLinesDetector(float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096)
+
     :param rho: Distance resolution of the accumulator in pixels.
 
     :param theta: Angle resolution of the accumulator in radians.
@@ -27,47 +79,132 @@ Finds lines in a binary image using the classical Hough transform.
 
     :param maxLines: Maximum number of output lines.
 
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
 
-.. seealso:: :ocv:func:`HoughLines`
+
+gpu::HoughSegmentDetector
+-------------------------
+.. ocv:class:: gpu::HoughSegmentDetector : public Algorithm
+
+Base class for line segments detector algorithm. ::
+
+    class CV_EXPORTS HoughSegmentDetector : public Algorithm
+    {
+    public:
+        virtual void detect(InputArray src, OutputArray lines) = 0;
+
+        virtual void setRho(float rho) = 0;
+        virtual float getRho() const = 0;
+
+        virtual void setTheta(float theta) = 0;
+        virtual float getTheta() const = 0;
+
+        virtual void setMinLineLength(int minLineLength) = 0;
+        virtual int getMinLineLength() const = 0;
+
+        virtual void setMaxLineGap(int maxLineGap) = 0;
+        virtual int getMaxLineGap() const = 0;
+
+        virtual void setMaxLines(int maxLines) = 0;
+        virtual int getMaxLines() const = 0;
+    };
+
+.. note::
+
+   * An example using the Hough segment detector can be found at opencv_source_code/samples/gpu/houghlines.cpp
+
+
+gpu::HoughSegmentDetector::detect
+---------------------------------
+Finds line segments in a binary image using the probabilistic Hough transform.
+
+.. ocv:function:: void gpu::HoughSegmentDetector::detect(InputArray src, OutputArray lines)
+
+    :param src: 8-bit, single-channel binary source image.
+
+    :param lines: Output vector of lines. Each line is represented by a 4-element vector  :math:`(x_1, y_1, x_2, y_2)` , where  :math:`(x_1,y_1)`  and  :math:`(x_2, y_2)`  are the ending points of each detected line segment.
+
+.. seealso:: :ocv:func:`HoughLinesP`
 
 
 
-gpu::HoughLinesDownload
------------------------
-Downloads results from :ocv:func:`gpu::HoughLines` to host memory.
+gpu::createHoughSegmentDetector
+-------------------------------
+Creates implementation for :ocv:class:`gpu::HoughSegmentDetector` .
 
-.. ocv:function:: void gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray())
+.. ocv:function:: Ptr<HoughSegmentDetector> gpu::createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096)
 
-    :param d_lines: Result of :ocv:func:`gpu::HoughLines` .
+    :param rho: Distance resolution of the accumulator in pixels.
 
-    :param h_lines: Output host array.
+    :param theta: Angle resolution of the accumulator in radians.
 
-    :param h_votes: Optional output array for line's votes.
+    :param minLineLength: Minimum line length. Line segments shorter than that are rejected.
 
-.. seealso:: :ocv:func:`gpu::HoughLines`
+    :param maxLineGap: Maximum allowed gap between points on the same line to link them.
+
+    :param maxLines: Maximum number of output lines.
 
 
 
-gpu::HoughCircles
------------------
+gpu::HoughCirclesDetector
+-------------------------
+.. ocv:class:: gpu::HoughCirclesDetector : public Algorithm
+
+Base class for circles detector algorithm. ::
+
+    class CV_EXPORTS HoughCirclesDetector : public Algorithm
+    {
+    public:
+        virtual void detect(InputArray src, OutputArray circles) = 0;
+
+        virtual void setDp(float dp) = 0;
+        virtual float getDp() const = 0;
+
+        virtual void setMinDist(float minDist) = 0;
+        virtual float getMinDist() const = 0;
+
+        virtual void setCannyThreshold(int cannyThreshold) = 0;
+        virtual int getCannyThreshold() const = 0;
+
+        virtual void setVotesThreshold(int votesThreshold) = 0;
+        virtual int getVotesThreshold() const = 0;
+
+        virtual void setMinRadius(int minRadius) = 0;
+        virtual int getMinRadius() const = 0;
+
+        virtual void setMaxRadius(int maxRadius) = 0;
+        virtual int getMaxRadius() const = 0;
+
+        virtual void setMaxCircles(int maxCircles) = 0;
+        virtual int getMaxCircles() const = 0;
+    };
+
+
+
+gpu::HoughCirclesDetector::detect
+---------------------------------
 Finds circles in a grayscale image using the Hough transform.
 
-.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
-
-.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
+.. ocv:function:: void gpu::HoughCirclesDetector::detect(InputArray src, OutputArray circles)
 
     :param src: 8-bit, single-channel grayscale input image.
 
     :param circles: Output vector of found circles. Each vector is encoded as a 3-element floating-point vector  :math:`(x, y, radius)` .
 
-    :param method: Detection method to use. Currently, the only implemented method is  ``CV_HOUGH_GRADIENT`` , which is basically  *21HT* , described in  [Yuen90]_.
+.. seealso:: :ocv:func:`HoughCircles`
+
+
+
+gpu::createHoughCirclesDetector
+-------------------------------
+Creates implementation for :ocv:class:`gpu::HoughCirclesDetector` .
+
+.. ocv:function:: Ptr<HoughCirclesDetector> gpu::createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
 
     :param dp: Inverse ratio of the accumulator resolution to the image resolution. For example, if  ``dp=1`` , the accumulator has the same resolution as the input image. If  ``dp=2`` , the accumulator has half as big width and height.
 
     :param minDist: Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed.
 
-    :param cannyThreshold: The higher threshold of the two passed to  the :ocv:func:`gpu::Canny`  edge detector (the lower one is twice smaller).
+    :param cannyThreshold: The higher threshold of the two passed to Canny edge detector (the lower one is twice smaller).
 
     :param votesThreshold: The accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected.
 
@@ -77,20 +214,23 @@ Finds circles in a grayscale image using the Hough transform.
 
     :param maxCircles: Maximum number of output circles.
 
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
 
-.. seealso:: :ocv:func:`HoughCircles`
+
+gpu::createGeneralizedHoughBallard
+----------------------------------
+Creates implementation for generalized hough transform from [Ballard1981]_ .
+
+.. ocv:function:: Ptr<GeneralizedHoughBallard> gpu::createGeneralizedHoughBallard()
 
 
 
-gpu::HoughCirclesDownload
--------------------------
-Downloads results from :ocv:func:`gpu::HoughCircles` to host memory.
+gpu::createGeneralizedHoughGuil
+-------------------------------
+Creates implementation for generalized hough transform from [Guil1999]_ .
 
-.. ocv:function:: void gpu::HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles)
+.. ocv:function:: Ptr<GeneralizedHoughGuil> gpu::createGeneralizedHoughGuil()
 
-    :param d_circles: Result of :ocv:func:`gpu::HoughCircles` .
 
-    :param h_circles: Output host array.
 
-.. seealso:: :ocv:func:`gpu::HoughCircles`
+.. [Ballard1981] Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
+.. [Guil1999] Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
diff --git a/modules/gpuimgproc/doc/imgproc.rst b/modules/gpuimgproc/doc/imgproc.rst
index cd91afecf..776c339a5 100644
--- a/modules/gpuimgproc/doc/imgproc.rst
+++ b/modules/gpuimgproc/doc/imgproc.rst
@@ -5,11 +5,72 @@ Image Processing
 
 
 
+gpu::CannyEdgeDetector
+----------------------
+.. ocv:class:: gpu::CannyEdgeDetector : public Algorithm
+
+Base class for Canny Edge Detector. ::
+
+    class CV_EXPORTS CannyEdgeDetector : public Algorithm
+    {
+    public:
+        virtual void detect(InputArray image, OutputArray edges) = 0;
+        virtual void detect(InputArray dx, InputArray dy, OutputArray edges) = 0;
+
+        virtual void setLowThreshold(double low_thresh) = 0;
+        virtual double getLowThreshold() const = 0;
+
+        virtual void setHighThreshold(double high_thresh) = 0;
+        virtual double getHighThreshold() const = 0;
+
+        virtual void setAppertureSize(int apperture_size) = 0;
+        virtual int getAppertureSize() const = 0;
+
+        virtual void setL2Gradient(bool L2gradient) = 0;
+        virtual bool getL2Gradient() const = 0;
+    };
+
+
+
+gpu::CannyEdgeDetector::detect
+------------------------------
+Finds edges in an image using the [Canny86]_ algorithm.
+
+.. ocv:function:: void gpu::CannyEdgeDetector::detect(InputArray image, OutputArray edges)
+
+.. ocv:function:: void gpu::CannyEdgeDetector::detect(InputArray dx, InputArray dy, OutputArray edges)
+
+    :param image: Single-channel 8-bit input image.
+
+    :param dx: First derivative of image in the vertical direction. Support only ``CV_32S`` type.
+
+    :param dy: First derivative of image in the horizontal direction. Support only ``CV_32S`` type.
+
+    :param edges: Output edge map. It has the same size and type as  ``image`` .
+
+
+
+gpu::createCannyEdgeDetector
+----------------------------
+Creates implementation for :ocv:class:`gpu::CannyEdgeDetector` .
+
+.. ocv:function:: Ptr<CannyEdgeDetector> gpu::createCannyEdgeDetector(double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
+
+    :param low_thresh: First threshold for the hysteresis procedure.
+
+    :param high_thresh: Second threshold for the hysteresis procedure.
+
+    :param apperture_size: Aperture size for the  :ocv:func:`Sobel`  operator.
+
+    :param L2gradient: Flag indicating whether a more accurate  :math:`L_2`  norm  :math:`=\sqrt{(dI/dx)^2 + (dI/dy)^2}`  should be used to compute the image gradient magnitude ( ``L2gradient=true`` ), or a faster default  :math:`L_1`  norm  :math:`=|dI/dx|+|dI/dy|`  is enough ( ``L2gradient=false`` ).
+
+
+
 gpu::meanShiftFiltering
----------------------------
+-----------------------
 Performs mean-shift filtering for each point of the source image.
 
-.. ocv:function:: void gpu::meanShiftFiltering( const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria=TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::meanShiftFiltering(InputArray src, OutputArray dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null())
 
     :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
 
@@ -26,10 +87,10 @@ It maps each point of the source image into another point. As a result, you have
 
 
 gpu::meanShiftProc
-----------------------
+------------------
 Performs a mean-shift procedure and stores information about processed points (their colors and positions) in two images.
 
-.. ocv:function:: void gpu::meanShiftProc( const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria=TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::meanShiftProc(InputArray src, OutputArray dstr, OutputArray dstsp, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null())
 
     :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
 
@@ -48,14 +109,14 @@ Performs a mean-shift procedure and stores information about processed points (t
 
 
 gpu::meanShiftSegmentation
-------------------------------
+--------------------------
 Performs a mean-shift segmentation of the source image and eliminates small segments.
 
-.. ocv:function:: void gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
+.. ocv:function:: void gpu::meanShiftSegmentation(InputArray src, OutputArray dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
 
     :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
 
-    :param dst: Segmented image with the same size and type as  ``src`` .
+    :param dst: Segmented image with the same size and type as  ``src`` (host memory).
 
     :param sp: Spatial window radius.
 
@@ -67,46 +128,49 @@ Performs a mean-shift segmentation of the source image and eliminates small segm
 
 
 
-gpu::MatchTemplateBuf
+gpu::TemplateMatching
 ---------------------
-.. ocv:struct:: gpu::MatchTemplateBuf
+.. ocv:class:: gpu::TemplateMatching : public Algorithm
 
-Class providing memory buffers for :ocv:func:`gpu::matchTemplate` function, plus it allows to adjust some specific parameters. ::
+Base class for Template Matching. ::
 
-    struct CV_EXPORTS MatchTemplateBuf
+    class CV_EXPORTS TemplateMatching : public Algorithm
     {
-        Size user_block_size;
-        GpuMat imagef, templf;
-        std::vector<GpuMat> images;
-        std::vector<GpuMat> image_sums;
-        std::vector<GpuMat> image_sqsums;
+    public:
+        virtual void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()) = 0;
     };
 
-You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::matchTemplate` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
 
 
-
-gpu::matchTemplate
-----------------------
+gpu::TemplateMatching::match
+----------------------------
 Computes a proximity map for a raster template and an image where the template is searched for.
 
-.. ocv:function:: void gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null())
+.. ocv:function:: void gpu::TemplateMatching::match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null())
-
-    :param image: Source image.  ``CV_32F`` and  ``CV_8U`` depth images (1..4 channels) are supported for now.
+    :param image: Source image.
 
     :param templ: Template image with the size and type the same as  ``image`` .
 
     :param result: Map containing comparison results ( ``CV_32FC1`` ). If  ``image`` is  *W x H*  and ``templ`` is  *w x h*, then  ``result`` must be *W-w+1 x H-h+1*.
 
-    :param method: Specifies the way to compare the template with the image.
-
-    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::MatchTemplateBuf`.
-
     :param stream: Stream for the asynchronous version.
 
-    The following methods are supported for the ``CV_8U`` depth images for now:
+
+
+gpu::createTemplateMatching
+---------------------------
+Creates implementation for :ocv:class:`gpu::TemplateMatching` .
+
+.. ocv:function:: Ptr<TemplateMatching> gpu::createTemplateMatching(int srcType, int method, Size user_block_size = Size())
+
+    :param srcType: Input source type. ``CV_32F`` and  ``CV_8U`` depth images (1..4 channels) are supported for now.
+
+    :param method: Specifies the way to compare the template with the image.
+
+    :param user_block_size: You can use field `user_block_size` to set specific block size. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
+
+The following methods are supported for the ``CV_8U`` depth images for now:
 
     * ``CV_TM_SQDIFF``
     * ``CV_TM_SQDIFF_NORMED``
@@ -115,7 +179,7 @@ Computes a proximity map for a raster template and an image where the template i
     * ``CV_TM_CCOEFF``
     * ``CV_TM_CCOEFF_NORMED``
 
-    The following methods are supported for the ``CV_32F`` images for now:
+The following methods are supported for the ``CV_32F`` images for now:
 
     * ``CV_TM_SQDIFF``
     * ``CV_TM_CCORR``
@@ -124,45 +188,11 @@ Computes a proximity map for a raster template and an image where the template i
 
 
 
-gpu::Canny
--------------------
-Finds edges in an image using the [Canny86]_ algorithm.
-
-.. ocv:function:: void gpu::Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
-
-.. ocv:function:: void gpu::Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
-
-.. ocv:function:: void gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
-
-.. ocv:function:: void gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
-
-    :param image: Single-channel 8-bit input image.
-
-    :param dx: First derivative of image in the vertical direction. Support only ``CV_32S`` type.
-
-    :param dy: First derivative of image in the horizontal direction. Support only ``CV_32S`` type.
-
-    :param edges: Output edge map. It has the same size and type as  ``image`` .
-
-    :param low_thresh: First threshold for the hysteresis procedure.
-
-    :param high_thresh: Second threshold for the hysteresis procedure.
-
-    :param apperture_size: Aperture size for the  :ocv:func:`Sobel`  operator.
-
-    :param L2gradient: Flag indicating whether a more accurate  :math:`L_2`  norm  :math:`=\sqrt{(dI/dx)^2 + (dI/dy)^2}`  should be used to compute the image gradient magnitude ( ``L2gradient=true`` ), or a faster default  :math:`L_1`  norm  :math:`=|dI/dx|+|dI/dy|`  is enough ( ``L2gradient=false`` ).
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-.. seealso:: :ocv:func:`Canny`
-
-
-
 gpu::bilateralFilter
 --------------------
 Performs bilateral filtering of passed image
 
-.. ocv:function:: void gpu::bilateralFilter( const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode=BORDER_DEFAULT, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::bilateralFilter(InputArray src, OutputArray dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode=BORDER_DEFAULT, Stream& stream=Stream::Null())
 
     :param src: Source image. Supports only (channles != 2 && depth() != CV_8S && depth() != CV_32S && depth() != CV_64F).
 
@@ -178,9 +208,7 @@ Performs bilateral filtering of passed image
 
     :param stream: Stream for the asynchronous version.
 
-.. seealso::
-
-    :ocv:func:`bilateralFilter`
+.. seealso:: :ocv:func:`bilateralFilter`
 
 
 
@@ -188,7 +216,7 @@ gpu::blendLinear
 -------------------
 Performs linear blending of two images.
 
-.. ocv:function:: void gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, GpuMat& result, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::blendLinear(InputArray img1, InputArray img2, InputArray weights1, InputArray weights2, OutputArray result, Stream& stream = Stream::Null())
 
     :param img1: First image. Supports only ``CV_8U`` and ``CV_32F`` depth.
 
diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
index 3fe9f82f4..f0a0f1260 100644
--- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
+++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
@@ -48,16 +48,14 @@
 #endif
 
 #include "opencv2/core/gpu.hpp"
-#include "opencv2/core/base.hpp"
 #include "opencv2/imgproc.hpp"
-#include "opencv2/gpufilters.hpp"
 
 namespace cv { namespace gpu {
 
 /////////////////////////// Color Processing ///////////////////////////
 
 //! converts image from one color space to another
-CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
+CV_EXPORTS void cvtColor(InputArray src, OutputArray dst, int code, int dcn = 0, Stream& stream = Stream::Null());
 
 enum
 {
@@ -77,67 +75,39 @@ enum
     COLOR_BayerRG2GRAY_MHT = 262,
     COLOR_BayerGR2GRAY_MHT = 263
 };
-CV_EXPORTS void demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn = -1, Stream& stream = Stream::Null());
+CV_EXPORTS void demosaicing(InputArray src, OutputArray dst, int code, int dcn = -1, Stream& stream = Stream::Null());
 
 //! swap channels
 //! dstOrder - Integer array describing how channel values are permutated. The n-th entry
 //!            of the array contains the number of the channel that is stored in the n-th channel of
 //!            the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR
 //!            channel order.
-CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null());
+CV_EXPORTS void swapChannels(InputOutputArray image, const int dstOrder[4], Stream& stream = Stream::Null());
 
 //! Routines for correcting image color gamma
-CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null());
+CV_EXPORTS void gammaCorrection(InputArray src, OutputArray dst, bool forward = true, Stream& stream = Stream::Null());
 
 enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
        ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};
 
 //! Composite two images using alpha opacity values contained in each image
 //! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
-CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
+CV_EXPORTS void alphaComp(InputArray img1, InputArray img2, OutputArray dst, int alpha_op, Stream& stream = Stream::Null());
 
 ////////////////////////////// Histogram ///////////////////////////////
 
-//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
-CV_EXPORTS void evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel);
-
-//! Calculates histogram with evenly distributed bins for signle channel source.
-//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
-//! Output hist will have one row and histSize cols and CV_32SC1 type.
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
-
-//! Calculates histogram with evenly distributed bins for four-channel source.
-//! All channels of source are processed separately.
-//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
-//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
-
-//! Calculates histogram with bins determined by levels array.
-//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
-//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
-//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null());
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null());
-
-//! Calculates histogram with bins determined by levels array.
-//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
-//! All channels of source are processed separately.
-//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
-//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream = Stream::Null());
-
 //! Calculates histogram for 8u one channel image
 //! Output hist will have one row, 256 cols and CV32SC1 type.
-CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null());
-CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stream::Null());
 
 //! normalizes the grayscale image brightness and contrast by normalizing its histogram
-CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null());
-CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, InputOutputArray buf, Stream& stream = Stream::Null());
+
+static inline void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
+{
+    GpuMat buf;
+    gpu::equalizeHist(src, dst, buf, stream);
+}
 
 class CV_EXPORTS CLAHE : public cv::CLAHE
 {
@@ -145,194 +115,247 @@ public:
     using cv::CLAHE::apply;
     virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0;
 };
-CV_EXPORTS Ptr<cv::gpu::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+CV_EXPORTS Ptr<gpu::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+
+//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
+CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel);
+
+//! Calculates histogram with evenly distributed bins for signle channel source.
+//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
+//! Output hist will have one row and histSize cols and CV_32SC1 type.
+CV_EXPORTS void histEven(InputArray src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
+
+static inline void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
+{
+    GpuMat buf;
+    gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
+}
+
+//! Calculates histogram with evenly distributed bins for four-channel source.
+//! All channels of source are processed separately.
+//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
+//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
+CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
+
+static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null())
+{
+    GpuMat buf;
+    gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
+}
+
+//! Calculates histogram with bins determined by levels array.
+//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
+//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
+//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
+CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, InputOutputArray buf, Stream& stream = Stream::Null());
+
+static inline void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null())
+{
+    GpuMat buf;
+    gpu::histRange(src, hist, levels, buf, stream);
+}
+
+//! Calculates histogram with bins determined by levels array.
+//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
+//! All channels of source are processed separately.
+//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
+//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
+CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream = Stream::Null());
+
+static inline void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null())
+{
+    GpuMat buf;
+    gpu::histRange(src, hist, levels, buf, stream);
+}
 
 //////////////////////////////// Canny ////////////////////////////////
 
-struct CV_EXPORTS CannyBuf
+class CV_EXPORTS CannyEdgeDetector : public Algorithm
 {
-    void create(const Size& image_size, int apperture_size = 3);
-    void release();
+public:
+    virtual void detect(InputArray image, OutputArray edges) = 0;
+    virtual void detect(InputArray dx, InputArray dy, OutputArray edges) = 0;
 
-    GpuMat dx, dy;
-    GpuMat mag;
-    GpuMat map;
-    GpuMat st1, st2;
-    Ptr<Filter> filterDX, filterDY;
+    virtual void setLowThreshold(double low_thresh) = 0;
+    virtual double getLowThreshold() const = 0;
+
+    virtual void setHighThreshold(double high_thresh) = 0;
+    virtual double getHighThreshold() const = 0;
+
+    virtual void setAppertureSize(int apperture_size) = 0;
+    virtual int getAppertureSize() const = 0;
+
+    virtual void setL2Gradient(bool L2gradient) = 0;
+    virtual bool getL2Gradient() const = 0;
 };
 
-CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-CV_EXPORTS void Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
-CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
+CV_EXPORTS Ptr<CannyEdgeDetector> createCannyEdgeDetector(double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 
 /////////////////////////// Hough Transform ////////////////////////////
 
-struct HoughLinesBuf
-{
-    GpuMat accum;
-    GpuMat list;
-};
+//////////////////////////////////////
+// HoughLines
 
-CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
-CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
-CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
-
-//! finds line segments in the black-n-white image using probabalistic Hough transform
-CV_EXPORTS void HoughLinesP(const GpuMat& image, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096);
-
-struct HoughCirclesBuf
-{
-    GpuMat edges;
-    GpuMat accum;
-    GpuMat list;
-    CannyBuf cannyBuf;
-};
-
-CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
-CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
-CV_EXPORTS void HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles);
-
-//! finds arbitrary template in the grayscale image using Generalized Hough Transform
-//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
-//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
-class CV_EXPORTS GeneralizedHough_GPU : public cv::Algorithm
+class CV_EXPORTS HoughLinesDetector : public Algorithm
 {
 public:
-    static Ptr<GeneralizedHough_GPU> create(int method);
+    virtual void detect(InputArray src, OutputArray lines) = 0;
+    virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) = 0;
 
-    virtual ~GeneralizedHough_GPU();
+    virtual void setRho(float rho) = 0;
+    virtual float getRho() const = 0;
 
-    //! set template to search
-    void setTemplate(const GpuMat& templ, int cannyThreshold = 100, Point templCenter = Point(-1, -1));
-    void setTemplate(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter = Point(-1, -1));
+    virtual void setTheta(float theta) = 0;
+    virtual float getTheta() const = 0;
 
-    //! find template on image
-    void detect(const GpuMat& image, GpuMat& positions, int cannyThreshold = 100);
-    void detect(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions);
+    virtual void setThreshold(int threshold) = 0;
+    virtual int getThreshold() const = 0;
 
-    void download(const GpuMat& d_positions, OutputArray h_positions, OutputArray h_votes = noArray());
+    virtual void setDoSort(bool doSort) = 0;
+    virtual bool getDoSort() const = 0;
 
-    void release();
-
-protected:
-    virtual void setTemplateImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter) = 0;
-    virtual void detectImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions) = 0;
-    virtual void releaseImpl() = 0;
-
-private:
-    GpuMat edges_;
-    CannyBuf cannyBuf_;
+    virtual void setMaxLines(int maxLines) = 0;
+    virtual int getMaxLines() const = 0;
 };
 
+CV_EXPORTS Ptr<HoughLinesDetector> createHoughLinesDetector(float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
+
+
+//////////////////////////////////////
+// HoughLinesP
+
+//! finds line segments in the black-n-white image using probabalistic Hough transform
+class CV_EXPORTS HoughSegmentDetector : public Algorithm
+{
+public:
+    virtual void detect(InputArray src, OutputArray lines) = 0;
+
+    virtual void setRho(float rho) = 0;
+    virtual float getRho() const = 0;
+
+    virtual void setTheta(float theta) = 0;
+    virtual float getTheta() const = 0;
+
+    virtual void setMinLineLength(int minLineLength) = 0;
+    virtual int getMinLineLength() const = 0;
+
+    virtual void setMaxLineGap(int maxLineGap) = 0;
+    virtual int getMaxLineGap() const = 0;
+
+    virtual void setMaxLines(int maxLines) = 0;
+    virtual int getMaxLines() const = 0;
+};
+
+CV_EXPORTS Ptr<HoughSegmentDetector> createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096);
+
+//////////////////////////////////////
+// HoughCircles
+
+class CV_EXPORTS HoughCirclesDetector : public Algorithm
+{
+public:
+    virtual void detect(InputArray src, OutputArray circles) = 0;
+
+    virtual void setDp(float dp) = 0;
+    virtual float getDp() const = 0;
+
+    virtual void setMinDist(float minDist) = 0;
+    virtual float getMinDist() const = 0;
+
+    virtual void setCannyThreshold(int cannyThreshold) = 0;
+    virtual int getCannyThreshold() const = 0;
+
+    virtual void setVotesThreshold(int votesThreshold) = 0;
+    virtual int getVotesThreshold() const = 0;
+
+    virtual void setMinRadius(int minRadius) = 0;
+    virtual int getMinRadius() const = 0;
+
+    virtual void setMaxRadius(int maxRadius) = 0;
+    virtual int getMaxRadius() const = 0;
+
+    virtual void setMaxCircles(int maxCircles) = 0;
+    virtual int getMaxCircles() const = 0;
+};
+
+CV_EXPORTS Ptr<HoughCirclesDetector> createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+
+//////////////////////////////////////
+// GeneralizedHough
+
+//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
+//! Detects position only without traslation and rotation
+CV_EXPORTS Ptr<GeneralizedHoughBallard> createGeneralizedHoughBallard();
+
+//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
+//! Detects position, traslation and rotation
+CV_EXPORTS Ptr<GeneralizedHoughGuil> createGeneralizedHoughGuil();
+
 ////////////////////////// Corners Detection ///////////////////////////
 
-//! computes Harris cornerness criteria at each image pixel
-CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
-CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
-CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
-                             int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
-
-//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
-CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
-CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
-CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize,
-    int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null());
-
-////////////////////////// Feature Detection ///////////////////////////
-
-class CV_EXPORTS GoodFeaturesToTrackDetector_GPU
+class CV_EXPORTS CornernessCriteria : public Algorithm
 {
 public:
-    explicit GoodFeaturesToTrackDetector_GPU(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
-        int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
-
-    //! return 1 rows matrix with CV_32FC2 type
-    void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat());
-
-    int maxCorners;
-    double qualityLevel;
-    double minDistance;
-
-    int blockSize;
-    bool useHarrisDetector;
-    double harrisK;
-
-    void releaseMemory()
-    {
-        Dx_.release();
-        Dy_.release();
-        buf_.release();
-        eig_.release();
-        minMaxbuf_.release();
-        tmpCorners_.release();
-    }
-
-private:
-    GpuMat Dx_;
-    GpuMat Dy_;
-    GpuMat buf_;
-    GpuMat eig_;
-    GpuMat minMaxbuf_;
-    GpuMat tmpCorners_;
+    virtual void compute(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0;
 };
 
-inline GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxCorners_, double qualityLevel_, double minDistance_,
-        int blockSize_, bool useHarrisDetector_, double harrisK_)
+//! computes Harris cornerness criteria at each image pixel
+CV_EXPORTS Ptr<CornernessCriteria> createHarrisCorner(int srcType, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
+
+//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
+CV_EXPORTS Ptr<CornernessCriteria> createMinEigenValCorner(int srcType, int blockSize, int ksize, int borderType = BORDER_REFLECT101);
+
+////////////////////////// Corners Detection ///////////////////////////
+
+class CV_EXPORTS CornersDetector : public Algorithm
 {
-    maxCorners = maxCorners_;
-    qualityLevel = qualityLevel_;
-    minDistance = minDistance_;
-    blockSize = blockSize_;
-    useHarrisDetector = useHarrisDetector_;
-    harrisK = harrisK_;
-}
+public:
+    //! return 1 rows matrix with CV_32FC2 type
+    virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray()) = 0;
+};
+
+CV_EXPORTS Ptr<CornersDetector> createGoodFeaturesToTrackDetector(int srcType, int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+                                                                  int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
 
 ///////////////////////////// Mean Shift //////////////////////////////
 
 //! Does mean shift filtering on GPU.
-CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
+CV_EXPORTS void meanShiftFiltering(InputArray src, OutputArray dst, int sp, int sr,
                                    TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
                                    Stream& stream = Stream::Null());
 
 //! Does mean shift procedure on GPU.
-CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
+CV_EXPORTS void meanShiftProc(InputArray src, OutputArray dstr, OutputArray dstsp, int sp, int sr,
                               TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
                               Stream& stream = Stream::Null());
 
 //! Does mean shift segmentation with elimination of small regions.
-CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
+CV_EXPORTS void meanShiftSegmentation(InputArray src, OutputArray dst, int sp, int sr, int minsize,
                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 
 /////////////////////////// Match Template ////////////////////////////
 
-struct CV_EXPORTS MatchTemplateBuf
+//! computes the proximity map for the raster template and the image where the template is searched for
+class CV_EXPORTS TemplateMatching : public Algorithm
 {
-    Size user_block_size;
-    GpuMat imagef, templf;
-    std::vector<GpuMat> images;
-    std::vector<GpuMat> image_sums;
-    std::vector<GpuMat> image_sqsums;
+public:
+    virtual void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()) = 0;
 };
 
-//! computes the proximity map for the raster template and the image where the template is searched for
-CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null());
-
-//! computes the proximity map for the raster template and the image where the template is searched for
-CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null());
+CV_EXPORTS Ptr<TemplateMatching> createTemplateMatching(int srcType, int method, Size user_block_size = Size());
 
 ////////////////////////// Bilateral Filter ///////////////////////////
 
 //! Performa bilateral filtering of passsed image
-CV_EXPORTS void bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial,
+CV_EXPORTS void bilateralFilter(InputArray src, OutputArray dst, int kernel_size, float sigma_color, float sigma_spatial,
                                 int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null());
 
 ///////////////////////////// Blending ////////////////////////////////
 
 //! performs linear blending of two images
 //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
-CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
-                            GpuMat& result, Stream& stream = Stream::Null());
+CV_EXPORTS void blendLinear(InputArray img1, InputArray img2, InputArray weights1, InputArray weights2,
+                            OutputArray result, Stream& stream = Stream::Null());
 
 }} // namespace cv { namespace gpu {
 
diff --git a/modules/gpuimgproc/perf/perf_canny.cpp b/modules/gpuimgproc/perf/perf_canny.cpp
index ce6db2bb3..2bbf70a49 100644
--- a/modules/gpuimgproc/perf/perf_canny.cpp
+++ b/modules/gpuimgproc/perf/perf_canny.cpp
@@ -70,9 +70,10 @@ PERF_TEST_P(Image_AppertureSz_L2gradient, Canny,
     {
         const cv::gpu::GpuMat d_image(image);
         cv::gpu::GpuMat dst;
-        cv::gpu::CannyBuf d_buf;
 
-        TEST_CYCLE() cv::gpu::Canny(d_image, d_buf, dst, low_thresh, high_thresh, apperture_size, useL2gradient);
+        cv::Ptr<cv::gpu::CannyEdgeDetector> canny = cv::gpu::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
+
+        TEST_CYCLE() canny->detect(d_image, dst);
 
         GPU_SANITY_CHECK(dst);
     }
diff --git a/modules/gpuimgproc/perf/perf_corners.cpp b/modules/gpuimgproc/perf/perf_corners.cpp
index 28e8806e5..a0c1f8d30 100644
--- a/modules/gpuimgproc/perf/perf_corners.cpp
+++ b/modules/gpuimgproc/perf/perf_corners.cpp
@@ -75,11 +75,10 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerHarris,
     {
         const cv::gpu::GpuMat d_img(img);
         cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_Dx;
-        cv::gpu::GpuMat d_Dy;
-        cv::gpu::GpuMat d_buf;
 
-        TEST_CYCLE() cv::gpu::cornerHarris(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, k, borderMode);
+        cv::Ptr<cv::gpu::CornernessCriteria> harris = cv::gpu::createHarrisCorner(img.type(), blockSize, apertureSize, k, borderMode);
+
+        TEST_CYCLE() harris->compute(d_img, dst);
 
         GPU_SANITY_CHECK(dst, 1e-4);
     }
@@ -118,11 +117,10 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerMinEigenVal,
     {
         const cv::gpu::GpuMat d_img(img);
         cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_Dx;
-        cv::gpu::GpuMat d_Dy;
-        cv::gpu::GpuMat d_buf;
 
-        TEST_CYCLE() cv::gpu::cornerMinEigenVal(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, borderMode);
+        cv::Ptr<cv::gpu::CornernessCriteria> minEigenVal = cv::gpu::createMinEigenValCorner(img.type(), blockSize, apertureSize, borderMode);
+
+        TEST_CYCLE() minEigenVal->compute(d_img, dst);
 
         GPU_SANITY_CHECK(dst, 1e-4);
     }
diff --git a/modules/gpuimgproc/perf/perf_gftt.cpp b/modules/gpuimgproc/perf/perf_gftt.cpp
index 982182d17..ed8d6ac16 100644
--- a/modules/gpuimgproc/perf/perf_gftt.cpp
+++ b/modules/gpuimgproc/perf/perf_gftt.cpp
@@ -66,12 +66,12 @@ PERF_TEST_P(Image_MinDistance, GoodFeaturesToTrack,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance);
+        cv::Ptr<cv::gpu::CornersDetector> d_detector = cv::gpu::createGoodFeaturesToTrackDetector(image.type(), maxCorners, qualityLevel, minDistance);
 
         const cv::gpu::GpuMat d_image(image);
         cv::gpu::GpuMat pts;
 
-        TEST_CYCLE() d_detector(d_image, pts);
+        TEST_CYCLE() d_detector->detect(d_image, pts);
 
         GPU_SANITY_CHECK(pts);
     }
diff --git a/modules/gpuimgproc/perf/perf_histogram.cpp b/modules/gpuimgproc/perf/perf_histogram.cpp
index 51f7416f9..d8def54ff 100644
--- a/modules/gpuimgproc/perf/perf_histogram.cpp
+++ b/modules/gpuimgproc/perf/perf_histogram.cpp
@@ -167,10 +167,9 @@ PERF_TEST_P(Sz, EqualizeHist,
     {
         const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_hist;
         cv::gpu::GpuMat d_buf;
 
-        TEST_CYCLE() cv::gpu::equalizeHist(d_src, dst, d_hist, d_buf);
+        TEST_CYCLE() cv::gpu::equalizeHist(d_src, dst, d_buf);
 
         GPU_SANITY_CHECK(dst);
     }
diff --git a/modules/gpuimgproc/perf/perf_hough.cpp b/modules/gpuimgproc/perf/perf_hough.cpp
index a4aac0d02..cce8e7432 100644
--- a/modules/gpuimgproc/perf/perf_hough.cpp
+++ b/modules/gpuimgproc/perf/perf_hough.cpp
@@ -103,9 +103,10 @@ PERF_TEST_P(Sz, HoughLines,
     {
         const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_lines;
-        cv::gpu::HoughLinesBuf d_buf;
 
-        TEST_CYCLE() cv::gpu::HoughLines(d_src, d_lines, d_buf, rho, theta, threshold);
+        cv::Ptr<cv::gpu::HoughLinesDetector> hough = cv::gpu::createHoughLinesDetector(rho, theta, threshold);
+
+        TEST_CYCLE() hough->detect(d_src, d_lines);
 
         cv::Mat gpu_lines(d_lines.row(0));
         cv::Vec2f* begin = gpu_lines.ptr<cv::Vec2f>(0);
@@ -151,9 +152,10 @@ PERF_TEST_P(Image, HoughLinesP,
     {
         const cv::gpu::GpuMat d_mask(mask);
         cv::gpu::GpuMat d_lines;
-        cv::gpu::HoughLinesBuf d_buf;
 
-        TEST_CYCLE() cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
+        cv::Ptr<cv::gpu::HoughSegmentDetector> hough = cv::gpu::createHoughSegmentDetector(rho, theta, minLineLenght, maxLineGap);
+
+        TEST_CYCLE() hough->detect(d_mask, d_lines);
 
         cv::Mat gpu_lines(d_lines);
         cv::Vec4i* begin = gpu_lines.ptr<cv::Vec4i>();
@@ -201,9 +203,10 @@ PERF_TEST_P(Sz_Dp_MinDist, HoughCircles,
     {
         const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_circles;
-        cv::gpu::HoughCirclesBuf d_buf;
 
-        TEST_CYCLE() cv::gpu::HoughCircles(d_src, d_circles, d_buf, cv::HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
+        cv::Ptr<cv::gpu::HoughCirclesDetector> houghCircles = cv::gpu::createHoughCirclesDetector(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
+
+        TEST_CYCLE() houghCircles->detect(d_src, d_circles);
 
         cv::Mat gpu_circles(d_circles);
         cv::Vec3f* begin = gpu_circles.ptr<cv::Vec3f>(0);
@@ -224,23 +227,59 @@ PERF_TEST_P(Sz_Dp_MinDist, HoughCircles,
 //////////////////////////////////////////////////////////////////////
 // GeneralizedHough
 
-enum { GHT_POSITION = cv::GeneralizedHough::GHT_POSITION,
-       GHT_SCALE    = cv::GeneralizedHough::GHT_SCALE,
-       GHT_ROTATION = cv::GeneralizedHough::GHT_ROTATION
-     };
-
-CV_FLAGS(GHMethod, GHT_POSITION, GHT_SCALE, GHT_ROTATION);
-
-DEF_PARAM_TEST(Method_Sz, GHMethod, cv::Size);
-
-PERF_TEST_P(Method_Sz, GeneralizedHough,
-            Combine(Values(GHMethod(GHT_POSITION), GHMethod(GHT_POSITION | GHT_SCALE), GHMethod(GHT_POSITION | GHT_ROTATION), GHMethod(GHT_POSITION | GHT_SCALE | GHT_ROTATION)),
-                    GPU_TYPICAL_MAT_SIZES))
+PERF_TEST_P(Sz, GeneralizedHoughBallard, GPU_TYPICAL_MAT_SIZES)
 {
     declare.time(10);
 
-    const int method = GET_PARAM(0);
-    const cv::Size imageSize = GET_PARAM(1);
+    const cv::Size imageSize = GetParam();
+
+    const cv::Mat templ = readImage("cv/shared/templ.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(templ.empty());
+
+    cv::Mat image(imageSize, CV_8UC1, cv::Scalar::all(0));
+    templ.copyTo(image(cv::Rect(50, 50, templ.cols, templ.rows)));
+
+    cv::Mat edges;
+    cv::Canny(image, edges, 50, 100);
+
+    cv::Mat dx, dy;
+    cv::Sobel(image, dx, CV_32F, 1, 0);
+    cv::Sobel(image, dy, CV_32F, 0, 1);
+
+    if (PERF_RUN_GPU())
+    {
+        cv::Ptr<cv::GeneralizedHoughBallard> alg = cv::gpu::createGeneralizedHoughBallard();
+
+        const cv::gpu::GpuMat d_edges(edges);
+        const cv::gpu::GpuMat d_dx(dx);
+        const cv::gpu::GpuMat d_dy(dy);
+        cv::gpu::GpuMat positions;
+
+        alg->setTemplate(cv::gpu::GpuMat(templ));
+
+        TEST_CYCLE() alg->detect(d_edges, d_dx, d_dy, positions);
+
+        GPU_SANITY_CHECK(positions);
+    }
+    else
+    {
+        cv::Ptr<cv::GeneralizedHoughBallard> alg = cv::createGeneralizedHoughBallard();
+
+        cv::Mat positions;
+
+        alg->setTemplate(templ);
+
+        TEST_CYCLE() alg->detect(edges, dx, dy, positions);
+
+        CPU_SANITY_CHECK(positions);
+    }
+}
+
+PERF_TEST_P(Sz, GeneralizedHoughGuil, GPU_TYPICAL_MAT_SIZES)
+{
+    declare.time(10);
+
+    const cv::Size imageSize = GetParam();
 
     const cv::Mat templ = readImage("cv/shared/templ.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(templ.empty());
@@ -278,39 +317,32 @@ PERF_TEST_P(Method_Sz, GeneralizedHough,
 
     if (PERF_RUN_GPU())
     {
+        cv::Ptr<cv::GeneralizedHoughGuil> alg = cv::gpu::createGeneralizedHoughGuil();
+        alg->setMaxAngle(90.0);
+        alg->setAngleStep(2.0);
+
         const cv::gpu::GpuMat d_edges(edges);
         const cv::gpu::GpuMat d_dx(dx);
         const cv::gpu::GpuMat d_dy(dy);
-        cv::gpu::GpuMat posAndVotes;
+        cv::gpu::GpuMat positions;
 
-        cv::Ptr<cv::gpu::GeneralizedHough_GPU> d_hough = cv::gpu::GeneralizedHough_GPU::create(method);
-        if (method & GHT_ROTATION)
-        {
-            d_hough->set("maxAngle", 90.0);
-            d_hough->set("angleStep", 2.0);
-        }
+        alg->setTemplate(cv::gpu::GpuMat(templ));
 
-        d_hough->setTemplate(cv::gpu::GpuMat(templ));
+        TEST_CYCLE() alg->detect(d_edges, d_dx, d_dy, positions);
 
-        TEST_CYCLE() d_hough->detect(d_edges, d_dx, d_dy, posAndVotes);
-
-        const cv::gpu::GpuMat positions(1, posAndVotes.cols, CV_32FC4, posAndVotes.data);
         GPU_SANITY_CHECK(positions);
     }
     else
     {
+        cv::Ptr<cv::GeneralizedHoughGuil> alg = cv::createGeneralizedHoughGuil();
+        alg->setMaxAngle(90.0);
+        alg->setAngleStep(2.0);
+
         cv::Mat positions;
 
-        cv::Ptr<cv::GeneralizedHough> hough = cv::GeneralizedHough::create(method);
-        if (method & GHT_ROTATION)
-        {
-            hough->set("maxAngle", 90.0);
-            hough->set("angleStep", 2.0);
-        }
+        alg->setTemplate(templ);
 
-        hough->setTemplate(templ);
-
-        TEST_CYCLE() hough->detect(edges, dx, dy, positions);
+        TEST_CYCLE() alg->detect(edges, dx, dy, positions);
 
         CPU_SANITY_CHECK(positions);
     }
diff --git a/modules/gpuimgproc/perf/perf_main.cpp b/modules/gpuimgproc/perf/perf_main.cpp
index 6b3bec5f8..8eb27a27b 100644
--- a/modules/gpuimgproc/perf/perf_main.cpp
+++ b/modules/gpuimgproc/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpuimgproc, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpuimgproc)
diff --git a/modules/gpuimgproc/perf/perf_match_template.cpp b/modules/gpuimgproc/perf/perf_match_template.cpp
index f3af14914..35f36596c 100644
--- a/modules/gpuimgproc/perf/perf_match_template.cpp
+++ b/modules/gpuimgproc/perf/perf_match_template.cpp
@@ -76,7 +76,9 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate8U,
         const cv::gpu::GpuMat d_templ(templ);
         cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method);
+        cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+
+        TEST_CYCLE() alg->match(d_image, d_templ, dst);
 
         GPU_SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE);
     }
@@ -116,7 +118,9 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate32F,
         const cv::gpu::GpuMat d_templ(templ);
         cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method);
+        cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+
+        TEST_CYCLE() alg->match(d_image, d_templ, dst);
 
         GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
diff --git a/modules/gpuimgproc/perf/perf_precomp.cpp b/modules/gpuimgproc/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpuimgproc/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpuimgproc/src/bilateral_filter.cpp b/modules/gpuimgproc/src/bilateral_filter.cpp
index c95dbe4f5..b9d0b811e 100644
--- a/modules/gpuimgproc/src/bilateral_filter.cpp
+++ b/modules/gpuimgproc/src/bilateral_filter.cpp
@@ -47,7 +47,7 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::bilateralFilter(const GpuMat&, GpuMat&, int, float, float, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::bilateralFilter(InputArray, OutputArray, int, float, float, int, Stream&) { throw_no_cuda(); }
 
 #else
 
@@ -60,7 +60,7 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& s)
+void cv::gpu::bilateralFilter(InputArray _src, OutputArray _dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& stream)
 {
     using cv::gpu::cudev::imgproc::bilateral_filter_gpu;
 
@@ -79,18 +79,21 @@ void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, f
     sigma_color = (sigma_color <= 0 ) ? 1 : sigma_color;
     sigma_spatial = (sigma_spatial <= 0 ) ? 1 : sigma_spatial;
 
-
     int radius = (kernel_size <= 0) ? cvRound(sigma_spatial*1.5) : kernel_size/2;
     kernel_size = std::max(radius, 1)*2 + 1;
 
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
+    GpuMat src = _src.getGpuMat();
+
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
+    CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
+
     const func_t func = funcs[src.depth()][src.channels() - 1];
-    CV_Assert(func != 0);
+    CV_Assert( func != 0 );
 
-    CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
 
-    dst.create(src.size(), src.type());
-    func(src, dst, kernel_size, sigma_spatial, sigma_color, borderMode, StreamAccessor::getStream(s));
+    func(src, dst, kernel_size, sigma_spatial, sigma_color, borderMode, StreamAccessor::getStream(stream));
 }
 
 #endif
diff --git a/modules/gpuimgproc/src/blend.cpp b/modules/gpuimgproc/src/blend.cpp
index e92e37945..71c72a715 100644
--- a/modules/gpuimgproc/src/blend.cpp
+++ b/modules/gpuimgproc/src/blend.cpp
@@ -47,7 +47,7 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::blendLinear(InputArray, InputArray, InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
 #else
 
@@ -67,21 +67,28 @@ namespace cv { namespace gpu { namespace cudev
 
 using namespace ::cv::gpu::cudev::blend;
 
-void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
-                          GpuMat& result, Stream& stream)
+void cv::gpu::blendLinear(InputArray _img1, InputArray _img2, InputArray _weights1, InputArray _weights2,
+                          OutputArray _result, Stream& stream)
 {
-    CV_Assert(img1.size() == img2.size());
-    CV_Assert(img1.type() == img2.type());
-    CV_Assert(weights1.size() == img1.size());
-    CV_Assert(weights2.size() == img2.size());
-    CV_Assert(weights1.type() == CV_32F);
-    CV_Assert(weights2.type() == CV_32F);
+    GpuMat img1 = _img1.getGpuMat();
+    GpuMat img2 = _img2.getGpuMat();
+
+    GpuMat weights1 = _weights1.getGpuMat();
+    GpuMat weights2 = _weights2.getGpuMat();
+
+    CV_Assert( img1.size() == img2.size() );
+    CV_Assert( img1.type() == img2.type() );
+    CV_Assert( weights1.size() == img1.size() );
+    CV_Assert( weights2.size() == img2.size() );
+    CV_Assert( weights1.type() == CV_32FC1 );
+    CV_Assert( weights2.type() == CV_32FC1 );
 
     const Size size = img1.size();
     const int depth = img1.depth();
     const int cn = img1.channels();
 
-    result.create(size, CV_MAKE_TYPE(depth, cn));
+    _result.create(size, CV_MAKE_TYPE(depth, cn));
+    GpuMat result = _result.getGpuMat();
 
     switch (depth)
     {
diff --git a/modules/gpuimgproc/src/canny.cpp b/modules/gpuimgproc/src/canny.cpp
index 9a3357564..17f03950f 100644
--- a/modules/gpuimgproc/src/canny.cpp
+++ b/modules/gpuimgproc/src/canny.cpp
@@ -47,46 +47,10 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_no_cuda(); }
-void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_no_cuda(); }
-void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_no_cuda(); }
-void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, double, bool) { throw_no_cuda(); }
-void cv::gpu::CannyBuf::create(const Size&, int) { throw_no_cuda(); }
-void cv::gpu::CannyBuf::release() { throw_no_cuda(); }
+Ptr<CannyEdgeDetector> cv::gpu::createCannyEdgeDetector(double, double, int, bool) { throw_no_cuda(); return Ptr<CannyEdgeDetector>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size)
-{
-    if (apperture_size > 0)
-    {
-        ensureSizeIsEnough(image_size, CV_32SC1, dx);
-        ensureSizeIsEnough(image_size, CV_32SC1, dy);
-
-        if (apperture_size != 3)
-        {
-            filterDX = createDerivFilter(CV_8UC1, CV_32S, 1, 0, apperture_size, false, 1, BORDER_REPLICATE);
-            filterDY = createDerivFilter(CV_8UC1, CV_32S, 0, 1, apperture_size, false, 1, BORDER_REPLICATE);
-        }
-    }
-
-    ensureSizeIsEnough(image_size, CV_32FC1, mag);
-    ensureSizeIsEnough(image_size, CV_32SC1, map);
-
-    ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st1);
-    ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st2);
-}
-
-void cv::gpu::CannyBuf::release()
-{
-    dx.release();
-    dy.release();
-    mag.release();
-    map.release();
-    st1.release();
-    st2.release();
-}
-
 namespace canny
 {
     void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
@@ -103,84 +67,168 @@ namespace canny
 
 namespace
 {
-    void CannyCaller(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)
+    class CannyImpl : public CannyEdgeDetector
     {
-        using namespace canny;
+    public:
+        CannyImpl(double low_thresh, double high_thresh, int apperture_size, bool L2gradient) :
+            low_thresh_(low_thresh), high_thresh_(high_thresh), apperture_size_(apperture_size), L2gradient_(L2gradient)
+        {
+            old_apperture_size_ = -1;
+        }
 
-        buf.map.setTo(Scalar::all(0));
-        calcMap(dx, dy, buf.mag, buf.map, low_thresh, high_thresh);
+        void detect(InputArray image, OutputArray edges);
+        void detect(InputArray dx, InputArray dy, OutputArray edges);
 
-        edgesHysteresisLocal(buf.map, buf.st1.ptr<ushort2>());
+        void setLowThreshold(double low_thresh) { low_thresh_ = low_thresh; }
+        double getLowThreshold() const { return low_thresh_; }
 
-        edgesHysteresisGlobal(buf.map, buf.st1.ptr<ushort2>(), buf.st2.ptr<ushort2>());
+        void setHighThreshold(double high_thresh) { high_thresh_ = high_thresh; }
+        double getHighThreshold() const { return high_thresh_; }
 
-        getEdges(buf.map, dst);
+        void setAppertureSize(int apperture_size) { apperture_size_ = apperture_size; }
+        int getAppertureSize() const { return apperture_size_; }
+
+        void setL2Gradient(bool L2gradient) { L2gradient_ = L2gradient; }
+        bool getL2Gradient() const { return L2gradient_; }
+
+        void write(FileStorage& fs) const
+        {
+            fs << "name" << "Canny_GPU"
+            << "low_thresh" << low_thresh_
+            << "high_thresh" << high_thresh_
+            << "apperture_size" << apperture_size_
+            << "L2gradient" << L2gradient_;
+        }
+
+        void read(const FileNode& fn)
+        {
+            CV_Assert( String(fn["name"]) == "Canny_GPU" );
+            low_thresh_ = (double)fn["low_thresh"];
+            high_thresh_ = (double)fn["high_thresh"];
+            apperture_size_ = (int)fn["apperture_size"];
+            L2gradient_ = (int)fn["L2gradient"] != 0;
+        }
+
+    private:
+        void createBuf(Size image_size);
+        void CannyCaller(GpuMat& edges);
+
+        double low_thresh_;
+        double high_thresh_;
+        int apperture_size_;
+        bool L2gradient_;
+
+        GpuMat dx_, dy_;
+        GpuMat mag_;
+        GpuMat map_;
+        GpuMat st1_, st2_;
+#ifdef HAVE_OPENCV_GPUFILTERS
+        Ptr<Filter> filterDX_, filterDY_;
+#endif
+        int old_apperture_size_;
+    };
+
+    void CannyImpl::detect(InputArray _image, OutputArray _edges)
+    {
+        GpuMat image = _image.getGpuMat();
+
+        CV_Assert( image.type() == CV_8UC1 );
+        CV_Assert( deviceSupports(SHARED_ATOMICS) );
+
+        if (low_thresh_ > high_thresh_)
+            std::swap(low_thresh_, high_thresh_);
+
+        createBuf(image.size());
+
+        _edges.create(image.size(), CV_8UC1);
+        GpuMat edges = _edges.getGpuMat();
+
+        if (apperture_size_ == 3)
+        {
+            Size wholeSize;
+            Point ofs;
+            image.locateROI(wholeSize, ofs);
+            GpuMat srcWhole(wholeSize, image.type(), image.datastart, image.step);
+
+            canny::calcMagnitude(srcWhole, ofs.x, ofs.y, dx_, dy_, mag_, L2gradient_);
+        }
+        else
+        {
+#ifndef HAVE_OPENCV_GPUFILTERS
+            throw_no_cuda();
+#else
+            filterDX_->apply(image, dx_);
+            filterDY_->apply(image, dy_);
+
+            canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
+#endif
+        }
+
+        CannyCaller(edges);
+    }
+
+    void CannyImpl::detect(InputArray _dx, InputArray _dy, OutputArray _edges)
+    {
+        GpuMat dx = _dx.getGpuMat();
+        GpuMat dy = _dy.getGpuMat();
+
+        CV_Assert( dx.type() == CV_32SC1 );
+        CV_Assert( dy.type() == dx.type() && dy.size() == dx.size() );
+        CV_Assert( deviceSupports(SHARED_ATOMICS) );
+
+        dx.copyTo(dx_);
+        dy.copyTo(dy_);
+
+        if (low_thresh_ > high_thresh_)
+            std::swap(low_thresh_, high_thresh_);
+
+        createBuf(dx.size());
+
+        _edges.create(dx.size(), CV_8UC1);
+        GpuMat edges = _edges.getGpuMat();
+
+        canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
+
+        CannyCaller(edges);
+    }
+
+    void CannyImpl::createBuf(Size image_size)
+    {
+        ensureSizeIsEnough(image_size, CV_32SC1, dx_);
+        ensureSizeIsEnough(image_size, CV_32SC1, dy_);
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+        if (apperture_size_ != 3 && apperture_size_ != old_apperture_size_)
+        {
+            filterDX_ = gpu::createDerivFilter(CV_8UC1, CV_32S, 1, 0, apperture_size_, false, 1, BORDER_REPLICATE);
+            filterDY_ = gpu::createDerivFilter(CV_8UC1, CV_32S, 0, 1, apperture_size_, false, 1, BORDER_REPLICATE);
+            old_apperture_size_ = apperture_size_;
+        }
+#endif
+
+        ensureSizeIsEnough(image_size, CV_32FC1, mag_);
+        ensureSizeIsEnough(image_size, CV_32SC1, map_);
+
+        ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st1_);
+        ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st2_);
+    }
+
+    void CannyImpl::CannyCaller(GpuMat& edges)
+    {
+        map_.setTo(Scalar::all(0));
+        canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_));
+
+        canny::edgesHysteresisLocal(map_, st1_.ptr<ushort2>());
+
+        canny::edgesHysteresisGlobal(map_, st1_.ptr<ushort2>(), st2_.ptr<ushort2>());
+
+        canny::getEdges(map_, edges);
     }
 }
 
-void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
+Ptr<CannyEdgeDetector> cv::gpu::createCannyEdgeDetector(double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
 {
-    CannyBuf buf;
-    Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
-}
-
-void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
-{
-    using namespace canny;
-
-    CV_Assert(src.type() == CV_8UC1);
-
-    if (!deviceSupports(SHARED_ATOMICS))
-        CV_Error(cv::Error::StsNotImplemented, "The device doesn't support shared atomics");
-
-    if( low_thresh > high_thresh )
-        std::swap( low_thresh, high_thresh);
-
-    dst.create(src.size(), CV_8U);
-    buf.create(src.size(), apperture_size);
-
-    if (apperture_size == 3)
-    {
-        Size wholeSize;
-        Point ofs;
-        src.locateROI(wholeSize, ofs);
-        GpuMat srcWhole(wholeSize, src.type(), src.datastart, src.step);
-
-        calcMagnitude(srcWhole, ofs.x, ofs.y, buf.dx, buf.dy, buf.mag, L2gradient);
-    }
-    else
-    {
-        buf.filterDX->apply(src, buf.dx);
-        buf.filterDY->apply(src, buf.dy);
-
-        calcMagnitude(buf.dx, buf.dy, buf.mag, L2gradient);
-    }
-
-    CannyCaller(buf.dx, buf.dy, buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
-}
-
-void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
-{
-    CannyBuf buf;
-    Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
-}
-
-void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
-{
-    using namespace canny;
-
-    CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
-    CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
-
-    if( low_thresh > high_thresh )
-        std::swap( low_thresh, high_thresh);
-
-    dst.create(dx.size(), CV_8U);
-    buf.create(dx.size(), -1);
-
-    calcMagnitude(dx, dy, buf.mag, L2gradient);
-
-    CannyCaller(dx, dy, buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
+    return makePtr<CannyImpl>(low_thresh, high_thresh, apperture_size, L2gradient);
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/color.cpp b/modules/gpuimgproc/src/color.cpp
index c1af7ce75..3d714b628 100644
--- a/modules/gpuimgproc/src/color.cpp
+++ b/modules/gpuimgproc/src/color.cpp
@@ -47,15 +47,16 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::cvtColor(InputArray, OutputArray, int, int, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::demosaicing(const GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::demosaicing(InputArray, OutputArray, int, int, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::swapChannels(GpuMat&, const int[], Stream&) { throw_no_cuda(); }
+void cv::gpu::swapChannels(InputOutputArray, const int[], Stream&) { throw_no_cuda(); }
 
-void cv::gpu::gammaCorrection(const GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
+void cv::gpu::gammaCorrection(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::alphaComp(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::alphaComp(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -80,363 +81,459 @@ namespace
 {
     typedef void (*gpu_func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 
-    void bgr_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {bgr_to_rgb_8u, 0, bgr_to_rgb_16u, 0, 0, bgr_to_rgb_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 3));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 3));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {bgr_to_bgra_8u, 0, bgr_to_bgra_16u, 0, 0, bgr_to_bgra_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 4));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 4));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {bgr_to_rgba_8u, 0, bgr_to_rgba_16u, 0, 0, bgr_to_rgba_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 4));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 4));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgra_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgra_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {bgra_to_bgr_8u, 0, bgra_to_bgr_16u, 0, 0, bgra_to_bgr_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 3));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 3));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgra_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgra_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {bgra_to_rgb_8u, 0, bgra_to_rgb_16u, 0, 0, bgra_to_rgb_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 3));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 3));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgra_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgra_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {bgra_to_rgba_8u, 0, bgra_to_rgba_16u, 0, 0, bgra_to_rgba_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 4));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 4));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 3);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 3 );
 
-        cudev::bgr_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 3);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 3 );
 
-        cudev::bgr_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void rgb_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 3);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 3 );
 
-        cudev::rgb_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::rgb_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void rgb_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 3);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 3 );
 
-        cudev::rgb_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::rgb_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgra_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgra_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 4 );
 
-        cudev::bgra_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgra_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgra_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgra_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 4 );
 
-        cudev::bgra_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgra_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgba_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void rgba_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 4 );
 
-        cudev::rgba_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::rgba_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgba_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void rgba_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 4 );
 
-        cudev::rgba_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::rgba_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr555_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr555_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC3);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr555_to_rgb(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC3);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr555_to_rgb(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr565_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr565_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC3);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr565_to_rgb(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC3);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr565_to_rgb(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr555_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr555_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC3);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr555_to_bgr(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC3);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr555_to_bgr(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr565_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr565_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC3);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr565_to_bgr(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC3);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr565_to_bgr(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr555_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr555_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC4);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr555_to_rgba(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC4);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr555_to_rgba(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr565_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr565_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC4);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr565_to_rgba(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC4);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr565_to_rgba(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr555_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr555_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC4);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr555_to_bgra(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC4);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr555_to_bgra(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr565_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr565_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC4);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr565_to_bgra(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC4);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr565_to_bgra(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void gray_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void gray_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {gray_to_bgr_8u, 0, gray_to_bgr_16u, 0, 0, gray_to_bgr_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 1);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 3));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 1 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 3));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void gray_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void gray_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {gray_to_bgra_8u, 0, gray_to_bgra_16u, 0, 0, gray_to_bgra_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 1);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 4));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 1 );
+
+        _dst.create(src.size(), CV_MAKETYPE(src.depth(), 4));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void gray_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void gray_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 1);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 1 );
 
-        cudev::gray_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::gray_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void gray_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void gray_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 1);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC2);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 1 );
 
-        cudev::gray_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC2);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::gray_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr555_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr555_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC1);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr555_to_gray(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC1);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr555_to_gray(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr565_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr565_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        CV_Assert(src.depth() == CV_8U);
-        CV_Assert(src.channels() == 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_8UC1);
+        CV_Assert( src.depth() == CV_8U );
+        CV_Assert( src.channels() == 2 );
 
-        cudev::bgr565_to_gray(src, dst, StreamAccessor::getStream(stream));
+        _dst.create(src.size(), CV_8UC1);
+        GpuMat dst = _dst.getGpuMat();
+
+        cv::gpu::cudev::bgr565_to_gray(src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void rgb_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {rgb_to_gray_8u, 0, rgb_to_gray_16u, 0, 0, rgb_to_gray_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 1));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 1));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgr_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {bgr_to_gray_8u, 0, bgr_to_gray_16u, 0, 0, bgr_to_gray_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 1));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 1));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgba_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void rgba_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {rgba_to_gray_8u, 0, rgba_to_gray_16u, 0, 0, rgba_to_gray_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 1));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 1));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgra_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void bgra_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[] = {bgra_to_gray_8u, 0, bgra_to_gray_16u, 0, 0, bgra_to_gray_32f};
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 1));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 1));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_yuv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_yuv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -453,16 +550,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_yuv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_yuv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -479,16 +579,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void yuv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void yuv_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -505,16 +608,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void yuv_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void yuv_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -531,16 +637,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_YCrCb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_YCrCb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -557,16 +666,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_YCrCb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_YCrCb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -583,16 +695,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void YCrCb_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void YCrCb_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -609,16 +724,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void YCrCb_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void YCrCb_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -635,16 +753,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_xyz(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_xyz(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -661,16 +782,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_xyz(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_xyz(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -687,16 +811,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void xyz_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void xyz_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -713,16 +840,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void xyz_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void xyz_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -739,16 +869,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_hsv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_hsv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -765,16 +898,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_hsv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_hsv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -791,16 +927,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void hsv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void hsv_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -817,16 +956,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void hsv_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void hsv_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -843,16 +985,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_hls(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_hls(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -869,16 +1014,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_hls(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_hls(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -895,16 +1043,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void hls_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void hls_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -921,16 +1072,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void hls_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void hls_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -947,16 +1101,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_hsv_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_hsv_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -973,16 +1130,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_hsv_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_hsv_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -999,16 +1159,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void hsv_to_rgb_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void hsv_to_rgb_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -1025,16 +1188,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void hsv_to_bgr_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void hsv_to_bgr_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -1051,16 +1217,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_hls_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_hls_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -1077,16 +1246,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_hls_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_hls_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -1103,16 +1275,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void hls_to_rgb_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void hls_to_rgb_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -1129,16 +1304,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void hls_to_bgr_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void hls_to_bgr_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][6] =
@@ -1155,16 +1333,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_lab(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1181,16 +1362,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_lab(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1207,16 +1391,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void lbgr_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void lbgr_to_lab(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1233,16 +1420,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void lrgb_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void lrgb_to_lab(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1259,16 +1449,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void lab_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void lab_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1285,16 +1478,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void lab_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void lab_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1311,16 +1507,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void lab_to_lbgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void lab_to_lbgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1337,16 +1536,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void lab_to_lrgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void lab_to_lrgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1363,16 +1565,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_luv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1389,16 +1594,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgb_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_luv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1415,16 +1623,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void lbgr_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void lbgr_to_luv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1441,16 +1652,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void lrgb_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void lrgb_to_luv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1467,16 +1681,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void luv_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void luv_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1493,16 +1710,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void luv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void luv_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1519,16 +1739,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void luv_to_lbgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void luv_to_lbgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1545,16 +1768,19 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void luv_to_lrgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void luv_to_lrgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
         using namespace cv::gpu::cudev;
         static const gpu_func_t funcs[2][2][2] =
@@ -1571,28 +1797,34 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F);
-        CV_Assert(src.channels() == 3 || src.channels() == 4);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        CV_Assert( src.depth() == CV_8U || src.depth() == CV_32F );
+        CV_Assert( src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void rgba_to_mbgra(const GpuMat& src, GpuMat& dst, int, Stream& st)
+    void rgba_to_mbgra(InputArray _src, OutputArray _dst, int, Stream& _stream)
     {
     #if (CUDA_VERSION < 5000)
-        (void)src;
-        (void)dst;
-        (void)st;
+        (void) _src;
+        (void) _dst;
+        (void) _stream;
         CV_Error( Error::StsBadFlag, "Unknown/unsupported color conversion code" );
     #else
-        CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), src.type());
+        CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 );
 
-        cudaStream_t stream = StreamAccessor::getStream(st);
+        _dst.create(src.size(), src.type());
+        GpuMat dst = _dst.getGpuMat();
+
+        cudaStream_t stream = StreamAccessor::getStream(_stream);
         NppStreamHandler h(stream);
 
         NppiSize oSizeROI;
@@ -1609,7 +1841,7 @@ namespace
     #endif
     }
 
-    void bayer_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, bool blue_last, bool start_with_green, Stream& stream)
+    void bayer_to_bgr(InputArray _src, OutputArray _dst, int dcn, bool blue_last, bool start_with_green, Stream& stream)
     {
         typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
         static const func_t funcs[3][4] =
@@ -1621,32 +1853,35 @@ namespace
 
         if (dcn <= 0) dcn = 3;
 
-        CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1);
-        CV_Assert(src.rows > 2 && src.cols > 2);
-        CV_Assert(dcn == 3 || dcn == 4);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+        CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 );
+        CV_Assert( src.rows > 2 && src.cols > 2 );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()][dcn - 1](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream));
     }
-    void bayerBG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bayerBG_to_bgr(InputArray src, OutputArray dst, int dcn, Stream& stream)
     {
         bayer_to_bgr(src, dst, dcn, false, false, stream);
     }
-    void bayerGB_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bayerGB_to_bgr(InputArray src, OutputArray dst, int dcn, Stream& stream)
     {
         bayer_to_bgr(src, dst, dcn, false, true, stream);
     }
-    void bayerRG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bayerRG_to_bgr(InputArray src, OutputArray dst, int dcn, Stream& stream)
     {
         bayer_to_bgr(src, dst, dcn, true, false, stream);
     }
-    void bayerGR_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bayerGR_to_bgr(InputArray src, OutputArray dst, int dcn, Stream& stream)
     {
         bayer_to_bgr(src, dst, dcn, true, true, stream);
     }
 
-    void bayer_to_gray(const GpuMat& src, GpuMat& dst, bool blue_last, bool start_with_green, Stream& stream)
+    void bayer_to_gray(InputArray _src, OutputArray _dst, bool blue_last, bool start_with_green, Stream& stream)
     {
         typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
         static const func_t funcs[3] =
@@ -1656,26 +1891,29 @@ namespace
             Bayer2BGR_16u_gpu<1>,
         };
 
-        CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1);
-        CV_Assert(src.rows > 2 && src.cols > 2);
+        GpuMat src = _src.getGpuMat();
 
-        dst.create(src.size(), CV_MAKETYPE(src.depth(), 1));
+        CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 );
+        CV_Assert( src.rows > 2 && src.cols > 2 );
+
+        _dst.create(src.size(), CV_MAKE_TYPE(src.depth(), 1));
+        GpuMat dst = _dst.getGpuMat();
 
         funcs[src.depth()](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream));
     }
-    void bayerBG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
+    void bayerBG_to_gray(InputArray src, OutputArray dst, int /*dcn*/, Stream& stream)
     {
         bayer_to_gray(src, dst, false, false, stream);
     }
-    void bayerGB_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
+    void bayerGB_to_gray(InputArray src, OutputArray dst, int /*dcn*/, Stream& stream)
     {
         bayer_to_gray(src, dst, false, true, stream);
     }
-    void bayerRG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
+    void bayerRG_to_gray(InputArray src, OutputArray dst, int /*dcn*/, Stream& stream)
     {
         bayer_to_gray(src, dst, true, false, stream);
     }
-    void bayerGR_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
+    void bayerGR_to_gray(InputArray src, OutputArray dst, int /*dcn*/, Stream& stream)
     {
         bayer_to_gray(src, dst, true, true, stream);
     }
@@ -1684,9 +1922,9 @@ namespace
 ////////////////////////////////////////////////////////////////////////
 // cvtColor
 
-void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
+void cv::gpu::cvtColor(InputArray src, OutputArray dst, int code, int dcn, Stream& stream)
 {
-    typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream);
+    typedef void (*func_t)(InputArray src, OutputArray dst, int dcn, Stream& stream);
     static const func_t funcs[] =
     {
         bgr_to_bgra,            // CV_BGR2BGRA    =0
@@ -1857,12 +2095,12 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
         0,                      // CV_COLORCVT_MAX  = 127
     };
 
-    CV_Assert(code < 128);
+    CV_Assert( code < 128 );
 
     func_t func = funcs[code];
 
     if (func == 0)
-        CV_Error( cv::Error::StsBadFlag, "Unknown/unsupported color conversion code" );
+        CV_Error(Error::StsBadFlag, "Unknown/unsupported color conversion code");
 
     func(src, dst, dcn, stream);
 }
@@ -1870,32 +2108,33 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
 ////////////////////////////////////////////////////////////////////////
 // demosaicing
 
-void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
+void cv::gpu::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Stream& stream)
 {
-    const int depth = src.depth();
-
-    CV_Assert( src.channels() == 1 );
-
     switch (code)
     {
     case cv::COLOR_BayerBG2GRAY: case cv::COLOR_BayerGB2GRAY: case cv::COLOR_BayerRG2GRAY: case cv::COLOR_BayerGR2GRAY:
-        bayer_to_gray(src, dst, code == cv::COLOR_BayerBG2GRAY || code == cv::COLOR_BayerGB2GRAY, code == cv::COLOR_BayerGB2GRAY || code == cv::COLOR_BayerGR2GRAY, stream);
+        bayer_to_gray(_src, _dst, code == cv::COLOR_BayerBG2GRAY || code == cv::COLOR_BayerGB2GRAY, code == cv::COLOR_BayerGB2GRAY || code == cv::COLOR_BayerGR2GRAY, stream);
         break;
 
     case cv::COLOR_BayerBG2BGR: case cv::COLOR_BayerGB2BGR: case cv::COLOR_BayerRG2BGR: case cv::COLOR_BayerGR2BGR:
-        bayer_to_bgr(src, dst, dcn, code == cv::COLOR_BayerBG2BGR || code == cv::COLOR_BayerGB2BGR, code == cv::COLOR_BayerGB2BGR || code == cv::COLOR_BayerGR2BGR, stream);
+        bayer_to_bgr(_src, _dst, dcn, code == cv::COLOR_BayerBG2BGR || code == cv::COLOR_BayerGB2BGR, code == cv::COLOR_BayerGB2BGR || code == cv::COLOR_BayerGR2BGR, stream);
         break;
 
     case COLOR_BayerBG2BGR_MHT: case COLOR_BayerGB2BGR_MHT: case COLOR_BayerRG2BGR_MHT: case COLOR_BayerGR2BGR_MHT:
     {
-        if (dcn <= 0)
-            dcn = 3;
+        if (dcn <= 0) dcn = 3;
+
+        GpuMat src = _src.getGpuMat();
+        const int depth = _src.depth();
 
         CV_Assert( depth == CV_8U );
+        CV_Assert( src.channels() == 1 );
         CV_Assert( dcn == 3 || dcn == 4 );
 
-        dst.create(src.size(), CV_MAKETYPE(depth, dcn));
-        dst.setTo(Scalar::all(0));
+        _dst.create(_src.size(), CV_MAKE_TYPE(depth, dcn));
+        GpuMat dst = _dst.getGpuMat();
+
+        dst.setTo(Scalar::all(0), stream);
 
         Size wholeSize;
         Point ofs;
@@ -1906,19 +2145,24 @@ void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Str
                                         code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);
 
         if (dcn == 3)
-            cudev::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
+            cv::gpu::cudev::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
         else
-            cudev::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
+            cv::gpu::cudev::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
 
         break;
     }
 
     case COLOR_BayerBG2GRAY_MHT: case COLOR_BayerGB2GRAY_MHT: case COLOR_BayerRG2GRAY_MHT: case COLOR_BayerGR2GRAY_MHT:
     {
+        GpuMat src = _src.getGpuMat();
+        const int depth = _src.depth();
+
         CV_Assert( depth == CV_8U );
 
-        dst.create(src.size(), CV_MAKETYPE(depth, 1));
-        dst.setTo(Scalar::all(0));
+        _dst.create(_src.size(), CV_MAKE_TYPE(depth, 1));
+        GpuMat dst = _dst.getGpuMat();
+
+        dst.setTo(Scalar::all(0), stream);
 
         Size wholeSize;
         Point ofs;
@@ -1928,25 +2172,26 @@ void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Str
         const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1,
                                         code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);
 
-        cudev::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
+        cv::gpu::cudev::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
 
         break;
     }
 
     default:
-        CV_Error( cv::Error::StsBadFlag, "Unknown / unsupported color conversion code" );
+        CV_Error(Error::StsBadFlag, "Unknown / unsupported color conversion code");
     }
 }
 
 ////////////////////////////////////////////////////////////////////////
 // swapChannels
 
-void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
+void cv::gpu::swapChannels(InputOutputArray _image, const int dstOrder[4], Stream& _stream)
 {
-    CV_Assert(image.type() == CV_8UC4);
+    GpuMat image = _image.getGpuMat();
 
-    cudaStream_t stream = StreamAccessor::getStream(s);
+    CV_Assert( image.type() == CV_8UC4 );
 
+    cudaStream_t stream = StreamAccessor::getStream(_stream);
     NppStreamHandler h(stream);
 
     NppiSize sz;
@@ -1962,14 +2207,14 @@ void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
 ////////////////////////////////////////////////////////////////////////
 // gammaCorrection
 
-void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stream& stream)
+void cv::gpu::gammaCorrection(InputArray _src, OutputArray _dst, bool forward, Stream& stream)
 {
 #if (CUDA_VERSION < 5000)
-    (void)src;
-    (void)dst;
-    (void)forward;
-    (void)stream;
-    CV_Error( cv::Error::StsNotImplemented, "This function works only with CUDA 5.0 or higher" );
+    (void) _src;
+    (void) _dst;
+    (void) forward;
+    (void) stream;
+    CV_Error(Error::StsNotImplemented, "This function works only with CUDA 5.0 or higher");
 #else
     typedef NppStatus (*func_t)(const Npp8u* pSrc, int nSrcStep, Npp8u* pDst, int nDstStep, NppiSize oSizeROI);
     typedef NppStatus (*func_inplace_t)(Npp8u* pSrcDst, int nSrcDstStep, NppiSize oSizeROI);
@@ -1985,9 +2230,12 @@ void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stre
         {0, 0, 0, nppiGammaFwd_8u_C3IR, nppiGammaFwd_8u_AC4IR}
     };
 
-    CV_Assert(src.type() == CV_8UC3 || src.type() == CV_8UC4);
+    GpuMat src = _src.getGpuMat();
 
-    dst.create(src.size(), src.type());
+    CV_Assert( src.type() == CV_8UC3 || src.type() == CV_8UC4 );
+
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
 
     NppStreamHandler h(StreamAccessor::getStream(stream));
 
@@ -2036,7 +2284,7 @@ namespace
     };
 }
 
-void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream)
+void cv::gpu::alphaComp(InputArray _img1, InputArray _img2, OutputArray _dst, int alpha_op, Stream& stream)
 {
     static const NppiAlphaOp npp_alpha_ops[] = {
         NPPI_OP_ALPHA_OVER,
@@ -2055,7 +2303,6 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
     };
 
     typedef void (*func_t)(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream);
-
     static const func_t funcs[] =
     {
         NppAlphaComp<CV_8U, nppiAlphaComp_8u_AC4R>::call,
@@ -2066,10 +2313,14 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
         NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call
     };
 
+    GpuMat img1 = _img1.getGpuMat();
+    GpuMat img2 = _img2.getGpuMat();
+
     CV_Assert( img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4 );
     CV_Assert( img1.size() == img2.size() && img1.type() == img2.type() );
 
-    dst.create(img1.size(), img1.type());
+    _dst.create(img1.size(), img1.type());
+    GpuMat dst = _dst.getGpuMat();
 
     const func_t func = funcs[img1.depth()];
 
diff --git a/modules/gpuimgproc/src/corners.cpp b/modules/gpuimgproc/src/corners.cpp
index 824a3308e..6b53e6f85 100644
--- a/modules/gpuimgproc/src/corners.cpp
+++ b/modules/gpuimgproc/src/corners.cpp
@@ -45,15 +45,10 @@
 using namespace cv;
 using namespace cv::gpu;
 
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUFILTERS)
 
-void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_no_cuda(); }
-void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int) { throw_no_cuda(); }
-void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int, Stream&) { throw_no_cuda(); }
-
-void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_no_cuda(); }
-void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int) { throw_no_cuda(); }
-void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
+Ptr<gpu::CornernessCriteria> cv::gpu::createHarrisCorner(int, int, int, double, int) { throw_no_cuda(); return Ptr<gpu::CornernessCriteria>(); }
+Ptr<gpu::CornernessCriteria> cv::gpu::createMinEigenValCorner(int, int, int, int) { throw_no_cuda(); return Ptr<gpu::CornernessCriteria>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -68,89 +63,127 @@ namespace cv { namespace gpu { namespace cudev
 
 namespace
 {
-    void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
+    class CornerBase : public CornernessCriteria
     {
-        (void) buf;
+    protected:
+        CornerBase(int srcType, int blockSize, int ksize, int borderType);
 
-        double scale = static_cast<double>(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
+        void extractCovData(const GpuMat& src, Stream& stream);
 
-        if (ksize < 0)
+        int srcType_;
+        int blockSize_;
+        int ksize_;
+        int borderType_;
+        GpuMat Dx_, Dy_;
+
+    private:
+        Ptr<gpu::Filter> filterDx_, filterDy_;
+    };
+
+    CornerBase::CornerBase(int srcType, int blockSize, int ksize, int borderType) :
+        srcType_(srcType), blockSize_(blockSize), ksize_(ksize), borderType_(borderType)
+    {
+        CV_Assert( borderType_ == BORDER_REFLECT101 || borderType_ == BORDER_REPLICATE || borderType_ == BORDER_REFLECT );
+
+        const int sdepth = CV_MAT_DEPTH(srcType_);
+        const int cn = CV_MAT_CN(srcType_);
+
+        CV_Assert( cn == 1 );
+
+        double scale = static_cast<double>(1 << ((ksize_ > 0 ? ksize_ : 3) - 1)) * blockSize_;
+
+        if (ksize_ < 0)
             scale *= 2.;
 
-        if (src.depth() == CV_8U)
+        if (sdepth == CV_8U)
             scale *= 255.;
 
         scale = 1./scale;
 
-        Dx.create(src.size(), CV_32F);
-        Dy.create(src.size(), CV_32F);
-
-        Ptr<gpu::Filter> filterDx, filterDy;
-
-        if (ksize > 0)
+        if (ksize_ > 0)
         {
-            filterDx = gpu::createSobelFilter(src.type(), CV_32F, 1, 0, ksize, scale, borderType);
-            filterDy = gpu::createSobelFilter(src.type(), CV_32F, 0, 1, ksize, scale, borderType);
+            filterDx_ = gpu::createSobelFilter(srcType, CV_32F, 1, 0, ksize_, scale, borderType_);
+            filterDy_ = gpu::createSobelFilter(srcType, CV_32F, 0, 1, ksize_, scale, borderType_);
         }
         else
         {
-            filterDx = gpu::createScharrFilter(src.type(), CV_32F, 1, 0, scale, borderType);
-            filterDy = gpu::createScharrFilter(src.type(), CV_32F, 0, 1, scale, borderType);
+            filterDx_ = gpu::createScharrFilter(srcType, CV_32F, 1, 0, scale, borderType_);
+            filterDy_ = gpu::createScharrFilter(srcType, CV_32F, 0, 1, scale, borderType_);
+        }
+    }
+
+    void CornerBase::extractCovData(const GpuMat& src, Stream& stream)
+    {
+        CV_Assert( src.type() == srcType_ );
+        filterDx_->apply(src, Dx_, stream);
+        filterDy_->apply(src, Dy_, stream);
+    }
+
+    class Harris : public CornerBase
+    {
+    public:
+        Harris(int srcType, int blockSize, int ksize, double k, int borderType) :
+            CornerBase(srcType, blockSize, ksize, borderType), k_(static_cast<float>(k))
+        {
         }
 
-        filterDx->apply(src, Dx);
-        filterDy->apply(src, Dy);
+        void compute(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
+
+    private:
+        float k_;
+    };
+
+    void Harris::compute(InputArray _src, OutputArray _dst, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::imgproc;
+
+        GpuMat src = _src.getGpuMat();
+
+        extractCovData(src, stream);
+
+        _dst.create(src.size(), CV_32FC1);
+        GpuMat dst = _dst.getGpuMat();
+
+        cornerHarris_gpu(blockSize_, k_, Dx_, Dy_, dst, borderType_, StreamAccessor::getStream(stream));
+    }
+
+    class MinEigenVal : public CornerBase
+    {
+    public:
+        MinEigenVal(int srcType, int blockSize, int ksize, int borderType) :
+            CornerBase(srcType, blockSize, ksize, borderType)
+        {
+        }
+
+        void compute(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
+
+    private:
+        float k_;
+    };
+
+    void MinEigenVal::compute(InputArray _src, OutputArray _dst, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::imgproc;
+
+        GpuMat src = _src.getGpuMat();
+
+        extractCovData(src, stream);
+
+        _dst.create(src.size(), CV_32FC1);
+        GpuMat dst = _dst.getGpuMat();
+
+        cornerMinEigenVal_gpu(blockSize_, Dx_, Dy_, dst, borderType_, StreamAccessor::getStream(stream));
     }
 }
 
-void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType)
+Ptr<gpu::CornernessCriteria> cv::gpu::createHarrisCorner(int srcType, int blockSize, int ksize, double k, int borderType)
 {
-    GpuMat Dx, Dy;
-    cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k, borderType);
+    return makePtr<Harris>(srcType, blockSize, ksize, k, borderType);
 }
 
-void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType)
+Ptr<gpu::CornernessCriteria> cv::gpu::createMinEigenValCorner(int srcType, int blockSize, int ksize, int borderType)
 {
-    GpuMat buf;
-    cornerHarris(src, dst, Dx, Dy, buf, blockSize, ksize, k, borderType);
-}
-
-void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream)
-{
-    using namespace cv::gpu::cudev::imgproc;
-
-    CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
-
-    extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
-
-    dst.create(src.size(), CV_32F);
-
-    cornerHarris_gpu(blockSize, static_cast<float>(k), Dx, Dy, dst, borderType, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)
-{
-    GpuMat Dx, Dy;
-    cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize, borderType);
-}
-
-void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)
-{
-    GpuMat buf;
-    cornerMinEigenVal(src, dst, Dx, Dy, buf, blockSize, ksize, borderType);
-}
-
-void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
-{
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
-
-    extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
-
-    dst.create(src.size(), CV_32F);
-
-    cornerMinEigenVal_gpu(blockSize, Dx, Dy, dst, borderType, StreamAccessor::getStream(stream));
+    return makePtr<MinEigenVal>(srcType, blockSize, ksize, borderType);
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/cuda/bilateral_filter.cu b/modules/gpuimgproc/src/cuda/bilateral_filter.cu
index 6aa5df27a..3192f649b 100644
--- a/modules/gpuimgproc/src/cuda/bilateral_filter.cu
+++ b/modules/gpuimgproc/src/cuda/bilateral_filter.cu
@@ -133,7 +133,7 @@ namespace cv { namespace gpu { namespace cudev
             B<T> b(src.rows, src.cols);
 
             float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
-             float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
+            float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
 
             cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
             bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
diff --git a/modules/gpuimgproc/src/cuda/build_point_list.cu b/modules/gpuimgproc/src/cuda/build_point_list.cu
new file mode 100644
index 000000000..c5f2b23f6
--- /dev/null
+++ b/modules/gpuimgproc/src/cuda/build_point_list.cu
@@ -0,0 +1,138 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/emulation.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace hough
+    {
+        __device__ int g_counter;
+
+        template <int PIXELS_PER_THREAD>
+        __global__ void buildPointList(const PtrStepSzb src, unsigned int* list)
+        {
+            __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
+            __shared__ int s_qsize[4];
+            __shared__ int s_globStart[4];
+
+            const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (threadIdx.x == 0)
+                s_qsize[threadIdx.y] = 0;
+            __syncthreads();
+
+            if (y < src.rows)
+            {
+                // fill the queue
+                const uchar* srcRow = src.ptr(y);
+                for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x)
+                {
+                    if (srcRow[xx])
+                    {
+                        const unsigned int val = (y << 16) | xx;
+                        const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
+                        s_queues[threadIdx.y][qidx] = val;
+                    }
+                }
+            }
+
+            __syncthreads();
+
+            // let one thread reserve the space required in the global list
+            if (threadIdx.x == 0 && threadIdx.y == 0)
+            {
+                // find how many items are stored in each list
+                int totalSize = 0;
+                for (int i = 0; i < blockDim.y; ++i)
+                {
+                    s_globStart[i] = totalSize;
+                    totalSize += s_qsize[i];
+                }
+
+                // calculate the offset in the global list
+                const int globalOffset = atomicAdd(&g_counter, totalSize);
+                for (int i = 0; i < blockDim.y; ++i)
+                    s_globStart[i] += globalOffset;
+            }
+
+            __syncthreads();
+
+            // copy local queues to global queue
+            const int qsize = s_qsize[threadIdx.y];
+            int gidx = s_globStart[threadIdx.y] + threadIdx.x;
+            for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
+                list[gidx] = s_queues[threadIdx.y][i];
+        }
+
+        int buildPointList_gpu(PtrStepSzb src, unsigned int* list)
+        {
+            const int PIXELS_PER_THREAD = 16;
+
+            void* counterPtr;
+            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
+
+            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
+
+            const dim3 block(32, 4);
+            const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
+
+            buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+            int totalCount;
+            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
+
+            return totalCount;
+        }
+    }
+}}}
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/cuda/canny.cu b/modules/gpuimgproc/src/cuda/canny.cu
index 177d14692..271fffbc7 100644
--- a/modules/gpuimgproc/src/cuda/canny.cu
+++ b/modules/gpuimgproc/src/cuda/canny.cu
@@ -43,7 +43,7 @@
 #if !defined CUDA_DISABLER
 
 #include <utility>
-#include <algorithm>//std::swap
+#include <algorithm>
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/emulation.hpp"
 #include "opencv2/core/cuda/transform.hpp"
diff --git a/modules/gpuimgproc/src/cuda/corners.cu b/modules/gpuimgproc/src/cuda/corners.cu
index 39e7cdc5d..aa65ac8f8 100644
--- a/modules/gpuimgproc/src/cuda/corners.cu
+++ b/modules/gpuimgproc/src/cuda/corners.cu
@@ -48,6 +48,10 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+
 namespace cv { namespace gpu { namespace cudev
 {
     namespace imgproc
@@ -271,4 +275,6 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-#endif
+#endif // HAVE_OPENCV_GPUFILTERS
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuimgproc/src/cuda/generalized_hough.cu b/modules/gpuimgproc/src/cuda/generalized_hough.cu
new file mode 100644
index 000000000..fdf691ff4
--- /dev/null
+++ b/modules/gpuimgproc/src/cuda/generalized_hough.cu
@@ -0,0 +1,824 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include <thrust/device_ptr.h>
+#include <thrust/transform.h>
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/emulation.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUARITHM
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace ght
+    {
+        __device__ int g_counter;
+
+        template <typename T, int PIXELS_PER_THREAD>
+        __global__ void buildEdgePointList(const PtrStepSzb edges, const PtrStep<T> dx, const PtrStep<T> dy,
+                                           unsigned int* coordList, float* thetaList)
+        {
+            __shared__ unsigned int s_coordLists[4][32 * PIXELS_PER_THREAD];
+            __shared__ float s_thetaLists[4][32 * PIXELS_PER_THREAD];
+            __shared__ int s_sizes[4];
+            __shared__ int s_globStart[4];
+
+            const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (threadIdx.x == 0)
+                s_sizes[threadIdx.y] = 0;
+            __syncthreads();
+
+            if (y < edges.rows)
+            {
+                // fill the queue
+                const uchar* edgesRow = edges.ptr(y);
+                const T* dxRow = dx.ptr(y);
+                const T* dyRow = dy.ptr(y);
+
+                for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < edges.cols; ++i, xx += blockDim.x)
+                {
+                    const T dxVal = dxRow[xx];
+                    const T dyVal = dyRow[xx];
+
+                    if (edgesRow[xx] && (dxVal != 0 || dyVal != 0))
+                    {
+                        const unsigned int coord = (y << 16) | xx;
+
+                        float theta = ::atan2f(dyVal, dxVal);
+                        if (theta < 0)
+                            theta += 2.0f * CV_PI_F;
+
+                        const int qidx = Emulation::smem::atomicAdd(&s_sizes[threadIdx.y], 1);
+
+                        s_coordLists[threadIdx.y][qidx] = coord;
+                        s_thetaLists[threadIdx.y][qidx] = theta;
+                    }
+                }
+            }
+
+            __syncthreads();
+
+            // let one thread reserve the space required in the global list
+            if (threadIdx.x == 0 && threadIdx.y == 0)
+            {
+                // find how many items are stored in each list
+                int totalSize = 0;
+                for (int i = 0; i < blockDim.y; ++i)
+                {
+                    s_globStart[i] = totalSize;
+                    totalSize += s_sizes[i];
+                }
+
+                // calculate the offset in the global list
+                const int globalOffset = atomicAdd(&g_counter, totalSize);
+                for (int i = 0; i < blockDim.y; ++i)
+                    s_globStart[i] += globalOffset;
+            }
+
+            __syncthreads();
+
+            // copy local queues to global queue
+            const int qsize = s_sizes[threadIdx.y];
+            int gidx = s_globStart[threadIdx.y] + threadIdx.x;
+            for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
+            {
+                coordList[gidx] = s_coordLists[threadIdx.y][i];
+                thetaList[gidx] = s_thetaLists[threadIdx.y][i];
+            }
+        }
+
+        template <typename T>
+        int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList)
+        {
+            const int PIXELS_PER_THREAD = 8;
+
+            void* counterPtr;
+            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
+
+            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
+
+            const dim3 block(32, 4);
+            const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList<T, PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
+
+            buildEdgePointList<T, PIXELS_PER_THREAD><<<grid, block>>>(edges, (PtrStepSz<T>) dx, (PtrStepSz<T>) dy, coordList, thetaList);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+            int totalCount;
+            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
+
+            return totalCount;
+        }
+
+        template int buildEdgePointList_gpu<short>(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
+        template int buildEdgePointList_gpu<int>(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
+        template int buildEdgePointList_gpu<float>(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
+
+        __global__ void buildRTable(const unsigned int* coordList, const float* thetaList, const int pointsCount,
+                                    PtrStep<short2> r_table, int* r_sizes, int maxSize,
+                                    const short2 templCenter, const float thetaScale)
+        {
+            const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+            if (tid >= pointsCount)
+                return;
+
+            const unsigned int coord = coordList[tid];
+            short2 p;
+            p.x = (coord & 0xFFFF);
+            p.y = (coord >> 16) & 0xFFFF;
+
+            const float theta = thetaList[tid];
+            const int n = __float2int_rn(theta * thetaScale);
+
+            const int ind = ::atomicAdd(r_sizes + n, 1);
+            if (ind < maxSize)
+                r_table(n, ind) = saturate_cast<short2>(p - templCenter);
+        }
+
+        void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                             PtrStepSz<short2> r_table, int* r_sizes,
+                             short2 templCenter, int levels)
+        {
+            const dim3 block(256);
+            const dim3 grid(divUp(pointsCount, block.x));
+
+            const float thetaScale = levels / (2.0f * CV_PI_F);
+
+            buildRTable<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        ////////////////////////////////////////////////////////////////////////
+        // Ballard_Pos
+
+        __global__ void Ballard_Pos_calcHist(const unsigned int* coordList, const float* thetaList, const int pointsCount,
+                                             const PtrStep<short2> r_table, const int* r_sizes,
+                                             PtrStepSzi hist,
+                                             const float idp, const float thetaScale)
+        {
+            const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+            if (tid >= pointsCount)
+                return;
+
+            const unsigned int coord = coordList[tid];
+            short2 p;
+            p.x = (coord & 0xFFFF);
+            p.y = (coord >> 16) & 0xFFFF;
+
+            const float theta = thetaList[tid];
+            const int n = __float2int_rn(theta * thetaScale);
+
+            const short2* r_row = r_table.ptr(n);
+            const int r_row_size = r_sizes[n];
+
+            for (int j = 0; j < r_row_size; ++j)
+            {
+                short2 c = saturate_cast<short2>(p - r_row[j]);
+
+                c.x = __float2int_rn(c.x * idp);
+                c.y = __float2int_rn(c.y * idp);
+
+                if (c.x >= 0 && c.x < hist.cols - 2 && c.y >= 0 && c.y < hist.rows - 2)
+                    ::atomicAdd(hist.ptr(c.y + 1) + c.x + 1, 1);
+            }
+        }
+
+        void Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                                      PtrStepSz<short2> r_table, const int* r_sizes,
+                                      PtrStepSzi hist,
+                                      float dp, int levels)
+        {
+            const dim3 block(256);
+            const dim3 grid(divUp(pointsCount, block.x));
+
+            const float idp = 1.0f / dp;
+            const float thetaScale = levels / (2.0f * CV_PI_F);
+
+            Ballard_Pos_calcHist<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        __global__ void Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes,
+                                                  const int maxSize, const float dp, const int threshold)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= hist.cols - 2 || y >= hist.rows - 2)
+                return;
+
+            const int curVotes = hist(y + 1, x + 1);
+
+            if (curVotes > threshold &&
+                curVotes >  hist(y + 1, x) &&
+                curVotes >= hist(y + 1, x + 2) &&
+                curVotes >  hist(y, x + 1) &&
+                curVotes >= hist(y + 2, x + 1))
+            {
+                const int ind = ::atomicAdd(&g_counter, 1);
+
+                if (ind < maxSize)
+                {
+                    out[ind] = make_float4(x * dp, y * dp, 1.0f, 0.0f);
+                    votes[ind] = make_int3(curVotes, 0, 0);
+                }
+            }
+        }
+
+        int Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold)
+        {
+            void* counterPtr;
+            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
+
+            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
+
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) );
+
+            Ballard_Pos_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, dp, threshold);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+            int totalCount;
+            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
+
+            totalCount = ::min(totalCount, maxSize);
+
+            return totalCount;
+        }
+
+        ////////////////////////////////////////////////////////////////////////
+        // Guil_Full
+
+        struct FeatureTable
+        {
+            uchar* p1_pos_data;
+            size_t p1_pos_step;
+
+            uchar* p1_theta_data;
+            size_t p1_theta_step;
+
+            uchar* p2_pos_data;
+            size_t p2_pos_step;
+
+            uchar* d12_data;
+            size_t d12_step;
+
+            uchar* r1_data;
+            size_t r1_step;
+
+            uchar* r2_data;
+            size_t r2_step;
+        };
+
+        __constant__ FeatureTable c_templFeatures;
+        __constant__ FeatureTable c_imageFeatures;
+
+        void Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
+        {
+            FeatureTable tbl;
+
+            tbl.p1_pos_data = p1_pos.data;
+            tbl.p1_pos_step = p1_pos.step;
+
+            tbl.p1_theta_data = p1_theta.data;
+            tbl.p1_theta_step = p1_theta.step;
+
+            tbl.p2_pos_data = p2_pos.data;
+            tbl.p2_pos_step = p2_pos.step;
+
+            tbl.d12_data = d12.data;
+            tbl.d12_step = d12.step;
+
+            tbl.r1_data = r1.data;
+            tbl.r1_step = r1.step;
+
+            tbl.r2_data = r2.data;
+            tbl.r2_step = r2.step;
+
+            cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) );
+        }
+        void Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
+        {
+            FeatureTable tbl;
+
+            tbl.p1_pos_data = p1_pos.data;
+            tbl.p1_pos_step = p1_pos.step;
+
+            tbl.p1_theta_data = p1_theta.data;
+            tbl.p1_theta_step = p1_theta.step;
+
+            tbl.p2_pos_data = p2_pos.data;
+            tbl.p2_pos_step = p2_pos.step;
+
+            tbl.d12_data = d12.data;
+            tbl.d12_step = d12.step;
+
+            tbl.r1_data = r1.data;
+            tbl.r1_step = r1.step;
+
+            tbl.r2_data = r2.data;
+            tbl.r2_step = r2.step;
+
+            cudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) );
+        }
+
+        struct TemplFeatureTable
+        {
+            static __device__ float2* p1_pos(int n)
+            {
+                return (float2*)(c_templFeatures.p1_pos_data + n * c_templFeatures.p1_pos_step);
+            }
+            static __device__ float* p1_theta(int n)
+            {
+                return (float*)(c_templFeatures.p1_theta_data + n * c_templFeatures.p1_theta_step);
+            }
+            static __device__ float2* p2_pos(int n)
+            {
+                return (float2*)(c_templFeatures.p2_pos_data + n * c_templFeatures.p2_pos_step);
+            }
+
+            static __device__ float* d12(int n)
+            {
+                return (float*)(c_templFeatures.d12_data + n * c_templFeatures.d12_step);
+            }
+
+            static __device__ float2* r1(int n)
+            {
+                return (float2*)(c_templFeatures.r1_data + n * c_templFeatures.r1_step);
+            }
+            static __device__ float2* r2(int n)
+            {
+                return (float2*)(c_templFeatures.r2_data + n * c_templFeatures.r2_step);
+            }
+        };
+        struct ImageFeatureTable
+        {
+            static __device__ float2* p1_pos(int n)
+            {
+                return (float2*)(c_imageFeatures.p1_pos_data + n * c_imageFeatures.p1_pos_step);
+            }
+            static __device__ float* p1_theta(int n)
+            {
+                return (float*)(c_imageFeatures.p1_theta_data + n * c_imageFeatures.p1_theta_step);
+            }
+            static __device__ float2* p2_pos(int n)
+            {
+                return (float2*)(c_imageFeatures.p2_pos_data + n * c_imageFeatures.p2_pos_step);
+            }
+
+            static __device__ float* d12(int n)
+            {
+                return (float*)(c_imageFeatures.d12_data + n * c_imageFeatures.d12_step);
+            }
+
+            static __device__ float2* r1(int n)
+            {
+                return (float2*)(c_imageFeatures.r1_data + n * c_imageFeatures.r1_step);
+            }
+            static __device__ float2* r2(int n)
+            {
+                return (float2*)(c_imageFeatures.r2_data + n * c_imageFeatures.r2_step);
+            }
+        };
+
+        __device__ float clampAngle(float a)
+        {
+            float res = a;
+
+            while (res > 2.0f * CV_PI_F)
+                res -= 2.0f * CV_PI_F;
+            while (res < 0.0f)
+                res += 2.0f * CV_PI_F;
+
+            return res;
+        }
+
+        __device__ bool angleEq(float a, float b, float eps)
+        {
+            return (::fabs(clampAngle(a - b)) <= eps);
+        }
+
+        template <class FT, bool isTempl>
+        __global__ void Guil_Full_buildFeatureList(const unsigned int* coordList, const float* thetaList, const int pointsCount,
+                                                   int* sizes, const int maxSize,
+                                                   const float xi, const float angleEpsilon, const float alphaScale,
+                                                   const float2 center, const float maxDist)
+        {
+            const float p1_theta = thetaList[blockIdx.x];
+            const unsigned int coord1 = coordList[blockIdx.x];
+            float2 p1_pos;
+            p1_pos.x = (coord1 & 0xFFFF);
+            p1_pos.y = (coord1 >> 16) & 0xFFFF;
+
+            for (int i = threadIdx.x; i < pointsCount; i += blockDim.x)
+            {
+                const float p2_theta = thetaList[i];
+                const unsigned int coord2 = coordList[i];
+                float2 p2_pos;
+                p2_pos.x = (coord2 & 0xFFFF);
+                p2_pos.y = (coord2 >> 16) & 0xFFFF;
+
+                if (angleEq(p1_theta - p2_theta, xi, angleEpsilon))
+                {
+                    const float2 d = p1_pos - p2_pos;
+
+                    float alpha12 = clampAngle(::atan2(d.y, d.x) - p1_theta);
+                    float d12 = ::sqrtf(d.x * d.x + d.y * d.y);
+
+                    if (d12 > maxDist)
+                        continue;
+
+                    float2 r1 = p1_pos - center;
+                    float2 r2 = p2_pos - center;
+
+                    const int n = __float2int_rn(alpha12 * alphaScale);
+
+                    const int ind = ::atomicAdd(sizes + n, 1);
+
+                    if (ind < maxSize)
+                    {
+                        if (!isTempl)
+                        {
+                            FT::p1_pos(n)[ind] = p1_pos;
+                            FT::p2_pos(n)[ind] = p2_pos;
+                        }
+
+                        FT::p1_theta(n)[ind] = p1_theta;
+
+                        FT::d12(n)[ind] = d12;
+
+                        if (isTempl)
+                        {
+                            FT::r1(n)[ind] = r1;
+                            FT::r2(n)[ind] = r2;
+                        }
+                    }
+                }
+            }
+        }
+
+        template <class FT, bool isTempl>
+        void Guil_Full_buildFeatureList_caller(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                                               int* sizes, int maxSize,
+                                               float xi, float angleEpsilon, int levels,
+                                               float2 center, float maxDist)
+        {
+            const dim3 block(256);
+            const dim3 grid(pointsCount);
+
+            const float alphaScale = levels / (2.0f * CV_PI_F);
+
+            Guil_Full_buildFeatureList<FT, isTempl><<<grid, block>>>(coordList, thetaList, pointsCount,
+                                                                     sizes, maxSize,
+                                                                     xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale,
+                                                                     center, maxDist);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+            thrust::device_ptr<int> sizesPtr(sizes);
+            thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum<int>(), maxSize));
+        }
+
+        void Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                                                 int* sizes, int maxSize,
+                                                 float xi, float angleEpsilon, int levels,
+                                                 float2 center, float maxDist)
+        {
+            Guil_Full_buildFeatureList_caller<TemplFeatureTable, true>(coordList, thetaList, pointsCount,
+                                                                       sizes, maxSize,
+                                                                       xi, angleEpsilon, levels,
+                                                                       center, maxDist);
+        }
+        void Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                                                 int* sizes, int maxSize,
+                                                 float xi, float angleEpsilon, int levels,
+                                                 float2 center, float maxDist)
+        {
+            Guil_Full_buildFeatureList_caller<ImageFeatureTable, false>(coordList, thetaList, pointsCount,
+                                                                        sizes, maxSize,
+                                                                        xi, angleEpsilon, levels,
+                                                                        center, maxDist);
+        }
+
+        __global__ void Guil_Full_calcOHist(const int* templSizes, const int* imageSizes, int* OHist,
+                                            const float minAngle, const float maxAngle, const float iAngleStep, const int angleRange)
+        {
+            extern __shared__ int s_OHist[];
+            for (int i = threadIdx.x; i <= angleRange; i += blockDim.x)
+                s_OHist[i] = 0;
+            __syncthreads();
+
+            const int tIdx = blockIdx.x;
+            const int level = blockIdx.y;
+
+            const int tSize = templSizes[level];
+
+            if (tIdx < tSize)
+            {
+                const int imSize = imageSizes[level];
+
+                const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx];
+
+                for (int i = threadIdx.x; i < imSize; i += blockDim.x)
+                {
+                    const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i];
+
+                    const float angle = clampAngle(im_p1_theta - t_p1_theta);
+
+                    if (angle >= minAngle && angle <= maxAngle)
+                    {
+                        const int n = __float2int_rn((angle - minAngle) * iAngleStep);
+                        Emulation::smem::atomicAdd(&s_OHist[n], 1);
+                    }
+                }
+            }
+            __syncthreads();
+
+            for (int i = threadIdx.x; i <= angleRange; i += blockDim.x)
+                ::atomicAdd(OHist + i, s_OHist[i]);
+        }
+
+        void Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist,
+                                     float minAngle, float maxAngle, float angleStep, int angleRange,
+                                     int levels, int tMaxSize)
+        {
+            const dim3 block(256);
+            const dim3 grid(tMaxSize, levels + 1);
+
+            minAngle *= (CV_PI_F / 180.0f);
+            maxAngle *= (CV_PI_F / 180.0f);
+            angleStep *= (CV_PI_F / 180.0f);
+
+            const size_t smemSize = (angleRange + 1) * sizeof(float);
+
+            Guil_Full_calcOHist<<<grid, block, smemSize>>>(templSizes, imageSizes, OHist,
+                                                           minAngle, maxAngle, 1.0f / angleStep, angleRange);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        __global__ void Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist,
+                                            const float angle, const float angleEpsilon,
+                                            const float minScale, const float maxScale, const float iScaleStep, const int scaleRange)
+        {
+            extern __shared__ int s_SHist[];
+            for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x)
+                s_SHist[i] = 0;
+            __syncthreads();
+
+            const int tIdx = blockIdx.x;
+            const int level = blockIdx.y;
+
+            const int tSize = templSizes[level];
+
+            if (tIdx < tSize)
+            {
+                const int imSize = imageSizes[level];
+
+                const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle;
+                const float t_d12 = TemplFeatureTable::d12(level)[tIdx] + angle;
+
+                for (int i = threadIdx.x; i < imSize; i += blockDim.x)
+                {
+                    const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i];
+                    const float im_d12 = ImageFeatureTable::d12(level)[i];
+
+                    if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon))
+                    {
+                        const float scale = im_d12 / t_d12;
+
+                        if (scale >= minScale && scale <= maxScale)
+                        {
+                            const int s = __float2int_rn((scale - minScale) * iScaleStep);
+                            Emulation::smem::atomicAdd(&s_SHist[s], 1);
+                        }
+                    }
+                }
+            }
+            __syncthreads();
+
+            for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x)
+                ::atomicAdd(SHist + i, s_SHist[i]);
+        }
+
+        void Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist,
+                                     float angle, float angleEpsilon,
+                                     float minScale, float maxScale, float iScaleStep, int scaleRange,
+                                     int levels, int tMaxSize)
+        {
+            const dim3 block(256);
+            const dim3 grid(tMaxSize, levels + 1);
+
+            angle *= (CV_PI_F / 180.0f);
+            angleEpsilon *= (CV_PI_F / 180.0f);
+
+            const size_t smemSize = (scaleRange + 1) * sizeof(float);
+
+            Guil_Full_calcSHist<<<grid, block, smemSize>>>(templSizes, imageSizes, SHist,
+                                                           angle, angleEpsilon,
+                                                           minScale, maxScale, iScaleStep, scaleRange);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        __global__ void Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
+                                            const float angle, const float sinVal, const float cosVal, const float angleEpsilon, const float scale,
+                                            const float idp)
+        {
+            const int tIdx = blockIdx.x;
+            const int level = blockIdx.y;
+
+            const int tSize = templSizes[level];
+
+            if (tIdx < tSize)
+            {
+                const int imSize = imageSizes[level];
+
+                const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle;
+
+                float2 r1 = TemplFeatureTable::r1(level)[tIdx];
+                float2 r2 = TemplFeatureTable::r2(level)[tIdx];
+
+                r1 = r1 * scale;
+                r2 = r2 * scale;
+
+                r1 = make_float2(cosVal * r1.x - sinVal * r1.y, sinVal * r1.x + cosVal * r1.y);
+                r2 = make_float2(cosVal * r2.x - sinVal * r2.y, sinVal * r2.x + cosVal * r2.y);
+
+                for (int i = threadIdx.x; i < imSize; i += blockDim.x)
+                {
+                    const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i];
+
+                    const float2 im_p1_pos = ImageFeatureTable::p1_pos(level)[i];
+                    const float2 im_p2_pos = ImageFeatureTable::p2_pos(level)[i];
+
+                    if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon))
+                    {
+                        float2 c1, c2;
+
+                        c1 = im_p1_pos - r1;
+                        c1 = c1 * idp;
+
+                        c2 = im_p2_pos - r2;
+                        c2 = c2 * idp;
+
+                        if (::fabs(c1.x - c2.x) > 1 || ::fabs(c1.y - c2.y) > 1)
+                            continue;
+
+                        if (c1.y >= 0 && c1.y < PHist.rows - 2 && c1.x >= 0 && c1.x < PHist.cols - 2)
+                            ::atomicAdd(PHist.ptr(__float2int_rn(c1.y) + 1) + __float2int_rn(c1.x) + 1, 1);
+                    }
+                }
+            }
+        }
+
+        void Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
+                                     float angle, float angleEpsilon, float scale,
+                                     float dp,
+                                     int levels, int tMaxSize)
+        {
+            const dim3 block(256);
+            const dim3 grid(tMaxSize, levels + 1);
+
+            angle *= (CV_PI_F / 180.0f);
+            angleEpsilon *= (CV_PI_F / 180.0f);
+
+            const float sinVal = ::sinf(angle);
+            const float cosVal = ::cosf(angle);
+
+            cudaSafeCall( cudaFuncSetCacheConfig(Guil_Full_calcPHist, cudaFuncCachePreferL1) );
+
+            Guil_Full_calcPHist<<<grid, block>>>(templSizes, imageSizes, PHist,
+                                                 angle, sinVal, cosVal, angleEpsilon, scale,
+                                                 1.0f / dp);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        __global__ void Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize,
+                                                const float angle, const int angleVotes, const float scale, const int scaleVotes,
+                                                const float dp, const int threshold)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= hist.cols - 2 || y >= hist.rows - 2)
+                return;
+
+            const int curVotes = hist(y + 1, x + 1);
+
+            if (curVotes > threshold &&
+                curVotes >  hist(y + 1, x) &&
+                curVotes >= hist(y + 1, x + 2) &&
+                curVotes >  hist(y, x + 1) &&
+                curVotes >= hist(y + 2, x + 1))
+            {
+                const int ind = ::atomicAdd(&g_counter, 1);
+
+                if (ind < maxSize)
+                {
+                    out[ind] = make_float4(x * dp, y * dp, scale, angle);
+                    votes[ind] = make_int3(curVotes, scaleVotes, angleVotes);
+                }
+            }
+        }
+
+        int Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize,
+                                        float angle, int angleVotes, float scale, int scaleVotes,
+                                        float dp, int threshold)
+        {
+            void* counterPtr;
+            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
+
+            cudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) );
+
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(Guil_Full_findPosInHist, cudaFuncCachePreferL1) );
+
+            Guil_Full_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize,
+                                                     angle, angleVotes, scale, scaleVotes,
+                                                     dp, threshold);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+            int totalCount;
+            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
+
+            totalCount = ::min(totalCount, maxSize);
+
+            return totalCount;
+        }
+    }
+}}}
+
+#endif // HAVE_OPENCV_GPUARITHM
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/cuda/hist.cu b/modules/gpuimgproc/src/cuda/hist.cu
index 474c27cf7..51931d7ce 100644
--- a/modules/gpuimgproc/src/cuda/hist.cu
+++ b/modules/gpuimgproc/src/cuda/hist.cu
@@ -109,6 +109,86 @@ namespace hist
 
 /////////////////////////////////////////////////////////////////////////
 
+namespace hist
+{
+    __device__ __forceinline__ void histEvenInc(int* shist, uint data, int binSize, int lowerLevel, int upperLevel)
+    {
+        if (data >= lowerLevel && data <= upperLevel)
+        {
+            const uint ind = (data - lowerLevel) / binSize;
+            Emulation::smem::atomicAdd(shist + ind, 1);
+        }
+    }
+
+    __global__ void histEven8u(const uchar* src, const size_t step, const int rows, const int cols,
+                               int* hist, const int binCount, const int binSize, const int lowerLevel, const int upperLevel)
+    {
+        extern __shared__ int shist[];
+
+        const int y = blockIdx.x * blockDim.y + threadIdx.y;
+        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
+
+        if (tid < binCount)
+            shist[tid] = 0;
+
+        __syncthreads();
+
+        if (y < rows)
+        {
+            const uchar* rowPtr = src + y * step;
+            const uint* rowPtr4 = (uint*) rowPtr;
+
+            const int cols_4 = cols / 4;
+            for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
+            {
+                const uint data = rowPtr4[x];
+
+                histEvenInc(shist, (data >>  0) & 0xFFU, binSize, lowerLevel, upperLevel);
+                histEvenInc(shist, (data >>  8) & 0xFFU, binSize, lowerLevel, upperLevel);
+                histEvenInc(shist, (data >> 16) & 0xFFU, binSize, lowerLevel, upperLevel);
+                histEvenInc(shist, (data >> 24) & 0xFFU, binSize, lowerLevel, upperLevel);
+            }
+
+            if (cols % 4 != 0 && threadIdx.x == 0)
+            {
+                for (int x = cols_4 * 4; x < cols; ++x)
+                {
+                    const uchar data = rowPtr[x];
+                    histEvenInc(shist, data, binSize, lowerLevel, upperLevel);
+                }
+            }
+        }
+
+        __syncthreads();
+
+        if (tid < binCount)
+        {
+            const int histVal = shist[tid];
+
+            if (histVal > 0)
+                ::atomicAdd(hist + tid, histVal);
+        }
+    }
+
+    void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream)
+    {
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(src.rows, block.y));
+
+        const int binSize = divUp(upperLevel - lowerLevel, binCount);
+
+        const size_t smem_size = binCount * sizeof(int);
+
+        histEven8u<<<grid, block, smem_size, stream>>>(src.data, src.step, src.rows, src.cols, hist, binCount, binSize, lowerLevel, upperLevel);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+}
+
+/////////////////////////////////////////////////////////////////////////
+
 namespace hist
 {
     __constant__ int c_lut[256];
diff --git a/modules/gpuimgproc/src/cuda/hough.cu b/modules/gpuimgproc/src/cuda/hough.cu
deleted file mode 100644
index 696ed3845..000000000
--- a/modules/gpuimgproc/src/cuda/hough.cu
+++ /dev/null
@@ -1,1710 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if !defined CUDA_DISABLER
-
-#include <thrust/device_ptr.h>
-#include <thrust/sort.h>
-
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/emulation.hpp"
-#include "opencv2/core/cuda/vec_math.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/limits.hpp"
-#include "opencv2/core/cuda/dynamic_smem.hpp"
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace hough
-    {
-        __device__ int g_counter;
-
-        ////////////////////////////////////////////////////////////////////////
-        // buildPointList
-
-        template <int PIXELS_PER_THREAD>
-        __global__ void buildPointList(const PtrStepSzb src, unsigned int* list)
-        {
-            __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
-            __shared__ int s_qsize[4];
-            __shared__ int s_globStart[4];
-
-            const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (threadIdx.x == 0)
-                s_qsize[threadIdx.y] = 0;
-            __syncthreads();
-
-            if (y < src.rows)
-            {
-                // fill the queue
-                const uchar* srcRow = src.ptr(y);
-                for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x)
-                {
-                    if (srcRow[xx])
-                    {
-                        const unsigned int val = (y << 16) | xx;
-                        const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
-                        s_queues[threadIdx.y][qidx] = val;
-                    }
-                }
-            }
-
-            __syncthreads();
-
-            // let one thread reserve the space required in the global list
-            if (threadIdx.x == 0 && threadIdx.y == 0)
-            {
-                // find how many items are stored in each list
-                int totalSize = 0;
-                for (int i = 0; i < blockDim.y; ++i)
-                {
-                    s_globStart[i] = totalSize;
-                    totalSize += s_qsize[i];
-                }
-
-                // calculate the offset in the global list
-                const int globalOffset = atomicAdd(&g_counter, totalSize);
-                for (int i = 0; i < blockDim.y; ++i)
-                    s_globStart[i] += globalOffset;
-            }
-
-            __syncthreads();
-
-            // copy local queues to global queue
-            const int qsize = s_qsize[threadIdx.y];
-            int gidx = s_globStart[threadIdx.y] + threadIdx.x;
-            for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
-                list[gidx] = s_queues[threadIdx.y][i];
-        }
-
-        int buildPointList_gpu(PtrStepSzb src, unsigned int* list)
-        {
-            const int PIXELS_PER_THREAD = 16;
-
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
-
-            const dim3 block(32, 4);
-            const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
-
-            buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            return totalCount;
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // linesAccum
-
-        __global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
-        {
-            const int n = blockIdx.x;
-            const float ang = n * theta;
-
-            float sinVal;
-            float cosVal;
-            sincosf(ang, &sinVal, &cosVal);
-            sinVal *= irho;
-            cosVal *= irho;
-
-            const int shift = (numrho - 1) / 2;
-
-            int* accumRow = accum.ptr(n + 1);
-            for (int i = threadIdx.x; i < count; i += blockDim.x)
-            {
-                const unsigned int val = list[i];
-
-                const int x = (val & 0xFFFF);
-                const int y = (val >> 16) & 0xFFFF;
-
-                int r = __float2int_rn(x * cosVal + y * sinVal);
-                r += shift;
-
-                ::atomicAdd(accumRow + r + 1, 1);
-            }
-        }
-
-        __global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
-        {
-            int* smem = DynamicSharedMem<int>();
-
-            for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
-                smem[i] = 0;
-
-            __syncthreads();
-
-            const int n = blockIdx.x;
-            const float ang = n * theta;
-
-            float sinVal;
-            float cosVal;
-            sincosf(ang, &sinVal, &cosVal);
-            sinVal *= irho;
-            cosVal *= irho;
-
-            const int shift = (numrho - 1) / 2;
-
-            for (int i = threadIdx.x; i < count; i += blockDim.x)
-            {
-                const unsigned int val = list[i];
-
-                const int x = (val & 0xFFFF);
-                const int y = (val >> 16) & 0xFFFF;
-
-                int r = __float2int_rn(x * cosVal + y * sinVal);
-                r += shift;
-
-                Emulation::smem::atomicAdd(&smem[r + 1], 1);
-            }
-
-            __syncthreads();
-
-            int* accumRow = accum.ptr(n + 1);
-            for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
-                accumRow[i] = smem[i];
-        }
-
-        void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
-        {
-            const dim3 block(has20 ? 1024 : 512);
-            const dim3 grid(accum.rows - 2);
-
-            size_t smemSize = (accum.cols - 1) * sizeof(int);
-
-            if (smemSize < sharedMemPerBlock - 1000)
-                linesAccumShared<<<grid, block, smemSize>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
-            else
-                linesAccumGlobal<<<grid, block>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
-
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // linesGetResult
-
-        __global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho)
-        {
-            const int r = blockIdx.x * blockDim.x + threadIdx.x;
-            const int n = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (r >= accum.cols - 2 || n >= accum.rows - 2)
-                return;
-
-            const int curVotes = accum(n + 1, r + 1);
-
-            if (curVotes > threshold &&
-                curVotes >  accum(n + 1, r) &&
-                curVotes >= accum(n + 1, r + 2) &&
-                curVotes >  accum(n, r + 1) &&
-                curVotes >= accum(n + 2, r + 1))
-            {
-                const float radius = (r - (numrho - 1) * 0.5f) * rho;
-                const float angle = n * theta;
-
-                const int ind = ::atomicAdd(&g_counter, 1);
-                if (ind < maxSize)
-                {
-                    out[ind] = make_float2(radius, angle);
-                    votes[ind] = curVotes;
-                }
-            }
-        }
-
-        int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
-        {
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
-
-            const dim3 block(32, 8);
-            const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
-
-            linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            totalCount = ::min(totalCount, maxSize);
-
-            if (doSort && totalCount > 0)
-            {
-                thrust::device_ptr<float2> outPtr(out);
-                thrust::device_ptr<int> votesPtr(votes);
-                thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater<int>());
-            }
-
-            return totalCount;
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // houghLinesProbabilistic
-
-        texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp);
-
-        __global__ void houghLinesProbabilistic(const PtrStepSzi accum,
-                                                int4* out, const int maxSize,
-                                                const float rho, const float theta,
-                                                const int lineGap, const int lineLength,
-                                                const int rows, const int cols)
-        {
-            const int r = blockIdx.x * blockDim.x + threadIdx.x;
-            const int n = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (r >= accum.cols - 2 || n >= accum.rows - 2)
-                return;
-
-            const int curVotes = accum(n + 1, r + 1);
-
-            if (curVotes >= lineLength &&
-                curVotes > accum(n, r) &&
-                curVotes > accum(n, r + 1) &&
-                curVotes > accum(n, r + 2) &&
-                curVotes > accum(n + 1, r) &&
-                curVotes > accum(n + 1, r + 2) &&
-                curVotes > accum(n + 2, r) &&
-                curVotes > accum(n + 2, r + 1) &&
-                curVotes > accum(n + 2, r + 2))
-            {
-                const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho;
-                const float angle = n * theta;
-
-                float cosa;
-                float sina;
-                sincosf(angle, &sina, &cosa);
-
-                float2 p0 = make_float2(cosa * radius, sina * radius);
-                float2 dir = make_float2(-sina, cosa);
-
-                float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)};
-                float a;
-
-                if (dir.x != 0)
-                {
-                    a = -p0.x / dir.x;
-                    pb[0].x = 0;
-                    pb[0].y = p0.y + a * dir.y;
-
-                    a = (cols - 1 - p0.x) / dir.x;
-                    pb[1].x = cols - 1;
-                    pb[1].y = p0.y + a * dir.y;
-                }
-                if (dir.y != 0)
-                {
-                    a = -p0.y / dir.y;
-                    pb[2].x = p0.x + a * dir.x;
-                    pb[2].y = 0;
-
-                    a = (rows - 1 - p0.y) / dir.y;
-                    pb[3].x = p0.x + a * dir.x;
-                    pb[3].y = rows - 1;
-                }
-
-                if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows))
-                {
-                    p0 = pb[0];
-                    if (dir.x < 0)
-                        dir = -dir;
-                }
-                else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows))
-                {
-                    p0 = pb[1];
-                    if (dir.x > 0)
-                        dir = -dir;
-                }
-                else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols))
-                {
-                    p0 = pb[2];
-                    if (dir.y < 0)
-                        dir = -dir;
-                }
-                else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols))
-                {
-                    p0 = pb[3];
-                    if (dir.y > 0)
-                        dir = -dir;
-                }
-
-                float2 d;
-                if (::fabsf(dir.x) > ::fabsf(dir.y))
-                {
-                    d.x = dir.x > 0 ? 1 : -1;
-                    d.y = dir.y / ::fabsf(dir.x);
-                }
-                else
-                {
-                    d.x = dir.x / ::fabsf(dir.y);
-                    d.y = dir.y > 0 ? 1 : -1;
-                }
-
-                float2 line_end[2];
-                int gap;
-                bool inLine = false;
-
-                float2 p1 = p0;
-                if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
-                    return;
-
-                for (;;)
-                {
-                    if (tex2D(tex_mask, p1.x, p1.y))
-                    {
-                        gap = 0;
-
-                        if (!inLine)
-                        {
-                            line_end[0] = p1;
-                            line_end[1] = p1;
-                            inLine = true;
-                        }
-                        else
-                        {
-                            line_end[1] = p1;
-                        }
-                    }
-                    else if (inLine)
-                    {
-                        if (++gap > lineGap)
-                        {
-                            bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
-                                             ::abs(line_end[1].y - line_end[0].y) >= lineLength;
-
-                            if (good_line)
-                            {
-                                const int ind = ::atomicAdd(&g_counter, 1);
-                                if (ind < maxSize)
-                                    out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
-                            }
-
-                            gap = 0;
-                            inLine = false;
-                        }
-                    }
-
-                    p1 = p1 + d;
-                    if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
-                    {
-                        if (inLine)
-                        {
-                            bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
-                                             ::abs(line_end[1].y - line_end[0].y) >= lineLength;
-
-                            if (good_line)
-                            {
-                                const int ind = ::atomicAdd(&g_counter, 1);
-                                if (ind < maxSize)
-                                    out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
-                            }
-
-                        }
-                        break;
-                    }
-                }
-            }
-        }
-
-        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength)
-        {
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
-
-            const dim3 block(32, 8);
-            const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
-
-            bindTexture(&tex_mask, mask);
-
-            houghLinesProbabilistic<<<grid, block>>>(accum,
-                                                     out, maxSize,
-                                                     rho, theta,
-                                                     lineGap, lineLength,
-                                                     mask.rows, mask.cols);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            totalCount = ::min(totalCount, maxSize);
-
-            return totalCount;
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // circlesAccumCenters
-
-        __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy,
-                                            PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp)
-        {
-            const int SHIFT = 10;
-            const int ONE = 1 << SHIFT;
-
-            const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-
-            if (tid >= count)
-                return;
-
-            const unsigned int val = list[tid];
-
-            const int x = (val & 0xFFFF);
-            const int y = (val >> 16) & 0xFFFF;
-
-            const int vx = dx(y, x);
-            const int vy = dy(y, x);
-
-            if (vx == 0 && vy == 0)
-                return;
-
-            const float mag = ::sqrtf(vx * vx + vy * vy);
-
-            const int x0 = __float2int_rn((x * idp) * ONE);
-            const int y0 = __float2int_rn((y * idp) * ONE);
-
-            int sx = __float2int_rn((vx * idp) * ONE / mag);
-            int sy = __float2int_rn((vy * idp) * ONE / mag);
-
-            // Step from minRadius to maxRadius in both directions of the gradient
-            for (int k1 = 0; k1 < 2; ++k1)
-            {
-                int x1 = x0 + minRadius * sx;
-                int y1 = y0 + minRadius * sy;
-
-                for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
-                {
-                    const int x2 = x1 >> SHIFT;
-                    const int y2 = y1 >> SHIFT;
-
-                    if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
-                        break;
-
-                    ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1);
-                }
-
-                sx = -sx;
-                sy = -sy;
-            }
-        }
-
-        void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
-        {
-            const dim3 block(256);
-            const dim3 grid(divUp(count, block.x));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
-
-            circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // buildCentersList
-
-        __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x < accum.cols - 2 && y < accum.rows - 2)
-            {
-                const int top = accum(y, x + 1);
-
-                const int left = accum(y + 1, x);
-                const int cur = accum(y + 1, x + 1);
-                const int right = accum(y + 1, x + 2);
-
-                const int bottom = accum(y + 2, x + 1);
-
-                if (cur > threshold && cur > top && cur >= bottom && cur >  left && cur >= right)
-                {
-                    const unsigned int val = (y << 16) | x;
-                    const int idx = ::atomicAdd(&g_counter, 1);
-                    centers[idx] = val;
-                }
-            }
-        }
-
-        int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
-        {
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
-
-            const dim3 block(32, 8);
-            const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
-
-            buildCentersList<<<grid, block>>>(accum, centers, threshold);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            return totalCount;
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // circlesAccumRadius
-
-        __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count,
-                                           float3* circles, const int maxCircles, const float dp,
-                                           const int minRadius, const int maxRadius, const int histSize, const int threshold)
-        {
-            int* smem = DynamicSharedMem<int>();
-
-            for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x)
-                smem[i] = 0;
-            __syncthreads();
-
-            unsigned int val = centers[blockIdx.x];
-
-            float cx = (val & 0xFFFF);
-            float cy = (val >> 16) & 0xFFFF;
-
-            cx = (cx + 0.5f) * dp;
-            cy = (cy + 0.5f) * dp;
-
-            for (int i = threadIdx.x; i < count; i += blockDim.x)
-            {
-                val = list[i];
-
-                const int x = (val & 0xFFFF);
-                const int y = (val >> 16) & 0xFFFF;
-
-                const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y));
-                if (rad >= minRadius && rad <= maxRadius)
-                {
-                    const int r = __float2int_rn(rad - minRadius);
-
-                    Emulation::smem::atomicAdd(&smem[r + 1], 1);
-                }
-            }
-
-            __syncthreads();
-
-            for (int i = threadIdx.x; i < histSize; i += blockDim.x)
-            {
-                const int curVotes = smem[i + 1];
-
-                if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
-                {
-                    const int ind = ::atomicAdd(&g_counter, 1);
-                    if (ind < maxCircles)
-                        circles[ind] = make_float3(cx, cy, i + minRadius);
-                }
-            }
-        }
-
-        int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
-                                   float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
-        {
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
-
-            const dim3 block(has20 ? 1024 : 512);
-            const dim3 grid(centersCount);
-
-            const int histSize = maxRadius - minRadius + 1;
-            size_t smemSize = (histSize + 2) * sizeof(int);
-
-            circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            totalCount = ::min(totalCount, maxCircles);
-
-            return totalCount;
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // Generalized Hough
-
-        template <typename T, int PIXELS_PER_THREAD>
-        __global__ void buildEdgePointList(const PtrStepSzb edges, const PtrStep<T> dx, const PtrStep<T> dy, unsigned int* coordList, float* thetaList)
-        {
-            __shared__ unsigned int s_coordLists[4][32 * PIXELS_PER_THREAD];
-            __shared__ float s_thetaLists[4][32 * PIXELS_PER_THREAD];
-            __shared__ int s_sizes[4];
-            __shared__ int s_globStart[4];
-
-            const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (threadIdx.x == 0)
-                s_sizes[threadIdx.y] = 0;
-            __syncthreads();
-
-            if (y < edges.rows)
-            {
-                // fill the queue
-                const uchar* edgesRow = edges.ptr(y);
-                const T* dxRow = dx.ptr(y);
-                const T* dyRow = dy.ptr(y);
-
-                for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < edges.cols; ++i, xx += blockDim.x)
-                {
-                    const T dxVal = dxRow[xx];
-                    const T dyVal = dyRow[xx];
-
-                    if (edgesRow[xx] && (dxVal != 0 || dyVal != 0))
-                    {
-                        const unsigned int coord = (y << 16) | xx;
-
-                        float theta = ::atan2f(dyVal, dxVal);
-                        if (theta < 0)
-                            theta += 2.0f * CV_PI_F;
-
-                        const int qidx = Emulation::smem::atomicAdd(&s_sizes[threadIdx.y], 1);
-
-                        s_coordLists[threadIdx.y][qidx] = coord;
-                        s_thetaLists[threadIdx.y][qidx] = theta;
-                    }
-                }
-            }
-
-            __syncthreads();
-
-            // let one thread reserve the space required in the global list
-            if (threadIdx.x == 0 && threadIdx.y == 0)
-            {
-                // find how many items are stored in each list
-                int totalSize = 0;
-                for (int i = 0; i < blockDim.y; ++i)
-                {
-                    s_globStart[i] = totalSize;
-                    totalSize += s_sizes[i];
-                }
-
-                // calculate the offset in the global list
-                const int globalOffset = atomicAdd(&g_counter, totalSize);
-                for (int i = 0; i < blockDim.y; ++i)
-                    s_globStart[i] += globalOffset;
-            }
-
-            __syncthreads();
-
-            // copy local queues to global queue
-            const int qsize = s_sizes[threadIdx.y];
-            int gidx = s_globStart[threadIdx.y] + threadIdx.x;
-            for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
-            {
-                coordList[gidx] = s_coordLists[threadIdx.y][i];
-                thetaList[gidx] = s_thetaLists[threadIdx.y][i];
-            }
-        }
-
-        template <typename T>
-        int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList)
-        {
-            const int PIXELS_PER_THREAD = 8;
-
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
-
-            const dim3 block(32, 4);
-            const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList<T, PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
-
-            buildEdgePointList<T, PIXELS_PER_THREAD><<<grid, block>>>(edges, (PtrStepSz<T>) dx, (PtrStepSz<T>) dy, coordList, thetaList);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            return totalCount;
-        }
-
-        template int buildEdgePointList_gpu<short>(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
-        template int buildEdgePointList_gpu<int>(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
-        template int buildEdgePointList_gpu<float>(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
-
-        __global__ void buildRTable(const unsigned int* coordList, const float* thetaList, const int pointsCount,
-                                    PtrStep<short2> r_table, int* r_sizes, int maxSize,
-                                    const short2 templCenter, const float thetaScale)
-        {
-            const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-
-            if (tid >= pointsCount)
-                return;
-
-            const unsigned int coord = coordList[tid];
-            short2 p;
-            p.x = (coord & 0xFFFF);
-            p.y = (coord >> 16) & 0xFFFF;
-
-            const float theta = thetaList[tid];
-            const int n = __float2int_rn(theta * thetaScale);
-
-            const int ind = ::atomicAdd(r_sizes + n, 1);
-            if (ind < maxSize)
-                r_table(n, ind) = saturate_cast<short2>(p - templCenter);
-        }
-
-        void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                             PtrStepSz<short2> r_table, int* r_sizes,
-                             short2 templCenter, int levels)
-        {
-            const dim3 block(256);
-            const dim3 grid(divUp(pointsCount, block.x));
-
-            const float thetaScale = levels / (2.0f * CV_PI_F);
-
-            buildRTable<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // GHT_Ballard_Pos
-
-        __global__ void GHT_Ballard_Pos_calcHist(const unsigned int* coordList, const float* thetaList, const int pointsCount,
-                                                 const PtrStep<short2> r_table, const int* r_sizes,
-                                                 PtrStepSzi hist,
-                                                 const float idp, const float thetaScale)
-        {
-            const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-
-            if (tid >= pointsCount)
-                return;
-
-            const unsigned int coord = coordList[tid];
-            short2 p;
-            p.x = (coord & 0xFFFF);
-            p.y = (coord >> 16) & 0xFFFF;
-
-            const float theta = thetaList[tid];
-            const int n = __float2int_rn(theta * thetaScale);
-
-            const short2* r_row = r_table.ptr(n);
-            const int r_row_size = r_sizes[n];
-
-            for (int j = 0; j < r_row_size; ++j)
-            {
-                int2 c = p - r_row[j];
-
-                c.x = __float2int_rn(c.x * idp);
-                c.y = __float2int_rn(c.y * idp);
-
-                if (c.x >= 0 && c.x < hist.cols - 2 && c.y >= 0 && c.y < hist.rows - 2)
-                    ::atomicAdd(hist.ptr(c.y + 1) + c.x + 1, 1);
-            }
-        }
-
-        void GHT_Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                          PtrStepSz<short2> r_table, const int* r_sizes,
-                                          PtrStepSzi hist,
-                                          float dp, int levels)
-        {
-            const dim3 block(256);
-            const dim3 grid(divUp(pointsCount, block.x));
-
-            const float idp = 1.0f / dp;
-            const float thetaScale = levels / (2.0f * CV_PI_F);
-
-            GHT_Ballard_Pos_calcHist<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        __global__ void GHT_Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, const float dp, const int threshold)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= hist.cols - 2 || y >= hist.rows - 2)
-                return;
-
-            const int curVotes = hist(y + 1, x + 1);
-
-            if (curVotes > threshold &&
-                curVotes >  hist(y + 1, x) &&
-                curVotes >= hist(y + 1, x + 2) &&
-                curVotes >  hist(y, x + 1) &&
-                curVotes >= hist(y + 2, x + 1))
-            {
-                const int ind = ::atomicAdd(&g_counter, 1);
-
-                if (ind < maxSize)
-                {
-                    out[ind] = make_float4(x * dp, y * dp, 1.0f, 0.0f);
-                    votes[ind] = make_int3(curVotes, 0, 0);
-                }
-            }
-        }
-
-        int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold)
-        {
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
-
-            const dim3 block(32, 8);
-            const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) );
-
-            GHT_Ballard_Pos_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, dp, threshold);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            totalCount = ::min(totalCount, maxSize);
-
-            return totalCount;
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // GHT_Ballard_PosScale
-
-        __global__ void GHT_Ballard_PosScale_calcHist(const unsigned int* coordList, const float* thetaList,
-                                                      PtrStep<short2> r_table, const int* r_sizes,
-                                                      PtrStepi hist, const int rows, const int cols,
-                                                      const float minScale, const float scaleStep, const int scaleRange,
-                                                      const float idp, const float thetaScale)
-        {
-            const unsigned int coord = coordList[blockIdx.x];
-            float2 p;
-            p.x = (coord & 0xFFFF);
-            p.y = (coord >> 16) & 0xFFFF;
-
-            const float theta = thetaList[blockIdx.x];
-            const int n = __float2int_rn(theta * thetaScale);
-
-            const short2* r_row = r_table.ptr(n);
-            const int r_row_size = r_sizes[n];
-
-            for (int j = 0; j < r_row_size; ++j)
-            {
-                const float2 d = saturate_cast<float2>(r_row[j]);
-
-                for (int s = threadIdx.x; s < scaleRange; s += blockDim.x)
-                {
-                    const float scale = minScale + s * scaleStep;
-
-                    float2 c = p - scale * d;
-
-                    c.x *= idp;
-                    c.y *= idp;
-
-                    if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows)
-                        ::atomicAdd(hist.ptr((s + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1);
-                }
-            }
-        }
-
-        void GHT_Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                               PtrStepSz<short2> r_table, const int* r_sizes,
-                                               PtrStepi hist, int rows, int cols,
-                                               float minScale, float scaleStep, int scaleRange,
-                                               float dp, int levels)
-        {
-            const dim3 block(256);
-            const dim3 grid(pointsCount);
-
-            const float idp = 1.0f / dp;
-            const float thetaScale = levels / (2.0f * CV_PI_F);
-
-            GHT_Ballard_PosScale_calcHist<<<grid, block>>>(coordList, thetaList,
-                                                           r_table, r_sizes,
-                                                           hist, rows, cols,
-                                                           minScale, scaleStep, scaleRange,
-                                                           idp, thetaScale);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        __global__ void GHT_Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange,
-                                                           float4* out, int3* votes, const int maxSize,
-                                                           const float minScale, const float scaleStep, const float dp, const int threshold)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= cols || y >= rows)
-                return;
-
-            for (int s = 0; s < scaleRange; ++s)
-            {
-                const float scale = minScale + s * scaleStep;
-
-                const int prevScaleIdx = (s) * (rows + 2);
-                const int curScaleIdx = (s + 1) * (rows + 2);
-                const int nextScaleIdx = (s + 2) * (rows + 2);
-
-                const int curVotes = hist(curScaleIdx + y + 1, x + 1);
-
-                if (curVotes > threshold &&
-                    curVotes >  hist(curScaleIdx + y + 1, x) &&
-                    curVotes >= hist(curScaleIdx + y + 1, x + 2) &&
-                    curVotes >  hist(curScaleIdx + y, x + 1) &&
-                    curVotes >= hist(curScaleIdx + y + 2, x + 1) &&
-                    curVotes >  hist(prevScaleIdx + y + 1, x + 1) &&
-                    curVotes >= hist(nextScaleIdx + y + 1, x + 1))
-                {
-                    const int ind = ::atomicAdd(&g_counter, 1);
-
-                    if (ind < maxSize)
-                    {
-                        out[ind] = make_float4(x * dp, y * dp, scale, 0.0f);
-                        votes[ind] = make_int3(curVotes, curVotes, 0);
-                    }
-                }
-            }
-        }
-
-        int GHT_Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize,
-                                                   float minScale, float scaleStep, float dp, int threshold)
-        {
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
-
-            const dim3 block(32, 8);
-            const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) );
-
-            GHT_Ballard_PosScale_findPosInHist<<<grid, block>>>(hist, rows, cols, scaleRange, out, votes, maxSize, minScale, scaleStep, dp, threshold);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            totalCount = ::min(totalCount, maxSize);
-
-            return totalCount;
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // GHT_Ballard_PosRotation
-
-        __global__ void GHT_Ballard_PosRotation_calcHist(const unsigned int* coordList, const float* thetaList,
-                                                         PtrStep<short2> r_table, const int* r_sizes,
-                                                         PtrStepi hist, const int rows, const int cols,
-                                                         const float minAngle, const float angleStep, const int angleRange,
-                                                         const float idp, const float thetaScale)
-        {
-            const unsigned int coord = coordList[blockIdx.x];
-            float2 p;
-            p.x = (coord & 0xFFFF);
-            p.y = (coord >> 16) & 0xFFFF;
-
-            const float thetaVal = thetaList[blockIdx.x];
-
-            for (int a = threadIdx.x; a < angleRange; a += blockDim.x)
-            {
-                const float angle = (minAngle + a * angleStep) * (CV_PI_F / 180.0f);
-                float sinA, cosA;
-                sincosf(angle, &sinA, &cosA);
-
-                float theta = thetaVal - angle;
-                if (theta < 0)
-                    theta += 2.0f * CV_PI_F;
-
-                const int n = __float2int_rn(theta * thetaScale);
-
-                const short2* r_row = r_table.ptr(n);
-                const int r_row_size = r_sizes[n];
-
-                for (int j = 0; j < r_row_size; ++j)
-                {
-                    const float2 d = saturate_cast<float2>(r_row[j]);
-
-                    const float2 dr = make_float2(d.x * cosA - d.y * sinA, d.x * sinA + d.y * cosA);
-
-                    float2 c = make_float2(p.x - dr.x, p.y - dr.y);
-                    c.x *= idp;
-                    c.y *= idp;
-
-                    if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows)
-                        ::atomicAdd(hist.ptr((a + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1);
-                }
-            }
-        }
-
-        void GHT_Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                                  PtrStepSz<short2> r_table, const int* r_sizes,
-                                                  PtrStepi hist, int rows, int cols,
-                                                  float minAngle, float angleStep, int angleRange,
-                                                  float dp, int levels)
-        {
-            const dim3 block(256);
-            const dim3 grid(pointsCount);
-
-            const float idp = 1.0f / dp;
-            const float thetaScale = levels / (2.0f * CV_PI_F);
-
-            GHT_Ballard_PosRotation_calcHist<<<grid, block>>>(coordList, thetaList,
-                                                              r_table, r_sizes,
-                                                              hist, rows, cols,
-                                                              minAngle, angleStep, angleRange,
-                                                              idp, thetaScale);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        __global__ void GHT_Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange,
-                                                              float4* out, int3* votes, const int maxSize,
-                                                              const float minAngle, const float angleStep, const float dp, const int threshold)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= cols || y >= rows)
-                return;
-
-            for (int a = 0; a < angleRange; ++a)
-            {
-                const float angle = minAngle + a * angleStep;
-
-                const int prevAngleIdx = (a) * (rows + 2);
-                const int curAngleIdx = (a + 1) * (rows + 2);
-                const int nextAngleIdx = (a + 2) * (rows + 2);
-
-                const int curVotes = hist(curAngleIdx + y + 1, x + 1);
-
-                if (curVotes > threshold &&
-                    curVotes >  hist(curAngleIdx + y + 1, x) &&
-                    curVotes >= hist(curAngleIdx + y + 1, x + 2) &&
-                    curVotes >  hist(curAngleIdx + y, x + 1) &&
-                    curVotes >= hist(curAngleIdx + y + 2, x + 1) &&
-                    curVotes >  hist(prevAngleIdx + y + 1, x + 1) &&
-                    curVotes >= hist(nextAngleIdx + y + 1, x + 1))
-                {
-                    const int ind = ::atomicAdd(&g_counter, 1);
-
-                    if (ind < maxSize)
-                    {
-                        out[ind] = make_float4(x * dp, y * dp, 1.0f, angle);
-                        votes[ind] = make_int3(curVotes, 0, curVotes);
-                    }
-                }
-            }
-        }
-
-        int GHT_Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize,
-                                                      float minAngle, float angleStep, float dp, int threshold)
-        {
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
-
-            const dim3 block(32, 8);
-            const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) );
-
-            GHT_Ballard_PosRotation_findPosInHist<<<grid, block>>>(hist, rows, cols, angleRange, out, votes, maxSize, minAngle, angleStep, dp, threshold);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            totalCount = ::min(totalCount, maxSize);
-
-            return totalCount;
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // GHT_Guil_Full
-
-        struct FeatureTable
-        {
-            uchar* p1_pos_data;
-            size_t p1_pos_step;
-
-            uchar* p1_theta_data;
-            size_t p1_theta_step;
-
-            uchar* p2_pos_data;
-            size_t p2_pos_step;
-
-            uchar* d12_data;
-            size_t d12_step;
-
-            uchar* r1_data;
-            size_t r1_step;
-
-            uchar* r2_data;
-            size_t r2_step;
-        };
-
-        __constant__ FeatureTable c_templFeatures;
-        __constant__ FeatureTable c_imageFeatures;
-
-        void GHT_Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
-        {
-            FeatureTable tbl;
-
-            tbl.p1_pos_data = p1_pos.data;
-            tbl.p1_pos_step = p1_pos.step;
-
-            tbl.p1_theta_data = p1_theta.data;
-            tbl.p1_theta_step = p1_theta.step;
-
-            tbl.p2_pos_data = p2_pos.data;
-            tbl.p2_pos_step = p2_pos.step;
-
-            tbl.d12_data = d12.data;
-            tbl.d12_step = d12.step;
-
-            tbl.r1_data = r1.data;
-            tbl.r1_step = r1.step;
-
-            tbl.r2_data = r2.data;
-            tbl.r2_step = r2.step;
-
-            cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) );
-        }
-        void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
-        {
-            FeatureTable tbl;
-
-            tbl.p1_pos_data = p1_pos.data;
-            tbl.p1_pos_step = p1_pos.step;
-
-            tbl.p1_theta_data = p1_theta.data;
-            tbl.p1_theta_step = p1_theta.step;
-
-            tbl.p2_pos_data = p2_pos.data;
-            tbl.p2_pos_step = p2_pos.step;
-
-            tbl.d12_data = d12.data;
-            tbl.d12_step = d12.step;
-
-            tbl.r1_data = r1.data;
-            tbl.r1_step = r1.step;
-
-            tbl.r2_data = r2.data;
-            tbl.r2_step = r2.step;
-
-            cudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) );
-        }
-
-        struct TemplFeatureTable
-        {
-            static __device__ float2* p1_pos(int n)
-            {
-                return (float2*)(c_templFeatures.p1_pos_data + n * c_templFeatures.p1_pos_step);
-            }
-            static __device__ float* p1_theta(int n)
-            {
-                return (float*)(c_templFeatures.p1_theta_data + n * c_templFeatures.p1_theta_step);
-            }
-            static __device__ float2* p2_pos(int n)
-            {
-                return (float2*)(c_templFeatures.p2_pos_data + n * c_templFeatures.p2_pos_step);
-            }
-
-            static __device__ float* d12(int n)
-            {
-                return (float*)(c_templFeatures.d12_data + n * c_templFeatures.d12_step);
-            }
-
-            static __device__ float2* r1(int n)
-            {
-                return (float2*)(c_templFeatures.r1_data + n * c_templFeatures.r1_step);
-            }
-            static __device__ float2* r2(int n)
-            {
-                return (float2*)(c_templFeatures.r2_data + n * c_templFeatures.r2_step);
-            }
-        };
-        struct ImageFeatureTable
-        {
-            static __device__ float2* p1_pos(int n)
-            {
-                return (float2*)(c_imageFeatures.p1_pos_data + n * c_imageFeatures.p1_pos_step);
-            }
-            static __device__ float* p1_theta(int n)
-            {
-                return (float*)(c_imageFeatures.p1_theta_data + n * c_imageFeatures.p1_theta_step);
-            }
-            static __device__ float2* p2_pos(int n)
-            {
-                return (float2*)(c_imageFeatures.p2_pos_data + n * c_imageFeatures.p2_pos_step);
-            }
-
-            static __device__ float* d12(int n)
-            {
-                return (float*)(c_imageFeatures.d12_data + n * c_imageFeatures.d12_step);
-            }
-
-            static __device__ float2* r1(int n)
-            {
-                return (float2*)(c_imageFeatures.r1_data + n * c_imageFeatures.r1_step);
-            }
-            static __device__ float2* r2(int n)
-            {
-                return (float2*)(c_imageFeatures.r2_data + n * c_imageFeatures.r2_step);
-            }
-        };
-
-        __device__ float clampAngle(float a)
-        {
-            float res = a;
-
-            while (res > 2.0f * CV_PI_F)
-                res -= 2.0f * CV_PI_F;
-            while (res < 0.0f)
-                res += 2.0f * CV_PI_F;
-
-            return res;
-        }
-
-        __device__ bool angleEq(float a, float b, float eps)
-        {
-            return (::fabs(clampAngle(a - b)) <= eps);
-        }
-
-        template <class FT, bool isTempl>
-        __global__ void GHT_Guil_Full_buildFeatureList(const unsigned int* coordList, const float* thetaList, const int pointsCount,
-                                                       int* sizes, const int maxSize,
-                                                       const float xi, const float angleEpsilon, const float alphaScale,
-                                                       const float2 center, const float maxDist)
-        {
-            const float p1_theta = thetaList[blockIdx.x];
-            const unsigned int coord1 = coordList[blockIdx.x];
-            float2 p1_pos;
-            p1_pos.x = (coord1 & 0xFFFF);
-            p1_pos.y = (coord1 >> 16) & 0xFFFF;
-
-            for (int i = threadIdx.x; i < pointsCount; i += blockDim.x)
-            {
-                const float p2_theta = thetaList[i];
-                const unsigned int coord2 = coordList[i];
-                float2 p2_pos;
-                p2_pos.x = (coord2 & 0xFFFF);
-                p2_pos.y = (coord2 >> 16) & 0xFFFF;
-
-                if (angleEq(p1_theta - p2_theta, xi, angleEpsilon))
-                {
-                    const float2 d = p1_pos - p2_pos;
-
-                    float alpha12 = clampAngle(::atan2(d.y, d.x) - p1_theta);
-                    float d12 = ::sqrtf(d.x * d.x + d.y * d.y);
-
-                    if (d12 > maxDist)
-                        continue;
-
-                    float2 r1 = p1_pos - center;
-                    float2 r2 = p2_pos - center;
-
-                    const int n = __float2int_rn(alpha12 * alphaScale);
-
-                    const int ind = ::atomicAdd(sizes + n, 1);
-
-                    if (ind < maxSize)
-                    {
-                        if (!isTempl)
-                        {
-                            FT::p1_pos(n)[ind] = p1_pos;
-                            FT::p2_pos(n)[ind] = p2_pos;
-                        }
-
-                        FT::p1_theta(n)[ind] = p1_theta;
-
-                        FT::d12(n)[ind] = d12;
-
-                        if (isTempl)
-                        {
-                            FT::r1(n)[ind] = r1;
-                            FT::r2(n)[ind] = r2;
-                        }
-                    }
-                }
-            }
-        }
-
-        template <class FT, bool isTempl>
-        void GHT_Guil_Full_buildFeatureList_caller(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                                   int* sizes, int maxSize,
-                                                   float xi, float angleEpsilon, int levels,
-                                                   float2 center, float maxDist)
-        {
-            const dim3 block(256);
-            const dim3 grid(pointsCount);
-
-            const float alphaScale = levels / (2.0f * CV_PI_F);
-
-            GHT_Guil_Full_buildFeatureList<FT, isTempl><<<grid, block>>>(coordList, thetaList, pointsCount,
-                                                                         sizes, maxSize,
-                                                                         xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale,
-                                                                         center, maxDist);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            thrust::device_ptr<int> sizesPtr(sizes);
-            thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum<int>(), maxSize));
-        }
-
-        void GHT_Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                                     int* sizes, int maxSize,
-                                                     float xi, float angleEpsilon, int levels,
-                                                     float2 center, float maxDist)
-        {
-            GHT_Guil_Full_buildFeatureList_caller<TemplFeatureTable, true>(coordList, thetaList, pointsCount,
-                                                                           sizes, maxSize,
-                                                                           xi, angleEpsilon, levels,
-                                                                           center, maxDist);
-        }
-        void GHT_Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                                     int* sizes, int maxSize,
-                                                     float xi, float angleEpsilon, int levels,
-                                                     float2 center, float maxDist)
-        {
-            GHT_Guil_Full_buildFeatureList_caller<ImageFeatureTable, false>(coordList, thetaList, pointsCount,
-                                                                            sizes, maxSize,
-                                                                            xi, angleEpsilon, levels,
-                                                                            center, maxDist);
-        }
-
-        __global__ void GHT_Guil_Full_calcOHist(const int* templSizes, const int* imageSizes, int* OHist,
-                                                const float minAngle, const float maxAngle, const float iAngleStep, const int angleRange)
-        {
-            extern __shared__ int s_OHist[];
-            for (int i = threadIdx.x; i <= angleRange; i += blockDim.x)
-                s_OHist[i] = 0;
-            __syncthreads();
-
-            const int tIdx = blockIdx.x;
-            const int level = blockIdx.y;
-
-            const int tSize = templSizes[level];
-
-            if (tIdx < tSize)
-            {
-                const int imSize = imageSizes[level];
-
-                const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx];
-
-                for (int i = threadIdx.x; i < imSize; i += blockDim.x)
-                {
-                    const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i];
-
-                    const float angle = clampAngle(im_p1_theta - t_p1_theta);
-
-                    if (angle >= minAngle && angle <= maxAngle)
-                    {
-                        const int n = __float2int_rn((angle - minAngle) * iAngleStep);
-                        Emulation::smem::atomicAdd(&s_OHist[n], 1);
-                    }
-                }
-            }
-            __syncthreads();
-
-            for (int i = threadIdx.x; i <= angleRange; i += blockDim.x)
-                ::atomicAdd(OHist + i, s_OHist[i]);
-        }
-
-        void GHT_Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist,
-                                         float minAngle, float maxAngle, float angleStep, int angleRange,
-                                         int levels, int tMaxSize)
-        {
-            const dim3 block(256);
-            const dim3 grid(tMaxSize, levels + 1);
-
-            minAngle *= (CV_PI_F / 180.0f);
-            maxAngle *= (CV_PI_F / 180.0f);
-            angleStep *= (CV_PI_F / 180.0f);
-
-            const size_t smemSize = (angleRange + 1) * sizeof(float);
-
-            GHT_Guil_Full_calcOHist<<<grid, block, smemSize>>>(templSizes, imageSizes, OHist,
-                                                               minAngle, maxAngle, 1.0f / angleStep, angleRange);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        __global__ void GHT_Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist,
-                                                const float angle, const float angleEpsilon,
-                                                const float minScale, const float maxScale, const float iScaleStep, const int scaleRange)
-        {
-            extern __shared__ int s_SHist[];
-            for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x)
-                s_SHist[i] = 0;
-            __syncthreads();
-
-            const int tIdx = blockIdx.x;
-            const int level = blockIdx.y;
-
-            const int tSize = templSizes[level];
-
-            if (tIdx < tSize)
-            {
-                const int imSize = imageSizes[level];
-
-                const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle;
-                const float t_d12 = TemplFeatureTable::d12(level)[tIdx] + angle;
-
-                for (int i = threadIdx.x; i < imSize; i += blockDim.x)
-                {
-                    const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i];
-                    const float im_d12 = ImageFeatureTable::d12(level)[i];
-
-                    if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon))
-                    {
-                        const float scale = im_d12 / t_d12;
-
-                        if (scale >= minScale && scale <= maxScale)
-                        {
-                            const int s = __float2int_rn((scale - minScale) * iScaleStep);
-                            Emulation::smem::atomicAdd(&s_SHist[s], 1);
-                        }
-                    }
-                }
-            }
-            __syncthreads();
-
-            for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x)
-                ::atomicAdd(SHist + i, s_SHist[i]);
-        }
-
-        void GHT_Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist,
-                                         float angle, float angleEpsilon,
-                                         float minScale, float maxScale, float iScaleStep, int scaleRange,
-                                         int levels, int tMaxSize)
-        {
-            const dim3 block(256);
-            const dim3 grid(tMaxSize, levels + 1);
-
-            angle *= (CV_PI_F / 180.0f);
-            angleEpsilon *= (CV_PI_F / 180.0f);
-
-            const size_t smemSize = (scaleRange + 1) * sizeof(float);
-
-            GHT_Guil_Full_calcSHist<<<grid, block, smemSize>>>(templSizes, imageSizes, SHist,
-                                                               angle, angleEpsilon,
-                                                               minScale, maxScale, iScaleStep, scaleRange);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        __global__ void GHT_Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
-                                                const float angle, const float sinVal, const float cosVal, const float angleEpsilon, const float scale,
-                                                const float idp)
-        {
-            const int tIdx = blockIdx.x;
-            const int level = blockIdx.y;
-
-            const int tSize = templSizes[level];
-
-            if (tIdx < tSize)
-            {
-                const int imSize = imageSizes[level];
-
-                const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle;
-
-                float2 r1 = TemplFeatureTable::r1(level)[tIdx];
-                float2 r2 = TemplFeatureTable::r2(level)[tIdx];
-
-                r1 = r1 * scale;
-                r2 = r2 * scale;
-
-                r1 = make_float2(cosVal * r1.x - sinVal * r1.y, sinVal * r1.x + cosVal * r1.y);
-                r2 = make_float2(cosVal * r2.x - sinVal * r2.y, sinVal * r2.x + cosVal * r2.y);
-
-                for (int i = threadIdx.x; i < imSize; i += blockDim.x)
-                {
-                    const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i];
-
-                    const float2 im_p1_pos = ImageFeatureTable::p1_pos(level)[i];
-                    const float2 im_p2_pos = ImageFeatureTable::p2_pos(level)[i];
-
-                    if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon))
-                    {
-                        float2 c1, c2;
-
-                        c1 = im_p1_pos - r1;
-                        c1 = c1 * idp;
-
-                        c2 = im_p2_pos - r2;
-                        c2 = c2 * idp;
-
-                        if (::fabs(c1.x - c2.x) > 1 || ::fabs(c1.y - c2.y) > 1)
-                            continue;
-
-                        if (c1.y >= 0 && c1.y < PHist.rows - 2 && c1.x >= 0 && c1.x < PHist.cols - 2)
-                            ::atomicAdd(PHist.ptr(__float2int_rn(c1.y) + 1) + __float2int_rn(c1.x) + 1, 1);
-                    }
-                }
-            }
-        }
-
-        void GHT_Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
-                                         float angle, float angleEpsilon, float scale,
-                                         float dp,
-                                         int levels, int tMaxSize)
-        {
-            const dim3 block(256);
-            const dim3 grid(tMaxSize, levels + 1);
-
-            angle *= (CV_PI_F / 180.0f);
-            angleEpsilon *= (CV_PI_F / 180.0f);
-
-            const float sinVal = ::sinf(angle);
-            const float cosVal = ::cosf(angle);
-
-            cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) );
-
-            GHT_Guil_Full_calcPHist<<<grid, block>>>(templSizes, imageSizes, PHist,
-                                                     angle, sinVal, cosVal, angleEpsilon, scale,
-                                                     1.0f / dp);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        __global__ void GHT_Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize,
-                                                    const float angle, const int angleVotes, const float scale, const int scaleVotes,
-                                                    const float dp, const int threshold)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= hist.cols - 2 || y >= hist.rows - 2)
-                return;
-
-            const int curVotes = hist(y + 1, x + 1);
-
-            if (curVotes > threshold &&
-                curVotes >  hist(y + 1, x) &&
-                curVotes >= hist(y + 1, x + 2) &&
-                curVotes >  hist(y, x + 1) &&
-                curVotes >= hist(y + 2, x + 1))
-            {
-                const int ind = ::atomicAdd(&g_counter, 1);
-
-                if (ind < maxSize)
-                {
-                    out[ind] = make_float4(x * dp, y * dp, scale, angle);
-                    votes[ind] = make_int3(curVotes, scaleVotes, angleVotes);
-                }
-            }
-        }
-
-        int GHT_Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize,
-                                             float angle, int angleVotes, float scale, int scaleVotes,
-                                             float dp, int threshold)
-        {
-            void* counterPtr;
-            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
-
-            cudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) );
-
-            const dim3 block(32, 8);
-            const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) );
-
-            GHT_Guil_Full_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize,
-                                                         angle, angleVotes, scale, scaleVotes,
-                                                         dp, threshold);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-            int totalCount;
-            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
-
-            totalCount = ::min(totalCount, maxSize);
-
-            return totalCount;
-        }
-    }
-}}}
-
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/cuda/hough_circles.cu b/modules/gpuimgproc/src/cuda/hough_circles.cu
new file mode 100644
index 000000000..6757e430b
--- /dev/null
+++ b/modules/gpuimgproc/src/cuda/hough_circles.cu
@@ -0,0 +1,260 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/emulation.hpp"
+#include "opencv2/core/cuda/dynamic_smem.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace hough_circles
+    {
+        __device__ int g_counter;
+
+        ////////////////////////////////////////////////////////////////////////
+        // circlesAccumCenters
+
+        __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy,
+                                            PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp)
+        {
+            const int SHIFT = 10;
+            const int ONE = 1 << SHIFT;
+
+            const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+            if (tid >= count)
+                return;
+
+            const unsigned int val = list[tid];
+
+            const int x = (val & 0xFFFF);
+            const int y = (val >> 16) & 0xFFFF;
+
+            const int vx = dx(y, x);
+            const int vy = dy(y, x);
+
+            if (vx == 0 && vy == 0)
+                return;
+
+            const float mag = ::sqrtf(vx * vx + vy * vy);
+
+            const int x0 = __float2int_rn((x * idp) * ONE);
+            const int y0 = __float2int_rn((y * idp) * ONE);
+
+            int sx = __float2int_rn((vx * idp) * ONE / mag);
+            int sy = __float2int_rn((vy * idp) * ONE / mag);
+
+            // Step from minRadius to maxRadius in both directions of the gradient
+            for (int k1 = 0; k1 < 2; ++k1)
+            {
+                int x1 = x0 + minRadius * sx;
+                int y1 = y0 + minRadius * sy;
+
+                for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
+                {
+                    const int x2 = x1 >> SHIFT;
+                    const int y2 = y1 >> SHIFT;
+
+                    if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
+                        break;
+
+                    ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1);
+                }
+
+                sx = -sx;
+                sy = -sy;
+            }
+        }
+
+        void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
+        {
+            const dim3 block(256);
+            const dim3 grid(divUp(count, block.x));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
+
+            circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        ////////////////////////////////////////////////////////////////////////
+        // buildCentersList
+
+        __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x < accum.cols - 2 && y < accum.rows - 2)
+            {
+                const int top = accum(y, x + 1);
+
+                const int left = accum(y + 1, x);
+                const int cur = accum(y + 1, x + 1);
+                const int right = accum(y + 1, x + 2);
+
+                const int bottom = accum(y + 2, x + 1);
+
+                if (cur > threshold && cur > top && cur >= bottom && cur >  left && cur >= right)
+                {
+                    const unsigned int val = (y << 16) | x;
+                    const int idx = ::atomicAdd(&g_counter, 1);
+                    centers[idx] = val;
+                }
+            }
+        }
+
+        int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
+        {
+            void* counterPtr;
+            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
+
+            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
+
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
+
+            buildCentersList<<<grid, block>>>(accum, centers, threshold);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+            int totalCount;
+            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
+
+            return totalCount;
+        }
+
+        ////////////////////////////////////////////////////////////////////////
+        // circlesAccumRadius
+
+        __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count,
+                                           float3* circles, const int maxCircles, const float dp,
+                                           const int minRadius, const int maxRadius, const int histSize, const int threshold)
+        {
+            int* smem = DynamicSharedMem<int>();
+
+            for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x)
+                smem[i] = 0;
+            __syncthreads();
+
+            unsigned int val = centers[blockIdx.x];
+
+            float cx = (val & 0xFFFF);
+            float cy = (val >> 16) & 0xFFFF;
+
+            cx = (cx + 0.5f) * dp;
+            cy = (cy + 0.5f) * dp;
+
+            for (int i = threadIdx.x; i < count; i += blockDim.x)
+            {
+                val = list[i];
+
+                const int x = (val & 0xFFFF);
+                const int y = (val >> 16) & 0xFFFF;
+
+                const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y));
+                if (rad >= minRadius && rad <= maxRadius)
+                {
+                    const int r = __float2int_rn(rad - minRadius);
+
+                    Emulation::smem::atomicAdd(&smem[r + 1], 1);
+                }
+            }
+
+            __syncthreads();
+
+            for (int i = threadIdx.x; i < histSize; i += blockDim.x)
+            {
+                const int curVotes = smem[i + 1];
+
+                if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
+                {
+                    const int ind = ::atomicAdd(&g_counter, 1);
+                    if (ind < maxCircles)
+                        circles[ind] = make_float3(cx, cy, i + minRadius);
+                }
+            }
+        }
+
+        int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
+                                   float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
+        {
+            void* counterPtr;
+            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
+
+            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
+
+            const dim3 block(has20 ? 1024 : 512);
+            const dim3 grid(centersCount);
+
+            const int histSize = maxRadius - minRadius + 1;
+            size_t smemSize = (histSize + 2) * sizeof(int);
+
+            circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+            int totalCount;
+            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
+
+            totalCount = ::min(totalCount, maxCircles);
+
+            return totalCount;
+        }
+    }
+}}}
+
+#endif // HAVE_OPENCV_GPUFILTERS
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/cuda/hough_lines.cu b/modules/gpuimgproc/src/cuda/hough_lines.cu
new file mode 100644
index 000000000..0cee0a43d
--- /dev/null
+++ b/modules/gpuimgproc/src/cuda/hough_lines.cu
@@ -0,0 +1,212 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include <thrust/device_ptr.h>
+#include <thrust/sort.h>
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/emulation.hpp"
+#include "opencv2/core/cuda/dynamic_smem.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace hough_lines
+    {
+        __device__ int g_counter;
+
+        ////////////////////////////////////////////////////////////////////////
+        // linesAccum
+
+        __global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
+        {
+            const int n = blockIdx.x;
+            const float ang = n * theta;
+
+            float sinVal;
+            float cosVal;
+            sincosf(ang, &sinVal, &cosVal);
+            sinVal *= irho;
+            cosVal *= irho;
+
+            const int shift = (numrho - 1) / 2;
+
+            int* accumRow = accum.ptr(n + 1);
+            for (int i = threadIdx.x; i < count; i += blockDim.x)
+            {
+                const unsigned int val = list[i];
+
+                const int x = (val & 0xFFFF);
+                const int y = (val >> 16) & 0xFFFF;
+
+                int r = __float2int_rn(x * cosVal + y * sinVal);
+                r += shift;
+
+                ::atomicAdd(accumRow + r + 1, 1);
+            }
+        }
+
+        __global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
+        {
+            int* smem = DynamicSharedMem<int>();
+
+            for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
+                smem[i] = 0;
+
+            __syncthreads();
+
+            const int n = blockIdx.x;
+            const float ang = n * theta;
+
+            float sinVal;
+            float cosVal;
+            sincosf(ang, &sinVal, &cosVal);
+            sinVal *= irho;
+            cosVal *= irho;
+
+            const int shift = (numrho - 1) / 2;
+
+            for (int i = threadIdx.x; i < count; i += blockDim.x)
+            {
+                const unsigned int val = list[i];
+
+                const int x = (val & 0xFFFF);
+                const int y = (val >> 16) & 0xFFFF;
+
+                int r = __float2int_rn(x * cosVal + y * sinVal);
+                r += shift;
+
+                Emulation::smem::atomicAdd(&smem[r + 1], 1);
+            }
+
+            __syncthreads();
+
+            int* accumRow = accum.ptr(n + 1);
+            for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
+                accumRow[i] = smem[i];
+        }
+
+        void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
+        {
+            const dim3 block(has20 ? 1024 : 512);
+            const dim3 grid(accum.rows - 2);
+
+            size_t smemSize = (accum.cols - 1) * sizeof(int);
+
+            if (smemSize < sharedMemPerBlock - 1000)
+                linesAccumShared<<<grid, block, smemSize>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
+            else
+                linesAccumGlobal<<<grid, block>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
+
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        ////////////////////////////////////////////////////////////////////////
+        // linesGetResult
+
+        __global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho)
+        {
+            const int r = blockIdx.x * blockDim.x + threadIdx.x;
+            const int n = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (r >= accum.cols - 2 || n >= accum.rows - 2)
+                return;
+
+            const int curVotes = accum(n + 1, r + 1);
+
+            if (curVotes > threshold &&
+                curVotes >  accum(n + 1, r) &&
+                curVotes >= accum(n + 1, r + 2) &&
+                curVotes >  accum(n, r + 1) &&
+                curVotes >= accum(n + 2, r + 1))
+            {
+                const float radius = (r - (numrho - 1) * 0.5f) * rho;
+                const float angle = n * theta;
+
+                const int ind = ::atomicAdd(&g_counter, 1);
+                if (ind < maxSize)
+                {
+                    out[ind] = make_float2(radius, angle);
+                    votes[ind] = curVotes;
+                }
+            }
+        }
+
+        int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
+        {
+            void* counterPtr;
+            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
+
+            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
+
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
+
+            linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+            int totalCount;
+            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
+
+            totalCount = ::min(totalCount, maxSize);
+
+            if (doSort && totalCount > 0)
+            {
+                thrust::device_ptr<float2> outPtr(out);
+                thrust::device_ptr<int> votesPtr(votes);
+                thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater<int>());
+            }
+
+            return totalCount;
+        }
+    }
+}}}
+
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/cuda/hough_segments.cu b/modules/gpuimgproc/src/cuda/hough_segments.cu
new file mode 100644
index 000000000..e420449fa
--- /dev/null
+++ b/modules/gpuimgproc/src/cuda/hough_segments.cu
@@ -0,0 +1,249 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace hough_segments
+    {
+        __device__ int g_counter;
+
+        texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp);
+
+        __global__ void houghLinesProbabilistic(const PtrStepSzi accum,
+                                                int4* out, const int maxSize,
+                                                const float rho, const float theta,
+                                                const int lineGap, const int lineLength,
+                                                const int rows, const int cols)
+        {
+            const int r = blockIdx.x * blockDim.x + threadIdx.x;
+            const int n = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (r >= accum.cols - 2 || n >= accum.rows - 2)
+                return;
+
+            const int curVotes = accum(n + 1, r + 1);
+
+            if (curVotes >= lineLength &&
+                curVotes > accum(n, r) &&
+                curVotes > accum(n, r + 1) &&
+                curVotes > accum(n, r + 2) &&
+                curVotes > accum(n + 1, r) &&
+                curVotes > accum(n + 1, r + 2) &&
+                curVotes > accum(n + 2, r) &&
+                curVotes > accum(n + 2, r + 1) &&
+                curVotes > accum(n + 2, r + 2))
+            {
+                const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho;
+                const float angle = n * theta;
+
+                float cosa;
+                float sina;
+                sincosf(angle, &sina, &cosa);
+
+                float2 p0 = make_float2(cosa * radius, sina * radius);
+                float2 dir = make_float2(-sina, cosa);
+
+                float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)};
+                float a;
+
+                if (dir.x != 0)
+                {
+                    a = -p0.x / dir.x;
+                    pb[0].x = 0;
+                    pb[0].y = p0.y + a * dir.y;
+
+                    a = (cols - 1 - p0.x) / dir.x;
+                    pb[1].x = cols - 1;
+                    pb[1].y = p0.y + a * dir.y;
+                }
+                if (dir.y != 0)
+                {
+                    a = -p0.y / dir.y;
+                    pb[2].x = p0.x + a * dir.x;
+                    pb[2].y = 0;
+
+                    a = (rows - 1 - p0.y) / dir.y;
+                    pb[3].x = p0.x + a * dir.x;
+                    pb[3].y = rows - 1;
+                }
+
+                if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows))
+                {
+                    p0 = pb[0];
+                    if (dir.x < 0)
+                        dir = -dir;
+                }
+                else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows))
+                {
+                    p0 = pb[1];
+                    if (dir.x > 0)
+                        dir = -dir;
+                }
+                else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols))
+                {
+                    p0 = pb[2];
+                    if (dir.y < 0)
+                        dir = -dir;
+                }
+                else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols))
+                {
+                    p0 = pb[3];
+                    if (dir.y > 0)
+                        dir = -dir;
+                }
+
+                float2 d;
+                if (::fabsf(dir.x) > ::fabsf(dir.y))
+                {
+                    d.x = dir.x > 0 ? 1 : -1;
+                    d.y = dir.y / ::fabsf(dir.x);
+                }
+                else
+                {
+                    d.x = dir.x / ::fabsf(dir.y);
+                    d.y = dir.y > 0 ? 1 : -1;
+                }
+
+                float2 line_end[2];
+                int gap;
+                bool inLine = false;
+
+                float2 p1 = p0;
+                if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
+                    return;
+
+                for (;;)
+                {
+                    if (tex2D(tex_mask, p1.x, p1.y))
+                    {
+                        gap = 0;
+
+                        if (!inLine)
+                        {
+                            line_end[0] = p1;
+                            line_end[1] = p1;
+                            inLine = true;
+                        }
+                        else
+                        {
+                            line_end[1] = p1;
+                        }
+                    }
+                    else if (inLine)
+                    {
+                        if (++gap > lineGap)
+                        {
+                            bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
+                                             ::abs(line_end[1].y - line_end[0].y) >= lineLength;
+
+                            if (good_line)
+                            {
+                                const int ind = ::atomicAdd(&g_counter, 1);
+                                if (ind < maxSize)
+                                    out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
+                            }
+
+                            gap = 0;
+                            inLine = false;
+                        }
+                    }
+
+                    p1 = p1 + d;
+                    if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
+                    {
+                        if (inLine)
+                        {
+                            bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
+                                             ::abs(line_end[1].y - line_end[0].y) >= lineLength;
+
+                            if (good_line)
+                            {
+                                const int ind = ::atomicAdd(&g_counter, 1);
+                                if (ind < maxSize)
+                                    out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
+                            }
+
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+
+        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength)
+        {
+            void* counterPtr;
+            cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
+
+            cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
+
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
+
+            bindTexture(&tex_mask, mask);
+
+            houghLinesProbabilistic<<<grid, block>>>(accum,
+                                                     out, maxSize,
+                                                     rho, theta,
+                                                     lineGap, lineLength,
+                                                     mask.rows, mask.cols);
+            cudaSafeCall( cudaGetLastError() );
+
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+            int totalCount;
+            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
+
+            totalCount = ::min(totalCount, maxSize);
+
+            return totalCount;
+        }
+    }
+}}}
+
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/generalized_hough.cpp b/modules/gpuimgproc/src/generalized_hough.cpp
new file mode 100644
index 000000000..abcd3ef49
--- /dev/null
+++ b/modules/gpuimgproc/src/generalized_hough.cpp
@@ -0,0 +1,906 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUARITHM)
+
+Ptr<GeneralizedHoughBallard> cv::gpu::createGeneralizedHoughBallard() { throw_no_cuda(); return Ptr<GeneralizedHoughBallard>(); }
+
+Ptr<GeneralizedHoughGuil> cv::gpu::createGeneralizedHoughGuil() { throw_no_cuda(); return Ptr<GeneralizedHoughGuil>(); }
+
+#else /* !defined (HAVE_CUDA) */
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace ght
+    {
+        template <typename T>
+        int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
+        void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                             PtrStepSz<short2> r_table, int* r_sizes,
+                             short2 templCenter, int levels);
+
+        void Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                                      PtrStepSz<short2> r_table, const int* r_sizes,
+                                      PtrStepSzi hist,
+                                      float dp, int levels);
+        int Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold);
+
+        void Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
+        void Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
+        void Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                                                 int* sizes, int maxSize,
+                                                 float xi, float angleEpsilon, int levels,
+                                                 float2 center, float maxDist);
+        void Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                                                 int* sizes, int maxSize,
+                                                 float xi, float angleEpsilon, int levels,
+                                                 float2 center, float maxDist);
+        void Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist,
+                                     float minAngle, float maxAngle, float angleStep, int angleRange,
+                                     int levels, int tMaxSize);
+        void Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist,
+                                     float angle, float angleEpsilon,
+                                     float minScale, float maxScale, float iScaleStep, int scaleRange,
+                                     int levels, int tMaxSize);
+        void Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
+                                     float angle, float angleEpsilon, float scale,
+                                     float dp,
+                                     int levels, int tMaxSize);
+        int Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize,
+                                        float angle, int angleVotes, float scale, int scaleVotes,
+                                        float dp, int threshold);
+    }
+}}}
+
+// common
+
+namespace
+{
+    class GeneralizedHoughBase
+    {
+    protected:
+        GeneralizedHoughBase();
+        virtual ~GeneralizedHoughBase() {}
+
+        void setTemplateImpl(InputArray templ, Point templCenter);
+        void setTemplateImpl(InputArray edges, InputArray dx, InputArray dy, Point templCenter);
+
+        void detectImpl(InputArray image, OutputArray positions, OutputArray votes);
+        void detectImpl(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes);
+
+        void buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy);
+
+        virtual void processTempl() = 0;
+        virtual void processImage() = 0;
+
+        int cannyLowThresh_;
+        int cannyHighThresh_;
+        double minDist_;
+        double dp_;
+        int maxBufferSize_;
+
+        Size templSize_;
+        Point templCenter_;
+        GpuMat templEdges_;
+        GpuMat templDx_;
+        GpuMat templDy_;
+
+        Size imageSize_;
+        GpuMat imageEdges_;
+        GpuMat imageDx_;
+        GpuMat imageDy_;
+
+        GpuMat edgePointList_;
+
+        GpuMat outBuf_;
+        int posCount_;
+
+    private:
+#ifdef HAVE_OPENCV_GPUFILTERS
+        void calcEdges(InputArray src, GpuMat& edges, GpuMat& dx, GpuMat& dy);
+#endif
+
+        void filterMinDist();
+        void convertTo(OutputArray positions, OutputArray votes);
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+        Ptr<gpu::CannyEdgeDetector> canny_;
+        Ptr<gpu::Filter> filterDx_;
+        Ptr<gpu::Filter> filterDy_;
+#endif
+
+        std::vector<float4> oldPosBuf_;
+        std::vector<int3> oldVoteBuf_;
+        std::vector<float4> newPosBuf_;
+        std::vector<int3> newVoteBuf_;
+        std::vector<int> indexies_;
+    };
+
+    GeneralizedHoughBase::GeneralizedHoughBase()
+    {
+        cannyLowThresh_ = 50;
+        cannyHighThresh_ = 100;
+        minDist_ = 1.0;
+        dp_ = 1.0;
+
+        maxBufferSize_ = 10000;
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+        canny_ = gpu::createCannyEdgeDetector(cannyLowThresh_, cannyHighThresh_);
+        filterDx_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
+        filterDy_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
+#endif
+    }
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+    void GeneralizedHoughBase::calcEdges(InputArray _src, GpuMat& edges, GpuMat& dx, GpuMat& dy)
+    {
+        GpuMat src = _src.getGpuMat();
+
+        CV_Assert( src.type() == CV_8UC1 );
+        CV_Assert( cannyLowThresh_ > 0 && cannyLowThresh_ < cannyHighThresh_ );
+
+        ensureSizeIsEnough(src.size(), CV_32SC1, dx);
+        ensureSizeIsEnough(src.size(), CV_32SC1, dy);
+
+        filterDx_->apply(src, dx);
+        filterDy_->apply(src, dy);
+
+        ensureSizeIsEnough(src.size(), CV_8UC1, edges);
+
+        canny_->setLowThreshold(cannyLowThresh_);
+        canny_->setHighThreshold(cannyHighThresh_);
+        canny_->detect(dx, dy, edges);
+    }
+#endif
+
+    void GeneralizedHoughBase::setTemplateImpl(InputArray templ, Point templCenter)
+    {
+#ifndef HAVE_OPENCV_GPUFILTERS
+        (void) templ;
+        (void) templCenter;
+        throw_no_cuda();
+#else
+        calcEdges(templ, templEdges_, templDx_, templDy_);
+
+        if (templCenter == Point(-1, -1))
+            templCenter = Point(templEdges_.cols / 2, templEdges_.rows / 2);
+
+        templSize_ = templEdges_.size();
+        templCenter_ = templCenter;
+
+        processTempl();
+#endif
+    }
+
+    void GeneralizedHoughBase::setTemplateImpl(InputArray edges, InputArray dx, InputArray dy, Point templCenter)
+    {
+        edges.getGpuMat().copyTo(templEdges_);
+        dx.getGpuMat().copyTo(templDx_);
+        dy.getGpuMat().copyTo(templDy_);
+
+        CV_Assert( templEdges_.type() == CV_8UC1 );
+        CV_Assert( templDx_.type() == CV_32FC1 && templDx_.size() == templEdges_.size() );
+        CV_Assert( templDy_.type() == templDx_.type() && templDy_.size() == templEdges_.size() );
+
+        if (templCenter == Point(-1, -1))
+            templCenter = Point(templEdges_.cols / 2, templEdges_.rows / 2);
+
+        templSize_ = templEdges_.size();
+        templCenter_ = templCenter;
+
+        processTempl();
+    }
+
+    void GeneralizedHoughBase::detectImpl(InputArray image, OutputArray positions, OutputArray votes)
+    {
+#ifndef HAVE_OPENCV_GPUFILTERS
+        (void) templ;
+        (void) templCenter;
+        throw_no_cuda();
+#else
+        calcEdges(image, imageEdges_, imageDx_, imageDy_);
+
+        imageSize_ = imageEdges_.size();
+
+        posCount_ = 0;
+
+        processImage();
+
+        if (posCount_ == 0)
+        {
+            positions.release();
+            if (votes.needed())
+                votes.release();
+        }
+        else
+        {
+            if (minDist_ > 1)
+                filterMinDist();
+            convertTo(positions, votes);
+        }
+#endif
+    }
+
+    void GeneralizedHoughBase::detectImpl(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes)
+    {
+        edges.getGpuMat().copyTo(imageEdges_);
+        dx.getGpuMat().copyTo(imageDx_);
+        dy.getGpuMat().copyTo(imageDy_);
+
+        CV_Assert( imageEdges_.type() == CV_8UC1 );
+        CV_Assert( imageDx_.type() == CV_32FC1 && imageDx_.size() == imageEdges_.size() );
+        CV_Assert( imageDy_.type() == imageDx_.type() && imageDy_.size() == imageEdges_.size() );
+
+        imageSize_ = imageEdges_.size();
+
+        posCount_ = 0;
+
+        processImage();
+
+        if (posCount_ == 0)
+        {
+            positions.release();
+            if (votes.needed())
+                votes.release();
+        }
+        else
+        {
+            if (minDist_ > 1)
+                filterMinDist();
+            convertTo(positions, votes);
+        }
+    }
+
+    void GeneralizedHoughBase::buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy)
+    {
+        using namespace cv::gpu::cudev::ght;
+
+        typedef int (*func_t)(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
+        static const func_t funcs[] =
+        {
+            0,
+            0,
+            0,
+            buildEdgePointList_gpu<short>,
+            buildEdgePointList_gpu<int>,
+            buildEdgePointList_gpu<float>,
+            0
+        };
+
+        CV_Assert( edges.type() == CV_8UC1 );
+        CV_Assert( dx.size() == edges.size() );
+        CV_Assert( dy.type() == dx.type() && dy.size() == edges.size() );
+
+        const func_t func = funcs[dx.depth()];
+        CV_Assert( func != 0 );
+
+        edgePointList_.cols = (int) (edgePointList_.step / sizeof(int));
+        ensureSizeIsEnough(2, edges.size().area(), CV_32SC1, edgePointList_);
+
+        edgePointList_.cols = func(edges, dx, dy, edgePointList_.ptr<unsigned int>(0), edgePointList_.ptr<float>(1));
+    }
+
+    struct IndexCmp
+    {
+        const int3* aux;
+
+        explicit IndexCmp(const int3* _aux) : aux(_aux) {}
+
+        bool operator ()(int l1, int l2) const
+        {
+            return aux[l1].x > aux[l2].x;
+        }
+    };
+
+    void GeneralizedHoughBase::filterMinDist()
+    {
+        oldPosBuf_.resize(posCount_);
+        oldVoteBuf_.resize(posCount_);
+
+        cudaSafeCall( cudaMemcpy(&oldPosBuf_[0], outBuf_.ptr(0), posCount_ * sizeof(float4), cudaMemcpyDeviceToHost) );
+        cudaSafeCall( cudaMemcpy(&oldVoteBuf_[0], outBuf_.ptr(1), posCount_ * sizeof(int3), cudaMemcpyDeviceToHost) );
+
+        indexies_.resize(posCount_);
+        for (int i = 0; i < posCount_; ++i)
+            indexies_[i] = i;
+        std::sort(indexies_.begin(), indexies_.end(), IndexCmp(&oldVoteBuf_[0]));
+
+        newPosBuf_.clear();
+        newVoteBuf_.clear();
+        newPosBuf_.reserve(posCount_);
+        newVoteBuf_.reserve(posCount_);
+
+        const int cellSize = cvRound(minDist_);
+        const int gridWidth = (imageSize_.width + cellSize - 1) / cellSize;
+        const int gridHeight = (imageSize_.height + cellSize - 1) / cellSize;
+
+        std::vector< std::vector<Point2f> > grid(gridWidth * gridHeight);
+
+        const double minDist2 = minDist_ * minDist_;
+
+        for (int i = 0; i < posCount_; ++i)
+        {
+            const int ind = indexies_[i];
+
+            Point2f p(oldPosBuf_[ind].x, oldPosBuf_[ind].y);
+
+            bool good = true;
+
+            const int xCell = static_cast<int>(p.x / cellSize);
+            const int yCell = static_cast<int>(p.y / cellSize);
+
+            int x1 = xCell - 1;
+            int y1 = yCell - 1;
+            int x2 = xCell + 1;
+            int y2 = yCell + 1;
+
+            // boundary check
+            x1 = std::max(0, x1);
+            y1 = std::max(0, y1);
+            x2 = std::min(gridWidth - 1, x2);
+            y2 = std::min(gridHeight - 1, y2);
+
+            for (int yy = y1; yy <= y2; ++yy)
+            {
+                for (int xx = x1; xx <= x2; ++xx)
+                {
+                    const std::vector<Point2f>& m = grid[yy * gridWidth + xx];
+
+                    for(size_t j = 0; j < m.size(); ++j)
+                    {
+                        const Point2f d = p - m[j];
+
+                        if (d.ddot(d) < minDist2)
+                        {
+                            good = false;
+                            goto break_out;
+                        }
+                    }
+                }
+            }
+
+            break_out:
+
+            if(good)
+            {
+                grid[yCell * gridWidth + xCell].push_back(p);
+
+                newPosBuf_.push_back(oldPosBuf_[ind]);
+                newVoteBuf_.push_back(oldVoteBuf_[ind]);
+            }
+        }
+
+        posCount_ = static_cast<int>(newPosBuf_.size());
+        cudaSafeCall( cudaMemcpy(outBuf_.ptr(0), &newPosBuf_[0], posCount_ * sizeof(float4), cudaMemcpyHostToDevice) );
+        cudaSafeCall( cudaMemcpy(outBuf_.ptr(1), &newVoteBuf_[0], posCount_ * sizeof(int3), cudaMemcpyHostToDevice) );
+    }
+
+    void GeneralizedHoughBase::convertTo(OutputArray positions, OutputArray votes)
+    {
+        ensureSizeIsEnough(1, posCount_, CV_32FC4, positions);
+        GpuMat(1, posCount_, CV_32FC4, outBuf_.ptr(0), outBuf_.step).copyTo(positions);
+
+        if (votes.needed())
+        {
+            ensureSizeIsEnough(1, posCount_, CV_32FC3, votes);
+            GpuMat(1, posCount_, CV_32FC4, outBuf_.ptr(1), outBuf_.step).copyTo(votes);
+        }
+    }
+}
+
+// GeneralizedHoughBallard
+
+namespace
+{
+    class GeneralizedHoughBallardImpl : public GeneralizedHoughBallard, private GeneralizedHoughBase
+    {
+    public:
+        GeneralizedHoughBallardImpl();
+
+        void setTemplate(InputArray templ, Point templCenter) { setTemplateImpl(templ, templCenter); }
+        void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter) { setTemplateImpl(edges, dx, dy, templCenter); }
+
+        void detect(InputArray image, OutputArray positions, OutputArray votes) { detectImpl(image, positions, votes); }
+        void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes) { detectImpl(edges, dx, dy, positions, votes); }
+
+        void setCannyLowThresh(int cannyLowThresh) { cannyLowThresh_ = cannyLowThresh; }
+        int getCannyLowThresh() const { return cannyLowThresh_; }
+
+        void setCannyHighThresh(int cannyHighThresh) { cannyHighThresh_ = cannyHighThresh; }
+        int getCannyHighThresh() const { return cannyHighThresh_; }
+
+        void setMinDist(double minDist) { minDist_ = minDist; }
+        double getMinDist() const { return minDist_; }
+
+        void setDp(double dp) { dp_ = dp; }
+        double getDp() const { return dp_; }
+
+        void setMaxBufferSize(int maxBufferSize) { maxBufferSize_ = maxBufferSize; }
+        int getMaxBufferSize() const { return maxBufferSize_; }
+
+        void setLevels(int levels) { levels_ = levels; }
+        int getLevels() const { return levels_; }
+
+        void setVotesThreshold(int votesThreshold) { votesThreshold_ = votesThreshold; }
+        int getVotesThreshold() const { return votesThreshold_; }
+
+    private:
+        void processTempl();
+        void processImage();
+
+        void calcHist();
+        void findPosInHist();
+
+        int levels_;
+        int votesThreshold_;
+
+        GpuMat r_table_;
+        GpuMat r_sizes_;
+
+        GpuMat hist_;
+    };
+
+    GeneralizedHoughBallardImpl::GeneralizedHoughBallardImpl()
+    {
+        levels_ = 360;
+        votesThreshold_ = 100;
+    }
+
+    void GeneralizedHoughBallardImpl::processTempl()
+    {
+        using namespace cv::gpu::cudev::ght;
+
+        CV_Assert( levels_ > 0 );
+
+        buildEdgePointList(templEdges_, templDx_, templDy_);
+
+        ensureSizeIsEnough(levels_ + 1, maxBufferSize_, CV_16SC2, r_table_);
+        ensureSizeIsEnough(1, levels_ + 1, CV_32SC1, r_sizes_);
+        r_sizes_.setTo(Scalar::all(0));
+
+        if (edgePointList_.cols > 0)
+        {
+            buildRTable_gpu(edgePointList_.ptr<unsigned int>(0), edgePointList_.ptr<float>(1), edgePointList_.cols,
+                            r_table_, r_sizes_.ptr<int>(), make_short2(templCenter_.x, templCenter_.y), levels_);
+            gpu::min(r_sizes_, maxBufferSize_, r_sizes_);
+        }
+    }
+
+    void GeneralizedHoughBallardImpl::processImage()
+    {
+        calcHist();
+        findPosInHist();
+    }
+
+    void GeneralizedHoughBallardImpl::calcHist()
+    {
+        using namespace cv::gpu::cudev::ght;
+
+        CV_Assert( levels_ > 0 && r_table_.rows == (levels_ + 1) && r_sizes_.cols == (levels_ + 1) );
+        CV_Assert( dp_ > 0.0);
+
+        const double idp = 1.0 / dp_;
+
+        buildEdgePointList(imageEdges_, imageDx_, imageDy_);
+
+        ensureSizeIsEnough(cvCeil(imageSize_.height * idp) + 2, cvCeil(imageSize_.width * idp) + 2, CV_32SC1, hist_);
+        hist_.setTo(Scalar::all(0));
+
+        if (edgePointList_.cols > 0)
+        {
+            Ballard_Pos_calcHist_gpu(edgePointList_.ptr<unsigned int>(0), edgePointList_.ptr<float>(1), edgePointList_.cols,
+                                     r_table_, r_sizes_.ptr<int>(),
+                                     hist_,
+                                     (float)dp_, levels_);
+        }
+    }
+
+    void GeneralizedHoughBallardImpl::findPosInHist()
+    {
+        using namespace cv::gpu::cudev::ght;
+
+        CV_Assert( votesThreshold_ > 0 );
+
+        ensureSizeIsEnough(2, maxBufferSize_, CV_32FC4, outBuf_);
+
+        posCount_ = Ballard_Pos_findPosInHist_gpu(hist_, outBuf_.ptr<float4>(0), outBuf_.ptr<int3>(1), maxBufferSize_, (float)dp_, votesThreshold_);
+    }
+}
+
+Ptr<GeneralizedHoughBallard> cv::gpu::createGeneralizedHoughBallard()
+{
+    return makePtr<GeneralizedHoughBallardImpl>();
+}
+
+// GeneralizedHoughGuil
+
+namespace
+{
+    class GeneralizedHoughGuilImpl : public GeneralizedHoughGuil, private GeneralizedHoughBase
+    {
+    public:
+        GeneralizedHoughGuilImpl();
+
+        void setTemplate(InputArray templ, Point templCenter) { setTemplateImpl(templ, templCenter); }
+        void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter) { setTemplateImpl(edges, dx, dy, templCenter); }
+
+        void detect(InputArray image, OutputArray positions, OutputArray votes) { detectImpl(image, positions, votes); }
+        void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes) { detectImpl(edges, dx, dy, positions, votes); }
+
+        void setCannyLowThresh(int cannyLowThresh) { cannyLowThresh_ = cannyLowThresh; }
+        int getCannyLowThresh() const { return cannyLowThresh_; }
+
+        void setCannyHighThresh(int cannyHighThresh) { cannyHighThresh_ = cannyHighThresh; }
+        int getCannyHighThresh() const { return cannyHighThresh_; }
+
+        void setMinDist(double minDist) { minDist_ = minDist; }
+        double getMinDist() const { return minDist_; }
+
+        void setDp(double dp) { dp_ = dp; }
+        double getDp() const { return dp_; }
+
+        void setMaxBufferSize(int maxBufferSize) { maxBufferSize_ = maxBufferSize; }
+        int getMaxBufferSize() const { return maxBufferSize_; }
+
+        void setXi(double xi) { xi_ = xi; }
+        double getXi() const { return xi_; }
+
+        void setLevels(int levels) { levels_ = levels; }
+        int getLevels() const { return levels_; }
+
+        void setAngleEpsilon(double angleEpsilon) { angleEpsilon_ = angleEpsilon; }
+        double getAngleEpsilon() const { return angleEpsilon_; }
+
+        void setMinAngle(double minAngle) { minAngle_ = minAngle; }
+        double getMinAngle() const { return minAngle_; }
+
+        void setMaxAngle(double maxAngle) { maxAngle_ = maxAngle; }
+        double getMaxAngle() const { return maxAngle_; }
+
+        void setAngleStep(double angleStep) { angleStep_ = angleStep; }
+        double getAngleStep() const { return angleStep_; }
+
+        void setAngleThresh(int angleThresh) { angleThresh_ = angleThresh; }
+        int getAngleThresh() const { return angleThresh_; }
+
+        void setMinScale(double minScale) { minScale_ = minScale; }
+        double getMinScale() const { return minScale_; }
+
+        void setMaxScale(double maxScale) { maxScale_ = maxScale; }
+        double getMaxScale() const { return maxScale_; }
+
+        void setScaleStep(double scaleStep) { scaleStep_ = scaleStep; }
+        double getScaleStep() const { return scaleStep_; }
+
+        void setScaleThresh(int scaleThresh) { scaleThresh_ = scaleThresh; }
+        int getScaleThresh() const { return scaleThresh_; }
+
+        void setPosThresh(int posThresh) { posThresh_ = posThresh; }
+        int getPosThresh() const { return posThresh_; }
+
+    private:
+        void processTempl();
+        void processImage();
+
+        double xi_;
+        int levels_;
+        double angleEpsilon_;
+
+        double minAngle_;
+        double maxAngle_;
+        double angleStep_;
+        int angleThresh_;
+
+        double minScale_;
+        double maxScale_;
+        double scaleStep_;
+        int scaleThresh_;
+
+        int posThresh_;
+
+        struct Feature
+        {
+            GpuMat p1_pos;
+            GpuMat p1_theta;
+            GpuMat p2_pos;
+
+            GpuMat d12;
+
+            GpuMat r1;
+            GpuMat r2;
+
+            GpuMat sizes;
+            int maxSize;
+
+            void create(int levels, int maxCapacity, bool isTempl);
+        };
+
+        typedef void (*set_func_t)(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
+        typedef void (*build_func_t)(const unsigned int* coordList, const float* thetaList, int pointsCount,
+                                     int* sizes, int maxSize,
+                                     float xi, float angleEpsilon, int levels,
+                                     float2 center, float maxDist);
+
+        void buildFeatureList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Feature& features,
+                              set_func_t set_func, build_func_t build_func, bool isTempl, Point2d center = Point2d());
+
+        void calcOrientation();
+        void calcScale(double angle);
+        void calcPosition(double angle, int angleVotes, double scale, int scaleVotes);
+
+        Feature templFeatures_;
+        Feature imageFeatures_;
+
+        std::vector< std::pair<double, int> > angles_;
+        std::vector< std::pair<double, int> > scales_;
+
+        GpuMat hist_;
+        std::vector<int> h_buf_;
+    };
+
+    double toRad(double a)
+    {
+        return a * CV_PI / 180.0;
+    }
+
+    double clampAngle(double a)
+    {
+        double res = a;
+
+        while (res > 360.0)
+            res -= 360.0;
+        while (res < 0)
+            res += 360.0;
+
+        return res;
+    }
+
+    bool angleEq(double a, double b, double eps = 1.0)
+    {
+        return (fabs(clampAngle(a - b)) <= eps);
+    }
+
+    GeneralizedHoughGuilImpl::GeneralizedHoughGuilImpl()
+    {
+        maxBufferSize_ = 1000;
+
+        xi_ = 90.0;
+        levels_ = 360;
+        angleEpsilon_ = 1.0;
+
+        minAngle_ = 0.0;
+        maxAngle_ = 360.0;
+        angleStep_ = 1.0;
+        angleThresh_ = 15000;
+
+        minScale_ = 0.5;
+        maxScale_ = 2.0;
+        scaleStep_ = 0.05;
+        scaleThresh_ = 1000;
+
+        posThresh_ = 100;
+    }
+
+    void GeneralizedHoughGuilImpl::processTempl()
+    {
+        using namespace cv::gpu::cudev::ght;
+
+        buildFeatureList(templEdges_, templDx_, templDy_, templFeatures_,
+            Guil_Full_setTemplFeatures, Guil_Full_buildTemplFeatureList_gpu,
+            true, templCenter_);
+
+        h_buf_.resize(templFeatures_.sizes.cols);
+        cudaSafeCall( cudaMemcpy(&h_buf_[0], templFeatures_.sizes.data, h_buf_.size() * sizeof(int), cudaMemcpyDeviceToHost) );
+        templFeatures_.maxSize = *std::max_element(h_buf_.begin(), h_buf_.end());
+    }
+
+    void GeneralizedHoughGuilImpl::processImage()
+    {
+        using namespace cv::gpu::cudev::ght;
+
+        CV_Assert( levels_ > 0 );
+        CV_Assert( templFeatures_.sizes.cols == levels_ + 1 );
+        CV_Assert( minAngle_ >= 0.0 && minAngle_ < maxAngle_ && maxAngle_ <= 360.0 );
+        CV_Assert( angleStep_ > 0.0 && angleStep_ < 360.0 );
+        CV_Assert( angleThresh_ > 0 );
+        CV_Assert( minScale_ > 0.0 && minScale_ < maxScale_ );
+        CV_Assert( scaleStep_ > 0.0 );
+        CV_Assert( scaleThresh_ > 0 );
+        CV_Assert( dp_ > 0.0 );
+        CV_Assert( posThresh_ > 0 );
+
+        const double iAngleStep = 1.0 / angleStep_;
+        const int angleRange = cvCeil((maxAngle_ - minAngle_) * iAngleStep);
+
+        const double iScaleStep = 1.0 / scaleStep_;
+        const int scaleRange = cvCeil((maxScale_ - minScale_) * iScaleStep);
+
+        const double idp = 1.0 / dp_;
+        const int histRows = cvCeil(imageSize_.height * idp);
+        const int histCols = cvCeil(imageSize_.width * idp);
+
+        ensureSizeIsEnough(histRows + 2, std::max(angleRange + 1, std::max(scaleRange + 1, histCols + 2)), CV_32SC1, hist_);
+        h_buf_.resize(std::max(angleRange + 1, scaleRange + 1));
+
+        ensureSizeIsEnough(2, maxBufferSize_, CV_32FC4, outBuf_);
+
+        buildFeatureList(imageEdges_, imageDx_, imageDy_, imageFeatures_,
+            Guil_Full_setImageFeatures, Guil_Full_buildImageFeatureList_gpu,
+            false);
+
+        calcOrientation();
+
+        for (size_t i = 0; i < angles_.size(); ++i)
+        {
+            const double angle = angles_[i].first;
+            const int angleVotes = angles_[i].second;
+
+            calcScale(angle);
+
+            for (size_t j = 0; j < scales_.size(); ++j)
+            {
+                const double scale = scales_[j].first;
+                const int scaleVotes = scales_[j].second;
+
+                calcPosition(angle, angleVotes, scale, scaleVotes);
+            }
+        }
+    }
+
+    void GeneralizedHoughGuilImpl::Feature::create(int levels, int maxCapacity, bool isTempl)
+    {
+        if (!isTempl)
+        {
+            ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, p1_pos);
+            ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, p2_pos);
+        }
+
+        ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC1, p1_theta);
+
+        ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC1, d12);
+
+        if (isTempl)
+        {
+            ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, r1);
+            ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, r2);
+        }
+
+        ensureSizeIsEnough(1, levels + 1, CV_32SC1, sizes);
+        sizes.setTo(Scalar::all(0));
+
+        maxSize = 0;
+    }
+
+    void GeneralizedHoughGuilImpl::buildFeatureList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Feature& features,
+                                                    set_func_t set_func, build_func_t build_func, bool isTempl, Point2d center)
+    {
+        CV_Assert( levels_ > 0 );
+
+        const double maxDist = sqrt((double) templSize_.width * templSize_.width + templSize_.height * templSize_.height) * maxScale_;
+
+        features.create(levels_, maxBufferSize_, isTempl);
+        set_func(features.p1_pos, features.p1_theta, features.p2_pos, features.d12, features.r1, features.r2);
+
+        buildEdgePointList(edges, dx, dy);
+
+        if (edgePointList_.cols > 0)
+        {
+            build_func(edgePointList_.ptr<unsigned int>(0), edgePointList_.ptr<float>(1), edgePointList_.cols,
+                features.sizes.ptr<int>(), maxBufferSize_, (float)xi_, (float)angleEpsilon_, levels_, make_float2((float)center.x, (float)center.y), (float)maxDist);
+        }
+    }
+
+    void GeneralizedHoughGuilImpl::calcOrientation()
+    {
+        using namespace cv::gpu::cudev::ght;
+
+        const double iAngleStep = 1.0 / angleStep_;
+        const int angleRange = cvCeil((maxAngle_ - minAngle_) * iAngleStep);
+
+        hist_.setTo(Scalar::all(0));
+        Guil_Full_calcOHist_gpu(templFeatures_.sizes.ptr<int>(), imageFeatures_.sizes.ptr<int>(0), hist_.ptr<int>(),
+                                (float)minAngle_, (float)maxAngle_, (float)angleStep_, angleRange, levels_, templFeatures_.maxSize);
+        cudaSafeCall( cudaMemcpy(&h_buf_[0], hist_.data, h_buf_.size() * sizeof(int), cudaMemcpyDeviceToHost) );
+
+        angles_.clear();
+
+        for (int n = 0; n < angleRange; ++n)
+        {
+            if (h_buf_[n] >= angleThresh_)
+            {
+                const double angle = minAngle_ + n * angleStep_;
+                angles_.push_back(std::make_pair(angle, h_buf_[n]));
+            }
+        }
+    }
+
+    void GeneralizedHoughGuilImpl::calcScale(double angle)
+    {
+        using namespace cv::gpu::cudev::ght;
+
+        const double iScaleStep = 1.0 / scaleStep_;
+        const int scaleRange = cvCeil((maxScale_ - minScale_) * iScaleStep);
+
+        hist_.setTo(Scalar::all(0));
+        Guil_Full_calcSHist_gpu(templFeatures_.sizes.ptr<int>(), imageFeatures_.sizes.ptr<int>(0), hist_.ptr<int>(),
+                                (float)angle, (float)angleEpsilon_, (float)minScale_, (float)maxScale_,
+                                (float)iScaleStep, scaleRange, levels_, templFeatures_.maxSize);
+        cudaSafeCall( cudaMemcpy(&h_buf_[0], hist_.data, h_buf_.size() * sizeof(int), cudaMemcpyDeviceToHost) );
+
+        scales_.clear();
+
+        for (int s = 0; s < scaleRange; ++s)
+        {
+            if (h_buf_[s] >= scaleThresh_)
+            {
+                const double scale = minScale_ + s * scaleStep_;
+                scales_.push_back(std::make_pair(scale, h_buf_[s]));
+            }
+        }
+    }
+
+    void GeneralizedHoughGuilImpl::calcPosition(double angle, int angleVotes, double scale, int scaleVotes)
+    {
+        using namespace cv::gpu::cudev::ght;
+
+        hist_.setTo(Scalar::all(0));
+        Guil_Full_calcPHist_gpu(templFeatures_.sizes.ptr<int>(), imageFeatures_.sizes.ptr<int>(0), hist_,
+                                (float)angle, (float)angleEpsilon_, (float)scale, (float)dp_, levels_, templFeatures_.maxSize);
+
+        posCount_ = Guil_Full_findPosInHist_gpu(hist_, outBuf_.ptr<float4>(0), outBuf_.ptr<int3>(1),
+                                                posCount_, maxBufferSize_, (float)angle, angleVotes,
+                                                (float)scale, scaleVotes, (float)dp_, posThresh_);
+    }
+}
+
+Ptr<GeneralizedHoughGuil> cv::gpu::createGeneralizedHoughGuil()
+{
+    return makePtr<GeneralizedHoughGuilImpl>();
+}
+
+#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/gftt.cpp b/modules/gpuimgproc/src/gftt.cpp
index cca1df444..0446b6cdb 100644
--- a/modules/gpuimgproc/src/gftt.cpp
+++ b/modules/gpuimgproc/src/gftt.cpp
@@ -45,9 +45,9 @@
 using namespace cv;
 using namespace cv::gpu;
 
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUARITHM)
 
-void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat&, GpuMat&, const GpuMat&) { throw_no_cuda(); }
+Ptr<gpu::CornersDetector> cv::gpu::createGoodFeaturesToTrackDetector(int, int, double, double, int, bool, double) { throw_no_cuda(); return Ptr<gpu::CornersDetector>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -60,117 +60,157 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask)
+namespace
 {
-#ifndef HAVE_OPENCV_GPUARITHM
-    (void) image;
-    (void) corners;
-    (void) mask;
-    throw_no_cuda();
-#else
-    using namespace cv::gpu::cudev::gfft;
-
-    CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
-    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
-
-    ensureSizeIsEnough(image.size(), CV_32F, eig_);
-
-    if (useHarrisDetector)
-        cornerHarris(image, eig_, Dx_, Dy_, buf_, blockSize, 3, harrisK);
-    else
-        cornerMinEigenVal(image, eig_, Dx_, Dy_, buf_, blockSize, 3);
-
-    double maxVal = 0;
-    gpu::minMax(eig_, 0, &maxVal, GpuMat(), minMaxbuf_);
-
-    ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
-
-    int total = findCorners_gpu(eig_, static_cast<float>(maxVal * qualityLevel), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols);
-
-    if (total == 0)
+    class GoodFeaturesToTrackDetector : public CornersDetector
     {
-        corners.release();
-        return;
+    public:
+        GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
+                                    int blockSize, bool useHarrisDetector, double harrisK);
+
+        void detect(InputArray image, OutputArray corners, InputArray mask = noArray());
+
+    private:
+        int maxCorners_;
+        double qualityLevel_;
+        double minDistance_;
+
+        Ptr<gpu::CornernessCriteria> cornerCriteria_;
+
+        GpuMat Dx_;
+        GpuMat Dy_;
+        GpuMat buf_;
+        GpuMat eig_;
+        GpuMat minMaxbuf_;
+        GpuMat tmpCorners_;
+    };
+
+    GoodFeaturesToTrackDetector::GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
+                                                             int blockSize, bool useHarrisDetector, double harrisK) :
+        maxCorners_(maxCorners), qualityLevel_(qualityLevel), minDistance_(minDistance)
+    {
+        CV_Assert( qualityLevel_ > 0 && minDistance_ >= 0 && maxCorners_ >= 0 );
+
+        cornerCriteria_ = useHarrisDetector ?
+                    gpu::createHarrisCorner(srcType, blockSize, 3, harrisK) :
+                    gpu::createMinEigenValCorner(srcType, blockSize, 3);
     }
 
-    sortCorners_gpu(eig_, tmpCorners_.ptr<float2>(), total);
-
-    if (minDistance < 1)
-        tmpCorners_.colRange(0, maxCorners > 0 ? std::min(maxCorners, total) : total).copyTo(corners);
-    else
+    void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask)
     {
-        std::vector<Point2f> tmp(total);
-        Mat tmpMat(1, total, CV_32FC2, (void*)&tmp[0]);
-        tmpCorners_.colRange(0, total).download(tmpMat);
+        using namespace cv::gpu::cudev::gfft;
 
-        std::vector<Point2f> tmp2;
-        tmp2.reserve(total);
+        GpuMat image = _image.getGpuMat();
+        GpuMat mask = _mask.getGpuMat();
 
-        const int cell_size = cvRound(minDistance);
-        const int grid_width = (image.cols + cell_size - 1) / cell_size;
-        const int grid_height = (image.rows + cell_size - 1) / cell_size;
+        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
 
-        std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
+        ensureSizeIsEnough(image.size(), CV_32FC1, eig_);
+        cornerCriteria_->compute(image, eig_);
 
-        for (int i = 0; i < total; ++i)
+        double maxVal = 0;
+        gpu::minMax(eig_, 0, &maxVal, noArray(), minMaxbuf_);
+
+        ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
+
+        int total = findCorners_gpu(eig_, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols);
+
+        if (total == 0)
         {
-            Point2f p = tmp[i];
+            _corners.release();
+            return;
+        }
 
-            bool good = true;
+        sortCorners_gpu(eig_, tmpCorners_.ptr<float2>(), total);
 
-            int x_cell = static_cast<int>(p.x / cell_size);
-            int y_cell = static_cast<int>(p.y / cell_size);
+        if (minDistance_ < 1)
+        {
+            tmpCorners_.colRange(0, maxCorners_ > 0 ? std::min(maxCorners_, total) : total).copyTo(_corners);
+        }
+        else
+        {
+            std::vector<Point2f> tmp(total);
+            Mat tmpMat(1, total, CV_32FC2, (void*)&tmp[0]);
+            tmpCorners_.colRange(0, total).download(tmpMat);
 
-            int x1 = x_cell - 1;
-            int y1 = y_cell - 1;
-            int x2 = x_cell + 1;
-            int y2 = y_cell + 1;
+            std::vector<Point2f> tmp2;
+            tmp2.reserve(total);
 
-            // boundary check
-            x1 = std::max(0, x1);
-            y1 = std::max(0, y1);
-            x2 = std::min(grid_width - 1, x2);
-            y2 = std::min(grid_height - 1, y2);
+            const int cell_size = cvRound(minDistance_);
+            const int grid_width = (image.cols + cell_size - 1) / cell_size;
+            const int grid_height = (image.rows + cell_size - 1) / cell_size;
 
-            for (int yy = y1; yy <= y2; yy++)
+            std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
+
+            for (int i = 0; i < total; ++i)
             {
-                for (int xx = x1; xx <= x2; xx++)
+                Point2f p = tmp[i];
+
+                bool good = true;
+
+                int x_cell = static_cast<int>(p.x / cell_size);
+                int y_cell = static_cast<int>(p.y / cell_size);
+
+                int x1 = x_cell - 1;
+                int y1 = y_cell - 1;
+                int x2 = x_cell + 1;
+                int y2 = y_cell + 1;
+
+                // boundary check
+                x1 = std::max(0, x1);
+                y1 = std::max(0, y1);
+                x2 = std::min(grid_width - 1, x2);
+                y2 = std::min(grid_height - 1, y2);
+
+                for (int yy = y1; yy <= y2; yy++)
                 {
-                    std::vector<Point2f>& m = grid[yy * grid_width + xx];
-
-                    if (!m.empty())
+                    for (int xx = x1; xx <= x2; xx++)
                     {
-                        for(size_t j = 0; j < m.size(); j++)
-                        {
-                            float dx = p.x - m[j].x;
-                            float dy = p.y - m[j].y;
+                        std::vector<Point2f>& m = grid[yy * grid_width + xx];
 
-                            if (dx * dx + dy * dy < minDistance * minDistance)
+                        if (!m.empty())
+                        {
+                            for(size_t j = 0; j < m.size(); j++)
                             {
-                                good = false;
-                                goto break_out;
+                                float dx = p.x - m[j].x;
+                                float dy = p.y - m[j].y;
+
+                                if (dx * dx + dy * dy < minDistance_ * minDistance_)
+                                {
+                                    good = false;
+                                    goto break_out;
+                                }
                             }
                         }
                     }
                 }
+
+                break_out:
+
+                if(good)
+                {
+                    grid[y_cell * grid_width + x_cell].push_back(p);
+
+                    tmp2.push_back(p);
+
+                    if (maxCorners_ > 0 && tmp2.size() == static_cast<size_t>(maxCorners_))
+                        break;
+                }
             }
 
-            break_out:
+            _corners.create(1, static_cast<int>(tmp2.size()), CV_32FC2);
+            GpuMat corners = _corners.getGpuMat();
 
-            if(good)
-            {
-                grid[y_cell * grid_width + x_cell].push_back(p);
-
-                tmp2.push_back(p);
-
-                if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
-                    break;
-            }
+            corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
         }
-
-        corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
     }
-#endif
+}
+
+Ptr<gpu::CornersDetector> cv::gpu::createGoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
+                                                                     int blockSize, bool useHarrisDetector, double harrisK)
+{
+    return Ptr<gpu::CornersDetector>(
+        new GoodFeaturesToTrackDetector(srcType, maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, harrisK));
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/histogram.cpp b/modules/gpuimgproc/src/histogram.cpp
index 3227dac6c..eb78ee0de 100644
--- a/modules/gpuimgproc/src/histogram.cpp
+++ b/modules/gpuimgproc/src/histogram.cpp
@@ -47,319 +47,22 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_no_cuda(); }
+void cv::gpu::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*, Stream&) { throw_no_cuda(); }
-void cv::gpu::histEven(const GpuMat&, GpuMat*, GpuMat&, int*, int*, int*, Stream&) { throw_no_cuda(); }
-
-void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, Stream&) { throw_no_cuda(); }
-void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, GpuMat&, Stream&) { throw_no_cuda(); }
-
-void cv::gpu::calcHist(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-
-void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::equalizeHist(InputArray, OutputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
 
 cv::Ptr<cv::gpu::CLAHE> cv::gpu::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::gpu::CLAHE>(); }
 
+void cv::gpu::evenLevels(OutputArray, int, int, int) { throw_no_cuda(); }
+
+void cv::gpu::histEven(InputArray, OutputArray, InputOutputArray, int, int, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::histEven(InputArray, GpuMat*, InputOutputArray, int*, int*, int*, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::histRange(InputArray, OutputArray, InputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
+void cv::gpu::histRange(InputArray, GpuMat*, const GpuMat*, InputOutputArray, Stream&) { throw_no_cuda(); }
+
 #else /* !defined (HAVE_CUDA) */
 
-////////////////////////////////////////////////////////////////////////
-// NPP Histogram
-
-namespace
-{
-    typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);
-    typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);
-
-    template<int SDEPTH> struct NppHistogramEvenFuncC1
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-    typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,
-            int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);
-    };
-    template<int SDEPTH> struct NppHistogramEvenFuncC4
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-
-        typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,
-            Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);
-    };
-
-    template<int SDEPTH, typename NppHistogramEvenFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>
-    struct NppHistogramEvenC1
-    {
-        typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
-
-        static void hist(const GpuMat& src, GpuMat& hist, GpuMat& buffer, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
-        {
-            int levels = histSize + 1;
-            hist.create(1, histSize, CV_32S);
-
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            int buf_size;
-            get_buf_size(sz, levels, &buf_size);
-
-            ensureSizeIsEnough(1, buf_size, CV_8U, buffer);
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,
-                lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-    template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
-    struct NppHistogramEvenC4
-    {
-        typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
-
-        static void hist(const GpuMat& src, GpuMat hist[4], GpuMat& buffer, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)
-        {
-            int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
-            hist[0].create(1, histSize[0], CV_32S);
-            hist[1].create(1, histSize[1], CV_32S);
-            hist[2].create(1, histSize[2], CV_32S);
-            hist[3].create(1, histSize[3], CV_32S);
-
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};
-
-            int buf_size;
-            get_buf_size(sz, levels, &buf_size);
-
-            ensureSizeIsEnough(1, buf_size, CV_8U, buffer);
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-
-    template<int SDEPTH> struct NppHistogramRangeFuncC1
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-        typedef Npp32s level_t;
-        enum {LEVEL_TYPE_CODE=CV_32SC1};
-
-        typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,
-            const Npp32s* pLevels, int nLevels, Npp8u* pBuffer);
-    };
-    template<> struct NppHistogramRangeFuncC1<CV_32F>
-    {
-        typedef Npp32f src_t;
-        typedef Npp32f level_t;
-        enum {LEVEL_TYPE_CODE=CV_32FC1};
-
-        typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,
-            const Npp32f* pLevels, int nLevels, Npp8u* pBuffer);
-    };
-    template<int SDEPTH> struct NppHistogramRangeFuncC4
-    {
-        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
-        typedef Npp32s level_t;
-        enum {LEVEL_TYPE_CODE=CV_32SC1};
-
-        typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],
-            const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer);
-    };
-    template<> struct NppHistogramRangeFuncC4<CV_32F>
-    {
-        typedef Npp32f src_t;
-        typedef Npp32f level_t;
-        enum {LEVEL_TYPE_CODE=CV_32FC1};
-
-        typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],
-            const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer);
-    };
-
-    template<int SDEPTH, typename NppHistogramRangeFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>
-    struct NppHistogramRangeC1
-    {
-        typedef typename NppHistogramRangeFuncC1<SDEPTH>::src_t src_t;
-        typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
-        enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
-
-        static void hist(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buffer, cudaStream_t stream)
-        {
-            CV_Assert(levels.type() == LEVEL_TYPE_CODE && levels.rows == 1);
-
-            hist.create(1, levels.cols - 1, CV_32S);
-
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            int buf_size;
-            get_buf_size(sz, levels.cols, &buf_size);
-
-            ensureSizeIsEnough(1, buf_size, CV_8U, buffer);
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-    template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
-    struct NppHistogramRangeC4
-    {
-        typedef typename NppHistogramRangeFuncC4<SDEPTH>::src_t src_t;
-        typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
-        enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
-
-        static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buffer, cudaStream_t stream)
-        {
-            CV_Assert(levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1);
-            CV_Assert(levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1);
-            CV_Assert(levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1);
-            CV_Assert(levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1);
-
-            hist[0].create(1, levels[0].cols - 1, CV_32S);
-            hist[1].create(1, levels[1].cols - 1, CV_32S);
-            hist[2].create(1, levels[2].cols - 1, CV_32S);
-            hist[3].create(1, levels[3].cols - 1, CV_32S);
-
-            Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};
-            int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols};
-            const level_t* pLevels[] = {levels[0].ptr<level_t>(), levels[1].ptr<level_t>(), levels[2].ptr<level_t>(), levels[3].ptr<level_t>()};
-
-            NppiSize sz;
-            sz.width = src.cols;
-            sz.height = src.rows;
-
-            int buf_size;
-            get_buf_size(sz, nLevels, &buf_size);
-
-            ensureSizeIsEnough(1, buf_size, CV_8U, buffer);
-
-            NppStreamHandler h(stream);
-
-            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-}
-
-void cv::gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel)
-{
-    Mat host_levels(1, nLevels, CV_32SC1);
-    nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );
-    levels.upload(host_levels);
-}
-
-void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
-{
-    GpuMat buf;
-    histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
-}
-
-void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)
-{
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
-
-    typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, GpuMat& buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);
-    static const hist_t hist_callers[] =
-    {
-        NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
-        0,
-        NppHistogramEvenC1<CV_16U, nppiHistogramEven_16u_C1R, nppiHistogramEvenGetBufferSize_16u_C1R>::hist,
-        NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist
-    };
-
-    hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
-{
-    GpuMat buf;
-    histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
-}
-
-void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
-{
-    CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
-
-    typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);
-    static const hist_t hist_callers[] =
-    {
-        NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
-        0,
-        NppHistogramEvenC4<CV_16U, nppiHistogramEven_16u_C4R, nppiHistogramEvenGetBufferSize_16u_C4R>::hist,
-        NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist
-    };
-
-    hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream)
-{
-    GpuMat buf;
-    histRange(src, hist, levels, buf, stream);
-}
-
-void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream)
-{
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1);
-
-    typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, cudaStream_t stream);
-    static const hist_t hist_callers[] =
-    {
-        NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
-        0,
-        NppHistogramRangeC1<CV_16U, nppiHistogramRange_16u_C1R, nppiHistogramRangeGetBufferSize_16u_C1R>::hist,
-        NppHistogramRangeC1<CV_16S, nppiHistogramRange_16s_C1R, nppiHistogramRangeGetBufferSize_16s_C1R>::hist,
-        0,
-        NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist
-    };
-
-    hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
-{
-    GpuMat buf;
-    histRange(src, hist, levels, buf, stream);
-}
-
-void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream)
-{
-    CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4);
-
-    typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, cudaStream_t stream);
-    static const hist_t hist_callers[] =
-    {
-        NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
-        0,
-        NppHistogramRangeC4<CV_16U, nppiHistogramRange_16u_C4R, nppiHistogramRangeGetBufferSize_16u_C4R>::hist,
-        NppHistogramRangeC4<CV_16S, nppiHistogramRange_16s_C4R, nppiHistogramRangeGetBufferSize_16s_C4R>::hist,
-        0,
-        NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist
-    };
-
-    hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
-}
-
 ////////////////////////////////////////////////////////////////////////
 // calcHist
 
@@ -368,12 +71,16 @@ namespace hist
     void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream);
 }
 
-void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
+void cv::gpu::calcHist(InputArray _src, OutputArray _hist, Stream& stream)
 {
-    CV_Assert(src.type() == CV_8UC1);
+    GpuMat src = _src.getGpuMat();
 
-    hist.create(1, 256, CV_32SC1);
-    hist.setTo(Scalar::all(0));
+    CV_Assert( src.type() == CV_8UC1 );
+
+    _hist.create(1, 256, CV_32SC1);
+    GpuMat hist = _hist.getGpuMat();
+
+    hist.setTo(Scalar::all(0), stream);
 
     hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream));
 }
@@ -386,31 +93,30 @@ namespace hist
     void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
 }
 
-void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream)
+void cv::gpu::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream)
 {
-    GpuMat hist;
-    GpuMat buf;
-    equalizeHist(src, dst, hist, buf, stream);
-}
+    GpuMat src = _src.getGpuMat();
 
-void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)
-{
-    CV_Assert(src.type() == CV_8UC1);
+    CV_Assert( src.type() == CV_8UC1 );
 
-    dst.create(src.size(), src.type());
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
 
     int intBufSize;
     nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) );
 
-    ensureSizeIsEnough(1, intBufSize + 256 * sizeof(int), CV_8UC1, buf);
+    size_t bufSize = intBufSize + 2 * 256 * sizeof(int);
 
-    GpuMat intBuf(1, intBufSize, CV_8UC1, buf.ptr());
-    GpuMat lut(1, 256, CV_32S, buf.ptr() + intBufSize);
+    ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, _buf);
+    GpuMat buf = _buf.getGpuMat();
 
-    calcHist(src, hist, s);
+    GpuMat hist(1, 256, CV_32SC1, buf.data);
+    GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int));
+    GpuMat intBuf(1, intBufSize, CV_8UC1, buf.data + 2 * 256 * sizeof(int));
 
-    cudaStream_t stream = StreamAccessor::getStream(s);
+    gpu::calcHist(src, hist, _stream);
 
+    cudaStream_t stream = StreamAccessor::getStream(_stream);
     NppStreamHandler h(stream);
 
     nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) );
@@ -551,7 +257,323 @@ namespace
 
 cv::Ptr<cv::gpu::CLAHE> cv::gpu::createCLAHE(double clipLimit, cv::Size tileGridSize)
 {
-    return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
+    return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height);
+}
+
+////////////////////////////////////////////////////////////////////////
+// NPP Histogram
+
+namespace
+{
+    typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);
+    typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);
+
+    template<int SDEPTH> struct NppHistogramEvenFuncC1
+    {
+        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
+
+    typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,
+            int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);
+    };
+    template<int SDEPTH> struct NppHistogramEvenFuncC4
+    {
+        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
+
+        typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,
+            Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);
+    };
+
+    template<int SDEPTH, typename NppHistogramEvenFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>
+    struct NppHistogramEvenC1
+    {
+        typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
+
+        static void hist(const GpuMat& src, OutputArray _hist, InputOutputArray _buf, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
+        {
+            const int levels = histSize + 1;
+
+            _hist.create(1, histSize, CV_32S);
+            GpuMat hist = _hist.getGpuMat();
+
+            NppiSize sz;
+            sz.width = src.cols;
+            sz.height = src.rows;
+
+            int buf_size;
+            get_buf_size(sz, levels, &buf_size);
+
+            ensureSizeIsEnough(1, buf_size, CV_8UC1, _buf);
+            GpuMat buf = _buf.getGpuMat();
+
+            NppStreamHandler h(stream);
+
+            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,
+                lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    };
+    template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
+    struct NppHistogramEvenC4
+    {
+        typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
+
+        static void hist(const GpuMat& src, GpuMat hist[4],InputOutputArray _buf, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)
+        {
+            int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
+            hist[0].create(1, histSize[0], CV_32S);
+            hist[1].create(1, histSize[1], CV_32S);
+            hist[2].create(1, histSize[2], CV_32S);
+            hist[3].create(1, histSize[3], CV_32S);
+
+            NppiSize sz;
+            sz.width = src.cols;
+            sz.height = src.rows;
+
+            Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};
+
+            int buf_size;
+            get_buf_size(sz, levels, &buf_size);
+
+            ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
+            GpuMat buf = _buf.getGpuMat();
+
+            NppStreamHandler h(stream);
+
+            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    };
+
+    template<int SDEPTH> struct NppHistogramRangeFuncC1
+    {
+        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
+        typedef Npp32s level_t;
+        enum {LEVEL_TYPE_CODE=CV_32SC1};
+
+        typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,
+            const Npp32s* pLevels, int nLevels, Npp8u* pBuffer);
+    };
+    template<> struct NppHistogramRangeFuncC1<CV_32F>
+    {
+        typedef Npp32f src_t;
+        typedef Npp32f level_t;
+        enum {LEVEL_TYPE_CODE=CV_32FC1};
+
+        typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,
+            const Npp32f* pLevels, int nLevels, Npp8u* pBuffer);
+    };
+    template<int SDEPTH> struct NppHistogramRangeFuncC4
+    {
+        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
+        typedef Npp32s level_t;
+        enum {LEVEL_TYPE_CODE=CV_32SC1};
+
+        typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],
+            const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer);
+    };
+    template<> struct NppHistogramRangeFuncC4<CV_32F>
+    {
+        typedef Npp32f src_t;
+        typedef Npp32f level_t;
+        enum {LEVEL_TYPE_CODE=CV_32FC1};
+
+        typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],
+            const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer);
+    };
+
+    template<int SDEPTH, typename NppHistogramRangeFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>
+    struct NppHistogramRangeC1
+    {
+        typedef typename NppHistogramRangeFuncC1<SDEPTH>::src_t src_t;
+        typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
+        enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
+
+        static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, InputOutputArray _buf, cudaStream_t stream)
+        {
+            CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 );
+
+            _hist.create(1, levels.cols - 1, CV_32S);
+            GpuMat hist = _hist.getGpuMat();
+
+            NppiSize sz;
+            sz.width = src.cols;
+            sz.height = src.rows;
+
+            int buf_size;
+            get_buf_size(sz, levels.cols, &buf_size);
+
+            ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
+            GpuMat buf = _buf.getGpuMat();
+
+            NppStreamHandler h(stream);
+
+            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buf.ptr<Npp8u>()) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    };
+    template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
+    struct NppHistogramRangeC4
+    {
+        typedef typename NppHistogramRangeFuncC4<SDEPTH>::src_t src_t;
+        typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
+        enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
+
+        static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4],InputOutputArray _buf, cudaStream_t stream)
+        {
+            CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 );
+            CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 );
+            CV_Assert( levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1 );
+            CV_Assert( levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1 );
+
+            hist[0].create(1, levels[0].cols - 1, CV_32S);
+            hist[1].create(1, levels[1].cols - 1, CV_32S);
+            hist[2].create(1, levels[2].cols - 1, CV_32S);
+            hist[3].create(1, levels[3].cols - 1, CV_32S);
+
+            Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};
+            int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols};
+            const level_t* pLevels[] = {levels[0].ptr<level_t>(), levels[1].ptr<level_t>(), levels[2].ptr<level_t>(), levels[3].ptr<level_t>()};
+
+            NppiSize sz;
+            sz.width = src.cols;
+            sz.height = src.rows;
+
+            int buf_size;
+            get_buf_size(sz, nLevels, &buf_size);
+
+            ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
+            GpuMat buf = _buf.getGpuMat();
+
+            NppStreamHandler h(stream);
+
+            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buf.ptr<Npp8u>()) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    };
+}
+
+void cv::gpu::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel)
+{
+    const int kind = _levels.kind();
+
+    _levels.create(1, nLevels, CV_32SC1);
+
+    Mat host_levels;
+    if (kind == _InputArray::GPU_MAT)
+        host_levels.create(1, nLevels, CV_32SC1);
+    else
+        host_levels = _levels.getMat();
+
+    nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );
+
+    if (kind == _InputArray::GPU_MAT)
+        _levels.getGpuMatRef().upload(host_levels);
+}
+
+namespace hist
+{
+    void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream);
+}
+
+namespace
+{
+    void histEven8u(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
+    {
+        hist.create(1, histSize, CV_32S);
+        cudaSafeCall( cudaMemsetAsync(hist.data, 0, histSize * sizeof(int), stream) );
+        hist::histEven8u(src, hist.ptr<int>(), histSize, lowerLevel, upperLevel, stream);
+    }
+}
+
+void cv::gpu::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)
+{
+    typedef void (*hist_t)(const GpuMat& src, OutputArray hist, InputOutputArray buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);
+    static const hist_t hist_callers[] =
+    {
+        NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
+        0,
+        NppHistogramEvenC1<CV_16U, nppiHistogramEven_16u_C1R, nppiHistogramEvenGetBufferSize_16u_C1R>::hist,
+        NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist
+    };
+
+    GpuMat src = _src.getGpuMat();
+
+    if (src.depth() == CV_8U && deviceSupports(FEATURE_SET_COMPUTE_30))
+    {
+        histEven8u(src, hist.getGpuMatRef(), histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
+        return;
+    }
+
+    CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
+
+    hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
+{
+    typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], InputOutputArray buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);
+    static const hist_t hist_callers[] =
+    {
+        NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
+        0,
+        NppHistogramEvenC4<CV_16U, nppiHistogramEven_16u_C4R, nppiHistogramEvenGetBufferSize_16u_C4R>::hist,
+        NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist
+    };
+
+    GpuMat src = _src.getGpuMat();
+
+    CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
+
+    hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream)
+{
+    typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, InputOutputArray buf, cudaStream_t stream);
+    static const hist_t hist_callers[] =
+    {
+        NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
+        0,
+        NppHistogramRangeC1<CV_16U, nppiHistogramRange_16u_C1R, nppiHistogramRangeGetBufferSize_16u_C1R>::hist,
+        NppHistogramRangeC1<CV_16S, nppiHistogramRange_16s_C1R, nppiHistogramRangeGetBufferSize_16s_C1R>::hist,
+        0,
+        NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist
+    };
+
+    GpuMat src = _src.getGpuMat();
+    GpuMat levels = _levels.getGpuMat();
+
+    CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 );
+
+    hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream)
+{
+    typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, cudaStream_t stream);
+    static const hist_t hist_callers[] =
+    {
+        NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
+        0,
+        NppHistogramRangeC4<CV_16U, nppiHistogramRange_16u_C4R, nppiHistogramRangeGetBufferSize_16u_C4R>::hist,
+        NppHistogramRangeC4<CV_16S, nppiHistogramRange_16s_C4R, nppiHistogramRangeGetBufferSize_16s_C4R>::hist,
+        0,
+        NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist
+    };
+
+    GpuMat src = _src.getGpuMat();
+
+    CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 );
+
+    hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/hough.cpp b/modules/gpuimgproc/src/hough.cpp
deleted file mode 100644
index 15e529762..000000000
--- a/modules/gpuimgproc/src/hough.cpp
+++ /dev/null
@@ -1,1432 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::gpu;
-
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
-
-void cv::gpu::HoughLines(const GpuMat&, GpuMat&, float, float, int, bool, int) { throw_no_cuda(); }
-void cv::gpu::HoughLines(const GpuMat&, GpuMat&, HoughLinesBuf&, float, float, int, bool, int) { throw_no_cuda(); }
-void cv::gpu::HoughLinesDownload(const GpuMat&, OutputArray, OutputArray) { throw_no_cuda(); }
-
-void cv::gpu::HoughLinesP(const GpuMat&, GpuMat&, HoughLinesBuf&, float, float, int, int, int) { throw_no_cuda(); }
-
-void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, int, float, float, int, int, int, int, int) { throw_no_cuda(); }
-void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, HoughCirclesBuf&, int, float, float, int, int, int, int, int) { throw_no_cuda(); }
-void cv::gpu::HoughCirclesDownload(const GpuMat&, OutputArray) { throw_no_cuda(); }
-
-Ptr<GeneralizedHough_GPU> cv::gpu::GeneralizedHough_GPU::create(int) { throw_no_cuda(); return Ptr<GeneralizedHough_GPU>(); }
-cv::gpu::GeneralizedHough_GPU::~GeneralizedHough_GPU() {}
-void cv::gpu::GeneralizedHough_GPU::setTemplate(const GpuMat&, int, Point) { throw_no_cuda(); }
-void cv::gpu::GeneralizedHough_GPU::setTemplate(const GpuMat&, const GpuMat&, const GpuMat&, Point) { throw_no_cuda(); }
-void cv::gpu::GeneralizedHough_GPU::detect(const GpuMat&, GpuMat&, int) { throw_no_cuda(); }
-void cv::gpu::GeneralizedHough_GPU::detect(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::GeneralizedHough_GPU::download(const GpuMat&, OutputArray, OutputArray) { throw_no_cuda(); }
-void cv::gpu::GeneralizedHough_GPU::release() {}
-
-#else /* !defined (HAVE_CUDA) */
-
-#include "opencv2/core/utility.hpp"
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace hough
-    {
-        int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
-    }
-}}}
-
-//////////////////////////////////////////////////////////
-// HoughLines
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace hough
-    {
-        void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
-        int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort);
-    }
-}}}
-
-void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
-{
-    HoughLinesBuf buf;
-    HoughLines(src, lines, buf, rho, theta, threshold, doSort, maxLines);
-}
-
-void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort, int maxLines)
-{
-    using namespace cv::gpu::cudev::hough;
-
-    CV_Assert(src.type() == CV_8UC1);
-    CV_Assert(src.cols < std::numeric_limits<unsigned short>::max());
-    CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
-
-    ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list);
-    unsigned int* srcPoints = buf.list.ptr<unsigned int>();
-
-    const int pointsCount = buildPointList_gpu(src, srcPoints);
-    if (pointsCount == 0)
-    {
-        lines.release();
-        return;
-    }
-
-    const int numangle = cvRound(CV_PI / theta);
-    const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
-    CV_Assert(numangle > 0 && numrho > 0);
-
-    ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum);
-    buf.accum.setTo(Scalar::all(0));
-
-    DeviceInfo devInfo;
-    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
-
-    ensureSizeIsEnough(2, maxLines, CV_32FC2, lines);
-
-    int linesCount = linesGetResult_gpu(buf.accum, lines.ptr<float2>(0), lines.ptr<int>(1), maxLines, rho, theta, threshold, doSort);
-    if (linesCount > 0)
-        lines.cols = linesCount;
-    else
-        lines.release();
-}
-
-void cv::gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines_, OutputArray h_votes_)
-{
-    if (d_lines.empty())
-    {
-        h_lines_.release();
-        if (h_votes_.needed())
-            h_votes_.release();
-        return;
-    }
-
-    CV_Assert(d_lines.rows == 2 && d_lines.type() == CV_32FC2);
-
-    h_lines_.create(1, d_lines.cols, CV_32FC2);
-    Mat h_lines = h_lines_.getMat();
-    d_lines.row(0).download(h_lines);
-
-    if (h_votes_.needed())
-    {
-        h_votes_.create(1, d_lines.cols, CV_32SC1);
-        Mat h_votes = h_votes_.getMat();
-        GpuMat d_votes(1, d_lines.cols, CV_32SC1, const_cast<int*>(d_lines.ptr<int>(1)));
-        d_votes.download(h_votes);
-    }
-}
-
-//////////////////////////////////////////////////////////
-// HoughLinesP
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace hough
-    {
-        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength);
-    }
-}}}
-
-void cv::gpu::HoughLinesP(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines)
-{
-    using namespace cv::gpu::cudev::hough;
-
-    CV_Assert( src.type() == CV_8UC1 );
-    CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
-    CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
-
-    ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list);
-    unsigned int* srcPoints = buf.list.ptr<unsigned int>();
-
-    const int pointsCount = buildPointList_gpu(src, srcPoints);
-    if (pointsCount == 0)
-    {
-        lines.release();
-        return;
-    }
-
-    const int numangle = cvRound(CV_PI / theta);
-    const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
-    CV_Assert( numangle > 0 && numrho > 0 );
-
-    ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum);
-    buf.accum.setTo(Scalar::all(0));
-
-    DeviceInfo devInfo;
-    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
-
-    ensureSizeIsEnough(1, maxLines, CV_32SC4, lines);
-
-    int linesCount = houghLinesProbabilistic_gpu(src, buf.accum, lines.ptr<int4>(), maxLines, rho, theta, maxLineGap, minLineLength);
-
-    if (linesCount > 0)
-        lines.cols = linesCount;
-    else
-        lines.release();
-}
-
-//////////////////////////////////////////////////////////
-// HoughCircles
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace hough
-    {
-        void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp);
-        int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold);
-        int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
-                                   float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20);
-    }
-}}}
-
-void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
-{
-    HoughCirclesBuf buf;
-    HoughCircles(src, circles, buf, method, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles);
-}
-
-void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method,
-                           float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
-{
-    using namespace cv::gpu::cudev::hough;
-
-    CV_Assert(src.type() == CV_8UC1);
-    CV_Assert(src.cols < std::numeric_limits<unsigned short>::max());
-    CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
-    CV_Assert(method == cv::HOUGH_GRADIENT);
-    CV_Assert(dp > 0);
-    CV_Assert(minRadius > 0 && maxRadius > minRadius);
-    CV_Assert(cannyThreshold > 0);
-    CV_Assert(votesThreshold > 0);
-    CV_Assert(maxCircles > 0);
-
-    const float idp = 1.0f / dp;
-
-    cv::gpu::Canny(src, buf.cannyBuf, buf.edges, std::max(cannyThreshold / 2, 1), cannyThreshold);
-
-    ensureSizeIsEnough(2, src.size().area(), CV_32SC1, buf.list);
-    unsigned int* srcPoints = buf.list.ptr<unsigned int>(0);
-    unsigned int* centers = buf.list.ptr<unsigned int>(1);
-
-    const int pointsCount = buildPointList_gpu(buf.edges, srcPoints);
-    if (pointsCount == 0)
-    {
-        circles.release();
-        return;
-    }
-
-    ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum);
-    buf.accum.setTo(Scalar::all(0));
-
-    circlesAccumCenters_gpu(srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp);
-
-    int centersCount = buildCentersList_gpu(buf.accum, centers, votesThreshold);
-    if (centersCount == 0)
-    {
-        circles.release();
-        return;
-    }
-
-    if (minDist > 1)
-    {
-        cv::AutoBuffer<ushort2> oldBuf_(centersCount);
-        cv::AutoBuffer<ushort2> newBuf_(centersCount);
-        int newCount = 0;
-
-        ushort2* oldBuf = oldBuf_;
-        ushort2* newBuf = newBuf_;
-
-        cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
-
-        const int cellSize = cvRound(minDist);
-        const int gridWidth = (src.cols + cellSize - 1) / cellSize;
-        const int gridHeight = (src.rows + cellSize - 1) / cellSize;
-
-        std::vector< std::vector<ushort2> > grid(gridWidth * gridHeight);
-
-        const float minDist2 = minDist * minDist;
-
-        for (int i = 0; i < centersCount; ++i)
-        {
-            ushort2 p = oldBuf[i];
-
-            bool good = true;
-
-            int xCell = static_cast<int>(p.x / cellSize);
-            int yCell = static_cast<int>(p.y / cellSize);
-
-            int x1 = xCell - 1;
-            int y1 = yCell - 1;
-            int x2 = xCell + 1;
-            int y2 = yCell + 1;
-
-            // boundary check
-            x1 = std::max(0, x1);
-            y1 = std::max(0, y1);
-            x2 = std::min(gridWidth - 1, x2);
-            y2 = std::min(gridHeight - 1, y2);
-
-            for (int yy = y1; yy <= y2; ++yy)
-            {
-                for (int xx = x1; xx <= x2; ++xx)
-                {
-                    std::vector<ushort2>& m = grid[yy * gridWidth + xx];
-
-                    for(size_t j = 0; j < m.size(); ++j)
-                    {
-                        float dx = (float)(p.x - m[j].x);
-                        float dy = (float)(p.y - m[j].y);
-
-                        if (dx * dx + dy * dy < minDist2)
-                        {
-                            good = false;
-                            goto break_out;
-                        }
-                    }
-                }
-            }
-
-            break_out:
-
-            if(good)
-            {
-                grid[yCell * gridWidth + xCell].push_back(p);
-
-                newBuf[newCount++] = p;
-            }
-        }
-
-        cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
-        centersCount = newCount;
-    }
-
-    ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles);
-
-    const int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, circles.ptr<float3>(), maxCircles,
-                                                    dp, minRadius, maxRadius, votesThreshold, deviceSupports(FEATURE_SET_COMPUTE_20));
-
-    if (circlesCount > 0)
-        circles.cols = circlesCount;
-    else
-        circles.release();
-}
-
-void cv::gpu::HoughCirclesDownload(const GpuMat& d_circles, cv::OutputArray h_circles_)
-{
-    if (d_circles.empty())
-    {
-        h_circles_.release();
-        return;
-    }
-
-    CV_Assert(d_circles.rows == 1 && d_circles.type() == CV_32FC3);
-
-    h_circles_.create(1, d_circles.cols, CV_32FC3);
-    Mat h_circles = h_circles_.getMat();
-    d_circles.download(h_circles);
-}
-
-//////////////////////////////////////////////////////////
-// GeneralizedHough
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace hough
-    {
-        template <typename T>
-        int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
-        void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                             PtrStepSz<short2> r_table, int* r_sizes,
-                             short2 templCenter, int levels);
-
-        void GHT_Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                          PtrStepSz<short2> r_table, const int* r_sizes,
-                                          PtrStepSzi hist,
-                                          float dp, int levels);
-        int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold);
-
-        void GHT_Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                               PtrStepSz<short2> r_table, const int* r_sizes,
-                                               PtrStepi hist, int rows, int cols,
-                                               float minScale, float scaleStep, int scaleRange,
-                                               float dp, int levels);
-        int GHT_Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize,
-                                                   float minScale, float scaleStep, float dp, int threshold);
-
-        void GHT_Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                                  PtrStepSz<short2> r_table, const int* r_sizes,
-                                                  PtrStepi hist, int rows, int cols,
-                                                  float minAngle, float angleStep, int angleRange,
-                                                  float dp, int levels);
-        int GHT_Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize,
-                                                      float minAngle, float angleStep, float dp, int threshold);
-
-        void GHT_Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
-        void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
-        void GHT_Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                                     int* sizes, int maxSize,
-                                                     float xi, float angleEpsilon, int levels,
-                                                     float2 center, float maxDist);
-        void GHT_Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                                     int* sizes, int maxSize,
-                                                     float xi, float angleEpsilon, int levels,
-                                                     float2 center, float maxDist);
-        void GHT_Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist,
-                                         float minAngle, float maxAngle, float angleStep, int angleRange,
-                                         int levels, int tMaxSize);
-        void GHT_Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist,
-                                         float angle, float angleEpsilon,
-                                         float minScale, float maxScale, float iScaleStep, int scaleRange,
-                                         int levels, int tMaxSize);
-        void GHT_Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
-                                         float angle, float angleEpsilon, float scale,
-                                         float dp,
-                                         int levels, int tMaxSize);
-        int GHT_Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize,
-                                             float angle, int angleVotes, float scale, int scaleVotes,
-                                             float dp, int threshold);
-    }
-}}}
-
-namespace
-{
-    /////////////////////////////////////
-    // Common
-
-    template <typename T, class A> void releaseVector(std::vector<T, A>& v)
-    {
-        std::vector<T, A> empty;
-        empty.swap(v);
-    }
-
-    class GHT_Pos : public GeneralizedHough_GPU
-    {
-    public:
-        GHT_Pos();
-
-    protected:
-        void setTemplateImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter);
-        void detectImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions);
-        void releaseImpl();
-
-        virtual void processTempl() = 0;
-        virtual void processImage() = 0;
-
-        void buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy);
-        void filterMinDist();
-        void convertTo(GpuMat& positions);
-
-        int maxSize;
-        double minDist;
-
-        Size templSize;
-        Point templCenter;
-        GpuMat templEdges;
-        GpuMat templDx;
-        GpuMat templDy;
-
-        Size imageSize;
-        GpuMat imageEdges;
-        GpuMat imageDx;
-        GpuMat imageDy;
-
-        GpuMat edgePointList;
-
-        GpuMat outBuf;
-        int posCount;
-
-        std::vector<float4> oldPosBuf;
-        std::vector<int3> oldVoteBuf;
-        std::vector<float4> newPosBuf;
-        std::vector<int3> newVoteBuf;
-        std::vector<int> indexies;
-    };
-
-    GHT_Pos::GHT_Pos()
-    {
-        maxSize = 10000;
-        minDist = 1.0;
-    }
-
-    void GHT_Pos::setTemplateImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter_)
-    {
-        templSize = edges.size();
-        templCenter = templCenter_;
-
-        ensureSizeIsEnough(templSize, edges.type(), templEdges);
-        ensureSizeIsEnough(templSize, dx.type(), templDx);
-        ensureSizeIsEnough(templSize, dy.type(), templDy);
-
-        edges.copyTo(templEdges);
-        dx.copyTo(templDx);
-        dy.copyTo(templDy);
-
-        processTempl();
-    }
-
-    void GHT_Pos::detectImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions)
-    {
-        imageSize = edges.size();
-
-        ensureSizeIsEnough(imageSize, edges.type(), imageEdges);
-        ensureSizeIsEnough(imageSize, dx.type(), imageDx);
-        ensureSizeIsEnough(imageSize, dy.type(), imageDy);
-
-        edges.copyTo(imageEdges);
-        dx.copyTo(imageDx);
-        dy.copyTo(imageDy);
-
-        posCount = 0;
-
-        processImage();
-
-        if (posCount == 0)
-            positions.release();
-        else
-        {
-            if (minDist > 1)
-                filterMinDist();
-            convertTo(positions);
-        }
-    }
-
-    void GHT_Pos::releaseImpl()
-    {
-        templSize = Size();
-        templCenter = Point(-1, -1);
-        templEdges.release();
-        templDx.release();
-        templDy.release();
-
-        imageSize = Size();
-        imageEdges.release();
-        imageDx.release();
-        imageDy.release();
-
-        edgePointList.release();
-
-        outBuf.release();
-        posCount = 0;
-
-        releaseVector(oldPosBuf);
-        releaseVector(oldVoteBuf);
-        releaseVector(newPosBuf);
-        releaseVector(newVoteBuf);
-        releaseVector(indexies);
-    }
-
-    void GHT_Pos::buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy)
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        typedef int (*func_t)(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
-        static const func_t funcs[] =
-        {
-            0,
-            0,
-            0,
-            buildEdgePointList_gpu<short>,
-            buildEdgePointList_gpu<int>,
-            buildEdgePointList_gpu<float>,
-            0
-        };
-
-        CV_Assert(edges.type() == CV_8UC1);
-        CV_Assert(dx.size() == edges.size());
-        CV_Assert(dy.type() == dx.type() && dy.size() == edges.size());
-
-        const func_t func = funcs[dx.depth()];
-        CV_Assert(func != 0);
-
-        edgePointList.cols = (int) (edgePointList.step / sizeof(int));
-        ensureSizeIsEnough(2, edges.size().area(), CV_32SC1, edgePointList);
-
-        edgePointList.cols = func(edges, dx, dy, edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1));
-    }
-
-    struct IndexCmp
-    {
-        const int3* aux;
-
-        explicit IndexCmp(const int3* _aux) : aux(_aux) {}
-
-        bool operator ()(int l1, int l2) const
-        {
-            return aux[l1].x > aux[l2].x;
-        }
-    };
-
-    void GHT_Pos::filterMinDist()
-    {
-        oldPosBuf.resize(posCount);
-        oldVoteBuf.resize(posCount);
-
-        cudaSafeCall( cudaMemcpy(&oldPosBuf[0], outBuf.ptr(0), posCount * sizeof(float4), cudaMemcpyDeviceToHost) );
-        cudaSafeCall( cudaMemcpy(&oldVoteBuf[0], outBuf.ptr(1), posCount * sizeof(int3), cudaMemcpyDeviceToHost) );
-
-        indexies.resize(posCount);
-        for (int i = 0; i < posCount; ++i)
-            indexies[i] = i;
-        std::sort(indexies.begin(), indexies.end(), IndexCmp(&oldVoteBuf[0]));
-
-        newPosBuf.clear();
-        newVoteBuf.clear();
-        newPosBuf.reserve(posCount);
-        newVoteBuf.reserve(posCount);
-
-        const int cellSize = cvRound(minDist);
-        const int gridWidth = (imageSize.width + cellSize - 1) / cellSize;
-        const int gridHeight = (imageSize.height + cellSize - 1) / cellSize;
-
-        std::vector< std::vector<Point2f> > grid(gridWidth * gridHeight);
-
-        const double minDist2 = minDist * minDist;
-
-        for (int i = 0; i < posCount; ++i)
-        {
-            const int ind = indexies[i];
-
-            Point2f p(oldPosBuf[ind].x, oldPosBuf[ind].y);
-
-            bool good = true;
-
-            const int xCell = static_cast<int>(p.x / cellSize);
-            const int yCell = static_cast<int>(p.y / cellSize);
-
-            int x1 = xCell - 1;
-            int y1 = yCell - 1;
-            int x2 = xCell + 1;
-            int y2 = yCell + 1;
-
-            // boundary check
-            x1 = std::max(0, x1);
-            y1 = std::max(0, y1);
-            x2 = std::min(gridWidth - 1, x2);
-            y2 = std::min(gridHeight - 1, y2);
-
-            for (int yy = y1; yy <= y2; ++yy)
-            {
-                for (int xx = x1; xx <= x2; ++xx)
-                {
-                    const std::vector<Point2f>& m = grid[yy * gridWidth + xx];
-
-                    for(size_t j = 0; j < m.size(); ++j)
-                    {
-                        const Point2f d = p - m[j];
-
-                        if (d.ddot(d) < minDist2)
-                        {
-                            good = false;
-                            goto break_out;
-                        }
-                    }
-                }
-            }
-
-            break_out:
-
-            if(good)
-            {
-                grid[yCell * gridWidth + xCell].push_back(p);
-
-                newPosBuf.push_back(oldPosBuf[ind]);
-                newVoteBuf.push_back(oldVoteBuf[ind]);
-            }
-        }
-
-        posCount = static_cast<int>(newPosBuf.size());
-        cudaSafeCall( cudaMemcpy(outBuf.ptr(0), &newPosBuf[0], posCount * sizeof(float4), cudaMemcpyHostToDevice) );
-        cudaSafeCall( cudaMemcpy(outBuf.ptr(1), &newVoteBuf[0], posCount * sizeof(int3), cudaMemcpyHostToDevice) );
-    }
-
-    void GHT_Pos::convertTo(GpuMat& positions)
-    {
-        ensureSizeIsEnough(2, posCount, CV_32FC4, positions);
-        GpuMat(2, posCount, CV_32FC4, outBuf.data, outBuf.step).copyTo(positions);
-    }
-
-    /////////////////////////////////////
-    // POSITION Ballard
-
-    class GHT_Ballard_Pos : public GHT_Pos
-    {
-    public:
-        AlgorithmInfo* info() const;
-
-        GHT_Ballard_Pos();
-
-    protected:
-        void releaseImpl();
-
-        void processTempl();
-        void processImage();
-
-        virtual void calcHist();
-        virtual void findPosInHist();
-
-        int levels;
-        int votesThreshold;
-        double dp;
-
-        GpuMat r_table;
-        GpuMat r_sizes;
-
-        GpuMat hist;
-    };
-
-    CV_INIT_ALGORITHM(GHT_Ballard_Pos, "GeneralizedHough_GPU.POSITION",
-                      obj.info()->addParam(obj, "maxSize", obj.maxSize, false, 0, 0,
-                                           "Maximal size of inner buffers.");
-                      obj.info()->addParam(obj, "minDist", obj.minDist, false, 0, 0,
-                                           "Minimum distance between the centers of the detected objects.");
-                      obj.info()->addParam(obj, "levels", obj.levels, false, 0, 0,
-                                           "R-Table levels.");
-                      obj.info()->addParam(obj, "votesThreshold", obj.votesThreshold, false, 0, 0,
-                                           "The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected.");
-                      obj.info()->addParam(obj, "dp", obj.dp, false, 0, 0,
-                                           "Inverse ratio of the accumulator resolution to the image resolution."));
-
-    GHT_Ballard_Pos::GHT_Ballard_Pos()
-    {
-        levels = 360;
-        votesThreshold = 100;
-        dp = 1.0;
-    }
-
-    void GHT_Ballard_Pos::releaseImpl()
-    {
-        GHT_Pos::releaseImpl();
-
-        r_table.release();
-        r_sizes.release();
-
-        hist.release();
-    }
-
-    void GHT_Ballard_Pos::processTempl()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        CV_Assert(levels > 0);
-
-        buildEdgePointList(templEdges, templDx, templDy);
-
-        ensureSizeIsEnough(levels + 1, maxSize, CV_16SC2, r_table);
-        ensureSizeIsEnough(1, levels + 1, CV_32SC1, r_sizes);
-        r_sizes.setTo(Scalar::all(0));
-
-        if (edgePointList.cols > 0)
-        {
-            buildRTable_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols,
-                            r_table, r_sizes.ptr<int>(), make_short2(templCenter.x, templCenter.y), levels);
-            gpu::min(r_sizes, maxSize, r_sizes);
-        }
-    }
-
-    void GHT_Ballard_Pos::processImage()
-    {
-        calcHist();
-        findPosInHist();
-    }
-
-    void GHT_Ballard_Pos::calcHist()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1));
-        CV_Assert(dp > 0.0);
-
-        const double idp = 1.0 / dp;
-
-        buildEdgePointList(imageEdges, imageDx, imageDy);
-
-        ensureSizeIsEnough(cvCeil(imageSize.height * idp) + 2, cvCeil(imageSize.width * idp) + 2, CV_32SC1, hist);
-        hist.setTo(Scalar::all(0));
-
-        if (edgePointList.cols > 0)
-        {
-            GHT_Ballard_Pos_calcHist_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols,
-                                         r_table, r_sizes.ptr<int>(),
-                                         hist,
-                                         (float)dp, levels);
-        }
-    }
-
-    void GHT_Ballard_Pos::findPosInHist()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        CV_Assert(votesThreshold > 0);
-
-        ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf);
-
-        posCount = GHT_Ballard_Pos_findPosInHist_gpu(hist, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), maxSize, (float)dp, votesThreshold);
-    }
-
-    /////////////////////////////////////
-    // POSITION & SCALE
-
-    class GHT_Ballard_PosScale : public GHT_Ballard_Pos
-    {
-    public:
-        AlgorithmInfo* info() const;
-
-        GHT_Ballard_PosScale();
-
-    protected:
-        void calcHist();
-        void findPosInHist();
-
-        double minScale;
-        double maxScale;
-        double scaleStep;
-    };
-
-    CV_INIT_ALGORITHM(GHT_Ballard_PosScale, "GeneralizedHough_GPU.POSITION_SCALE",
-                      obj.info()->addParam(obj, "maxSize", obj.maxSize, false, 0, 0,
-                                           "Maximal size of inner buffers.");
-                      obj.info()->addParam(obj, "minDist", obj.minDist, false, 0, 0,
-                                           "Minimum distance between the centers of the detected objects.");
-                      obj.info()->addParam(obj, "levels", obj.levels, false, 0, 0,
-                                           "R-Table levels.");
-                      obj.info()->addParam(obj, "votesThreshold", obj.votesThreshold, false, 0, 0,
-                                           "The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected.");
-                      obj.info()->addParam(obj, "dp", obj.dp, false, 0, 0,
-                                           "Inverse ratio of the accumulator resolution to the image resolution.");
-                      obj.info()->addParam(obj, "minScale", obj.minScale, false, 0, 0,
-                                           "Minimal scale to detect.");
-                      obj.info()->addParam(obj, "maxScale", obj.maxScale, false, 0, 0,
-                                           "Maximal scale to detect.");
-                      obj.info()->addParam(obj, "scaleStep", obj.scaleStep, false, 0, 0,
-                                           "Scale step."));
-
-    GHT_Ballard_PosScale::GHT_Ballard_PosScale()
-    {
-        minScale = 0.5;
-        maxScale = 2.0;
-        scaleStep = 0.05;
-    }
-
-    void GHT_Ballard_PosScale::calcHist()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1));
-        CV_Assert(dp > 0.0);
-        CV_Assert(minScale > 0.0 && minScale < maxScale);
-        CV_Assert(scaleStep > 0.0);
-
-        const double idp = 1.0 / dp;
-        const int scaleRange = cvCeil((maxScale - minScale) / scaleStep);
-        const int rows = cvCeil(imageSize.height * idp);
-        const int cols = cvCeil(imageSize.width * idp);
-
-        buildEdgePointList(imageEdges, imageDx, imageDy);
-
-        ensureSizeIsEnough((scaleRange + 2) * (rows + 2), cols + 2, CV_32SC1, hist);
-        hist.setTo(Scalar::all(0));
-
-        if (edgePointList.cols > 0)
-        {
-            GHT_Ballard_PosScale_calcHist_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols,
-                                              r_table, r_sizes.ptr<int>(),
-                                              hist, rows, cols,
-                                              (float)minScale, (float)scaleStep, scaleRange, (float)dp, levels);
-        }
-    }
-
-    void GHT_Ballard_PosScale::findPosInHist()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        CV_Assert(votesThreshold > 0);
-
-        const double idp = 1.0 / dp;
-        const int scaleRange = cvCeil((maxScale - minScale) / scaleStep);
-        const int rows = cvCeil(imageSize.height * idp);
-        const int cols = cvCeil(imageSize.width * idp);
-
-        ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf);
-
-        posCount =  GHT_Ballard_PosScale_findPosInHist_gpu(hist, rows, cols, scaleRange, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), maxSize, (float)minScale, (float)scaleStep, (float)dp, votesThreshold);
-    }
-
-    /////////////////////////////////////
-    // POSITION & Rotation
-
-    class GHT_Ballard_PosRotation : public GHT_Ballard_Pos
-    {
-    public:
-        AlgorithmInfo* info() const;
-
-        GHT_Ballard_PosRotation();
-
-    protected:
-        void calcHist();
-        void findPosInHist();
-
-        double minAngle;
-        double maxAngle;
-        double angleStep;
-    };
-
-    CV_INIT_ALGORITHM(GHT_Ballard_PosRotation, "GeneralizedHough_GPU.POSITION_ROTATION",
-                      obj.info()->addParam(obj, "maxSize", obj.maxSize, false, 0, 0,
-                                           "Maximal size of inner buffers.");
-                      obj.info()->addParam(obj, "minDist", obj.minDist, false, 0, 0,
-                                           "Minimum distance between the centers of the detected objects.");
-                      obj.info()->addParam(obj, "levels", obj.levels, false, 0, 0,
-                                           "R-Table levels.");
-                      obj.info()->addParam(obj, "votesThreshold", obj.votesThreshold, false, 0, 0,
-                                           "The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected.");
-                      obj.info()->addParam(obj, "dp", obj.dp, false, 0, 0,
-                                           "Inverse ratio of the accumulator resolution to the image resolution.");
-                      obj.info()->addParam(obj, "minAngle", obj.minAngle, false, 0, 0,
-                                           "Minimal rotation angle to detect in degrees.");
-                      obj.info()->addParam(obj, "maxAngle", obj.maxAngle, false, 0, 0,
-                                           "Maximal rotation angle to detect in degrees.");
-                      obj.info()->addParam(obj, "angleStep", obj.angleStep, false, 0, 0,
-                                           "Angle step in degrees."));
-
-    GHT_Ballard_PosRotation::GHT_Ballard_PosRotation()
-    {
-        minAngle = 0.0;
-        maxAngle = 360.0;
-        angleStep = 1.0;
-    }
-
-    void GHT_Ballard_PosRotation::calcHist()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1));
-        CV_Assert(dp > 0.0);
-        CV_Assert(minAngle >= 0.0 && minAngle < maxAngle && maxAngle <= 360.0);
-        CV_Assert(angleStep > 0.0 && angleStep < 360.0);
-
-        const double idp = 1.0 / dp;
-        const int angleRange = cvCeil((maxAngle - minAngle) / angleStep);
-        const int rows = cvCeil(imageSize.height * idp);
-        const int cols = cvCeil(imageSize.width * idp);
-
-        buildEdgePointList(imageEdges, imageDx, imageDy);
-
-        ensureSizeIsEnough((angleRange + 2) * (rows + 2), cols + 2, CV_32SC1, hist);
-        hist.setTo(Scalar::all(0));
-
-        if (edgePointList.cols > 0)
-        {
-            GHT_Ballard_PosRotation_calcHist_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols,
-                                                 r_table, r_sizes.ptr<int>(),
-                                                 hist, rows, cols,
-                                                 (float)minAngle, (float)angleStep, angleRange, (float)dp, levels);
-        }
-    }
-
-    void GHT_Ballard_PosRotation::findPosInHist()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        CV_Assert(votesThreshold > 0);
-
-        const double idp = 1.0 / dp;
-        const int angleRange = cvCeil((maxAngle - minAngle) / angleStep);
-        const int rows = cvCeil(imageSize.height * idp);
-        const int cols = cvCeil(imageSize.width * idp);
-
-        ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf);
-
-        posCount = GHT_Ballard_PosRotation_findPosInHist_gpu(hist, rows, cols, angleRange, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), maxSize, (float)minAngle, (float)angleStep, (float)dp, votesThreshold);
-    }
-
-    /////////////////////////////////////////
-    // POSITION & SCALE & ROTATION
-
-    double toRad(double a)
-    {
-        return a * CV_PI / 180.0;
-    }
-
-    double clampAngle(double a)
-    {
-        double res = a;
-
-        while (res > 360.0)
-            res -= 360.0;
-        while (res < 0)
-            res += 360.0;
-
-        return res;
-    }
-
-    bool angleEq(double a, double b, double eps = 1.0)
-    {
-        return (fabs(clampAngle(a - b)) <= eps);
-    }
-
-    class GHT_Guil_Full : public GHT_Pos
-    {
-    public:
-        AlgorithmInfo* info() const;
-
-        GHT_Guil_Full();
-
-    protected:
-        void releaseImpl();
-
-        void processTempl();
-        void processImage();
-
-        struct Feature
-        {
-            GpuMat p1_pos;
-            GpuMat p1_theta;
-            GpuMat p2_pos;
-
-            GpuMat d12;
-
-            GpuMat r1;
-            GpuMat r2;
-
-            GpuMat sizes;
-            int maxSize;
-
-            void create(int levels, int maxCapacity, bool isTempl);
-            void release();
-        };
-
-        typedef void (*set_func_t)(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
-        typedef void (*build_func_t)(const unsigned int* coordList, const float* thetaList, int pointsCount,
-                                     int* sizes, int maxSize,
-                                     float xi, float angleEpsilon, int levels,
-                                     float2 center, float maxDist);
-
-        void buildFeatureList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Feature& features,
-                              set_func_t set_func, build_func_t build_func, bool isTempl, Point2d center = Point2d());
-
-        void calcOrientation();
-        void calcScale(double angle);
-        void calcPosition(double angle, int angleVotes, double scale, int scaleVotes);
-
-        double xi;
-        int levels;
-        double angleEpsilon;
-
-        double minAngle;
-        double maxAngle;
-        double angleStep;
-        int angleThresh;
-
-        double minScale;
-        double maxScale;
-        double scaleStep;
-        int scaleThresh;
-
-        double dp;
-        int posThresh;
-
-        Feature templFeatures;
-        Feature imageFeatures;
-
-        std::vector< std::pair<double, int> > angles;
-        std::vector< std::pair<double, int> > scales;
-
-        GpuMat hist;
-        std::vector<int> h_buf;
-    };
-
-    CV_INIT_ALGORITHM(GHT_Guil_Full, "GeneralizedHough_GPU.POSITION_SCALE_ROTATION",
-                      obj.info()->addParam(obj, "minDist", obj.minDist, false, 0, 0,
-                                           "Minimum distance between the centers of the detected objects.");
-                      obj.info()->addParam(obj, "maxSize", obj.maxSize, false, 0, 0,
-                                           "Maximal size of inner buffers.");
-                      obj.info()->addParam(obj, "xi", obj.xi, false, 0, 0,
-                                           "Angle difference in degrees between two points in feature.");
-                      obj.info()->addParam(obj, "levels", obj.levels, false, 0, 0,
-                                           "Feature table levels.");
-                      obj.info()->addParam(obj, "angleEpsilon", obj.angleEpsilon, false, 0, 0,
-                                           "Maximal difference between angles that treated as equal.");
-                      obj.info()->addParam(obj, "minAngle", obj.minAngle, false, 0, 0,
-                                           "Minimal rotation angle to detect in degrees.");
-                      obj.info()->addParam(obj, "maxAngle", obj.maxAngle, false, 0, 0,
-                                           "Maximal rotation angle to detect in degrees.");
-                      obj.info()->addParam(obj, "angleStep", obj.angleStep, false, 0, 0,
-                                           "Angle step in degrees.");
-                      obj.info()->addParam(obj, "angleThresh", obj.angleThresh, false, 0, 0,
-                                           "Angle threshold.");
-                      obj.info()->addParam(obj, "minScale", obj.minScale, false, 0, 0,
-                                           "Minimal scale to detect.");
-                      obj.info()->addParam(obj, "maxScale", obj.maxScale, false, 0, 0,
-                                           "Maximal scale to detect.");
-                      obj.info()->addParam(obj, "scaleStep", obj.scaleStep, false, 0, 0,
-                                           "Scale step.");
-                      obj.info()->addParam(obj, "scaleThresh", obj.scaleThresh, false, 0, 0,
-                                           "Scale threshold.");
-                      obj.info()->addParam(obj, "dp", obj.dp, false, 0, 0,
-                                           "Inverse ratio of the accumulator resolution to the image resolution.");
-                      obj.info()->addParam(obj, "posThresh", obj.posThresh, false, 0, 0,
-                                           "Position threshold."));
-
-    GHT_Guil_Full::GHT_Guil_Full()
-    {
-        maxSize = 1000;
-        xi = 90.0;
-        levels = 360;
-        angleEpsilon = 1.0;
-
-        minAngle = 0.0;
-        maxAngle = 360.0;
-        angleStep = 1.0;
-        angleThresh = 15000;
-
-        minScale = 0.5;
-        maxScale = 2.0;
-        scaleStep = 0.05;
-        scaleThresh = 1000;
-
-        dp = 1.0;
-        posThresh = 100;
-    }
-
-    void GHT_Guil_Full::releaseImpl()
-    {
-        GHT_Pos::releaseImpl();
-
-        templFeatures.release();
-        imageFeatures.release();
-
-        releaseVector(angles);
-        releaseVector(scales);
-
-        hist.release();
-        releaseVector(h_buf);
-    }
-
-    void GHT_Guil_Full::processTempl()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        buildFeatureList(templEdges, templDx, templDy, templFeatures,
-            GHT_Guil_Full_setTemplFeatures, GHT_Guil_Full_buildTemplFeatureList_gpu,
-            true, templCenter);
-
-        h_buf.resize(templFeatures.sizes.cols);
-        cudaSafeCall( cudaMemcpy(&h_buf[0], templFeatures.sizes.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
-        templFeatures.maxSize = *max_element(h_buf.begin(), h_buf.end());
-    }
-
-    void GHT_Guil_Full::processImage()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        CV_Assert(levels > 0);
-        CV_Assert(templFeatures.sizes.cols == levels + 1);
-        CV_Assert(minAngle >= 0.0 && minAngle < maxAngle && maxAngle <= 360.0);
-        CV_Assert(angleStep > 0.0 && angleStep < 360.0);
-        CV_Assert(angleThresh > 0);
-        CV_Assert(minScale > 0.0 && minScale < maxScale);
-        CV_Assert(scaleStep > 0.0);
-        CV_Assert(scaleThresh > 0);
-        CV_Assert(dp > 0.0);
-        CV_Assert(posThresh > 0);
-
-        const double iAngleStep = 1.0 / angleStep;
-        const int angleRange = cvCeil((maxAngle - minAngle) * iAngleStep);
-
-        const double iScaleStep = 1.0 / scaleStep;
-        const int scaleRange = cvCeil((maxScale - minScale) * iScaleStep);
-
-        const double idp = 1.0 / dp;
-        const int histRows = cvCeil(imageSize.height * idp);
-        const int histCols = cvCeil(imageSize.width * idp);
-
-        ensureSizeIsEnough(histRows + 2, std::max(angleRange + 1, std::max(scaleRange + 1, histCols + 2)), CV_32SC1, hist);
-        h_buf.resize(std::max(angleRange + 1, scaleRange + 1));
-
-        ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf);
-
-        buildFeatureList(imageEdges, imageDx, imageDy, imageFeatures,
-            GHT_Guil_Full_setImageFeatures, GHT_Guil_Full_buildImageFeatureList_gpu,
-            false);
-
-        calcOrientation();
-
-        for (size_t i = 0; i < angles.size(); ++i)
-        {
-            const double angle = angles[i].first;
-            const int angleVotes = angles[i].second;
-
-            calcScale(angle);
-
-            for (size_t j = 0; j < scales.size(); ++j)
-            {
-                const double scale = scales[j].first;
-                const int scaleVotes = scales[j].second;
-
-                calcPosition(angle, angleVotes, scale, scaleVotes);
-            }
-        }
-    }
-
-    void GHT_Guil_Full::Feature::create(int levels, int maxCapacity, bool isTempl)
-    {
-        if (!isTempl)
-        {
-            ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, p1_pos);
-            ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, p2_pos);
-        }
-
-        ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC1, p1_theta);
-
-        ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC1, d12);
-
-        if (isTempl)
-        {
-            ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, r1);
-            ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, r2);
-        }
-
-        ensureSizeIsEnough(1, levels + 1, CV_32SC1, sizes);
-        sizes.setTo(Scalar::all(0));
-
-        maxSize = 0;
-    }
-
-    void GHT_Guil_Full::Feature::release()
-    {
-        p1_pos.release();
-        p1_theta.release();
-        p2_pos.release();
-
-        d12.release();
-
-        r1.release();
-        r2.release();
-
-        sizes.release();
-
-        maxSize = 0;
-    }
-
-    void GHT_Guil_Full::buildFeatureList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Feature& features,
-                                         set_func_t set_func, build_func_t build_func, bool isTempl, Point2d center)
-    {
-        CV_Assert(levels > 0);
-
-        const double maxDist = sqrt((double) templSize.width * templSize.width + templSize.height * templSize.height) * maxScale;
-
-        features.create(levels, maxSize, isTempl);
-        set_func(features.p1_pos, features.p1_theta, features.p2_pos, features.d12, features.r1, features.r2);
-
-        buildEdgePointList(edges, dx, dy);
-
-        if (edgePointList.cols > 0)
-        {
-            build_func(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols,
-                features.sizes.ptr<int>(), maxSize, (float)xi, (float)angleEpsilon, levels, make_float2((float)center.x, (float)center.y), (float)maxDist);
-        }
-    }
-
-    void GHT_Guil_Full::calcOrientation()
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        const double iAngleStep = 1.0 / angleStep;
-        const int angleRange = cvCeil((maxAngle - minAngle) * iAngleStep);
-
-        hist.setTo(Scalar::all(0));
-        GHT_Guil_Full_calcOHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0),
-            hist.ptr<int>(), (float)minAngle, (float)maxAngle, (float)angleStep, angleRange, levels, templFeatures.maxSize);
-        cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
-
-        angles.clear();
-
-        for (int n = 0; n < angleRange; ++n)
-        {
-            if (h_buf[n] >= angleThresh)
-            {
-                const double angle = minAngle + n * angleStep;
-                angles.push_back(std::make_pair(angle, h_buf[n]));
-            }
-        }
-    }
-
-    void GHT_Guil_Full::calcScale(double angle)
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        const double iScaleStep = 1.0 / scaleStep;
-        const int scaleRange = cvCeil((maxScale - minScale) * iScaleStep);
-
-        hist.setTo(Scalar::all(0));
-        GHT_Guil_Full_calcSHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0),
-            hist.ptr<int>(), (float)angle, (float)angleEpsilon, (float)minScale, (float)maxScale, (float)iScaleStep, scaleRange, levels, templFeatures.maxSize);
-        cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
-
-        scales.clear();
-
-        for (int s = 0; s < scaleRange; ++s)
-        {
-            if (h_buf[s] >= scaleThresh)
-            {
-                const double scale = minScale + s * scaleStep;
-                scales.push_back(std::make_pair(scale, h_buf[s]));
-            }
-        }
-    }
-
-    void GHT_Guil_Full::calcPosition(double angle, int angleVotes, double scale, int scaleVotes)
-    {
-        using namespace cv::gpu::cudev::hough;
-
-        hist.setTo(Scalar::all(0));
-        GHT_Guil_Full_calcPHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0),
-            hist,(float) (float)angle, (float)angleEpsilon, (float)scale, (float)dp, levels, templFeatures.maxSize);
-
-        posCount = GHT_Guil_Full_findPosInHist_gpu(hist, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1),
-            posCount, maxSize, (float)angle, angleVotes, (float)scale, scaleVotes, (float)dp, posThresh);
-    }
-}
-
-Ptr<GeneralizedHough_GPU> cv::gpu::GeneralizedHough_GPU::create(int method)
-{
-    switch (method)
-    {
-    case cv::GeneralizedHough::GHT_POSITION:
-        CV_Assert( !GHT_Ballard_Pos_info_auto.name().empty() );
-        return new GHT_Ballard_Pos();
-
-    case (cv::GeneralizedHough::GHT_POSITION | cv::GeneralizedHough::GHT_SCALE):
-        CV_Assert( !GHT_Ballard_PosScale_info_auto.name().empty() );
-        return new GHT_Ballard_PosScale();
-
-    case (cv::GeneralizedHough::GHT_POSITION | cv::GeneralizedHough::GHT_ROTATION):
-        CV_Assert( !GHT_Ballard_PosRotation_info_auto.name().empty() );
-        return new GHT_Ballard_PosRotation();
-
-    case (cv::GeneralizedHough::GHT_POSITION | cv::GeneralizedHough::GHT_SCALE | cv::GeneralizedHough::GHT_ROTATION):
-        CV_Assert( !GHT_Guil_Full_info_auto.name().empty() );
-        return new GHT_Guil_Full();
-    }
-
-    CV_Error(cv::Error::StsBadArg, "Unsupported method");
-    return Ptr<GeneralizedHough_GPU>();
-}
-
-cv::gpu::GeneralizedHough_GPU::~GeneralizedHough_GPU()
-{
-}
-
-void cv::gpu::GeneralizedHough_GPU::setTemplate(const GpuMat& templ, int cannyThreshold, Point templCenter)
-{
-    CV_Assert(templ.type() == CV_8UC1);
-    CV_Assert(cannyThreshold > 0);
-
-    ensureSizeIsEnough(templ.size(), CV_8UC1, edges_);
-    Canny(templ, cannyBuf_, edges_, cannyThreshold / 2, cannyThreshold);
-
-    if (templCenter == Point(-1, -1))
-        templCenter = Point(templ.cols / 2, templ.rows / 2);
-
-    setTemplateImpl(edges_, cannyBuf_.dx, cannyBuf_.dy, templCenter);
-}
-
-void cv::gpu::GeneralizedHough_GPU::setTemplate(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter)
-{
-    if (templCenter == Point(-1, -1))
-        templCenter = Point(edges.cols / 2, edges.rows / 2);
-
-    setTemplateImpl(edges, dx, dy, templCenter);
-}
-
-void cv::gpu::GeneralizedHough_GPU::detect(const GpuMat& image, GpuMat& positions, int cannyThreshold)
-{
-    CV_Assert(image.type() == CV_8UC1);
-    CV_Assert(cannyThreshold > 0);
-
-    ensureSizeIsEnough(image.size(), CV_8UC1, edges_);
-    Canny(image, cannyBuf_, edges_, cannyThreshold / 2, cannyThreshold);
-
-    detectImpl(edges_, cannyBuf_.dx, cannyBuf_.dy, positions);
-}
-
-void cv::gpu::GeneralizedHough_GPU::detect(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions)
-{
-    detectImpl(edges, dx, dy, positions);
-}
-
-void cv::gpu::GeneralizedHough_GPU::download(const GpuMat& d_positions, OutputArray h_positions_, OutputArray h_votes_)
-{
-    if (d_positions.empty())
-    {
-        h_positions_.release();
-        if (h_votes_.needed())
-            h_votes_.release();
-        return;
-    }
-
-    CV_Assert(d_positions.rows == 2 && d_positions.type() == CV_32FC4);
-
-    h_positions_.create(1, d_positions.cols, CV_32FC4);
-    Mat h_positions = h_positions_.getMat();
-    d_positions.row(0).download(h_positions);
-
-    if (h_votes_.needed())
-    {
-        h_votes_.create(1, d_positions.cols, CV_32SC3);
-        Mat h_votes = h_votes_.getMat();
-        GpuMat d_votes(1, d_positions.cols, CV_32SC3, const_cast<int3*>(d_positions.ptr<int3>(1)));
-        d_votes.download(h_votes);
-    }
-}
-
-void cv::gpu::GeneralizedHough_GPU::release()
-{
-    edges_.release();
-    cannyBuf_.release();
-    releaseImpl();
-}
-
-#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/hough_circles.cpp b/modules/gpuimgproc/src/hough_circles.cpp
new file mode 100644
index 000000000..f123cf579
--- /dev/null
+++ b/modules/gpuimgproc/src/hough_circles.cpp
@@ -0,0 +1,297 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUFILTERS)
+
+Ptr<gpu::HoughCirclesDetector> cv::gpu::createHoughCirclesDetector(float, float, int, int, int, int, int) { throw_no_cuda(); return Ptr<HoughCirclesDetector>(); }
+
+#else /* !defined (HAVE_CUDA) */
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace hough
+    {
+        int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
+    }
+
+    namespace hough_circles
+    {
+        void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp);
+        int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold);
+        int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
+                                   float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20);
+    }
+}}}
+
+namespace
+{
+    class HoughCirclesDetectorImpl : public HoughCirclesDetector
+    {
+    public:
+        HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles);
+
+        void detect(InputArray src, OutputArray circles);
+
+        void setDp(float dp) { dp_ = dp; }
+        float getDp() const { return dp_; }
+
+        void setMinDist(float minDist) { minDist_ = minDist; }
+        float getMinDist() const { return minDist_; }
+
+        void setCannyThreshold(int cannyThreshold) { cannyThreshold_ = cannyThreshold; }
+        int getCannyThreshold() const { return cannyThreshold_; }
+
+        void setVotesThreshold(int votesThreshold) { votesThreshold_ = votesThreshold; }
+        int getVotesThreshold() const { return votesThreshold_; }
+
+        void setMinRadius(int minRadius) { minRadius_ = minRadius; }
+        int getMinRadius() const { return minRadius_; }
+
+        void setMaxRadius(int maxRadius) { maxRadius_ = maxRadius; }
+        int getMaxRadius() const { return maxRadius_; }
+
+        void setMaxCircles(int maxCircles) { maxCircles_ = maxCircles; }
+        int getMaxCircles() const { return maxCircles_; }
+
+        void write(FileStorage& fs) const
+        {
+            fs << "name" << "HoughCirclesDetector_GPU"
+            << "dp" << dp_
+            << "minDist" << minDist_
+            << "cannyThreshold" << cannyThreshold_
+            << "votesThreshold" << votesThreshold_
+            << "minRadius" << minRadius_
+            << "maxRadius" << maxRadius_
+            << "maxCircles" << maxCircles_;
+        }
+
+        void read(const FileNode& fn)
+        {
+            CV_Assert( String(fn["name"]) == "HoughCirclesDetector_GPU" );
+            dp_ = (float)fn["dp"];
+            minDist_ = (float)fn["minDist"];
+            cannyThreshold_ = (int)fn["cannyThreshold"];
+            votesThreshold_ = (int)fn["votesThreshold"];
+            minRadius_ = (int)fn["minRadius"];
+            maxRadius_ = (int)fn["maxRadius"];
+            maxCircles_ = (int)fn["maxCircles"];
+        }
+
+    private:
+        float dp_;
+        float minDist_;
+        int cannyThreshold_;
+        int votesThreshold_;
+        int minRadius_;
+        int maxRadius_;
+        int maxCircles_;
+
+        GpuMat dx_, dy_;
+        GpuMat edges_;
+        GpuMat accum_;
+        GpuMat list_;
+        GpuMat result_;
+        Ptr<gpu::Filter> filterDx_;
+        Ptr<gpu::Filter> filterDy_;
+        Ptr<gpu::CannyEdgeDetector> canny_;
+    };
+
+    HoughCirclesDetectorImpl::HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold,
+                                                       int minRadius, int maxRadius, int maxCircles) :
+        dp_(dp), minDist_(minDist), cannyThreshold_(cannyThreshold), votesThreshold_(votesThreshold),
+        minRadius_(minRadius), maxRadius_(maxRadius), maxCircles_(maxCircles)
+    {
+        canny_ = gpu::createCannyEdgeDetector(std::max(cannyThreshold_ / 2, 1), cannyThreshold_);
+
+        filterDx_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
+        filterDy_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
+    }
+
+    void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles)
+    {
+        using namespace cv::gpu::cudev::hough;
+        using namespace cv::gpu::cudev::hough_circles;
+
+        GpuMat src = _src.getGpuMat();
+
+        CV_Assert( src.type() == CV_8UC1 );
+        CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
+        CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
+        CV_Assert( dp_ > 0 );
+        CV_Assert( minRadius_ > 0 && maxRadius_ > minRadius_ );
+        CV_Assert( cannyThreshold_ > 0 );
+        CV_Assert( votesThreshold_ > 0 );
+        CV_Assert( maxCircles_ > 0 );
+
+        const float idp = 1.0f / dp_;
+
+        filterDx_->apply(src, dx_);
+        filterDy_->apply(src, dy_);
+
+        canny_->setLowThreshold(std::max(cannyThreshold_ / 2, 1));
+        canny_->setHighThreshold(cannyThreshold_);
+
+        canny_->detect(dx_, dy_, edges_);
+
+        ensureSizeIsEnough(2, src.size().area(), CV_32SC1, list_);
+        unsigned int* srcPoints = list_.ptr<unsigned int>(0);
+        unsigned int* centers = list_.ptr<unsigned int>(1);
+
+        const int pointsCount = buildPointList_gpu(edges_, srcPoints);
+        if (pointsCount == 0)
+        {
+            circles.release();
+            return;
+        }
+
+        ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, accum_);
+        accum_.setTo(Scalar::all(0));
+
+        circlesAccumCenters_gpu(srcPoints, pointsCount, dx_, dy_, accum_, minRadius_, maxRadius_, idp);
+
+        int centersCount = buildCentersList_gpu(accum_, centers, votesThreshold_);
+        if (centersCount == 0)
+        {
+            circles.release();
+            return;
+        }
+
+        if (minDist_ > 1)
+        {
+            AutoBuffer<ushort2> oldBuf_(centersCount);
+            AutoBuffer<ushort2> newBuf_(centersCount);
+            int newCount = 0;
+
+            ushort2* oldBuf = oldBuf_;
+            ushort2* newBuf = newBuf_;
+
+            cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
+
+            const int cellSize = cvRound(minDist_);
+            const int gridWidth = (src.cols + cellSize - 1) / cellSize;
+            const int gridHeight = (src.rows + cellSize - 1) / cellSize;
+
+            std::vector< std::vector<ushort2> > grid(gridWidth * gridHeight);
+
+            const float minDist2 = minDist_ * minDist_;
+
+            for (int i = 0; i < centersCount; ++i)
+            {
+                ushort2 p = oldBuf[i];
+
+                bool good = true;
+
+                int xCell = static_cast<int>(p.x / cellSize);
+                int yCell = static_cast<int>(p.y / cellSize);
+
+                int x1 = xCell - 1;
+                int y1 = yCell - 1;
+                int x2 = xCell + 1;
+                int y2 = yCell + 1;
+
+                // boundary check
+                x1 = std::max(0, x1);
+                y1 = std::max(0, y1);
+                x2 = std::min(gridWidth - 1, x2);
+                y2 = std::min(gridHeight - 1, y2);
+
+                for (int yy = y1; yy <= y2; ++yy)
+                {
+                    for (int xx = x1; xx <= x2; ++xx)
+                    {
+                        std::vector<ushort2>& m = grid[yy * gridWidth + xx];
+
+                        for(size_t j = 0; j < m.size(); ++j)
+                        {
+                            float dx = (float)(p.x - m[j].x);
+                            float dy = (float)(p.y - m[j].y);
+
+                            if (dx * dx + dy * dy < minDist2)
+                            {
+                                good = false;
+                                goto break_out;
+                            }
+                        }
+                    }
+                }
+
+                break_out:
+
+                if(good)
+                {
+                    grid[yCell * gridWidth + xCell].push_back(p);
+
+                    newBuf[newCount++] = p;
+                }
+            }
+
+            cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
+            centersCount = newCount;
+        }
+
+        ensureSizeIsEnough(1, maxCircles_, CV_32FC3, result_);
+
+        int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, result_.ptr<float3>(), maxCircles_,
+                                                  dp_, minRadius_, maxRadius_, votesThreshold_, deviceSupports(FEATURE_SET_COMPUTE_20));
+
+        if (circlesCount == 0)
+        {
+            circles.release();
+            return;
+        }
+
+        result_.cols = circlesCount;
+        result_.copyTo(circles);
+    }
+}
+
+Ptr<HoughCirclesDetector> cv::gpu::createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
+{
+    return makePtr<HoughCirclesDetectorImpl>(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles);
+}
+
+#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/hough_lines.cpp b/modules/gpuimgproc/src/hough_lines.cpp
new file mode 100644
index 000000000..e3e03cba7
--- /dev/null
+++ b/modules/gpuimgproc/src/hough_lines.cpp
@@ -0,0 +1,202 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+
+Ptr<gpu::HoughLinesDetector> cv::gpu::createHoughLinesDetector(float, float, int, bool, int) { throw_no_cuda(); return Ptr<HoughLinesDetector>(); }
+
+#else /* !defined (HAVE_CUDA) */
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace hough
+    {
+        int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
+    }
+
+    namespace hough_lines
+    {
+        void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
+        int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort);
+    }
+}}}
+
+namespace
+{
+    class HoughLinesDetectorImpl : public HoughLinesDetector
+    {
+    public:
+        HoughLinesDetectorImpl(float rho, float theta, int threshold, bool doSort, int maxLines) :
+            rho_(rho), theta_(theta), threshold_(threshold), doSort_(doSort), maxLines_(maxLines)
+        {
+        }
+
+        void detect(InputArray src, OutputArray lines);
+        void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
+
+        void setRho(float rho) { rho_ = rho; }
+        float getRho() const { return rho_; }
+
+        void setTheta(float theta) { theta_ = theta; }
+        float getTheta() const { return theta_; }
+
+        void setThreshold(int threshold) { threshold_ = threshold; }
+        int getThreshold() const { return threshold_; }
+
+        void setDoSort(bool doSort) { doSort_ = doSort; }
+        bool getDoSort() const { return doSort_; }
+
+        void setMaxLines(int maxLines) { maxLines_ = maxLines; }
+        int getMaxLines() const { return maxLines_; }
+
+        void write(FileStorage& fs) const
+        {
+            fs << "name" << "HoughLinesDetector_GPU"
+            << "rho" << rho_
+            << "theta" << theta_
+            << "threshold" << threshold_
+            << "doSort" << doSort_
+            << "maxLines" << maxLines_;
+        }
+
+        void read(const FileNode& fn)
+        {
+            CV_Assert( String(fn["name"]) == "HoughLinesDetector_GPU" );
+            rho_ = (float)fn["rho"];
+            theta_ = (float)fn["theta"];
+            threshold_ = (int)fn["threshold"];
+            doSort_ = (int)fn["doSort"] != 0;
+            maxLines_ = (int)fn["maxLines"];
+        }
+
+    private:
+        float rho_;
+        float theta_;
+        int threshold_;
+        bool doSort_;
+        int maxLines_;
+
+        GpuMat accum_;
+        GpuMat list_;
+        GpuMat result_;
+    };
+
+    void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines)
+    {
+        using namespace cv::gpu::cudev::hough;
+        using namespace cv::gpu::cudev::hough_lines;
+
+        GpuMat src = _src.getGpuMat();
+
+        CV_Assert( src.type() == CV_8UC1 );
+        CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
+        CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
+
+        ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_);
+        unsigned int* srcPoints = list_.ptr<unsigned int>();
+
+        const int pointsCount = buildPointList_gpu(src, srcPoints);
+        if (pointsCount == 0)
+        {
+            lines.release();
+            return;
+        }
+
+        const int numangle = cvRound(CV_PI / theta_);
+        const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_);
+        CV_Assert( numangle > 0 && numrho > 0 );
+
+        ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_);
+        accum_.setTo(Scalar::all(0));
+
+        DeviceInfo devInfo;
+        linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
+
+        ensureSizeIsEnough(2, maxLines_, CV_32FC2, result_);
+
+        int linesCount = linesGetResult_gpu(accum_, result_.ptr<float2>(0), result_.ptr<int>(1), maxLines_, rho_, theta_, threshold_, doSort_);
+
+        if (linesCount == 0)
+        {
+            lines.release();
+            return;
+        }
+
+        result_.cols = linesCount;
+        result_.copyTo(lines);
+    }
+
+    void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes)
+    {
+        GpuMat d_lines = _d_lines.getGpuMat();
+
+        if (d_lines.empty())
+        {
+            h_lines.release();
+            if (h_votes.needed())
+                h_votes.release();
+            return;
+        }
+
+        CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 );
+
+        d_lines.row(0).download(h_lines);
+
+        if (h_votes.needed())
+        {
+            GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr<int>(1));
+            d_votes.download(h_votes);
+        }
+    }
+}
+
+Ptr<HoughLinesDetector> cv::gpu::createHoughLinesDetector(float rho, float theta, int threshold, bool doSort, int maxLines)
+{
+    return makePtr<HoughLinesDetectorImpl>(rho, theta, threshold, doSort, maxLines);
+}
+
+#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/hough_segments.cpp b/modules/gpuimgproc/src/hough_segments.cpp
new file mode 100644
index 000000000..6f888a2c0
--- /dev/null
+++ b/modules/gpuimgproc/src/hough_segments.cpp
@@ -0,0 +1,183 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+
+Ptr<gpu::HoughSegmentDetector> cv::gpu::createHoughSegmentDetector(float, float, int, int, int) { throw_no_cuda(); return Ptr<HoughSegmentDetector>(); }
+
+#else /* !defined (HAVE_CUDA) */
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace hough
+    {
+        int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
+    }
+
+    namespace hough_lines
+    {
+        void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
+    }
+
+    namespace hough_segments
+    {
+        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength);
+    }
+}}}
+
+namespace
+{
+    class HoughSegmentDetectorImpl : public HoughSegmentDetector
+    {
+    public:
+        HoughSegmentDetectorImpl(float rho, float theta, int minLineLength, int maxLineGap, int maxLines) :
+            rho_(rho), theta_(theta), minLineLength_(minLineLength), maxLineGap_(maxLineGap), maxLines_(maxLines)
+        {
+        }
+
+        void detect(InputArray src, OutputArray lines);
+
+        void setRho(float rho) { rho_ = rho; }
+        float getRho() const { return rho_; }
+
+        void setTheta(float theta) { theta_ = theta; }
+        float getTheta() const { return theta_; }
+
+        void setMinLineLength(int minLineLength) { minLineLength_ = minLineLength; }
+        int getMinLineLength() const { return minLineLength_; }
+
+        void setMaxLineGap(int maxLineGap) { maxLineGap_ = maxLineGap; }
+        int getMaxLineGap() const { return maxLineGap_; }
+
+        void setMaxLines(int maxLines) { maxLines_ = maxLines; }
+        int getMaxLines() const { return maxLines_; }
+
+        void write(FileStorage& fs) const
+        {
+            fs << "name" << "PHoughLinesDetector_GPU"
+            << "rho" << rho_
+            << "theta" << theta_
+            << "minLineLength" << minLineLength_
+            << "maxLineGap" << maxLineGap_
+            << "maxLines" << maxLines_;
+        }
+
+        void read(const FileNode& fn)
+        {
+            CV_Assert( String(fn["name"]) == "PHoughLinesDetector_GPU" );
+            rho_ = (float)fn["rho"];
+            theta_ = (float)fn["theta"];
+            minLineLength_ = (int)fn["minLineLength"];
+            maxLineGap_ = (int)fn["maxLineGap"];
+            maxLines_ = (int)fn["maxLines"];
+        }
+
+    private:
+        float rho_;
+        float theta_;
+        int minLineLength_;
+        int maxLineGap_;
+        int maxLines_;
+
+        GpuMat accum_;
+        GpuMat list_;
+        GpuMat result_;
+    };
+
+    void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines)
+    {
+        using namespace cv::gpu::cudev::hough;
+        using namespace cv::gpu::cudev::hough_lines;
+        using namespace cv::gpu::cudev::hough_segments;
+
+        GpuMat src = _src.getGpuMat();
+
+        CV_Assert( src.type() == CV_8UC1 );
+        CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
+        CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
+
+        ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_);
+        unsigned int* srcPoints = list_.ptr<unsigned int>();
+
+        const int pointsCount = buildPointList_gpu(src, srcPoints);
+        if (pointsCount == 0)
+        {
+            lines.release();
+            return;
+        }
+
+        const int numangle = cvRound(CV_PI / theta_);
+        const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_);
+        CV_Assert( numangle > 0 && numrho > 0 );
+
+        ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_);
+        accum_.setTo(Scalar::all(0));
+
+        DeviceInfo devInfo;
+        linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
+
+        ensureSizeIsEnough(1, maxLines_, CV_32SC4, result_);
+
+        int linesCount = houghLinesProbabilistic_gpu(src, accum_, result_.ptr<int4>(), maxLines_, rho_, theta_, maxLineGap_, minLineLength_);
+
+        if (linesCount == 0)
+        {
+            lines.release();
+            return;
+        }
+
+        result_.cols = linesCount;
+        result_.copyTo(lines);
+    }
+}
+
+Ptr<HoughSegmentDetector> cv::gpu::createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines)
+{
+    return makePtr<HoughSegmentDetectorImpl>(rho, theta, minLineLength, maxLineGap, maxLines);
+}
+
+#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/match_template.cpp b/modules/gpuimgproc/src/match_template.cpp
index 059d41ca9..aeebd01d9 100644
--- a/modules/gpuimgproc/src/match_template.cpp
+++ b/modules/gpuimgproc/src/match_template.cpp
@@ -47,7 +47,7 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_GPUARITHM) || defined (CUDA_DISABLER)
 
-void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
+Ptr<gpu::TemplateMatching> cv::gpu::createTemplateMatching(int, int, Size) { throw_no_cuda(); return Ptr<gpu::TemplateMatching>(); }
 
 #else
 
@@ -137,11 +137,8 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-using namespace ::cv::gpu::cudev::match_template;
-
 namespace
 {
-
     // Evaluates optimal template's area threshold. If
     // template's area is less  than the threshold, we use naive match
     // template version, otherwise FFT-based (if available)
@@ -149,135 +146,317 @@ namespace
     {
         switch (method)
         {
-        case cv::TM_CCORR:
+        case TM_CCORR:
             if (depth == CV_32F) return 250;
             if (depth == CV_8U) return 300;
             break;
-        case cv::TM_SQDIFF:
+
+        case TM_SQDIFF:
             if (depth == CV_8U) return 300;
             break;
         }
-        CV_Error(cv::Error::StsBadArg, "getTemplateThreshold: unsupported match template mode");
+
+        CV_Error(Error::StsBadArg, "unsupported match template mode");
         return 0;
     }
 
+    ///////////////////////////////////////////////////////////////
+    // CCORR_32F
 
-    void matchTemplate_CCORR_32F(
-            const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
+    class Match_CCORR_32F : public TemplateMatching
     {
-        result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
-        if (templ.size().area() < getTemplateThreshold(cv::TM_CCORR, CV_32F))
+    public:
+        explicit Match_CCORR_32F(Size user_block_size);
+
+        void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
+
+    private:
+        Ptr<gpu::Convolution> conv_;
+        GpuMat result_;
+    };
+
+    Match_CCORR_32F::Match_CCORR_32F(Size user_block_size)
+    {
+        conv_ = gpu::createConvolution(user_block_size);
+    }
+
+    void Match_CCORR_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& _stream)
+    {
+        using namespace cv::gpu::cudev::match_template;
+
+        GpuMat image = _image.getGpuMat();
+        GpuMat templ = _templ.getGpuMat();
+
+        CV_Assert( image.depth() == CV_32F );
+        CV_Assert( image.type() == templ.type() );
+        CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
+
+        cudaStream_t stream = StreamAccessor::getStream(_stream);
+
+        _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
+        GpuMat result = _result.getGpuMat();
+
+        if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_32F))
         {
-            matchTemplateNaive_CCORR_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
+            matchTemplateNaive_CCORR_32F(image, templ, result, image.channels(), stream);
             return;
         }
 
-        Ptr<gpu::Convolution> conv = gpu::createConvolution(buf.user_block_size);
-
         if (image.channels() == 1)
         {
-            conv->convolve(image.reshape(1), templ.reshape(1), result, true, stream);
+            conv_->convolve(image.reshape(1), templ.reshape(1), result, true, _stream);
         }
         else
         {
-            GpuMat result_;
-            conv->convolve(image.reshape(1), templ.reshape(1), result_, true, stream);
-            extractFirstChannel_32F(result_, result, image.channels(), StreamAccessor::getStream(stream));
+            conv_->convolve(image.reshape(1), templ.reshape(1), result_, true, _stream);
+            extractFirstChannel_32F(result_, result, image.channels(), stream);
         }
     }
 
+    ///////////////////////////////////////////////////////////////
+    // CCORR_8U
 
-    void matchTemplate_CCORR_8U(
-            const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
+    class Match_CCORR_8U : public TemplateMatching
     {
-        if (templ.size().area() < getTemplateThreshold(cv::TM_CCORR, CV_8U))
+    public:
+        explicit Match_CCORR_8U(Size user_block_size) : match32F_(user_block_size)
         {
-            result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
+        }
+
+        void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
+
+    private:
+        GpuMat imagef_, templf_;
+        Match_CCORR_32F match32F_;
+    };
+
+    void Match_CCORR_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::match_template;
+
+        GpuMat image = _image.getGpuMat();
+        GpuMat templ = _templ.getGpuMat();
+
+        CV_Assert( image.depth() == CV_8U );
+        CV_Assert( image.type() == templ.type() );
+        CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
+
+        if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_8U))
+        {
+            _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
+            GpuMat result = _result.getGpuMat();
+
             matchTemplateNaive_CCORR_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
             return;
         }
 
-        image.convertTo(buf.imagef, CV_32F, stream);
-        templ.convertTo(buf.templf, CV_32F, stream);
+        image.convertTo(imagef_, CV_32F, stream);
+        templ.convertTo(templf_, CV_32F, stream);
 
-        matchTemplate_CCORR_32F(buf.imagef, buf.templf, result, buf, stream);
+        match32F_.match(imagef_, templf_, _result, stream);
     }
 
+    ///////////////////////////////////////////////////////////////
+    // CCORR_NORMED_8U
 
-    void matchTemplate_CCORR_NORMED_8U(
-            const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
+    class Match_CCORR_NORMED_8U : public TemplateMatching
     {
-        matchTemplate_CCORR_8U(image, templ, result, buf, stream);
+    public:
+        explicit Match_CCORR_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size)
+        {
+        }
 
-        buf.image_sqsums.resize(1);
-        gpu::sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
+        void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
 
-        unsigned long long templ_sqsum = (unsigned long long)gpu::sqrSum(templ.reshape(1))[0];
-        normalize_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
+    private:
+        Match_CCORR_8U match_CCORR_;
+        GpuMat image_sqsums_;
+        GpuMat intBuffer_;
+    };
+
+    void Match_CCORR_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::match_template;
+
+        GpuMat image = _image.getGpuMat();
+        GpuMat templ = _templ.getGpuMat();
+
+        CV_Assert( image.depth() == CV_8U );
+        CV_Assert( image.type() == templ.type() );
+        CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
+
+        match_CCORR_.match(image, templ, _result, stream);
+        GpuMat result = _result.getGpuMat();
+
+        gpu::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
+
+        unsigned long long templ_sqsum = (unsigned long long) gpu::sqrSum(templ.reshape(1))[0];
+
+        normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
     }
 
+    ///////////////////////////////////////////////////////////////
+    // SQDIFF_32F
 
-    void matchTemplate_SQDIFF_32F(
-            const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
+    class Match_SQDIFF_32F : public TemplateMatching
     {
-        (void)buf;
-        result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
+    public:
+        void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
+    };
+
+    void Match_SQDIFF_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::match_template;
+
+        GpuMat image = _image.getGpuMat();
+        GpuMat templ = _templ.getGpuMat();
+
+        CV_Assert( image.depth() == CV_32F );
+        CV_Assert( image.type() == templ.type() );
+        CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
+
+        _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
+        GpuMat result = _result.getGpuMat();
+
         matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
     }
 
+    ///////////////////////////////////////////////////////////////
+    // SQDIFF_8U
 
-    void matchTemplate_SQDIFF_8U(
-            const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
+    class Match_SQDIFF_8U : public TemplateMatching
     {
-        if (templ.size().area() < getTemplateThreshold(cv::TM_SQDIFF, CV_8U))
+    public:
+        explicit Match_SQDIFF_8U(Size user_block_size) : match_CCORR_(user_block_size)
         {
-            result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
+        }
+
+        void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
+
+    private:
+        GpuMat image_sqsums_;
+        GpuMat intBuffer_;
+        Match_CCORR_8U match_CCORR_;
+    };
+
+    void Match_SQDIFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::match_template;
+
+        GpuMat image = _image.getGpuMat();
+        GpuMat templ = _templ.getGpuMat();
+
+        CV_Assert( image.depth() == CV_8U );
+        CV_Assert( image.type() == templ.type() );
+        CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
+
+        if (templ.size().area() < getTemplateThreshold(TM_SQDIFF, CV_8U))
+        {
+            _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
+            GpuMat result = _result.getGpuMat();
+
             matchTemplateNaive_SQDIFF_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
             return;
         }
 
-        buf.image_sqsums.resize(1);
-        gpu::sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
+        gpu::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
 
-        unsigned long long templ_sqsum = (unsigned long long)gpu::sqrSum(templ.reshape(1))[0];
+        unsigned long long templ_sqsum = (unsigned long long) gpu::sqrSum(templ.reshape(1))[0];
 
-        matchTemplate_CCORR_8U(image, templ, result, buf, stream);
-        matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
+        match_CCORR_.match(image, templ, _result, stream);
+        GpuMat result = _result.getGpuMat();
+
+        matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
     }
 
+    ///////////////////////////////////////////////////////////////
+    // SQDIFF_NORMED_8U
 
-    void matchTemplate_SQDIFF_NORMED_8U(
-            const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
+    class Match_SQDIFF_NORMED_8U : public TemplateMatching
     {
-        buf.image_sqsums.resize(1);
-        gpu::sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
+    public:
+        explicit Match_SQDIFF_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size)
+        {
+        }
 
-        unsigned long long templ_sqsum = (unsigned long long)gpu::sqrSum(templ.reshape(1))[0];
+        void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
 
-        matchTemplate_CCORR_8U(image, templ, result, buf, stream);
-        matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
+    private:
+        GpuMat image_sqsums_;
+        GpuMat intBuffer_;
+        Match_CCORR_8U match_CCORR_;
+    };
+
+    void Match_SQDIFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::match_template;
+
+        GpuMat image = _image.getGpuMat();
+        GpuMat templ = _templ.getGpuMat();
+
+        CV_Assert( image.depth() == CV_8U );
+        CV_Assert( image.type() == templ.type() );
+        CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
+
+        gpu::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
+
+        unsigned long long templ_sqsum = (unsigned long long) gpu::sqrSum(templ.reshape(1))[0];
+
+        match_CCORR_.match(image, templ, _result, stream);
+        GpuMat result = _result.getGpuMat();
+
+        matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
     }
 
+    ///////////////////////////////////////////////////////////////
+    // CCOFF_8U
 
-    void matchTemplate_CCOFF_8U(
-            const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
+    class Match_CCOEFF_8U : public TemplateMatching
     {
-        matchTemplate_CCORR_8U(image, templ, result, buf, stream);
+    public:
+        explicit Match_CCOEFF_8U(Size user_block_size) : match_CCORR_(user_block_size)
+        {
+        }
+
+        void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
+
+    private:
+        GpuMat intBuffer_;
+        std::vector<GpuMat> images_;
+        std::vector<GpuMat> image_sums_;
+        Match_CCORR_8U match_CCORR_;
+    };
+
+    void Match_CCOEFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::match_template;
+
+        GpuMat image = _image.getGpuMat();
+        GpuMat templ = _templ.getGpuMat();
+
+        CV_Assert( image.depth() == CV_8U );
+        CV_Assert( image.type() == templ.type() );
+        CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
+
+        match_CCORR_.match(image, templ, _result, stream);
+        GpuMat result = _result.getGpuMat();
 
         if (image.channels() == 1)
         {
-            buf.image_sums.resize(1);
-            gpu::integral(image, buf.image_sums[0], stream);
+            image_sums_.resize(1);
+            gpu::integral(image, image_sums_[0], intBuffer_, stream);
 
-            unsigned int templ_sum = (unsigned int)gpu::sum(templ)[0];
-            matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, buf.image_sums[0], templ_sum, result, StreamAccessor::getStream(stream));
+            unsigned int templ_sum = (unsigned int) gpu::sum(templ)[0];
+
+            matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream));
         }
         else
         {
-            gpu::split(image, buf.images);
-            buf.image_sums.resize(buf.images.size());
+            gpu::split(image, images_);
+
+            image_sums_.resize(images_.size());
             for (int i = 0; i < image.channels(); ++i)
-                gpu::integral(buf.images[i], buf.image_sums[i], stream);
+                gpu::integral(images_[i], image_sums_[i], intBuffer_, stream);
 
             Scalar templ_sum = gpu::sum(templ);
 
@@ -285,60 +464,91 @@ namespace
             {
             case 2:
                 matchTemplatePrepared_CCOFF_8UC2(
-                        templ.cols, templ.rows, buf.image_sums[0], buf.image_sums[1],
-                        (unsigned int)templ_sum[0], (unsigned int)templ_sum[1],
+                        templ.cols, templ.rows, image_sums_[0], image_sums_[1],
+                        (unsigned int) templ_sum[0], (unsigned int) templ_sum[1],
                         result, StreamAccessor::getStream(stream));
                 break;
             case 3:
                 matchTemplatePrepared_CCOFF_8UC3(
-                        templ.cols, templ.rows, buf.image_sums[0], buf.image_sums[1], buf.image_sums[2],
-                        (unsigned int)templ_sum[0], (unsigned int)templ_sum[1], (unsigned int)templ_sum[2],
+                        templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2],
+                        (unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2],
                         result, StreamAccessor::getStream(stream));
                 break;
             case 4:
                 matchTemplatePrepared_CCOFF_8UC4(
-                        templ.cols, templ.rows, buf.image_sums[0], buf.image_sums[1], buf.image_sums[2], buf.image_sums[3],
-                        (unsigned int)templ_sum[0], (unsigned int)templ_sum[1], (unsigned int)templ_sum[2],
-                        (unsigned int)templ_sum[3], result, StreamAccessor::getStream(stream));
+                        templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], image_sums_[3],
+                        (unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2], (unsigned int) templ_sum[3],
+                        result, StreamAccessor::getStream(stream));
                 break;
             default:
-                CV_Error(cv::Error::StsBadArg, "matchTemplate: unsupported number of channels");
+                CV_Error(Error::StsBadArg, "unsupported number of channels");
             }
         }
     }
 
+    ///////////////////////////////////////////////////////////////
+    // CCOFF_NORMED_8U
 
-    void matchTemplate_CCOFF_NORMED_8U(
-            const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
+    class Match_CCOEFF_NORMED_8U : public TemplateMatching
     {
-        image.convertTo(buf.imagef, CV_32F, stream);
-        templ.convertTo(buf.templf, CV_32F, stream);
+    public:
+        explicit Match_CCOEFF_NORMED_8U(Size user_block_size) : match_CCORR_32F_(user_block_size)
+        {
+        }
 
-        matchTemplate_CCORR_32F(buf.imagef, buf.templf, result, buf, stream);
+        void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
+
+    private:
+        GpuMat imagef_, templf_;
+        Match_CCORR_32F match_CCORR_32F_;
+        GpuMat intBuffer_;
+        std::vector<GpuMat> images_;
+        std::vector<GpuMat> image_sums_;
+        std::vector<GpuMat> image_sqsums_;
+    };
+
+    void Match_CCOEFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
+    {
+        using namespace cv::gpu::cudev::match_template;
+
+        GpuMat image = _image.getGpuMat();
+        GpuMat templ = _templ.getGpuMat();
+
+        CV_Assert( image.depth() == CV_8U );
+        CV_Assert( image.type() == templ.type() );
+        CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
+
+        image.convertTo(imagef_, CV_32F, stream);
+        templ.convertTo(templf_, CV_32F, stream);
+
+        match_CCORR_32F_.match(imagef_, templf_, _result, stream);
+        GpuMat result = _result.getGpuMat();
 
         if (image.channels() == 1)
         {
-            buf.image_sums.resize(1);
-            gpu::integral(image, buf.image_sums[0], stream);
-            buf.image_sqsums.resize(1);
-            gpu::sqrIntegral(image, buf.image_sqsums[0], stream);
+            image_sums_.resize(1);
+            gpu::integral(image, image_sums_[0], intBuffer_, stream);
 
-            unsigned int templ_sum = (unsigned int)gpu::sum(templ)[0];
-            unsigned long long templ_sqsum = (unsigned long long)gpu::sqrSum(templ)[0];
+            image_sqsums_.resize(1);
+            gpu::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream);
+
+            unsigned int templ_sum = (unsigned int) gpu::sum(templ)[0];
+            unsigned long long templ_sqsum = (unsigned long long) gpu::sqrSum(templ)[0];
 
             matchTemplatePrepared_CCOFF_NORMED_8U(
-                    templ.cols, templ.rows, buf.image_sums[0], buf.image_sqsums[0],
+                    templ.cols, templ.rows, image_sums_[0], image_sqsums_[0],
                     templ_sum, templ_sqsum, result, StreamAccessor::getStream(stream));
         }
         else
         {
-            gpu::split(image, buf.images);
-            buf.image_sums.resize(buf.images.size());
-            buf.image_sqsums.resize(buf.images.size());
+            gpu::split(image, images_);
+
+            image_sums_.resize(images_.size());
+            image_sqsums_.resize(images_.size());
             for (int i = 0; i < image.channels(); ++i)
             {
-                gpu::integral(buf.images[i], buf.image_sums[i], stream);
-                gpu::sqrIntegral(buf.images[i], buf.image_sqsums[i], stream);
+                gpu::integral(images_[i], image_sums_[i], intBuffer_, stream);
+                gpu::sqrIntegral(images_[i], image_sqsums_[i], intBuffer_, stream);
             }
 
             Scalar templ_sum = gpu::sum(templ);
@@ -349,8 +559,8 @@ namespace
             case 2:
                 matchTemplatePrepared_CCOFF_NORMED_8UC2(
                         templ.cols, templ.rows,
-                        buf.image_sums[0], buf.image_sqsums[0],
-                        buf.image_sums[1], buf.image_sqsums[1],
+                        image_sums_[0], image_sqsums_[0],
+                        image_sums_[1], image_sqsums_[1],
                         (unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
                         (unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
                         result, StreamAccessor::getStream(stream));
@@ -358,9 +568,9 @@ namespace
             case 3:
                 matchTemplatePrepared_CCOFF_NORMED_8UC3(
                         templ.cols, templ.rows,
-                        buf.image_sums[0], buf.image_sqsums[0],
-                        buf.image_sums[1], buf.image_sqsums[1],
-                        buf.image_sums[2], buf.image_sqsums[2],
+                        image_sums_[0], image_sqsums_[0],
+                        image_sums_[1], image_sqsums_[1],
+                        image_sums_[2], image_sqsums_[2],
                         (unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
                         (unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
                         (unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
@@ -369,10 +579,10 @@ namespace
             case 4:
                 matchTemplatePrepared_CCOFF_NORMED_8UC4(
                         templ.cols, templ.rows,
-                        buf.image_sums[0], buf.image_sqsums[0],
-                        buf.image_sums[1], buf.image_sqsums[1],
-                        buf.image_sums[2], buf.image_sqsums[2],
-                        buf.image_sums[3], buf.image_sqsums[3],
+                        image_sums_[0], image_sqsums_[0],
+                        image_sums_[1], image_sqsums_[1],
+                        image_sums_[2], image_sqsums_[2],
+                        image_sums_[3], image_sqsums_[3],
                         (unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
                         (unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
                         (unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
@@ -380,46 +590,60 @@ namespace
                         result, StreamAccessor::getStream(stream));
                 break;
             default:
-                CV_Error(cv::Error::StsBadArg, "matchTemplate: unsupported number of channels");
+                CV_Error(Error::StsBadArg, "unsupported number of channels");
             }
         }
     }
 }
 
-
-void cv::gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream& stream)
+Ptr<gpu::TemplateMatching> cv::gpu::createTemplateMatching(int srcType, int method, Size user_block_size)
 {
-    MatchTemplateBuf buf;
-    matchTemplate(image, templ, result, method, buf, stream);
-}
+    const int sdepth = CV_MAT_DEPTH(srcType);
 
+    CV_Assert( sdepth == CV_8U || sdepth == CV_32F );
 
-void cv::gpu::matchTemplate(
-        const GpuMat& image, const GpuMat& templ, GpuMat& result, int method,
-        MatchTemplateBuf &buf, Stream& stream)
-{
-    CV_Assert(image.type() == templ.type());
-    CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
-
-    typedef void (*Caller)(const GpuMat&, const GpuMat&, GpuMat&, MatchTemplateBuf&, Stream& stream);
-
-    static const Caller callers8U[] = { ::matchTemplate_SQDIFF_8U, ::matchTemplate_SQDIFF_NORMED_8U,
-                                        ::matchTemplate_CCORR_8U, ::matchTemplate_CCORR_NORMED_8U,
-                                        ::matchTemplate_CCOFF_8U, ::matchTemplate_CCOFF_NORMED_8U };
-    static const Caller callers32F[] = { ::matchTemplate_SQDIFF_32F, 0,
-                                         ::matchTemplate_CCORR_32F, 0, 0, 0 };
-
-    const Caller* callers = 0;
-    switch (image.depth())
+    if (sdepth == CV_32F)
     {
-        case CV_8U: callers = callers8U; break;
-        case CV_32F: callers = callers32F; break;
-        default: CV_Error(cv::Error::StsBadArg, "matchTemplate: unsupported data type");
-    }
+        switch (method)
+        {
+        case TM_SQDIFF:
+            return makePtr<Match_SQDIFF_32F>();
 
-    Caller caller = callers[method];
-    CV_Assert(caller);
-    caller(image, templ, result, buf, stream);
+        case TM_CCORR:
+            return makePtr<Match_CCORR_32F>(user_block_size);
+
+        default:
+            CV_Error( Error::StsBadFlag, "Unsopported method" );
+            return Ptr<gpu::TemplateMatching>();
+        }
+    }
+    else
+    {
+        switch (method)
+        {
+        case TM_SQDIFF:
+            return makePtr<Match_SQDIFF_8U>(user_block_size);
+
+        case TM_SQDIFF_NORMED:
+            return makePtr<Match_SQDIFF_NORMED_8U>(user_block_size);
+
+        case TM_CCORR:
+            return makePtr<Match_CCORR_8U>(user_block_size);
+
+        case TM_CCORR_NORMED:
+            return makePtr<Match_CCORR_NORMED_8U>(user_block_size);
+
+        case TM_CCOEFF:
+            return makePtr<Match_CCOEFF_8U>(user_block_size);
+
+        case TM_CCOEFF_NORMED:
+            return makePtr<Match_CCOEFF_NORMED_8U>(user_block_size);
+
+        default:
+            CV_Error( Error::StsBadFlag, "Unsopported method" );
+            return Ptr<gpu::TemplateMatching>();
+        }
+    }
 }
 
 #endif
diff --git a/modules/gpuimgproc/src/mean_shift.cpp b/modules/gpuimgproc/src/mean_shift.cpp
index e30f95bf9..26368ca5a 100644
--- a/modules/gpuimgproc/src/mean_shift.cpp
+++ b/modules/gpuimgproc/src/mean_shift.cpp
@@ -47,13 +47,13 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
-void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
+void cv::gpu::meanShiftFiltering(InputArray, OutputArray, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
+void cv::gpu::meanShiftProc(InputArray, OutputArray, OutputArray, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
 ////////////////////////////////////////////////////////////////////////
-// meanShiftFiltering_GPU
+// meanShiftFiltering
 
 namespace cv { namespace gpu { namespace cudev
 {
@@ -63,27 +63,26 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria, Stream& stream)
+void cv::gpu::meanShiftFiltering(InputArray _src, OutputArray _dst, int sp, int sr, TermCriteria criteria, Stream& stream)
 {
     using namespace ::cv::gpu::cudev::imgproc;
 
-    if( src.empty() )
-        CV_Error( cv::Error::StsBadArg, "The input image is empty" );
+    GpuMat src = _src.getGpuMat();
 
-    if( src.depth() != CV_8U || src.channels() != 4 )
-        CV_Error( cv::Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
+    CV_Assert( src.type() == CV_8UC4 );
 
-    dst.create( src.size(), CV_8UC4 );
+    _dst.create(src.size(), CV_8UC4);
+    GpuMat dst = _dst.getGpuMat();
 
-    if( !(criteria.type & TermCriteria::MAX_ITER) )
+    if (!(criteria.type & TermCriteria::MAX_ITER))
         criteria.maxCount = 5;
 
     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
 
-    float eps;
-    if( !(criteria.type & TermCriteria::EPS) )
-        eps = 1.f;
-    eps = (float)std::max(criteria.epsilon, 0.0);
+    if (!(criteria.type & TermCriteria::EPS))
+        criteria.epsilon = 1.f;
+
+    float eps = (float) std::max(criteria.epsilon, 0.0);
 
     meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
 }
@@ -99,28 +98,29 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
+void cv::gpu::meanShiftProc(InputArray _src, OutputArray _dstr, OutputArray _dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
 {
     using namespace ::cv::gpu::cudev::imgproc;
 
-    if( src.empty() )
-        CV_Error( cv::Error::StsBadArg, "The input image is empty" );
+    GpuMat src = _src.getGpuMat();
 
-    if( src.depth() != CV_8U || src.channels() != 4 )
-        CV_Error( cv::Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
+    CV_Assert( src.type() == CV_8UC4 );
 
-    dstr.create( src.size(), CV_8UC4 );
-    dstsp.create( src.size(), CV_16SC2 );
+    _dstr.create(src.size(), CV_8UC4);
+    _dstsp.create(src.size(), CV_16SC2);
 
-    if( !(criteria.type & TermCriteria::MAX_ITER) )
+    GpuMat dstr = _dstr.getGpuMat();
+    GpuMat dstsp = _dstsp.getGpuMat();
+
+    if (!(criteria.type & TermCriteria::MAX_ITER))
         criteria.maxCount = 5;
 
     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
 
-    float eps;
-    if( !(criteria.type & TermCriteria::EPS) )
-        eps = 1.f;
-    eps = (float)std::max(criteria.epsilon, 0.0);
+    if (!(criteria.type & TermCriteria::EPS))
+        criteria.epsilon = 1.f;
+
+    float eps = (float) std::max(criteria.epsilon, 0.0);
 
     meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
 }
diff --git a/modules/gpuimgproc/src/mssegmentation.cpp b/modules/gpuimgproc/src/mssegmentation.cpp
index 7f02168e1..ec1c5feb4 100644
--- a/modules/gpuimgproc/src/mssegmentation.cpp
+++ b/modules/gpuimgproc/src/mssegmentation.cpp
@@ -43,7 +43,7 @@
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::meanShiftSegmentation(const GpuMat&, Mat&, int, int, int, TermCriteria) { throw_no_cuda(); }
+void cv::gpu::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria) { throw_no_cuda(); }
 
 #else
 
@@ -222,9 +222,12 @@ inline int dist2(const cv::Vec2s& lhs, const cv::Vec2s& rhs)
 } // anonymous namespace
 
 
-void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize, TermCriteria criteria)
+void cv::gpu::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria)
 {
-    CV_Assert(src.type() == CV_8UC4);
+    GpuMat src = _src.getGpuMat();
+
+    CV_Assert( src.type() == CV_8UC4 );
+
     const int nrows = src.rows;
     const int ncols = src.cols;
     const int hr = sr;
@@ -232,7 +235,7 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr,
 
     // Perform mean shift procedure and obtain region and spatial maps
     GpuMat d_rmap, d_spmap;
-    meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
+    gpu::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
     Mat rmap(d_rmap);
     Mat spmap(d_spmap);
 
@@ -337,7 +340,7 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr,
     }
 
     // Sort all graph's edges connecting differnet components (in asceding order)
-    sort(edges.begin(), edges.end());
+    std::sort(edges.begin(), edges.end());
 
     // Exclude small components (starting from the nearest couple)
     for (size_t i = 0; i < edges.size(); ++i)
@@ -366,7 +369,8 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr,
     }
 
     // Create final image, color of each segment is the average color of its pixels
-    dst.create(src.size(), src.type());
+    _dst.create(src.size(), src.type());
+    Mat dst = _dst.getMat();
 
     for (int y = 0; y < nrows; ++y)
     {
diff --git a/modules/gpuimgproc/src/precomp.cpp b/modules/gpuimgproc/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpuimgproc/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpuimgproc/src/precomp.hpp b/modules/gpuimgproc/src/precomp.hpp
index 1417c874b..b1edbf123 100644
--- a/modules/gpuimgproc/src/precomp.hpp
+++ b/modules/gpuimgproc/src/precomp.hpp
@@ -44,8 +44,8 @@
 #define __OPENCV_PRECOMP_H__
 
 #include "opencv2/gpuimgproc.hpp"
-#include "opencv2/gpufilters.hpp"
 
+#include "opencv2/core/utility.hpp"
 #include "opencv2/core/private.hpp"
 #include "opencv2/core/private.gpu.hpp"
 
@@ -55,4 +55,8 @@
 #  include "opencv2/gpuarithm.hpp"
 #endif
 
+#ifdef HAVE_OPENCV_GPUFILTERS
+#  include "opencv2/gpufilters.hpp"
+#endif
+
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpuimgproc/test/test_canny.cpp b/modules/gpuimgproc/test/test_canny.cpp
index b3ab5addc..3d9d35016 100644
--- a/modules/gpuimgproc/test/test_canny.cpp
+++ b/modules/gpuimgproc/test/test_canny.cpp
@@ -81,28 +81,15 @@ GPU_TEST_P(Canny, Accuracy)
     double low_thresh = 50.0;
     double high_thresh = 100.0;
 
-    if (!supportFeature(devInfo, cv::gpu::SHARED_ATOMICS))
-    {
-        try
-        {
-        cv::gpu::GpuMat edges;
-        cv::gpu::Canny(loadMat(img), edges, low_thresh, high_thresh, apperture_size, useL2gradient);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat edges;
-        cv::gpu::Canny(loadMat(img, useRoi), edges, low_thresh, high_thresh, apperture_size, useL2gradient);
+    cv::Ptr<cv::gpu::CannyEdgeDetector> canny = cv::gpu::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
 
-        cv::Mat edges_gold;
-        cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
+    cv::gpu::GpuMat edges;
+    canny->detect(loadMat(img, useRoi), edges);
 
-        EXPECT_MAT_SIMILAR(edges_gold, edges, 2e-2);
-    }
+    cv::Mat edges_gold;
+    cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
+
+    EXPECT_MAT_SIMILAR(edges_gold, edges, 2e-2);
 }
 
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
diff --git a/modules/gpuimgproc/test/test_corners.cpp b/modules/gpuimgproc/test/test_corners.cpp
index 54d8df457..262548056 100644
--- a/modules/gpuimgproc/test/test_corners.cpp
+++ b/modules/gpuimgproc/test/test_corners.cpp
@@ -82,8 +82,10 @@ GPU_TEST_P(CornerHarris, Accuracy)
 
     double k = randomDouble(0.1, 0.9);
 
+    cv::Ptr<cv::gpu::CornernessCriteria> harris = cv::gpu::createHarrisCorner(src.type(), blockSize, apertureSize, k, borderType);
+
     cv::gpu::GpuMat dst;
-    cv::gpu::cornerHarris(loadMat(src), dst, blockSize, apertureSize, k, borderType);
+    harris->compute(loadMat(src), dst);
 
     cv::Mat dst_gold;
     cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
@@ -126,8 +128,10 @@ GPU_TEST_P(CornerMinEigen, Accuracy)
     cv::Mat src = readImageType("stereobm/aloe-L.png", type);
     ASSERT_FALSE(src.empty());
 
+    cv::Ptr<cv::gpu::CornernessCriteria> minEigenVal = cv::gpu::createMinEigenValCorner(src.type(), blockSize, apertureSize, borderType);
+
     cv::gpu::GpuMat dst;
-    cv::gpu::cornerMinEigenVal(loadMat(src), dst, blockSize, apertureSize, borderType);
+    minEigenVal->compute(loadMat(src), dst);
 
     cv::Mat dst_gold;
     cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
diff --git a/modules/gpuimgproc/test/test_gftt.cpp b/modules/gpuimgproc/test/test_gftt.cpp
index b20df33ae..6ba6e0cff 100644
--- a/modules/gpuimgproc/test/test_gftt.cpp
+++ b/modules/gpuimgproc/test/test_gftt.cpp
@@ -76,10 +76,10 @@ GPU_TEST_P(GoodFeaturesToTrack, Accuracy)
     int maxCorners = 1000;
     double qualityLevel = 0.01;
 
-    cv::gpu::GoodFeaturesToTrackDetector_GPU detector(maxCorners, qualityLevel, minDistance);
+    cv::Ptr<cv::gpu::CornersDetector> detector = cv::gpu::createGoodFeaturesToTrackDetector(image.type(), maxCorners, qualityLevel, minDistance);
 
     cv::gpu::GpuMat d_pts;
-    detector(loadMat(image), d_pts);
+    detector->detect(loadMat(image), d_pts);
 
     ASSERT_FALSE(d_pts.empty());
 
@@ -114,12 +114,12 @@ GPU_TEST_P(GoodFeaturesToTrack, EmptyCorners)
     int maxCorners = 1000;
     double qualityLevel = 0.01;
 
-    cv::gpu::GoodFeaturesToTrackDetector_GPU detector(maxCorners, qualityLevel, minDistance);
-
     cv::gpu::GpuMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
     cv::gpu::GpuMat corners(1, maxCorners, CV_32FC2);
 
-    detector(src, corners);
+    cv::Ptr<cv::gpu::CornersDetector> detector = cv::gpu::createGoodFeaturesToTrackDetector(src.type(), maxCorners, qualityLevel, minDistance);
+
+    detector->detect(src, corners);
 
     ASSERT_TRUE(corners.empty());
 }
diff --git a/modules/gpuimgproc/test/test_histogram.cpp b/modules/gpuimgproc/test/test_histogram.cpp
index c3d17d2a1..556211729 100644
--- a/modules/gpuimgproc/test/test_histogram.cpp
+++ b/modules/gpuimgproc/test/test_histogram.cpp
@@ -49,13 +49,16 @@ using namespace cvtest;
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // HistEven
 
-struct HistEven : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(HistEven, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo;
 
+    cv::Size size;
+
     virtual void SetUp()
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
     }
@@ -63,57 +66,34 @@ struct HistEven : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 GPU_TEST_P(HistEven, Accuracy)
 {
-    cv::Mat img = readImage("stereobm/aloe-L.png");
-    ASSERT_FALSE(img.empty());
-
-    cv::Mat hsv;
-    cv::cvtColor(img, hsv, cv::COLOR_BGR2HSV);
+    cv::Mat src = randomMat(size, CV_8UC1);
 
     int hbins = 30;
-    float hranges[] = {0.0f, 180.0f};
-
-    std::vector<cv::gpu::GpuMat> srcs;
-    cv::gpu::split(loadMat(hsv), srcs);
+    float hranges[] = {50.0f, 200.0f};
 
     cv::gpu::GpuMat hist;
-    cv::gpu::histEven(srcs[0], hist, hbins, (int)hranges[0], (int)hranges[1]);
+    cv::gpu::histEven(loadMat(src), hist, hbins, (int) hranges[0], (int) hranges[1]);
+
+    cv::Mat hist_gold;
 
-    cv::MatND histnd;
     int histSize[] = {hbins};
     const float* ranges[] = {hranges};
     int channels[] = {0};
-    cv::calcHist(&hsv, 1, channels, cv::Mat(), histnd, 1, histSize, ranges);
+    cv::calcHist(&src, 1, channels, cv::Mat(), hist_gold, 1, histSize, ranges);
 
-    cv::Mat hist_gold = histnd;
     hist_gold = hist_gold.t();
     hist_gold.convertTo(hist_gold, CV_32S);
 
     EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HistEven, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HistEven, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // CalcHist
 
-namespace
-{
-    void calcHistGold(const cv::Mat& src, cv::Mat& hist)
-    {
-        hist.create(1, 256, CV_32SC1);
-        hist.setTo(cv::Scalar::all(0));
-
-        int* hist_row = hist.ptr<int>();
-        for (int y = 0; y < src.rows; ++y)
-        {
-            const uchar* src_row = src.ptr(y);
-
-            for (int x = 0; x < src.cols; ++x)
-                ++hist_row[src_row[x]];
-        }
-    }
-}
-
 PARAM_TEST_CASE(CalcHist, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo;
@@ -137,7 +117,16 @@ GPU_TEST_P(CalcHist, Accuracy)
     cv::gpu::calcHist(loadMat(src), hist);
 
     cv::Mat hist_gold;
-    calcHistGold(src, hist_gold);
+
+    const int hbins = 256;
+    const float hranges[] = {0.0f, 256.0f};
+    const int histSize[] = {hbins};
+    const float* ranges[] = {hranges};
+    const int channels[] = {0};
+
+    cv::calcHist(&src, 1, channels, cv::Mat(), hist_gold, 1, histSize, ranges);
+    hist_gold = hist_gold.reshape(1, 1);
+    hist_gold.convertTo(hist_gold, CV_32S);
 
     EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
 }
diff --git a/modules/gpuimgproc/test/test_hough.cpp b/modules/gpuimgproc/test/test_hough.cpp
index a04490104..969899d8b 100644
--- a/modules/gpuimgproc/test/test_hough.cpp
+++ b/modules/gpuimgproc/test/test_hough.cpp
@@ -94,11 +94,13 @@ GPU_TEST_P(HoughLines, Accuracy)
     cv::Mat src(size, CV_8UC1);
     generateLines(src);
 
+    cv::Ptr<cv::gpu::HoughLinesDetector> hough = cv::gpu::createHoughLinesDetector(rho, theta, threshold);
+
     cv::gpu::GpuMat d_lines;
-    cv::gpu::HoughLines(loadMat(src, useRoi), d_lines, rho, theta, threshold);
+    hough->detect(loadMat(src, useRoi), d_lines);
 
     std::vector<cv::Vec2f> lines;
-    cv::gpu::HoughLinesDownload(d_lines, lines);
+    hough->downloadResults(d_lines, lines);
 
     cv::Mat dst(size, CV_8UC1);
     drawLines(dst, lines);
@@ -148,11 +150,13 @@ GPU_TEST_P(HoughCircles, Accuracy)
     cv::Mat src(size, CV_8UC1);
     drawCircles(src, circles_gold, true);
 
+    cv::Ptr<cv::gpu::HoughCirclesDetector> houghCircles = cv::gpu::createHoughCirclesDetector(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
+
     cv::gpu::GpuMat d_circles;
-    cv::gpu::HoughCircles(loadMat(src, useRoi), d_circles, cv::HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
+    houghCircles->detect(loadMat(src, useRoi), d_circles);
 
     std::vector<cv::Vec3f> circles;
-    cv::gpu::HoughCirclesDownload(d_circles, circles);
+    d_circles.download(circles);
 
     ASSERT_FALSE(circles.empty());
 
@@ -189,7 +193,7 @@ PARAM_TEST_CASE(GeneralizedHough, cv::gpu::DeviceInfo, UseRoi)
 {
 };
 
-GPU_TEST_P(GeneralizedHough, POSITION)
+GPU_TEST_P(GeneralizedHough, Ballard)
 {
     const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
@@ -214,16 +218,16 @@ GPU_TEST_P(GeneralizedHough, POSITION)
         templ.copyTo(imageROI);
     }
 
-    cv::Ptr<cv::gpu::GeneralizedHough_GPU> hough = cv::gpu::GeneralizedHough_GPU::create(cv::GeneralizedHough::GHT_POSITION);
-    hough->set("votesThreshold", 200);
+    cv::Ptr<cv::GeneralizedHoughBallard> alg = cv::gpu::createGeneralizedHoughBallard();
+    alg->setVotesThreshold(200);
 
-    hough->setTemplate(loadMat(templ, useRoi));
+    alg->setTemplate(loadMat(templ, useRoi));
 
     cv::gpu::GpuMat d_pos;
-    hough->detect(loadMat(image, useRoi), d_pos);
+    alg->detect(loadMat(image, useRoi), d_pos);
 
     std::vector<cv::Vec4f> pos;
-    hough->download(d_pos, pos);
+    d_pos.download(pos);
 
     ASSERT_EQ(gold_count, pos.size());
 
diff --git a/modules/gpuimgproc/test/test_match_template.cpp b/modules/gpuimgproc/test/test_match_template.cpp
index d18757923..b6fd16140 100644
--- a/modules/gpuimgproc/test/test_match_template.cpp
+++ b/modules/gpuimgproc/test/test_match_template.cpp
@@ -82,8 +82,10 @@ GPU_TEST_P(MatchTemplate8U, Accuracy)
     cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
     cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
 
+    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+
     cv::gpu::GpuMat dst;
-    cv::gpu::matchTemplate(loadMat(image), loadMat(templ), dst, method);
+    alg->match(loadMat(image), loadMat(templ), dst);
 
     cv::Mat dst_gold;
     cv::matchTemplate(image, templ, dst_gold, method);
@@ -128,8 +130,10 @@ GPU_TEST_P(MatchTemplate32F, Regression)
     cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
     cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
 
+    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+
     cv::gpu::GpuMat dst;
-    cv::gpu::matchTemplate(loadMat(image), loadMat(templ), dst, method);
+    alg->match(loadMat(image), loadMat(templ), dst);
 
     cv::Mat dst_gold;
     cv::matchTemplate(image, templ, dst_gold, method);
@@ -169,8 +173,10 @@ GPU_TEST_P(MatchTemplateBlackSource, Accuracy)
     cv::Mat pattern = readImage("matchtemplate/cat.png");
     ASSERT_FALSE(pattern.empty());
 
+    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+
     cv::gpu::GpuMat d_dst;
-    cv::gpu::matchTemplate(loadMat(image), loadMat(pattern), d_dst, method);
+    alg->match(loadMat(image), loadMat(pattern), d_dst);
 
     cv::Mat dst(d_dst);
 
@@ -214,8 +220,10 @@ GPU_TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
     cv::Mat pattern = readImage(patternName);
     ASSERT_FALSE(pattern.empty());
 
+    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), cv::TM_CCOEFF_NORMED);
+
     cv::gpu::GpuMat d_dst;
-    cv::gpu::matchTemplate(loadMat(image), loadMat(pattern), d_dst, cv::TM_CCOEFF_NORMED);
+    alg->match(loadMat(image), loadMat(pattern), d_dst);
 
     cv::Mat dst(d_dst);
 
@@ -263,8 +271,10 @@ GPU_TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF_NORMED)
     cv::Mat templ = readImage("matchtemplate/template.png");
     ASSERT_FALSE(templ.empty());
 
+    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(scene.type(), cv::TM_SQDIFF_NORMED);
+
     cv::gpu::GpuMat d_result;
-    cv::gpu::matchTemplate(loadMat(scene), loadMat(templ), d_result, cv::TM_SQDIFF_NORMED);
+    alg->match(loadMat(scene), loadMat(templ), d_result);
 
     cv::Mat result(d_result);
 
@@ -286,8 +296,10 @@ GPU_TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF)
     cv::Mat templ = readImage("matchtemplate/template.png");
     ASSERT_FALSE(templ.empty());
 
+    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(scene.type(), cv::TM_SQDIFF);
+
     cv::gpu::GpuMat d_result;
-    cv::gpu::matchTemplate(loadMat(scene), loadMat(templ), d_result, cv::TM_SQDIFF);
+    alg->match(loadMat(scene), loadMat(templ), d_result);
 
     cv::Mat result(d_result);
 
diff --git a/modules/gpuimgproc/test/test_precomp.cpp b/modules/gpuimgproc/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpuimgproc/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpuimgproc/test/test_precomp.hpp b/modules/gpuimgproc/test/test_precomp.hpp
index 4196aa9fe..7a8b1cbcd 100644
--- a/modules/gpuimgproc/test/test_precomp.hpp
+++ b/modules/gpuimgproc/test/test_precomp.hpp
@@ -55,7 +55,8 @@
 #include "opencv2/ts/gpu_test.hpp"
 
 #include "opencv2/gpuimgproc.hpp"
-#include "opencv2/gpuarithm.hpp"
 #include "opencv2/imgproc.hpp"
 
+#include "cvconfig.h"
+
 #endif
diff --git a/modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
index e993c6408..cb84c23ad 100644
--- a/modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
@@ -126,7 +126,7 @@ typedef                int Ncv32s;
 typedef       unsigned int Ncv32u;
 typedef              short Ncv16s;
 typedef     unsigned short Ncv16u;
-typedef               char Ncv8s;
+typedef        signed char Ncv8s;
 typedef      unsigned char Ncv8u;
 typedef              float Ncv32f;
 typedef             double Ncv64f;
diff --git a/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
index c8aaaeace..3ac1e9491 100644
--- a/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
+++ b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
@@ -2138,8 +2138,8 @@ static NCVStatus loadFromXML(const cv::String &filename,
     haarClassifierNodes.resize(0);
     haarFeatures.resize(0);
 
-    cv::Ptr<CvHaarClassifierCascade> oldCascade = (CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0);
-    if (oldCascade.empty())
+    cv::Ptr<CvHaarClassifierCascade> oldCascade((CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0));
+    if (!oldCascade)
     {
         return NCV_HAAR_XML_LOADING_EXCEPTION;
     }
diff --git a/modules/gpulegacy/src/cuda/NCVPixelOperations.hpp b/modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
index 6409fab94..5cf902a9f 100644
--- a/modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
+++ b/modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
@@ -51,7 +51,7 @@ template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
 template<> static inline __host__ __device__ Ncv8u  _pixMaxVal<Ncv8u>()  {return UCHAR_MAX;}
 template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
 template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return  UINT_MAX;}
-template<> static inline __host__ __device__ Ncv8s  _pixMaxVal<Ncv8s>()  {return  CHAR_MAX;}
+template<> static inline __host__ __device__ Ncv8s  _pixMaxVal<Ncv8s>()  {return  SCHAR_MAX;}
 template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return  SHRT_MAX;}
 template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return   INT_MAX;}
 template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return   FLT_MAX;}
@@ -61,7 +61,7 @@ template<typename TBase> inline __host__ __device__ TBase _pixMinVal();
 template<> static inline __host__ __device__ Ncv8u  _pixMinVal<Ncv8u>()  {return 0;}
 template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
 template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
-template<> static inline __host__ __device__ Ncv8s  _pixMinVal<Ncv8s>()  {return CHAR_MIN;}
+template<> static inline __host__ __device__ Ncv8s  _pixMinVal<Ncv8s>()  {return SCHAR_MIN;}
 template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
 template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
 template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
diff --git a/modules/gpulegacy/src/precomp.cpp b/modules/gpulegacy/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpulegacy/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpulegacy/test/test_precomp.cpp b/modules/gpulegacy/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpulegacy/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpulegacy/test/test_precomp.hpp b/modules/gpulegacy/test/test_precomp.hpp
index 90485a90a..e1f2d5def 100644
--- a/modules/gpulegacy/test/test_precomp.hpp
+++ b/modules/gpulegacy/test/test_precomp.hpp
@@ -74,6 +74,8 @@
 
 #include "opencv2/core/private.gpu.hpp"
 
+#include "cvconfig.h"
+
 #include "NCVTest.hpp"
 #include "NCVAutoTestLister.hpp"
 #include "NCVTestSourceProvider.hpp"
diff --git a/modules/gpuoptflow/doc/optflow.rst b/modules/gpuoptflow/doc/optflow.rst
index 5962b2b81..937b209ae 100644
--- a/modules/gpuoptflow/doc/optflow.rst
+++ b/modules/gpuoptflow/doc/optflow.rst
@@ -3,6 +3,10 @@ Optical Flow
 
 .. highlight:: cpp
 
+.. note::
+
+   * A general optical flow example can be found at opencv_source_code/samples/gpu/optical_flow.cpp
+   * A general optical flow example using the Nvidia API can be found at opencv_source_code/samples/gpu/opticalflow_nvidia_api.cpp
 
 
 gpu::BroxOpticalFlow
@@ -44,6 +48,9 @@ Class computing the optical flow for two images using Brox et al Optical Flow al
         GpuMat buf;
     };
 
+.. note::
+
+   * An example illustrating the Brox et al optical flow algorithm can be found at opencv_source_code/samples/gpu/brox_optical_flow.cpp
 
 
 gpu::FarnebackOpticalFlow
@@ -138,6 +145,9 @@ The class can calculate an optical flow for a sparse feature set or dense optica
 
 .. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
 
+.. note::
+
+   * An example of the Lucas Kanade optical flow algorithm can be found at opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp
 
 
 gpu::PyrLKOpticalFlow::sparse
diff --git a/modules/gpuoptflow/perf/perf_main.cpp b/modules/gpuoptflow/perf/perf_main.cpp
index dad5e52bb..e261932f8 100644
--- a/modules/gpuoptflow/perf/perf_main.cpp
+++ b/modules/gpuoptflow/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpuoptflow, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpuoptflow)
diff --git a/modules/gpuoptflow/perf/perf_optflow.cpp b/modules/gpuoptflow/perf/perf_optflow.cpp
index 545225d62..562eb4b20 100644
--- a/modules/gpuoptflow/perf/perf_optflow.cpp
+++ b/modules/gpuoptflow/perf/perf_optflow.cpp
@@ -368,8 +368,8 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1,
 
         TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
 
-        GPU_SANITY_CHECK(u, 1e-2);
-        GPU_SANITY_CHECK(v, 1e-2);
+        GPU_SANITY_CHECK(u, 1e-1);
+        GPU_SANITY_CHECK(v, 1e-1);
     }
     else
     {
diff --git a/modules/gpuoptflow/perf/perf_precomp.cpp b/modules/gpuoptflow/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpuoptflow/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpuoptflow/src/cuda/needle_map.cu b/modules/gpuoptflow/src/cuda/needle_map.cu
index d361bcfc6..e0b1ef6b7 100644
--- a/modules/gpuoptflow/src/cuda/needle_map.cu
+++ b/modules/gpuoptflow/src/cuda/needle_map.cu
@@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace cudev
                 const float u_avg_val = u_avg(y, x);
                 const float v_avg_val = v_avg(y, x);
 
-                const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI;
+                const float theta = ::atan2f(v_avg_val, u_avg_val);
 
                 float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
                 r = fmin(14.0f * (r / max_flow), 14.0f);
diff --git a/modules/gpuoptflow/src/cuda/tvl1flow.cu b/modules/gpuoptflow/src/cuda/tvl1flow.cu
index 3d1c61264..75d30a624 100644
--- a/modules/gpuoptflow/src/cuda/tvl1flow.cu
+++ b/modules/gpuoptflow/src/cuda/tvl1flow.cu
@@ -211,7 +211,7 @@ namespace tvl1flow
                               const PtrStepf grad, const PtrStepf rho_c,
                               const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
                               PtrStepf u1, PtrStepf u2, PtrStepf error,
-                              const float l_t, const float theta)
+                              const float l_t, const float theta, const bool calcError)
     {
         const int x = blockIdx.x * blockDim.x + threadIdx.x;
         const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -265,21 +265,24 @@ namespace tvl1flow
         u1(y, x) = u1NewVal;
         u2(y, x) = u2NewVal;
 
-        const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
-        const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
-        error(y, x) = n1 + n2;
+        if (calcError)
+        {
+            const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+            const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+            error(y, x) = n1 + n2;
+        }
     }
 
     void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
                    PtrStepSzf grad, PtrStepSzf rho_c,
                    PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
                    PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
-                   float l_t, float theta)
+                   float l_t, float theta, bool calcError)
     {
         const dim3 block(32, 8);
         const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
 
-        estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
+        estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta, calcError);
         cudaSafeCall( cudaGetLastError() );
 
         cudaSafeCall( cudaDeviceSynchronize() );
diff --git a/modules/gpuoptflow/src/needle_map.cpp b/modules/gpuoptflow/src/needle_map.cpp
index 1fdc16262..9ca8fe5e4 100644
--- a/modules/gpuoptflow/src/needle_map.cpp
+++ b/modules/gpuoptflow/src/needle_map.cpp
@@ -94,7 +94,7 @@ void cv::gpu::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMa
 
     CreateOpticalFlowNeedleMap_gpu(u_avg, v_avg, vertex.ptr<float>(), colors.ptr<float>(), max_flow, 1.0f / u.cols, 1.0f / u.rows);
 
-    cvtColor(colors, colors, COLOR_HSV2RGB);
+    gpu::cvtColor(colors, colors, COLOR_HSV2RGB);
 }
 
 #endif /* HAVE_CUDA */
diff --git a/modules/gpuoptflow/src/precomp.cpp b/modules/gpuoptflow/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpuoptflow/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpuoptflow/src/tvl1flow.cpp b/modules/gpuoptflow/src/tvl1flow.cpp
index e725e0f27..50b280157 100644
--- a/modules/gpuoptflow/src/tvl1flow.cpp
+++ b/modules/gpuoptflow/src/tvl1flow.cpp
@@ -173,7 +173,7 @@ namespace tvl1flow
                    PtrStepSzf grad, PtrStepSzf rho_c,
                    PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
                    PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
-                   float l_t, float theta);
+                   float l_t, float theta, bool calcError);
     void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut);
 }
 
@@ -218,11 +218,24 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const Gpu
         warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
 
         double error = std::numeric_limits<double>::max();
+        double prevError = 0.0;
         for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
         {
-            estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta));
+            // some tweaks to make sum operation less frequently
+            bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon);
 
-            error = gpu::sum(diff, norm_buf)[0];
+            estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta), calcError);
+
+            if (calcError)
+            {
+                error = gpu::sum(diff, norm_buf)[0];
+                prevError = error;
+            }
+            else
+            {
+                error = std::numeric_limits<double>::max();
+                prevError -= scaledEpsilon;
+            }
 
             estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
         }
diff --git a/modules/gpuoptflow/test/test_optflow.cpp b/modules/gpuoptflow/test/test_optflow.cpp
index c20260e19..fce07551d 100644
--- a/modules/gpuoptflow/test/test_optflow.cpp
+++ b/modules/gpuoptflow/test/test_optflow.cpp
@@ -80,7 +80,7 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
     brox(loadMat(frame0), loadMat(frame1), u, v);
 
     std::string fname(cvtest::TS::ptr()->get_data_path());
-    if (devInfo.major() >= 2)
+    if (devInfo.majorVersion() >= 2)
         fname += "opticalflow/brox_optical_flow_cc20.bin";
     else
         fname += "opticalflow/brox_optical_flow.bin";
diff --git a/modules/gpuoptflow/test/test_precomp.cpp b/modules/gpuoptflow/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpuoptflow/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpuoptflow/test/test_precomp.hpp b/modules/gpuoptflow/test/test_precomp.hpp
index 32a7443e8..afac60d87 100644
--- a/modules/gpuoptflow/test/test_precomp.hpp
+++ b/modules/gpuoptflow/test/test_precomp.hpp
@@ -59,4 +59,6 @@
 #include "opencv2/gpuoptflow.hpp"
 #include "opencv2/video.hpp"
 
+#include "cvconfig.h"
+
 #endif
diff --git a/modules/gpustereo/doc/stereo.rst b/modules/gpustereo/doc/stereo.rst
index 4064fe0a3..f87d43ff8 100644
--- a/modules/gpustereo/doc/stereo.rst
+++ b/modules/gpustereo/doc/stereo.rst
@@ -3,7 +3,11 @@ Stereo Correspondence
 
 .. highlight:: cpp
 
+.. note::
 
+   * A basic stereo matching example can be found at opencv_source_code/samples/gpu/stereo_match.cpp
+   * A stereo matching example using several GPU's can be found at opencv_source_code/samples/gpu/stereo_multi.cpp
+   * A stereo matching example using several GPU's and driver API can be found at opencv_source_code/samples/gpu/driver_api_stereo_multi.cpp
 
 gpu::StereoBM
 -------------
diff --git a/modules/gpustereo/perf/perf_main.cpp b/modules/gpustereo/perf/perf_main.cpp
index d681cdb4d..0766ccf05 100644
--- a/modules/gpustereo/perf/perf_main.cpp
+++ b/modules/gpustereo/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpustereo, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpustereo)
diff --git a/modules/gpustereo/perf/perf_precomp.cpp b/modules/gpustereo/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpustereo/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpustereo/src/disparity_bilateral_filter.cpp b/modules/gpustereo/src/disparity_bilateral_filter.cpp
index 689a9e76e..9808b7f66 100644
--- a/modules/gpustereo/src/disparity_bilateral_filter.cpp
+++ b/modules/gpustereo/src/disparity_bilateral_filter.cpp
@@ -200,7 +200,7 @@ namespace
 
 Ptr<gpu::DisparityBilateralFilter> cv::gpu::createDisparityBilateralFilter(int ndisp, int radius, int iters)
 {
-    return new DispBilateralFilterImpl(ndisp, radius, iters);
+    return makePtr<DispBilateralFilterImpl>(ndisp, radius, iters);
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpustereo/src/precomp.cpp b/modules/gpustereo/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpustereo/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpustereo/src/stereobm.cpp b/modules/gpustereo/src/stereobm.cpp
index 9b32cf7e9..2ef896d57 100644
--- a/modules/gpustereo/src/stereobm.cpp
+++ b/modules/gpustereo/src/stereobm.cpp
@@ -98,8 +98,8 @@ namespace
         int getPreFilterCap() const { return preFilterCap_; }
         void setPreFilterCap(int preFilterCap) { preFilterCap_ = preFilterCap; }
 
-        int getTextureThreshold() const { return avergeTexThreshold_; }
-        void setTextureThreshold(int textureThreshold) { avergeTexThreshold_ = textureThreshold; }
+        int getTextureThreshold() const { return static_cast<int>(avergeTexThreshold_); }
+        void setTextureThreshold(int textureThreshold) { avergeTexThreshold_ = static_cast<float>(textureThreshold); }
 
         int getUniquenessRatio() const { return 0; }
         void setUniquenessRatio(int /*uniquenessRatio*/) {}
@@ -179,7 +179,7 @@ namespace
 
 Ptr<gpu::StereoBM> cv::gpu::createStereoBM(int numDisparities, int blockSize)
 {
-    return new StereoBMImpl(numDisparities, blockSize);
+    return makePtr<StereoBMImpl>(numDisparities, blockSize);
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpustereo/src/stereobp.cpp b/modules/gpustereo/src/stereobp.cpp
index ac3bcfe33..f1ea9adf4 100644
--- a/modules/gpustereo/src/stereobp.cpp
+++ b/modules/gpustereo/src/stereobp.cpp
@@ -361,7 +361,7 @@ namespace
 
 Ptr<gpu::StereoBeliefPropagation> cv::gpu::createStereoBeliefPropagation(int ndisp, int iters, int levels, int msg_type)
 {
-    return new StereoBPImpl(ndisp, iters, levels, msg_type);
+    return makePtr<StereoBPImpl>(ndisp, iters, levels, msg_type);
 }
 
 void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
diff --git a/modules/gpustereo/src/stereocsbp.cpp b/modules/gpustereo/src/stereocsbp.cpp
index 9afd8d14e..b1135b276 100644
--- a/modules/gpustereo/src/stereocsbp.cpp
+++ b/modules/gpustereo/src/stereocsbp.cpp
@@ -366,7 +366,7 @@ namespace
 
 Ptr<gpu::StereoConstantSpaceBP> cv::gpu::createStereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, int msg_type)
 {
-    return new StereoCSBPImpl(ndisp, iters, levels, nr_plane, msg_type);
+    return makePtr<StereoCSBPImpl>(ndisp, iters, levels, nr_plane, msg_type);
 }
 
 void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane)
diff --git a/modules/gpustereo/test/test_precomp.cpp b/modules/gpustereo/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpustereo/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpustereo/test/test_precomp.hpp b/modules/gpustereo/test/test_precomp.hpp
index d55b1ec0d..eb34fcb56 100644
--- a/modules/gpustereo/test/test_precomp.hpp
+++ b/modules/gpustereo/test/test_precomp.hpp
@@ -57,4 +57,6 @@
 #include "opencv2/gpustereo.hpp"
 #include "opencv2/calib3d.hpp"
 
+#include "cvconfig.h"
+
 #endif
diff --git a/modules/gpuwarping/doc/warping.rst b/modules/gpuwarping/doc/warping.rst
index b2c95e2b0..5c88eb772 100644
--- a/modules/gpuwarping/doc/warping.rst
+++ b/modules/gpuwarping/doc/warping.rst
@@ -9,7 +9,7 @@ gpu::remap
 --------------
 Applies a generic geometrical transformation to an image.
 
-.. ocv:function:: void gpu::remap( const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::remap(InputArray src, OutputArray dst, InputArray xmap, InputArray ymap, int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null())
 
     :param src: Source image.
 
@@ -43,7 +43,7 @@ gpu::resize
 ---------------
 Resizes an image.
 
-.. ocv:function:: void gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
 
     :param src: Source image.
 
@@ -80,7 +80,7 @@ gpu::warpAffine
 -------------------
 Applies an affine transformation to an image.
 
-.. ocv:function:: void gpu::warpAffine( const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::warpAffine(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null())
 
     :param src: Source image.  ``CV_8U`` , ``CV_16U`` , ``CV_32S`` , or  ``CV_32F`` depth and 1, 3, or 4 channels are supported.
 
@@ -102,7 +102,7 @@ gpu::buildWarpAffineMaps
 ------------------------
 Builds transformation maps for affine transformation.
 
-.. ocv:function:: void gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::buildWarpAffineMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null())
 
     :param M: *2x3*  transformation matrix.
 
@@ -124,7 +124,7 @@ gpu::warpPerspective
 ------------------------
 Applies a perspective transformation to an image.
 
-.. ocv:function:: void gpu::warpPerspective( const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::warpPerspective(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null())
 
     :param src: Source image. ``CV_8U`` , ``CV_16U`` , ``CV_32S`` , or  ``CV_32F`` depth and 1, 3, or 4 channels are supported.
 
@@ -146,7 +146,7 @@ gpu::buildWarpPerspectiveMaps
 -----------------------------
 Builds transformation maps for perspective transformation.
 
-.. ocv:function:: void gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::buildWarpAffineMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null())
 
     :param M: *3x3*  transformation matrix.
 
@@ -164,11 +164,41 @@ Builds transformation maps for perspective transformation.
 
 
 
+gpu::buildWarpPlaneMaps
+-----------------------
+Builds plane warping maps.
+
+.. ocv:function:: void gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, InputArray T, float scale, OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null())
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::buildWarpCylindricalMaps
+-----------------------------
+Builds cylindrical warping maps.
+
+.. ocv:function:: void gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale, OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null())
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::buildWarpSphericalMaps
+---------------------------
+Builds spherical warping maps.
+
+.. ocv:function:: void gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale, OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null())
+
+    :param stream: Stream for the asynchronous version.
+
+
+
 gpu::rotate
 ---------------
 Rotates an image around the origin (0,0) and then shifts it.
 
-.. ocv:function:: void gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::rotate(InputArray src, OutputArray dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
 
     :param src: Source image. Supports 1, 3 or 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32F`` depth.
 
@@ -190,41 +220,11 @@ Rotates an image around the origin (0,0) and then shifts it.
 
 
 
-gpu::buildWarpPlaneMaps
------------------------
-Builds plane warping maps.
-
-.. ocv:function:: void gpu::buildWarpPlaneMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, const Mat & T, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() )
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::buildWarpCylindricalMaps
------------------------------
-Builds cylindrical warping maps.
-
-.. ocv:function:: void gpu::buildWarpCylindricalMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() )
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::buildWarpSphericalMaps
----------------------------
-Builds spherical warping maps.
-
-.. ocv:function:: void gpu::buildWarpSphericalMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() )
-
-    :param stream: Stream for the asynchronous version.
-
-
-
 gpu::pyrDown
 -------------------
 Smoothes an image and downsamples it.
 
-.. ocv:function:: void gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::pyrDown(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
 
     :param src: Source image.
 
@@ -240,7 +240,7 @@ gpu::pyrUp
 -------------------
 Upsamples an image and then smoothes it.
 
-.. ocv:function:: void gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::pyrUp(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
 
     :param src: Source image.
 
diff --git a/modules/gpuwarping/include/opencv2/gpuwarping.hpp b/modules/gpuwarping/include/opencv2/gpuwarping.hpp
index 18b42ce60..ed17464ec 100644
--- a/modules/gpuwarping/include/opencv2/gpuwarping.hpp
+++ b/modules/gpuwarping/include/opencv2/gpuwarping.hpp
@@ -54,78 +54,60 @@ namespace cv { namespace gpu {
 
 //! DST[x,y] = SRC[xmap[x,y],ymap[x,y]]
 //! supports only CV_32FC1 map type
-CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,
+CV_EXPORTS void remap(InputArray src, OutputArray dst, InputArray xmap, InputArray ymap,
                       int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(),
                       Stream& stream = Stream::Null());
 
 //! resizes the image
 //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA
-CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
+CV_EXPORTS void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
 
 //! warps the image using affine transformation
 //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
+CV_EXPORTS void warpAffine(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR,
     int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
 
-CV_EXPORTS void buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
+CV_EXPORTS void buildWarpAffineMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null());
 
 //! warps the image using perspective transformation
 //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
+CV_EXPORTS void warpPerspective(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR,
     int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
 
-CV_EXPORTS void buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
+CV_EXPORTS void buildWarpPerspectiveMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null());
 
 //! builds plane warping maps
-CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, float scale,
-                                   GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, InputArray T, float scale,
+                                   OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());
 
 //! builds cylindrical warping maps
-CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                         GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale,
+                                         OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());
 
 //! builds spherical warping maps
-CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                       GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale,
+                                       OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());
 
 //! rotates an image around the origin (0,0) and then shifts it
 //! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 //! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth
-CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
+CV_EXPORTS void rotate(InputArray src, OutputArray dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
                        int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
 
 //! smoothes the source image and downsamples it
-CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+CV_EXPORTS void pyrDown(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
 
 //! upsamples the source image and then smoothes it
-CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+CV_EXPORTS void pyrUp(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
 
-class CV_EXPORTS ImagePyramid
+class CV_EXPORTS ImagePyramid : public Algorithm
 {
 public:
-    inline ImagePyramid() : nLayers_(0) {}
-    inline ImagePyramid(const GpuMat& img, int nLayers, Stream& stream = Stream::Null())
-    {
-        build(img, nLayers, stream);
-    }
-
-    void build(const GpuMat& img, int nLayers, Stream& stream = Stream::Null());
-
-    void getLayer(GpuMat& outImg, Size outRoi, Stream& stream = Stream::Null()) const;
-
-    inline void release()
-    {
-        layer0_.release();
-        pyramid_.clear();
-        nLayers_ = 0;
-    }
-
-private:
-    GpuMat layer0_;
-    std::vector<GpuMat> pyramid_;
-    int nLayers_;
+    virtual void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const = 0;
 };
 
+CV_EXPORTS Ptr<ImagePyramid> createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null());
+
 }} // namespace cv { namespace gpu {
 
 #endif /* __OPENCV_GPUWARPING_HPP__ */
diff --git a/modules/gpuwarping/perf/perf_main.cpp b/modules/gpuwarping/perf/perf_main.cpp
index a7c1d5c85..8566b097d 100644
--- a/modules/gpuwarping/perf/perf_main.cpp
+++ b/modules/gpuwarping/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpuwarping, printCudaInfo())
+CV_PERF_TEST_CUDA_MAIN(gpuwarping)
diff --git a/modules/gpuwarping/perf/perf_precomp.cpp b/modules/gpuwarping/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpuwarping/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpuwarping/perf/perf_warping.cpp b/modules/gpuwarping/perf/perf_warping.cpp
index fd555cbe5..266475bb0 100644
--- a/modules/gpuwarping/perf/perf_warping.cpp
+++ b/modules/gpuwarping/perf/perf_warping.cpp
@@ -515,45 +515,6 @@ PERF_TEST_P(Sz_Depth_Cn, PyrUp,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// ImagePyramidBuild
-
-PERF_TEST_P(Sz_Depth_Cn, ImagePyramidBuild,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    const int nLayers = 5;
-    const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-
-        cv::gpu::ImagePyramid d_pyr;
-
-        TEST_CYCLE() d_pyr.build(d_src, nLayers);
-
-        cv::gpu::GpuMat dst;
-        d_pyr.getLayer(dst, dstSize);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // ImagePyramidGetLayer
 
@@ -579,9 +540,9 @@ PERF_TEST_P(Sz_Depth_Cn, ImagePyramidGetLayer,
         const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat dst;
 
-        cv::gpu::ImagePyramid d_pyr(d_src, nLayers);
+        cv::Ptr<cv::gpu::ImagePyramid> d_pyr = cv::gpu::createImagePyramid(d_src, nLayers);
 
-        TEST_CYCLE() d_pyr.getLayer(dst, dstSize);
+        TEST_CYCLE() d_pyr->getLayer(dst, dstSize);
 
         GPU_SANITY_CHECK(dst);
     }
diff --git a/modules/gpuwarping/src/cuda/resize.cu b/modules/gpuwarping/src/cuda/resize.cu
index 04c1fb2ac..94422db9d 100644
--- a/modules/gpuwarping/src/cuda/resize.cu
+++ b/modules/gpuwarping/src/cuda/resize.cu
@@ -49,254 +49,434 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/filters.hpp"
-#include "opencv2/core/cuda/scan.hpp"
 
 namespace cv { namespace gpu { namespace cudev
 {
-    namespace imgproc
+    // kernels
+
+    template <typename T> __global__ void resize_nearest(const PtrStep<T> src, PtrStepSz<T> dst, const float fy, const float fx)
     {
-        template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
+        const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
+        const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
+
+        if (dst_x < dst.cols && dst_y < dst.rows)
         {
-            const int x = blockDim.x * blockIdx.x + threadIdx.x;
-            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+            const float src_x = dst_x * fx;
+            const float src_y = dst_y * fy;
 
-            if (x < dst.cols && y < dst.rows)
-            {
-                const float xcoo = x * fx;
-                const float ycoo = y * fy;
-
-                dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));
-            }
+            dst(dst_y, dst_x) = src(__float2int_rz(src_y), __float2int_rz(src_x));
         }
+    }
 
-        template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
+    template <typename T> __global__ void resize_linear(const PtrStepSz<T> src, PtrStepSz<T> dst, const float fy, const float fx)
+    {
+        typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
+
+        const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
+        const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
+
+        if (dst_x < dst.cols && dst_y < dst.rows)
         {
-            const int x = blockDim.x * blockIdx.x + threadIdx.x;
-            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+            const float src_x = dst_x * fx;
+            const float src_y = dst_y * fy;
 
-            if (x < dst.cols && y < dst.rows)
-            {
-                dst(y, x) = saturate_cast<T>(src(y, x));
-            }
+            work_type out = VecTraits<work_type>::all(0);
+
+            const int x1 = __float2int_rd(src_x);
+            const int y1 = __float2int_rd(src_y);
+            const int x2 = x1 + 1;
+            const int y2 = y1 + 1;
+            const int x2_read = ::min(x2, src.cols - 1);
+            const int y2_read = ::min(y2, src.rows - 1);
+
+            T src_reg = src(y1, x1);
+            out = out + src_reg * ((x2 - src_x) * (y2 - src_y));
+
+            src_reg = src(y1, x2_read);
+            out = out + src_reg * ((src_x - x1) * (y2 - src_y));
+
+            src_reg = src(y2_read, x1);
+            out = out + src_reg * ((x2 - src_x) * (src_y - y1));
+
+            src_reg = src(y2_read, x2_read);
+            out = out + src_reg * ((src_x - x1) * (src_y - y1));
+
+            dst(dst_y, dst_x) = saturate_cast<T>(out);
         }
+    }
 
-        template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
+    template <class Ptr2D, typename T> __global__ void resize(const Ptr2D src, PtrStepSz<T> dst, const float fy, const float fx)
+    {
+        const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
+        const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
+
+        if (dst_x < dst.cols && dst_y < dst.rows)
         {
-            static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
-            {
-                dim3 block(32, 8);
-                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+            const float src_x = dst_x * fx;
+            const float src_y = dst_y * fy;
 
-                BrdReplicate<T> brd(src.rows, src.cols);
-                BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
-                Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
+            dst(dst_y, dst_x) = src(src_y, src_x);
+        }
+    }
 
-                resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
-                cudaSafeCall( cudaGetLastError() );
-            }
+    template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, PtrStepSz<T> dst)
+    {
+        const int x = blockDim.x * blockIdx.x + threadIdx.x;
+        const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+        if (x < dst.cols && y < dst.rows)
+        {
+            dst(y, x) = src(y, x);
+        }
+    }
+
+    // textures
+
+    template <typename T> struct TextureAccessor;
+
+    #define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
+        texture<type, cudaTextureType2D, cudaReadModeElementType> tex_resize_##type (0, cudaFilterModePoint, cudaAddressModeClamp); \
+        template <> struct TextureAccessor<type> \
+        { \
+            typedef type elem_type; \
+            typedef int index_type; \
+            int xoff; \
+            int yoff; \
+            __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
+            { \
+                return tex2D(tex_resize_##type, x + xoff, y + yoff); \
+            } \
+            __host__ static void bind(const PtrStepSz<type>& mat) \
+            { \
+                bindTexture(&tex_resize_##type, mat); \
+            } \
         };
 
-        template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
-        {
-            static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
-            {
-                dim3 block(32, 8);
-                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
 
-                BrdConstant<T> brd(src.rows, src.cols);
-                BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
-                AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
-                resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
-                cudaSafeCall( cudaGetLastError() );
-                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
+
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
+
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
+
+    #undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
+
+    template <typename T>
+    TextureAccessor<T> texAccessor(const PtrStepSz<T>& mat, int yoff, int xoff)
+    {
+        TextureAccessor<T>::bind(mat);
+
+        TextureAccessor<T> t;
+        t.xoff = xoff;
+        t.yoff = yoff;
+
+        return t;
+    }
+
+    // callers for nearest interpolation
+
+    template <typename T>
+    void call_resize_nearest_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+    {
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+
+        resize_nearest<<<grid, block, 0, stream>>>(src, dst, fy, fx);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    template <typename T>
+    void call_resize_nearest_tex(const PtrStepSz<T>& /*src*/, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
+    {
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+
+        resize<<<grid, block>>>(texAccessor(srcWhole, yoff, xoff), dst, fy, fx);
+        cudaSafeCall( cudaGetLastError() );
+
+        cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    // callers for linear interpolation
+
+    template <typename T>
+    void call_resize_linear_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+    {
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+
+        resize_linear<<<grid, block>>>(src, dst, fy, fx);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    template <typename T>
+    void call_resize_linear_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
+    {
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+
+        if (srcWhole.data == src.data)
+        {
+            TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
+            LinearFilter< TextureAccessor<T> > filteredSrc(texSrc);
+
+            resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
+        }
+        else
+        {
+            TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
+
+            BrdReplicate<T> brd(src.rows, src.cols);
+            BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
+            LinearFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
+
+            resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
+        }
+
+        cudaSafeCall( cudaGetLastError() );
+
+        cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    // callers for cubic interpolation
+
+    template <typename T>
+    void call_resize_cubic_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+    {
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+
+        BrdReplicate<T> brd(src.rows, src.cols);
+        BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
+        CubicFilter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
+
+        resize<<<grid, block, 0, stream>>>(filteredSrc, dst, fy, fx);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    template <typename T>
+    void call_resize_cubic_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
+    {
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+
+        if (srcWhole.data == src.data)
+        {
+            TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
+            CubicFilter< TextureAccessor<T> > filteredSrc(texSrc);
+
+            resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
+        }
+        else
+        {
+            TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
+
+            BrdReplicate<T> brd(src.rows, src.cols);
+            BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
+            CubicFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
+
+            resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
+        }
+
+        cudaSafeCall( cudaGetLastError() );
+
+        cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    // ResizeNearestDispatcher
+
+    template <typename T> struct ResizeNearestDispatcher
+    {
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& /*srcWhole*/, int /*yoff*/, int /*xoff*/, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+        {
+            call_resize_nearest_glob(src, dst, fy, fx, stream);
+        }
+    };
+
+    template <typename T> struct SelectImplForNearest
+    {
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+        {
+            if (stream)
+                call_resize_nearest_glob(src, dst, fy, fx, stream);
+            else
+            {
+                if (fx > 1 || fy > 1)
+                    call_resize_nearest_glob(src, dst, fy, fx, 0);
+                else
+                    call_resize_nearest_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
             }
-        };
+        }
+    };
 
-        template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
+    template <> struct ResizeNearestDispatcher<uchar> : SelectImplForNearest<uchar> {};
+    template <> struct ResizeNearestDispatcher<uchar4> : SelectImplForNearest<uchar4> {};
+
+    template <> struct ResizeNearestDispatcher<ushort> : SelectImplForNearest<ushort> {};
+    template <> struct ResizeNearestDispatcher<ushort4> : SelectImplForNearest<ushort4> {};
+
+    template <> struct ResizeNearestDispatcher<short> : SelectImplForNearest<short> {};
+    template <> struct ResizeNearestDispatcher<short4> : SelectImplForNearest<short4> {};
+
+    template <> struct ResizeNearestDispatcher<float> : SelectImplForNearest<float> {};
+    template <> struct ResizeNearestDispatcher<float4> : SelectImplForNearest<float4> {};
+
+    // ResizeLinearDispatcher
+
+    template <typename T> struct ResizeLinearDispatcher
+    {
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
         {
-            static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
+            call_resize_linear_glob(src, dst, fy, fx, stream);
+        }
+    };
+
+    template <typename T> struct SelectImplForLinear
+    {
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+        {
+            if (stream)
+                call_resize_linear_glob(src, dst, fy, fx, stream);
+            else
+            {
+                if (fx > 1 || fy > 1)
+                    call_resize_linear_glob(src, dst, fy, fx, 0);
+                else
+                    call_resize_linear_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
+            }
+        }
+    };
+
+    template <> struct ResizeLinearDispatcher<uchar> : SelectImplForLinear<uchar> {};
+    template <> struct ResizeLinearDispatcher<uchar4> : SelectImplForLinear<uchar4> {};
+
+    template <> struct ResizeLinearDispatcher<ushort> : SelectImplForLinear<ushort> {};
+    template <> struct ResizeLinearDispatcher<ushort4> : SelectImplForLinear<ushort4> {};
+
+    template <> struct ResizeLinearDispatcher<short> : SelectImplForLinear<short> {};
+    template <> struct ResizeLinearDispatcher<short4> : SelectImplForLinear<short4> {};
+
+    template <> struct ResizeLinearDispatcher<float> : SelectImplForLinear<float> {};
+    template <> struct ResizeLinearDispatcher<float4> : SelectImplForLinear<float4> {};
+
+    // ResizeCubicDispatcher
+
+    template <typename T> struct ResizeCubicDispatcher
+    {
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+        {
+            call_resize_cubic_glob(src, dst, fy, fx, stream);
+        }
+    };
+
+    template <typename T> struct SelectImplForCubic
+    {
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+        {
+            if (stream)
+                call_resize_cubic_glob(src, dst, fy, fx, stream);
+            else
+                call_resize_cubic_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
+        }
+    };
+
+    template <> struct ResizeCubicDispatcher<uchar> : SelectImplForCubic<uchar> {};
+    template <> struct ResizeCubicDispatcher<uchar4> : SelectImplForCubic<uchar4> {};
+
+    template <> struct ResizeCubicDispatcher<ushort> : SelectImplForCubic<ushort> {};
+    template <> struct ResizeCubicDispatcher<ushort4> : SelectImplForCubic<ushort4> {};
+
+    template <> struct ResizeCubicDispatcher<short> : SelectImplForCubic<short> {};
+    template <> struct ResizeCubicDispatcher<short4> : SelectImplForCubic<short4> {};
+
+    template <> struct ResizeCubicDispatcher<float> : SelectImplForCubic<float> {};
+    template <> struct ResizeCubicDispatcher<float4> : SelectImplForCubic<float4> {};
+
+    // ResizeAreaDispatcher
+
+    template <typename T> struct ResizeAreaDispatcher
+    {
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>&, int, int, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+        {
+            const int iscale_x = (int) round(fx);
+            const int iscale_y = (int) round(fy);
+
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+
+            if (std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
             {
-                dim3 block(32, 8);
-                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
                 BrdConstant<T> brd(src.rows, src.cols);
                 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
                 IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
-                resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
-                cudaSafeCall( cudaGetLastError() );
-                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+
+                resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
             }
-        };
-
-        template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
-        {
-            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst)
+            else
             {
-                (void)srcWhole;
-                (void)xoff;
-                (void)yoff;
+                BrdConstant<T> brd(src.rows, src.cols);
+                BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
+                AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
 
-                dim3 block(32, 8);
-                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+                resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
+            }
 
-                BrdReplicate<T> brd(src.rows, src.cols);
-                BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
-                Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
-
-                resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
-                cudaSafeCall( cudaGetLastError() );
+            cudaSafeCall( cudaGetLastError() );
 
+            if (stream == 0)
                 cudaSafeCall( cudaDeviceSynchronize() );
-            }
-        };
-
-        #define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
-            texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
-            struct tex_resize_ ## type ## _reader \
-            { \
-                typedef type elem_type; \
-                typedef int index_type; \
-                const int xoff; \
-                const int yoff; \
-                __host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
-                __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
-                { \
-                    return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
-                } \
-            }; \
-            template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
-            { \
-                static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
-                { \
-                    dim3 block(32, 8); \
-                    dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
-                    bindTexture(&tex_resize_ ## type, srcWhole); \
-                    tex_resize_ ## type ## _reader texSrc(xoff, yoff); \
-                    if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
-                    { \
-                        Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
-                        resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
-                    } \
-                    else \
-                    { \
-                        BrdReplicate< type > brd(src.rows, src.cols); \
-                        BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
-                        Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
-                        resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
-                    } \
-                    cudaSafeCall( cudaGetLastError() ); \
-                    cudaSafeCall( cudaDeviceSynchronize() ); \
-                } \
-            };
-
-        OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
-        OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
-
-        //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
-        //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
-
-        OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
-        OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
-
-        OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
-        OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
-
-        //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
-        //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
-
-        OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
-        OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
-
-        #undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
-
-        template <template <typename> class Filter, typename T> struct ResizeDispatcher
-        {
-            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
-            {
-                if (stream == 0)
-                    ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
-                else
-                    ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
-            }
-        };
-
-        template <typename T> struct ResizeDispatcher<AreaFilter, T>
-        {
-            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
-            {
-                (void)srcWhole;
-                (void)xoff;
-                (void)yoff;
-                int iscale_x = (int)round(fx);
-                int iscale_y = (int)round(fy);
-
-                if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
-                    ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
-                else
-                    ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
-            }
-        };
-
-        template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
-            PtrStepSzb dst, int interpolation, cudaStream_t stream)
-        {
-            typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
-
-            static const caller_t callers[4] =
-            {
-                ResizeDispatcher<PointFilter, T>::call,
-                ResizeDispatcher<LinearFilter, T>::call,
-                ResizeDispatcher<CubicFilter, T>::call,
-                ResizeDispatcher<AreaFilter, T>::call
-            };
-            // chenge to linear if area interpolation upscaling
-            if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
-                interpolation = 1;
-
-            callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
-                static_cast< PtrStepSz<T> >(dst), stream);
         }
+    };
 
-        template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
+    // resize
 
-        //template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-
-        template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-
-        template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-
-        //template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-
-        template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-
-        template<typename T> struct scan_traits{};
-
-        template<> struct scan_traits<uchar>
+    template <typename T> void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream)
+    {
+        typedef void (*func_t)(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream);
+        static const func_t funcs[4] =
         {
-            typedef float scan_line_type;
+            ResizeNearestDispatcher<T>::call,
+            ResizeLinearDispatcher<T>::call,
+            ResizeCubicDispatcher<T>::call,
+            ResizeAreaDispatcher<T>::call
         };
 
-    } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cudev
+        // change to linear if area interpolation upscaling
+        if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
+            interpolation = 1;
 
+        funcs[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), yoff, xoff, static_cast< PtrStepSz<T> >(dst), fy, fx, stream);
+    }
+
+    template void resize<uchar >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+    template void resize<uchar3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+    template void resize<uchar4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+
+    template void resize<ushort >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+    template void resize<ushort3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+    template void resize<ushort4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+
+    template void resize<short >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+    template void resize<short3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+    template void resize<short4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+
+    template void resize<float >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+    template void resize<float3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+    template void resize<float4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+}}}
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuwarping/src/precomp.cpp b/modules/gpuwarping/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpuwarping/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpuwarping/src/pyramids.cpp b/modules/gpuwarping/src/pyramids.cpp
index 19d5dcf94..aa86e3a40 100644
--- a/modules/gpuwarping/src/pyramids.cpp
+++ b/modules/gpuwarping/src/pyramids.cpp
@@ -42,14 +42,15 @@
 
 #include "precomp.hpp"
 
+using namespace cv;
+using namespace cv::gpu;
+
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::pyrDown(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::gpu::pyrUp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-
-void cv::gpu::ImagePyramid::build(const GpuMat&, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::ImagePyramid::getLayer(GpuMat&, Size, Stream&) const { throw_no_cuda(); }
+Ptr<ImagePyramid> cv::gpu::createImagePyramid(InputArray, int, Stream&) { throw_no_cuda(); return Ptr<ImagePyramid>(); }
 
 #else // HAVE_CUDA
 
@@ -64,12 +65,11 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
+void cv::gpu::pyrDown(InputArray _src, OutputArray _dst, Stream& stream)
 {
     using namespace cv::gpu::cudev::imgproc;
 
     typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-
     static const func_t funcs[6][4] =
     {
         {pyrDown_gpu<uchar>      , 0 /*pyrDown_gpu<uchar2>*/ , pyrDown_gpu<uchar3>      , pyrDown_gpu<uchar4>      },
@@ -80,12 +80,15 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
         {pyrDown_gpu<float>      , 0 /*pyrDown_gpu<float2>*/ , pyrDown_gpu<float3>      , pyrDown_gpu<float4>      }
     };
 
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
+    GpuMat src = _src.getGpuMat();
+
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
 
     const func_t func = funcs[src.depth()][src.channels() - 1];
-    CV_Assert(func != 0);
+    CV_Assert( func != 0 );
 
-    dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
+    _dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
+    GpuMat dst = _dst.getGpuMat();
 
     func(src, dst, StreamAccessor::getStream(stream));
 }
@@ -102,12 +105,11 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)
+void cv::gpu::pyrUp(InputArray _src, OutputArray _dst, Stream& stream)
 {
     using namespace cv::gpu::cudev::imgproc;
 
     typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-
     static const func_t funcs[6][4] =
     {
         {pyrUp_gpu<uchar>      , 0 /*pyrUp_gpu<uchar2>*/ , pyrUp_gpu<uchar3>      , pyrUp_gpu<uchar4>      },
@@ -118,98 +120,124 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)
         {pyrUp_gpu<float>      , 0 /*pyrUp_gpu<float2>*/ , pyrUp_gpu<float3>      , pyrUp_gpu<float4>      }
     };
 
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
+    GpuMat src = _src.getGpuMat();
+
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
 
     const func_t func = funcs[src.depth()][src.channels() - 1];
-    CV_Assert(func != 0);
+    CV_Assert( func != 0 );
 
-    dst.create(src.rows * 2, src.cols * 2, src.type());
+    _dst.create(src.rows * 2, src.cols * 2, src.type());
+    GpuMat dst = _dst.getGpuMat();
 
     func(src, dst, StreamAccessor::getStream(stream));
 }
 
-
 //////////////////////////////////////////////////////////////////////////////
 // ImagePyramid
 
-void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stream)
+#ifdef HAVE_OPENCV_GPULEGACY
+
+namespace
+{
+    class ImagePyramidImpl : public ImagePyramid
+    {
+    public:
+        ImagePyramidImpl(InputArray img, int nLayers, Stream& stream);
+
+        void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const;
+
+    private:
+        GpuMat layer0_;
+        std::vector<GpuMat> pyramid_;
+        int nLayers_;
+    };
+
+    ImagePyramidImpl::ImagePyramidImpl(InputArray _img, int numLayers, Stream& stream)
+    {
+        GpuMat img = _img.getGpuMat();
+
+        CV_Assert( img.depth() <= CV_32F && img.channels() <= 4 );
+
+        img.copyTo(layer0_, stream);
+
+        Size szLastLayer = img.size();
+        nLayers_ = 1;
+
+        if (numLayers <= 0)
+            numLayers = 255; // it will cut-off when any of the dimensions goes 1
+
+        pyramid_.resize(numLayers);
+
+        for (int i = 0; i < numLayers - 1; ++i)
+        {
+            Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
+
+            if (szCurLayer.width == 0 || szCurLayer.height == 0)
+                break;
+
+            ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
+            nLayers_++;
+
+            const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
+
+            cv::gpu::cudev::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
+
+            szLastLayer = szCurLayer;
+        }
+    }
+
+    void ImagePyramidImpl::getLayer(OutputArray _outImg, Size outRoi, Stream& stream) const
+    {
+        CV_Assert( outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0 );
+
+        ensureSizeIsEnough(outRoi, layer0_.type(), _outImg);
+        GpuMat outImg = _outImg.getGpuMat();
+
+        if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
+        {
+            layer0_.copyTo(outImg, stream);
+            return;
+        }
+
+        float lastScale = 1.0f;
+        float curScale;
+        GpuMat lastLayer = layer0_;
+        GpuMat curLayer;
+
+        for (int i = 0; i < nLayers_ - 1; ++i)
+        {
+            curScale = lastScale * 0.5f;
+            curLayer = pyramid_[i];
+
+            if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
+            {
+                curLayer.copyTo(outImg, stream);
+            }
+
+            if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
+                break;
+
+            lastScale = curScale;
+            lastLayer = curLayer;
+        }
+
+        cv::gpu::cudev::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
+    }
+}
+
+#endif
+
+Ptr<ImagePyramid> cv::gpu::createImagePyramid(InputArray img, int nLayers, Stream& stream)
 {
 #ifndef HAVE_OPENCV_GPULEGACY
     (void) img;
     (void) numLayers;
     (void) stream;
     throw_no_cuda();
+    return Ptr<ImagePyramid>();
 #else
-    CV_Assert(img.depth() <= CV_32F && img.channels() <= 4);
-
-    layer0_ = img;
-    Size szLastLayer = img.size();
-    nLayers_ = 1;
-
-    if (numLayers <= 0)
-        numLayers = 255; //it will cut-off when any of the dimensions goes 1
-
-    pyramid_.resize(numLayers);
-
-    for (int i = 0; i < numLayers - 1; ++i)
-    {
-        Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
-
-        if (szCurLayer.width == 0 || szCurLayer.height == 0)
-            break;
-
-        ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
-        nLayers_++;
-
-        const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
-
-        cudev::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
-
-        szLastLayer = szCurLayer;
-    }
-#endif
-}
-
-void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream) const
-{
-#ifndef HAVE_OPENCV_GPULEGACY
-    (void) outImg;
-    (void) outRoi;
-    (void) stream;
-    throw_no_cuda();
-#else
-    CV_Assert(outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0);
-
-    ensureSizeIsEnough(outRoi, layer0_.type(), outImg);
-
-    if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
-    {
-        layer0_.copyTo(outImg, stream);
-    }
-
-    float lastScale = 1.0f;
-    float curScale;
-    GpuMat lastLayer = layer0_;
-    GpuMat curLayer;
-
-    for (int i = 0; i < nLayers_ - 1; ++i)
-    {
-        curScale = lastScale * 0.5f;
-        curLayer = pyramid_[i];
-
-        if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
-        {
-            curLayer.copyTo(outImg, stream);
-        }
-
-        if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
-            break;
-
-        lastScale = curScale;
-        lastLayer = curLayer;
-    }
-
-    cudev::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
+    return Ptr<ImagePyramid>(new ImagePyramidImpl(img, nLayers, stream));
 #endif
 }
 
diff --git a/modules/gpuwarping/src/remap.cpp b/modules/gpuwarping/src/remap.cpp
index 131f93763..c3d797783 100644
--- a/modules/gpuwarping/src/remap.cpp
+++ b/modules/gpuwarping/src/remap.cpp
@@ -44,7 +44,7 @@
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, Scalar, Stream&){ throw_no_cuda(); }
+void cv::gpu::remap(InputArray, OutputArray, InputArray, InputArray, int, int, Scalar, Stream&){ throw_no_cuda(); }
 
 #else // HAVE_CUDA
 
@@ -58,13 +58,12 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
+void cv::gpu::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
 {
     using namespace cv::gpu::cudev::imgproc;
 
     typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
         int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
-
     static const func_t funcs[6][4] =
     {
         {remap_gpu<uchar>      , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3>     , remap_gpu<uchar4>     },
@@ -75,15 +74,21 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
         {remap_gpu<float>      , 0 /*remap_gpu<float2>*/ , remap_gpu<float3>     , remap_gpu<float4>     }
     };
 
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-    CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size());
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
-    CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
+    GpuMat src = _src.getGpuMat();
+    GpuMat xmap = _xmap.getGpuMat();
+    GpuMat ymap = _ymap.getGpuMat();
+
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
+    CV_Assert( xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size() );
+    CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
+    CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
 
     const func_t func = funcs[src.depth()][src.channels() - 1];
-    CV_Assert(func != 0);
+    if (!func)
+        CV_Error(Error::StsUnsupportedFormat, "Unsupported input type");
 
-    dst.create(xmap.size(), src.type());
+    _dst.create(xmap.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
 
     Scalar_<float> borderValueFloat;
     borderValueFloat = borderValue;
diff --git a/modules/gpuwarping/src/resize.cpp b/modules/gpuwarping/src/resize.cpp
index 5cb518483..9c82318f9 100644
--- a/modules/gpuwarping/src/resize.cpp
+++ b/modules/gpuwarping/src/resize.cpp
@@ -44,105 +44,65 @@
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::resize(InputArray, OutputArray, Size, double, double, int, Stream&) { throw_no_cuda(); }
 
 #else // HAVE_CUDA
 
 namespace cv { namespace gpu { namespace cudev
 {
-    namespace imgproc
-    {
-        template <typename T>
-        void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
-                        PtrStepSzb dst, int interpolation, cudaStream_t stream);
-    }
+    template <typename T>
+    void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
 }}}
 
-void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
+void cv::gpu::resize(InputArray _src, OutputArray _dst, Size dsize, double fx, double fy, int interpolation, Stream& stream)
 {
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR
-            || interpolation == INTER_CUBIC || interpolation == INTER_AREA);
-    CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));
+    GpuMat src = _src.getGpuMat();
+
+    typedef void (*func_t)(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
+    static const func_t funcs[6][4] =
+    {
+        {cudev::resize<uchar>      , 0 /*cudev::resize<uchar2>*/ , cudev::resize<uchar3>     , cudev::resize<uchar4>     },
+        {0 /*cudev::resize<schar>*/, 0 /*cudev::resize<char2>*/  , 0 /*cudev::resize<char3>*/, 0 /*cudev::resize<char4>*/},
+        {cudev::resize<ushort>     , 0 /*cudev::resize<ushort2>*/, cudev::resize<ushort3>    , cudev::resize<ushort4>    },
+        {cudev::resize<short>      , 0 /*cudev::resize<short2>*/ , cudev::resize<short3>     , cudev::resize<short4>     },
+        {0 /*cudev::resize<int>*/  , 0 /*cudev::resize<int2>*/   , 0 /*cudev::resize<int3>*/ , 0 /*cudev::resize<int4>*/ },
+        {cudev::resize<float>      , 0 /*cudev::resize<float2>*/ , cudev::resize<float3>     , cudev::resize<float4>     }
+    };
+
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
+    CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA );
+    CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
 
     if (dsize == Size())
+    {
         dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
+    }
     else
     {
         fx = static_cast<double>(dsize.width) / src.cols;
         fy = static_cast<double>(dsize.height) / src.rows;
     }
-    if (dsize != dst.size())
-        dst.create(dsize, src.type());
+
+    _dst.create(dsize, src.type());
+    GpuMat dst = _dst.getGpuMat();
 
     if (dsize == src.size())
     {
-        src.copyTo(dst, s);
+        src.copyTo(dst, stream);
         return;
     }
 
-    cudaStream_t stream = StreamAccessor::getStream(s);
+    const func_t func = funcs[src.depth()][src.channels() - 1];
+
+    if (!func)
+        CV_Error(Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
 
     Size wholeSize;
     Point ofs;
     src.locateROI(wholeSize, ofs);
+    PtrStepSzb wholeSrc(wholeSize.height, wholeSize.width, src.datastart, src.step);
 
-    bool useNpp = (src.type() == CV_8UC1 || src.type() == CV_8UC4);
-    useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR);
-
-    if (useNpp)
-    {
-        typedef NppStatus (*func_t)(const Npp8u * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, Npp8u * pDst, int nDstStep, NppiSize dstROISize,
-                                    double xFactor, double yFactor, int eInterpolation);
-
-        const func_t funcs[4] = { nppiResize_8u_C1R, 0, 0, nppiResize_8u_C4R };
-
-        static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
-
-        NppiSize srcsz;
-        srcsz.width  = wholeSize.width;
-        srcsz.height = wholeSize.height;
-
-        NppiRect srcrect;
-        srcrect.x = ofs.x;
-        srcrect.y = ofs.y;
-        srcrect.width  = src.cols;
-        srcrect.height = src.rows;
-
-        NppiSize dstsz;
-        dstsz.width  = dst.cols;
-        dstsz.height = dst.rows;
-
-        NppStreamHandler h(stream);
-
-        nppSafeCall( funcs[src.channels() - 1](src.datastart, srcsz, static_cast<int>(src.step), srcrect,
-                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-    else
-    {
-        using namespace ::cv::gpu::cudev::imgproc;
-
-        typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-
-        static const func_t funcs[6][4] =
-        {
-            {resize_gpu<uchar>      , 0 /*resize_gpu<uchar2>*/ , resize_gpu<uchar3>     , resize_gpu<uchar4>     },
-            {0 /*resize_gpu<schar>*/, 0 /*resize_gpu<char2>*/  , 0 /*resize_gpu<char3>*/, 0 /*resize_gpu<char4>*/},
-            {resize_gpu<ushort>     , 0 /*resize_gpu<ushort2>*/, resize_gpu<ushort3>    , resize_gpu<ushort4>    },
-            {resize_gpu<short>      , 0 /*resize_gpu<short2>*/ , resize_gpu<short3>     , resize_gpu<short4>     },
-            {0 /*resize_gpu<int>*/  , 0 /*resize_gpu<int2>*/   , 0 /*resize_gpu<int3>*/ , 0 /*resize_gpu<int4>*/ },
-            {resize_gpu<float>      , 0 /*resize_gpu<float2>*/ , resize_gpu<float3>     , resize_gpu<float4>     }
-        };
-
-        const func_t func = funcs[src.depth()][src.channels() - 1];
-        CV_Assert(func != 0);
-
-        func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y,
-            static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, stream);
-    }
+    func(src, wholeSrc, ofs.y, ofs.x, dst, static_cast<float>(1.0 / fy), static_cast<float>(1.0 / fx), interpolation, StreamAccessor::getStream(stream));
 }
 
 #endif // HAVE_CUDA
diff --git a/modules/gpuwarping/src/warp.cpp b/modules/gpuwarping/src/warp.cpp
index b3c44e562..2775fc084 100644
--- a/modules/gpuwarping/src/warp.cpp
+++ b/modules/gpuwarping/src/warp.cpp
@@ -47,17 +47,17 @@ using namespace cv::gpu;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpAffineMaps(const Mat&, bool, Size, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
+void cv::gpu::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpPerspectiveMaps(const Mat&, bool, Size, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::warpPerspective(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
+void cv::gpu::buildWarpPerspectiveMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::buildWarpPlaneMaps(Size, Rect, InputArray, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::gpu::buildWarpCylindricalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::gpu::buildWarpSphericalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::rotate(InputArray, OutputArray, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
 
 #else // HAVE_CUDA
 
@@ -79,14 +79,19 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream)
+void cv::gpu::buildWarpAffineMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
 {
     using namespace cv::gpu::cudev::imgproc;
 
-    CV_Assert(M.rows == 2 && M.cols == 3);
+    Mat M = _M.getMat();
 
-    xmap.create(dsize, CV_32FC1);
-    ymap.create(dsize, CV_32FC1);
+    CV_Assert( M.rows == 2 && M.cols == 3 );
+
+    _xmap.create(dsize, CV_32FC1);
+    _ymap.create(dsize, CV_32FC1);
+
+    GpuMat xmap = _xmap.getGpuMat();
+    GpuMat ymap = _ymap.getGpuMat();
 
     float coeffs[2 * 3];
     Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
@@ -103,14 +108,19 @@ void cv::gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat
     buildWarpAffineMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream)
+void cv::gpu::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
 {
     using namespace cv::gpu::cudev::imgproc;
 
-    CV_Assert(M.rows == 3 && M.cols == 3);
+    Mat M = _M.getMat();
 
-    xmap.create(dsize, CV_32FC1);
-    ymap.create(dsize, CV_32FC1);
+    CV_Assert( M.rows == 3 && M.cols == 3 );
+
+    _xmap.create(dsize, CV_32FC1);
+    _ymap.create(dsize, CV_32FC1);
+
+    GpuMat xmap = _xmap.getGpuMat();
+    GpuMat ymap = _ymap.getGpuMat();
 
     float coeffs[3 * 3];
     Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
@@ -174,17 +184,21 @@ namespace
     };
 }
 
-void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& s)
+void cv::gpu::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
 {
-    CV_Assert(M.rows == 2 && M.cols == 3);
+    GpuMat src = _src.getGpuMat();
+    Mat M = _M.getMat();
 
-    int interpolation = flags & INTER_MAX;
+    CV_Assert( M.rows == 2 && M.cols == 3 );
 
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
-    CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
+    const int interpolation = flags & INTER_MAX;
 
-    dst.create(dsize, src.type());
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
+    CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
+    CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
+
+    _dst.create(dsize, src.type());
+    GpuMat dst = _dst.getGpuMat();
 
     Size wholeSize;
     Point ofs;
@@ -258,7 +272,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
             }
         };
 
-        dst.setTo(borderValue);
+        dst.setTo(borderValue, stream);
 
         double coeffs[2][3];
         Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
@@ -267,7 +281,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
         const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
         CV_Assert(func != 0);
 
-        func(src, dst, coeffs, interpolation, StreamAccessor::getStream(s));
+        func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
     }
     else
     {
@@ -305,21 +319,25 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
         borderValueFloat = borderValue;
 
         func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
-            dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
+            dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
     }
 }
 
-void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& s)
+void cv::gpu::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
 {
-    CV_Assert(M.rows == 3 && M.cols == 3);
+    GpuMat src = _src.getGpuMat();
+    Mat M = _M.getMat();
 
-    int interpolation = flags & INTER_MAX;
+    CV_Assert( M.rows == 3 && M.cols == 3 );
 
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
-    CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
+    const int interpolation = flags & INTER_MAX;
 
-    dst.create(dsize, src.type());
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
+    CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
+    CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP) ;
+
+    _dst.create(dsize, src.type());
+    GpuMat dst = _dst.getGpuMat();
 
     Size wholeSize;
     Point ofs;
@@ -393,7 +411,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
             }
         };
 
-        dst.setTo(borderValue);
+        dst.setTo(borderValue, stream);
 
         double coeffs[3][3];
         Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
@@ -402,7 +420,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
         const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
         CV_Assert(func != 0);
 
-        func(src, dst, coeffs, interpolation, StreamAccessor::getStream(s));
+        func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
     }
     else
     {
@@ -440,7 +458,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
         borderValueFloat = borderValue;
 
         func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
-            dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
+            dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
     }
 }
 
@@ -457,23 +475,30 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T,
-                                 float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
+void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, InputArray _T,
+                                 float scale, OutputArray _map_x, OutputArray _map_y, Stream& stream)
 {
-    (void)src_size;
-    using namespace ::cv::gpu::cudev::imgproc;
+    (void) src_size;
 
-    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
-    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
-    CV_Assert((T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32F && T.isContinuous());
+    Mat K = _K.getMat();
+    Mat R = _R.getMat();
+    Mat T = _T.getMat();
+
+    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
+    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
+    CV_Assert( (T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32FC1 && T.isContinuous() );
 
     Mat K_Rinv = K * R.t();
     Mat R_Kinv = R * K.inv();
-    CV_Assert(K_Rinv.isContinuous());
-    CV_Assert(R_Kinv.isContinuous());
+    CV_Assert( K_Rinv.isContinuous() );
+    CV_Assert( R_Kinv.isContinuous() );
+
+    _map_x.create(dst_roi.size(), CV_32FC1);
+    _map_y.create(dst_roi.size(), CV_32FC1);
+
+    GpuMat map_x = _map_x.getGpuMat();
+    GpuMat map_y = _map_y.getGpuMat();
 
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
     cudev::imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
                        T.ptr<float>(), scale, StreamAccessor::getStream(stream));
 }
@@ -491,22 +516,28 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                       GpuMat& map_x, GpuMat& map_y, Stream& stream)
+void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
+                                       OutputArray _map_x, OutputArray _map_y, Stream& stream)
 {
-    (void)src_size;
-    using namespace ::cv::gpu::cudev::imgproc;
+    (void) src_size;
 
-    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
-    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
+    Mat K = _K.getMat();
+    Mat R = _R.getMat();
+
+    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
+    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
 
     Mat K_Rinv = K * R.t();
     Mat R_Kinv = R * K.inv();
-    CV_Assert(K_Rinv.isContinuous());
-    CV_Assert(R_Kinv.isContinuous());
+    CV_Assert( K_Rinv.isContinuous() );
+    CV_Assert( R_Kinv.isContinuous() );
+
+    _map_x.create(dst_roi.size(), CV_32FC1);
+    _map_y.create(dst_roi.size(), CV_32FC1);
+
+    GpuMat map_x = _map_x.getGpuMat();
+    GpuMat map_y = _map_y.getGpuMat();
 
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
     cudev::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
 }
 
@@ -524,22 +555,28 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                     GpuMat& map_x, GpuMat& map_y, Stream& stream)
+void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
+                                     OutputArray _map_x, OutputArray _map_y, Stream& stream)
 {
-    (void)src_size;
-    using namespace ::cv::gpu::cudev::imgproc;
+    (void) src_size;
 
-    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
-    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
+    Mat K = _K.getMat();
+    Mat R = _R.getMat();
+
+    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
+    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
 
     Mat K_Rinv = K * R.t();
     Mat R_Kinv = R * K.inv();
-    CV_Assert(K_Rinv.isContinuous());
-    CV_Assert(R_Kinv.isContinuous());
+    CV_Assert( K_Rinv.isContinuous() );
+    CV_Assert( R_Kinv.isContinuous() );
+
+    _map_x.create(dst_roi.size(), CV_32FC1);
+    _map_y.create(dst_roi.size(), CV_32FC1);
+
+    GpuMat map_x = _map_x.getGpuMat();
+    GpuMat map_y = _map_y.getGpuMat();
 
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
     cudev::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
 }
 
@@ -589,10 +626,9 @@ namespace
     };
 }
 
-void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
+void cv::gpu::rotate(InputArray _src, OutputArray _dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
 {
     typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
-
     static const func_t funcs[6][4] =
     {
         {NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call},
@@ -603,12 +639,16 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
         {NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call}
     };
 
-    CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-    CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
+    GpuMat src = _src.getGpuMat();
 
-    dst.create(dsize, src.type());
-    dst.setTo(Scalar::all(0));
+    CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
+    CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
+    CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
+
+    _dst.create(dsize, src.type());
+    GpuMat dst = _dst.getGpuMat();
+
+    dst.setTo(Scalar::all(0), stream);
 
     funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
 }
diff --git a/modules/gpuwarping/test/test_precomp.cpp b/modules/gpuwarping/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpuwarping/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpuwarping/test/test_precomp.hpp b/modules/gpuwarping/test/test_precomp.hpp
index 90a28fe1a..9ac7fc0b8 100644
--- a/modules/gpuwarping/test/test_precomp.hpp
+++ b/modules/gpuwarping/test/test_precomp.hpp
@@ -57,6 +57,8 @@
 #include "opencv2/gpuwarping.hpp"
 #include "opencv2/imgproc.hpp"
 
+#include "cvconfig.h"
+
 #include "interpolation.hpp"
 
 #endif
diff --git a/modules/gpuwarping/test/test_resize.cpp b/modules/gpuwarping/test/test_resize.cpp
index 27289834a..13326c07b 100644
--- a/modules/gpuwarping/test/test_resize.cpp
+++ b/modules/gpuwarping/test/test_resize.cpp
@@ -155,7 +155,7 @@ GPU_TEST_P(Resize, Accuracy)
 INSTANTIATE_TEST_CASE_P(GPU_Warping, Resize, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
     testing::Values(0.3, 0.5, 1.5, 2.0),
     testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
     WHOLE_SUBMAT));
@@ -201,50 +201,9 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy)
 INSTANTIATE_TEST_CASE_P(GPU_Warping, ResizeSameAsHost, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
     testing::Values(0.3, 0.5),
-    testing::Values(Interpolation(cv::INTER_AREA), Interpolation(cv::INTER_NEAREST)),  //, Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_AREA)),
     WHOLE_SUBMAT));
 
-///////////////////////////////////////////////////////////////////
-// Test NPP
-
-PARAM_TEST_CASE(ResizeNPP, cv::gpu::DeviceInfo, MatType, double, Interpolation)
-{
-    cv::gpu::DeviceInfo devInfo;
-    double coeff;
-    int interpolation;
-    int type;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        type = GET_PARAM(1);
-        coeff = GET_PARAM(2);
-        interpolation = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(ResizeNPP, Accuracy)
-{
-    cv::Mat src = readImageType("stereobp/aloe-L.png", type);
-    ASSERT_FALSE(src.empty());
-
-    cv::gpu::GpuMat dst;
-    cv::gpu::resize(loadMat(src), dst, cv::Size(), coeff, coeff, interpolation);
-
-    cv::Mat dst_gold;
-    resizeGold(src, dst_gold, coeff, coeff, interpolation);
-
-    EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-1);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Warping, ResizeNPP, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
-    testing::Values(0.3, 0.5, 1.5, 2.0),
-    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR))));
-
 #endif // HAVE_CUDA
diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt
index aec364a00..02ede4d2f 100644
--- a/modules/highgui/CMakeLists.txt
+++ b/modules/highgui/CMakeLists.txt
@@ -9,13 +9,12 @@ ocv_add_module(highgui opencv_imgproc OPTIONAL opencv_androidcamera)
 
 ocv_clear_vars(GRFMT_LIBS)
 
-if(WITH_PNG OR WITH_TIFF OR WITH_OPENEXR)
-  ocv_include_directories(${ZLIB_INCLUDE_DIR})
+if(HAVE_PNG OR HAVE_TIFF OR HAVE_OPENEXR)
+  ocv_include_directories(${ZLIB_INCLUDE_DIRS})
   list(APPEND GRFMT_LIBS ${ZLIB_LIBRARIES})
 endif()
 
-if(WITH_JPEG)
-  add_definitions(-DHAVE_JPEG)
+if(HAVE_JPEG)
   ocv_include_directories(${JPEG_INCLUDE_DIR})
   list(APPEND GRFMT_LIBS ${JPEG_LIBRARIES})
 endif()
@@ -26,27 +25,23 @@ if(WITH_WEBP)
   list(APPEND GRFMT_LIBS ${WEBP_LIBRARIES})
 endif()
 
-if(WITH_PNG)
-  add_definitions(-DHAVE_PNG)
+if(HAVE_PNG)
   add_definitions(${PNG_DEFINITIONS})
   ocv_include_directories(${PNG_INCLUDE_DIR})
   list(APPEND GRFMT_LIBS ${PNG_LIBRARIES})
 endif()
 
-if(WITH_TIFF)
-  add_definitions(-DHAVE_TIFF)
+if(HAVE_TIFF)
   ocv_include_directories(${TIFF_INCLUDE_DIR})
   list(APPEND GRFMT_LIBS ${TIFF_LIBRARIES})
 endif()
 
-if(WITH_JASPER)
-  add_definitions(-DHAVE_JASPER)
+if(HAVE_JASPER)
   ocv_include_directories(${JASPER_INCLUDE_DIR})
   list(APPEND GRFMT_LIBS ${JASPER_LIBRARIES})
 endif()
 
-if(WITH_OPENEXR)
-  add_definitions(-DHAVE_OPENEXR)
+if(HAVE_OPENEXR)
   include_directories(SYSTEM ${OPENEXR_INCLUDE_PATHS})
   list(APPEND GRFMT_LIBS ${OPENEXR_LIBRARIES})
 endif()
@@ -71,7 +66,6 @@ set(highgui_srcs
     src/cap_images.cpp
     src/cap_ffmpeg.cpp
     src/loadsave.cpp
-    src/precomp.cpp
     src/utils.cpp
     src/window.cpp
     )
@@ -116,16 +110,12 @@ elseif(HAVE_WIN32UI)
   list(APPEND highgui_srcs src/window_w32.cpp)
 elseif(HAVE_GTK)
   list(APPEND highgui_srcs src/window_gtk.cpp)
-elseif(APPLE)
-  if(WITH_CARBON)
-    add_definitions(-DHAVE_CARBON=1)
-    list(APPEND highgui_srcs src/window_carbon.cpp)
-    list(APPEND HIGHGUI_LIBRARIES "-framework Carbon" "-framework QuickTime")
-  elseif(NOT IOS)
-    add_definitions(-DHAVE_COCOA=1)
-    list(APPEND highgui_srcs src/window_cocoa.mm)
-    list(APPEND HIGHGUI_LIBRARIES "-framework Cocoa")
-  endif()
+elseif(HAVE_CARBON)
+  list(APPEND highgui_srcs src/window_carbon.cpp)
+  list(APPEND HIGHGUI_LIBRARIES "-framework Carbon" "-framework QuickTime")
+elseif(HAVE_COCOA)
+  list(APPEND highgui_srcs src/window_cocoa.mm)
+  list(APPEND HIGHGUI_LIBRARIES "-framework Cocoa")
 endif()
 
 if(WIN32 AND NOT ARM)
@@ -205,6 +195,7 @@ endif(HAVE_FFMPEG)
 
 if(HAVE_PVAPI)
   add_definitions(-DHAVE_PVAPI)
+  add_definitions(${PVAPI_DEFINITIONS})
   ocv_include_directories(${PVAPI_INCLUDE_PATH})
   set(highgui_srcs src/cap_pvapi.cpp ${highgui_srcs})
   list(APPEND HIGHGUI_LIBRARIES ${PVAPI_LIBRARY})
@@ -218,24 +209,22 @@ if(HAVE_GIGE_API)
   list(APPEND highgui_srcs src/cap_giganetix.cpp)
 endif(HAVE_GIGE_API)
 
-if(WITH_AVFOUNDATION)
-  add_definitions(-DHAVE_AVFOUNDATION=1)
+if(HAVE_AVFOUNDATION)
   list(APPEND highgui_srcs src/cap_avfoundation.mm)
   list(APPEND HIGHGUI_LIBRARIES "-framework AVFoundation" "-framework QuartzCore")
-elseif(APPLE)
-  add_definitions(-DHAVE_QUICKTIME=1)
-  if(WITH_QUICKTIME)
-    list(APPEND highgui_srcs src/cap_qt.cpp)
-    list(APPEND HIGHGUI_LIBRARIES "-framework Carbon" "-framework QuickTime" "-framework CoreFoundation" "-framework QuartzCore")
-  else()
-    list(APPEND highgui_srcs src/cap_qtkit.mm)
-    list(APPEND HIGHGUI_LIBRARIES "-framework QTKit" "-framework QuartzCore" "-framework AppKit")
-  endif()
+endif()
+
+if(HAVE_QUICKTIME)
+  list(APPEND highgui_srcs src/cap_qt.cpp)
+  list(APPEND HIGHGUI_LIBRARIES "-framework Carbon" "-framework QuickTime" "-framework CoreFoundation" "-framework QuartzCore")
+elseif(HAVE_QTKIT)
+  list(APPEND highgui_srcs src/cap_qtkit.mm)
+  list(APPEND HIGHGUI_LIBRARIES "-framework QTKit" "-framework QuartzCore" "-framework AppKit")
 endif()
 
 if(IOS)
   add_definitions(-DHAVE_IOS=1)
-  list(APPEND highgui_srcs src/cap_ios_abstract_camera.mm src/cap_ios_photo_camera.mm src/cap_ios_video_camera.mm)
+  list(APPEND highgui_srcs src/ios_conversions.mm src/cap_ios_abstract_camera.mm src/cap_ios_photo_camera.mm src/cap_ios_video_camera.mm)
   list(APPEND HIGHGUI_LIBRARIES "-framework Accelerate" "-framework AVFoundation" "-framework CoreGraphics" "-framework CoreImage" "-framework CoreMedia" "-framework CoreVideo" "-framework QuartzCore" "-framework AssetsLibrary")
 endif()
 
@@ -305,11 +294,6 @@ if(WIN32 AND WITH_FFMPEG)
   set(ffmpeg_bare_name_ver "opencv_ffmpeg${OPENCV_DLLVERSION}${FFMPEG_SUFFIX}.dll")
   set(ffmpeg_path "${OpenCV_SOURCE_DIR}/3rdparty/ffmpeg/${ffmpeg_bare_name}")
 
-  #if(MSVC AND CMAKE_VERSION VERSION_GREATER "2.8.2")
-  #  add_custom_command(TARGET ${the_module} POST_BUILD
-  #                     COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/$<CONFIGURATION>/${ffmpeg_bare_name_ver}"
-  #                     COMMENT "Copying ${ffmpeg_path} to the output directory")
-  #else
   if(MSVC_IDE)
     add_custom_command(TARGET ${the_module} POST_BUILD
                        COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/Release/${ffmpeg_bare_name_ver}"
diff --git a/modules/highgui/doc/qt_new_functions.rst b/modules/highgui/doc/qt_new_functions.rst
index ee77e4d58..3d11445b1 100644
--- a/modules/highgui/doc/qt_new_functions.rst
+++ b/modules/highgui/doc/qt_new_functions.rst
@@ -334,5 +334,3 @@ See below various examples of the ``createButton`` function call: ::
     createButton("button6",callbackButton2,NULL,CV_PUSH_BUTTON,1);
 
 ..
-
-
diff --git a/modules/highgui/doc/reading_and_writing_images_and_video.rst b/modules/highgui/doc/reading_and_writing_images_and_video.rst
index d52e0ecfb..6b956b40d 100644
--- a/modules/highgui/doc/reading_and_writing_images_and_video.rst
+++ b/modules/highgui/doc/reading_and_writing_images_and_video.rst
@@ -223,6 +223,15 @@ The class provides C++ API for capturing video from cameras or for reading video
 
 .. note:: In C API the black-box structure ``CvCapture`` is used instead of ``VideoCapture``.
 
+.. note::
+
+   * A basic sample on using the VideoCapture interface can be found at opencv_source_code/samples/cpp/starter_video.cpp
+   * Another basic video processing sample can be found at opencv_source_code/samples/cpp/video_dmtx.cpp
+
+   * (Python) A basic sample on using the VideoCapture interface can be found at opencv_source_code/samples/python2/video.py
+   * (Python) Another basic video processing sample can be found at opencv_source_code/samples/python2/video_dmtx.py
+   * (Python) A multi threaded video processing sample can be found at opencv_source_code/samples/python2/video_threaded.py
+
 
 VideoCapture::VideoCapture
 ------------------------------
@@ -530,4 +539,3 @@ Writes the next video frame
     :param image: The written frame
 
 The functions/methods write the specified image to video file. It must have the same size as has been specified when opening the video writer.
-
diff --git a/modules/highgui/doc/user_interface.rst b/modules/highgui/doc/user_interface.rst
index f84a04c21..d4055ef6f 100644
--- a/modules/highgui/doc/user_interface.rst
+++ b/modules/highgui/doc/user_interface.rst
@@ -9,6 +9,8 @@ Creates a trackbar and attaches it to the specified window.
 
 .. ocv:function:: int createTrackbar( const String& trackbarname, const String& winname, int* value, int count, TrackbarCallback onChange=0, void* userdata=0)
 
+.. ocv:pyfunction:: cv2.createTrackbar(trackbarName, windowName, value, count, onChange) -> None
+
 .. ocv:cfunction:: int cvCreateTrackbar( const char* trackbar_name, const char* window_name, int* value, int count, CvTrackbarCallback on_change=NULL )
 
     :param trackbarname: Name of the created trackbar.
@@ -31,6 +33,10 @@ The function ``createTrackbar`` creates a trackbar (a slider or range control) w
 
 Clicking the label of each trackbar enables editing the trackbar values manually.
 
+.. note::
+
+   * An example of using the trackbar functionality can be found at opencv_source_code/samples/cpp/connected_components.cpp
+
 getTrackbarPos
 ------------------
 Returns the trackbar position.
@@ -73,6 +79,10 @@ The function ``imshow`` displays an image in the specified window. If the window
 
     * If the image is 32-bit floating-point, the pixel values are multiplied by 255. That is, the value range [0,1] is mapped to [0,255].
 
+If window was created with OpenGL support, ``imshow`` also support :ocv:class:`ogl::Buffer` ,  :ocv:class:`ogl::Texture2D` and  :ocv:class:`gpu::GpuMat` as input.
+
+.. note:: This function should be followed by ``waitKey`` function which displays the image for specified milliseconds. Otherwise, it won't display the image. For example, ``waitKey(0)`` will display the window infinitely until any keypress (it is suitable for image display). ``waitKey(25)`` will display a frame for 25 ms, after which display will be automatically closed. (If you put it in a loop to read videos, it will display the video frame-by-frame)
+
 
 namedWindow
 ---------------
@@ -86,7 +96,13 @@ Creates a window.
 
     :param name: Name of the window in the window caption that may be used as a window identifier.
 
-    :param flags: Flags of the window. Currently the only supported flag is  ``CV_WINDOW_AUTOSIZE`` . If this is set, the window size is automatically adjusted to fit the displayed image (see  :ocv:func:`imshow` ), and you cannot change the window size manually.
+    :param flags: Flags of the window. The supported flags are:
+
+        * **WINDOW_NORMAL** If this is set, the user can resize the window (no constraint).
+
+        * **WINDOW_AUTOSIZE** If this is set, the window size is automatically adjusted to fit the displayed image (see  :ocv:func:`imshow` ), and you cannot change the window size manually.
+
+        * **WINDOW_OPENGL** If this is set, the window will be created with OpenGL support.
 
 The function ``namedWindow`` creates a window that can be used as a placeholder for images and trackbars. Created windows are referred to by their names.
 
@@ -181,6 +197,8 @@ Sets mouse handler for the specified window
 
 .. ocv:function:: void setMouseCallback( const String& winname, MouseCallback onMouse, void* userdata=0 )
 
+.. ocv:pyfunction:: cv2.setMouseCallback(windowName, onMouse [, param]) -> None
+
 .. ocv:cfunction:: void cvSetMouseCallback( const char* window_name, CvMouseCallback on_mouse, void* param=NULL )
 
     :param winname: Window name
@@ -234,3 +252,31 @@ The function ``waitKey`` waits for a key event infinitely (when
 .. note::
 
     The function only works if there is at least one HighGUI window created and the window is active. If there are several HighGUI windows, any of them can be active.
+
+setOpenGlDrawCallback
+---------------------
+Set OpenGL render handler for the specified window.
+
+.. ocv:function:: void setOpenGlDrawCallback(const string& winname, OpenGlDrawCallback onOpenGlDraw, void* userdata = 0)
+
+    :param winname: Window name
+
+    :param onOpenGlDraw: Draw callback.
+
+    :param userdata: The optional parameter passed to the callback.
+
+setOpenGlContext
+----------------
+Sets the specified window as current OpenGL context.
+
+.. ocv:function:: void setOpenGlContext(const String& winname)
+
+    :param winname: Window name
+
+updateWindow
+------------
+Force window to redraw its context and call draw callback ( :ocv:func:`setOpenGlDrawCallback` ).
+
+.. ocv:function:: void updateWindow(const String& winname)
+
+    :param winname: Window name
diff --git a/modules/highgui/include/opencv2/highgui.hpp b/modules/highgui/include/opencv2/highgui.hpp
index c20cf883e..c4fc73a81 100644
--- a/modules/highgui/include/opencv2/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui.hpp
@@ -544,8 +544,8 @@ protected:
     Ptr<CvVideoWriter> writer;
 };
 
-template<> CV_EXPORTS void Ptr<CvCapture>::delete_obj();
-template<> CV_EXPORTS void Ptr<CvVideoWriter>::delete_obj();
+template<> CV_EXPORTS void DefaultDeleter<CvCapture>::operator ()(CvCapture* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvVideoWriter>::operator ()(CvVideoWriter* obj) const;
 
 } // cv
 
diff --git a/modules/highgui/include/opencv2/highgui/cap_ios.h b/modules/highgui/include/opencv2/highgui/cap_ios.h
index 97d9a08af..4d270aba9 100644
--- a/modules/highgui/include/opencv2/highgui/cap_ios.h
+++ b/modules/highgui/include/opencv2/highgui/cap_ios.h
@@ -1,6 +1,4 @@
-/*
- *  cap_ios.h
- *  For iOS video I/O
+/*  For iOS video I/O
  *  by Eduard Feicho on 29/07/12
  *  Copyright 2012. All rights reserved.
  *
@@ -90,6 +88,12 @@
 - (void)createVideoPreviewLayer;
 - (void)updateOrientation;
 
+- (void)lockFocus;
+- (void)unlockFocus;
+- (void)lockExposure;
+- (void)unlockExposure;
+- (void)lockBalance;
+- (void)unlockBalance;
 
 @end
 
@@ -116,6 +120,7 @@
     BOOL grayscaleMode;
 
     BOOL recordVideo;
+    BOOL rotateVideo;
     AVAssetWriterInput* recordAssetWriterInput;
     AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
     AVAssetWriter* recordAssetWriter;
@@ -128,6 +133,7 @@
 @property (nonatomic, assign) BOOL grayscaleMode;
 
 @property (nonatomic, assign) BOOL recordVideo;
+@property (nonatomic, assign) BOOL rotateVideo;
 @property (nonatomic, retain) AVAssetWriterInput* recordAssetWriterInput;
 @property (nonatomic, retain) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
 @property (nonatomic, retain) AVAssetWriter* recordAssetWriter;
diff --git a/modules/highgui/include/opencv2/highgui/highgui.hpp b/modules/highgui/include/opencv2/highgui/highgui.hpp
index cb84ef620..160c9cf4a 100644
--- a/modules/highgui/include/opencv2/highgui/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui/highgui.hpp
@@ -45,4 +45,4 @@
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 
-#include "opencv2/highgui.hpp"
\ No newline at end of file
+#include "opencv2/highgui.hpp"
diff --git a/modules/highgui/include/opencv2/highgui/highgui_c.h b/modules/highgui/include/opencv2/highgui/highgui_c.h
index 5c9fde38d..2ebea0d30 100644
--- a/modules/highgui/include/opencv2/highgui/highgui_c.h
+++ b/modules/highgui/include/opencv2/highgui/highgui_c.h
@@ -559,9 +559,11 @@ CVAPI(int)    cvGetCaptureDomain( CvCapture* capture);
 /* "black box" video file writer structure */
 typedef struct CvVideoWriter CvVideoWriter;
 
+#define CV_FOURCC_MACRO(c1, c2, c3, c4) (((c1) & 255) + (((c2) & 255) << 8) + (((c3) & 255) << 16) + (((c4) & 255) << 24))
+
 CV_INLINE int CV_FOURCC(char c1, char c2, char c3, char c4)
 {
-    return (c1 & 255) + ((c2 & 255) << 8) + ((c3 & 255) << 16) + ((c4 & 255) << 24);
+    return CV_FOURCC_MACRO(c1, c2, c3, c4);
 }
 
 #define CV_FOURCC_PROMPT -1  /* Open Codec Selection Dialog (Windows only) */
diff --git a/modules/highgui/include/opencv2/highgui/ios.h b/modules/highgui/include/opencv2/highgui/ios.h
new file mode 100644
index 000000000..a7f0395d7
--- /dev/null
+++ b/modules/highgui/include/opencv2/highgui/ios.h
@@ -0,0 +1,49 @@
+
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/core/core.hpp"
+#import "opencv2/highgui/cap_ios.h"
+
+UIImage* MatToUIImage(const cv::Mat& image);
+void UIImageToMat(const UIImage* image,
+                         cv::Mat& m, bool alphaExist = false);
diff --git a/modules/highgui/perf/perf_input.cpp b/modules/highgui/perf/perf_input.cpp
index b32f88da9..10ce30a83 100644
--- a/modules/highgui/perf/perf_input.cpp
+++ b/modules/highgui/perf/perf_input.cpp
@@ -11,11 +11,21 @@ using std::tr1::get;
 
 typedef perf::TestBaseWithParam<std::string> VideoCapture_Reading;
 
+#if defined(HAVE_MSMF)
+// MPEG2 is not supported by Media Foundation yet
+// http://social.msdn.microsoft.com/Forums/en-US/mediafoundationdevelopment/thread/39a36231-8c01-40af-9af5-3c105d684429
+PERF_TEST_P(VideoCapture_Reading, ReadFile, testing::Values( "highgui/video/big_buck_bunny.avi",
+                                               "highgui/video/big_buck_bunny.mov",
+                                               "highgui/video/big_buck_bunny.mp4",
+                                               "highgui/video/big_buck_bunny.wmv" ) )
+
+#else
 PERF_TEST_P(VideoCapture_Reading, ReadFile, testing::Values( "highgui/video/big_buck_bunny.avi",
                                                "highgui/video/big_buck_bunny.mov",
                                                "highgui/video/big_buck_bunny.mp4",
                                                "highgui/video/big_buck_bunny.mpg",
                                                "highgui/video/big_buck_bunny.wmv" ) )
+#endif
 {
   string filename = getDataPath(GetParam());
 
@@ -27,4 +37,4 @@ PERF_TEST_P(VideoCapture_Reading, ReadFile, testing::Values( "highgui/video/big_
   SANITY_CHECK(dummy);
 }
 
-#endif // BUILD_WITH_VIDEO_INPUT_SUPPORT
\ No newline at end of file
+#endif // BUILD_WITH_VIDEO_INPUT_SUPPORT
diff --git a/modules/highgui/perf/perf_output.cpp b/modules/highgui/perf/perf_output.cpp
index ee26f3a42..8e7ec54a9 100644
--- a/modules/highgui/perf/perf_output.cpp
+++ b/modules/highgui/perf/perf_output.cpp
@@ -22,13 +22,19 @@ PERF_TEST_P(VideoWriter_Writing, WriteFrame,
 {
   string filename = getDataPath(get<0>(GetParam()));
   bool isColor = get<1>(GetParam());
+  Mat image = imread(filename, 1);
+#if defined(HAVE_MSMF) && !defined(HAVE_VFW) && !defined(HAVE_FFMPEG) // VFW has greater priority
+  VideoWriter writer(cv::tempfile(".wmv"), VideoWriter::fourcc('W', 'M', 'V', '3'),
+                            25, cv::Size(image.cols, image.rows), isColor);
+#else
+  VideoWriter writer(cv::tempfile(".avi"), VideoWriter::fourcc('X', 'V', 'I', 'D'),
+                            25, cv::Size(image.cols, image.rows), isColor);
+#endif
 
-  VideoWriter writer(cv::tempfile(".avi"), VideoWriter::fourcc('X', 'V', 'I', 'D'), 25, cv::Size(640, 480), isColor);
-
-  TEST_CYCLE() { Mat image = imread(filename, 1); writer << image; }
+  TEST_CYCLE() { image = imread(filename, 1); writer << image; }
 
   bool dummy = writer.isOpened();
   SANITY_CHECK(dummy);
 }
 
-#endif // BUILD_WITH_VIDEO_OUTPUT_SUPPORT
\ No newline at end of file
+#endif // BUILD_WITH_VIDEO_OUTPUT_SUPPORT
diff --git a/modules/highgui/perf/perf_precomp.cpp b/modules/highgui/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/highgui/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/highgui/perf/perf_precomp.hpp b/modules/highgui/perf/perf_precomp.hpp
index 16880d1e4..faf34617e 100644
--- a/modules/highgui/perf/perf_precomp.hpp
+++ b/modules/highgui/perf/perf_precomp.hpp
@@ -19,8 +19,10 @@
 #if defined(HAVE_XINE)         || \
     defined(HAVE_GSTREAMER)    || \
     defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
     defined(HAVE_AVFOUNDATION) || \
     defined(HAVE_FFMPEG)       || \
+    defined(HAVE_MSMF)         || \
     defined(HAVE_VFW)
     /*defined(HAVE_OPENNI) too specialized */ \
 
@@ -32,8 +34,10 @@
 #if /*defined(HAVE_XINE)       || */\
     defined(HAVE_GSTREAMER)    || \
     defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
     defined(HAVE_AVFOUNDATION) || \
     defined(HAVE_FFMPEG)       || \
+    defined(HAVE_MSMF)         || \
     defined(HAVE_VFW)
 #  define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1
 #else
diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp
index 8eb9d0d6e..be62ce9d3 100644
--- a/modules/highgui/src/cap.cpp
+++ b/modules/highgui/src/cap.cpp
@@ -49,10 +49,10 @@
 namespace cv
 {
 
-template<> void Ptr<CvCapture>::delete_obj()
+template<> void DefaultDeleter<CvCapture>::operator ()(CvCapture* obj) const
 { cvReleaseCapture(&obj); }
 
-template<> void Ptr<CvVideoWriter>::delete_obj()
+template<> void DefaultDeleter<CvVideoWriter>::operator ()(CvVideoWriter* obj) const
 { cvReleaseVideoWriter(&obj); }
 
 }
@@ -117,6 +117,9 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
 #ifdef HAVE_DSHOW
         CV_CAP_DSHOW,
 #endif
+#ifdef HAVE_MSMF
+        CV_CAP_MSMF,
+#endif
 #if 1
         CV_CAP_IEEE1394,   // identical to CV_CAP_DC1394
 #endif
@@ -132,7 +135,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
 #ifdef HAVE_MIL
         CV_CAP_MIL,
 #endif
-#ifdef HAVE_QUICKTIME
+#if defined(HAVE_QUICKTIME) || defined(HAVE_QTKIT)
         CV_CAP_QT,
 #endif
 #ifdef HAVE_UNICAP
@@ -182,6 +185,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
     defined(HAVE_CMU1394)      || \
     defined(HAVE_MIL)          || \
     defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
     defined(HAVE_UNICAP)       || \
     defined(HAVE_PVAPI)        || \
     defined(HAVE_OPENNI)       || \
@@ -196,13 +200,6 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
 
         switch (domains[i])
         {
-#ifdef HAVE_MSMF
-        case CV_CAP_MSMF:
-             capture = cvCreateCameraCapture_MSMF (index);
-             if (capture)
-                 return capture;
-            break;
-#endif
 #ifdef HAVE_DSHOW
         case CV_CAP_DSHOW:
              capture = cvCreateCameraCapture_DShow (index);
@@ -210,7 +207,13 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
                  return capture;
             break;
 #endif
-
+#ifdef HAVE_MSMF
+        case CV_CAP_MSMF:
+             capture = cvCreateCameraCapture_MSMF (index);
+             if (capture)
+                 return capture;
+            break;
+#endif
 #ifdef HAVE_TYZX
         case CV_CAP_STEREO:
             capture = cvCreateCameraCapture_TYZX (index);
@@ -218,14 +221,12 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
                 return capture;
             break;
 #endif
-
         case CV_CAP_VFW:
 #ifdef HAVE_VFW
             capture = cvCreateCameraCapture_VFW (index);
             if (capture)
                 return capture;
 #endif
-
 #if defined HAVE_LIBV4L || defined HAVE_CAMV4L || defined HAVE_CAMV4L2 || defined HAVE_VIDEOIO
             capture = cvCreateCameraCapture_V4L (index);
             if (capture)
@@ -277,7 +278,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
             break;
 #endif
 
-#ifdef HAVE_QUICKTIME
+#if defined(HAVE_QUICKTIME) || defined(HAVE_QTKIT)
         case CV_CAP_QT:
             capture = cvCreateCameraCapture_QT (index);
             if (capture)
@@ -359,6 +360,16 @@ CV_IMPL CvCapture * cvCreateFileCapture (const char * filename)
     if (! result)
         result = cvCreateFileCapture_FFMPEG_proxy (filename);
 
+#ifdef HAVE_VFW
+    if (! result)
+        result = cvCreateFileCapture_VFW (filename);
+#endif
+
+#ifdef HAVE_MSMF
+    if (! result)
+        result = cvCreateFileCapture_MSMF (filename);
+#endif
+
 #ifdef HAVE_XINE
     if (! result)
         result = cvCreateFileCapture_XINE (filename);
@@ -369,7 +380,7 @@ CV_IMPL CvCapture * cvCreateFileCapture (const char * filename)
         result = cvCreateCapture_GStreamer (CV_CAP_GSTREAMER_FILE, filename);
 #endif
 
-#ifdef HAVE_QUICKTIME
+#if defined(HAVE_QUICKTIME) || defined(HAVE_QTKIT)
     if (! result)
         result = cvCreateFileCapture_QT (filename);
 #endif
@@ -407,6 +418,16 @@ CV_IMPL CvVideoWriter* cvCreateVideoWriter( const char* filename, int fourcc,
     if(!result)
         result = cvCreateVideoWriter_FFMPEG_proxy (filename, fourcc, fps, frameSize, is_color);
 
+#ifdef HAVE_VFW
+    if(!result)
+        result = cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, is_color);
+#endif
+
+#ifdef HAVE_MSMF
+    if (!result)
+        result = cvCreateVideoWriter_MSMF(filename, fourcc, fps, frameSize, is_color);
+#endif
+
 /*  #ifdef HAVE_XINE
     if(!result)
         result = cvCreateVideoWriter_XINE(filename, fourcc, fps, frameSize, is_color);
@@ -417,7 +438,7 @@ CV_IMPL CvVideoWriter* cvCreateVideoWriter( const char* filename, int fourcc,
         result = cvCreateVideoWriter_AVFoundation(filename, fourcc, fps, frameSize, is_color);
 #endif
 
-#ifdef HAVE_QUICKTIME
+#if defined(HAVE_QUICKTIME) || defined(HAVE_QTKIT)
     if(!result)
         result = cvCreateVideoWriter_QT(filename, fourcc, fps, frameSize, is_color);
 #endif
@@ -470,15 +491,15 @@ VideoCapture::~VideoCapture()
 
 bool VideoCapture::open(const String& filename)
 {
-    if (!isOpened())
-    cap = cvCreateFileCapture(filename.c_str());
+    if (isOpened()) release();
+    cap.reset(cvCreateFileCapture(filename.c_str()));
     return isOpened();
 }
 
 bool VideoCapture::open(int device)
 {
-    if (!isOpened())
-    cap = cvCreateCameraCapture(device);
+    if (isOpened()) release();
+    cap.reset(cvCreateCameraCapture(device));
     return isOpened();
 }
 
@@ -557,7 +578,7 @@ VideoWriter::~VideoWriter()
 
 bool VideoWriter::open(const String& filename, int _fourcc, double fps, Size frameSize, bool isColor)
 {
-    writer = cvCreateVideoWriter(filename.c_str(), _fourcc, fps, frameSize, isColor);
+    writer.reset(cvCreateVideoWriter(filename.c_str(), _fourcc, fps, frameSize, isColor));
     return isOpened();
 }
 
diff --git a/modules/highgui/src/cap_avfoundation.mm b/modules/highgui/src/cap_avfoundation.mm
index c7471e2e8..71777f875 100644
--- a/modules/highgui/src/cap_avfoundation.mm
+++ b/modules/highgui/src/cap_avfoundation.mm
@@ -1325,4 +1325,3 @@ bool CvVideoWriter_AVFoundation::writeFrame(const IplImage* iplimage) {
     }
 
 }
-
diff --git a/modules/highgui/src/cap_dc1394_v2.cpp b/modules/highgui/src/cap_dc1394_v2.cpp
index ea7e4b2b8..0d5f89818 100644
--- a/modules/highgui/src/cap_dc1394_v2.cpp
+++ b/modules/highgui/src/cap_dc1394_v2.cpp
@@ -304,6 +304,11 @@ bool CvCaptureCAM_DC1394_v2_CPP::startCapture()
         return false;
     if (isoSpeed > 0)
     {
+        // if capable set operation mode to 1394b for iso speeds above 400
+        if (isoSpeed > 400 && dcCam->bmode_capable == DC1394_TRUE)
+        {
+            dc1394_video_set_operation_mode(dcCam, DC1394_OPERATION_MODE_1394B);
+        }
         code = dc1394_video_set_iso_speed(dcCam,
                                           isoSpeed <= 100 ? DC1394_ISO_SPEED_100 :
                                           isoSpeed <= 200 ? DC1394_ISO_SPEED_200 :
diff --git a/modules/highgui/src/cap_dshow.cpp b/modules/highgui/src/cap_dshow.cpp
index 8e5baaa40..90ffa0003 100644
--- a/modules/highgui/src/cap_dshow.cpp
+++ b/modules/highgui/src/cap_dshow.cpp
@@ -3162,18 +3162,18 @@ void CvCaptureCAM_DShow::close()
 // Initialize camera input
 bool CvCaptureCAM_DShow::open( int _index )
 {
-    int try_index = _index;
     int devices = 0;
 
     close();
     devices = VI.listDevices(true);
     if (devices == 0)
         return false;
-    try_index = try_index < 0 ? 0 : (try_index > devices-1 ? devices-1 : try_index);
-    VI.setupDevice(try_index);
-    if( !VI.isDeviceSetup(try_index) )
+    if (_index < 0 || _index > devices-1)
         return false;
-    index = try_index;
+    VI.setupDevice(_index);
+    if( !VI.isDeviceSetup(_index) )
+        return false;
+    index = _index;
     return true;
 }
 
diff --git a/modules/highgui/src/cap_ffmpeg.cpp b/modules/highgui/src/cap_ffmpeg.cpp
index 57f67dab9..192c0da69 100644
--- a/modules/highgui/src/cap_ffmpeg.cpp
+++ b/modules/highgui/src/cap_ffmpeg.cpp
@@ -85,6 +85,16 @@ private:
     icvInitFFMPEG()
     {
     #if defined WIN32 || defined _WIN32
+    # ifdef HAVE_WINRT
+        const wchar_t* module_name = L"opencv_ffmpeg"
+            CVAUX_STRW(CV_MAJOR_VERSION) CVAUX_STRW(CV_MINOR_VERSION) CVAUX_STRW(CV_SUBMINOR_VERSION)
+        #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
+            L"_64"
+        #endif
+            L".dll";
+
+        icvFFOpenCV = LoadPackagedLibrary( module_name, 0 );
+    # else
         const char* module_name = "opencv_ffmpeg"
             CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) CVAUX_STR(CV_SUBMINOR_VERSION)
         #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
@@ -93,6 +103,8 @@ private:
             ".dll";
 
         icvFFOpenCV = LoadLibrary( module_name );
+    # endif
+
         if( icvFFOpenCV )
         {
             icvCreateFileCapture_FFMPEG_p =
@@ -209,11 +221,7 @@ CvCapture* cvCreateFileCapture_FFMPEG_proxy(const char * filename)
     if( result->open( filename ))
         return result;
     delete result;
-#ifdef HAVE_VFW
-    return cvCreateFileCapture_VFW(filename);
-#else
     return 0;
-#endif
 }
 
 class CvVideoWriter_FFMPEG_proxy :
@@ -263,9 +271,5 @@ CvVideoWriter* cvCreateVideoWriter_FFMPEG_proxy( const char* filename, int fourc
     if( result->open( filename, fourcc, fps, frameSize, isColor != 0 ))
         return result;
     delete result;
-#ifdef HAVE_VFW
-     return cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, isColor);
- #else
     return 0;
-#endif
 }
diff --git a/modules/highgui/src/cap_ffmpeg_api.hpp b/modules/highgui/src/cap_ffmpeg_api.hpp
index 1fe488c04..e7a956063 100644
--- a/modules/highgui/src/cap_ffmpeg_api.hpp
+++ b/modules/highgui/src/cap_ffmpeg_api.hpp
@@ -7,9 +7,11 @@ extern "C"
 #endif
 
 #if defined WIN32 || defined _WIN32
-#define OPENCV_FFMPEG_API __declspec(dllexport)
+#   define OPENCV_FFMPEG_API __declspec(dllexport)
+#elif defined __GNUC__ && __GNUC__ >= 4
+#   define OPENCV_FFMPEG_API __attribute__ ((visibility ("default")))
 #else
-#define OPENCV_FFMPEG_API
+#   define OPENCV_FFMPEG_API
 #endif
 
 enum
@@ -94,4 +96,3 @@ typedef int (*Read_InputMediaStream_FFMPEG_Plugin)(struct InputMediaStream_FFMPE
 #endif
 
 #endif
-
diff --git a/modules/highgui/src/cap_ffmpeg_impl.hpp b/modules/highgui/src/cap_ffmpeg_impl.hpp
index 99da45f4c..91fc8953d 100644
--- a/modules/highgui/src/cap_ffmpeg_impl.hpp
+++ b/modules/highgui/src/cap_ffmpeg_impl.hpp
@@ -374,7 +374,15 @@ private:
 
 struct ImplMutex::Impl
 {
-    void init() { InitializeCriticalSection(&cs); refcount = 1; }
+    void init()
+    {
+#if (_WIN32_WINNT >= 0x0600)
+        ::InitializeCriticalSectionEx(&cs, 1000, 0);
+#else
+        ::InitializeCriticalSection(&cs);
+#endif
+        refcount = 1;
+    }
     void destroy() { DeleteCriticalSection(&cs); }
 
     void lock() { EnterCriticalSection(&cs); }
@@ -1204,7 +1212,7 @@ static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc,
 #if LIBAVUTIL_BUILD > CALC_FFMPEG_VERSION(51,11,0)
     /* Some settings for libx264 encoding, restore dummy values for gop_size
      and qmin since they will be set to reasonable defaults by the libx264
-     preset system. Also, use a crf encode with the default quality rating, 
+     preset system. Also, use a crf encode with the default quality rating,
      this seems easier than finding an appropriate default bitrate. */
     if (c->codec_id == CODEC_ID_H264) {
       c->gop_size = -1;
@@ -1392,8 +1400,6 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int
 /// close video output stream and free associated memory
 void CvVideoWriter_FFMPEG::close()
 {
-    unsigned i;
-
     // nothing to do if already released
     if ( !picture )
         return;
@@ -1449,13 +1455,6 @@ void CvVideoWriter_FFMPEG::close()
 
     av_free(outbuf);
 
-    /* free the streams */
-    for(i = 0; i < oc->nb_streams; i++)
-    {
-        av_freep(&oc->streams[i]->codec);
-        av_freep(&oc->streams[i]);
-    }
-
     if (!(fmt->flags & AVFMT_NOFILE))
     {
         /* close the output file */
@@ -1473,7 +1472,7 @@ void CvVideoWriter_FFMPEG::close()
     }
 
     /* free the stream */
-    av_free(oc);
+    avformat_free_context(oc);
 
     if( temp_image.data )
     {
diff --git a/modules/highgui/src/cap_ios_abstract_camera.mm b/modules/highgui/src/cap_ios_abstract_camera.mm
index b6a7d944f..a77e200a8 100644
--- a/modules/highgui/src/cap_ios_abstract_camera.mm
+++ b/modules/highgui/src/cap_ios_abstract_camera.mm
@@ -2,6 +2,7 @@
  *  cap_ios_abstract_camera.mm
  *  For iOS video I/O
  *  by Eduard Feicho on 29/07/12
+ *  by Alexander Shishkov on 17/07/13
  *  Copyright 2012. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -405,4 +406,88 @@
     }
 }
 
+- (void)lockFocus;
+{
+    AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
+    if ([device isFocusModeSupported:AVCaptureFocusModeLocked]) {
+        NSError *error = nil;
+        if ([device lockForConfiguration:&error]) {
+            device.focusMode = AVCaptureFocusModeLocked;
+            [device unlockForConfiguration];
+        } else {
+            NSLog(@"unable to lock device for locked focus configuration %@", [error localizedDescription]);
+        }
+    }
+}
+
+- (void) unlockFocus;
+{
+    AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
+    if ([device isFocusModeSupported:AVCaptureFocusModeContinuousAutoFocus]) {
+        NSError *error = nil;
+        if ([device lockForConfiguration:&error]) {
+            device.focusMode = AVCaptureFocusModeContinuousAutoFocus;
+            [device unlockForConfiguration];
+        } else {
+            NSLog(@"unable to lock device for autofocus configuration %@", [error localizedDescription]);
+        }
+    }
+}
+
+- (void)lockExposure;
+{
+    AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
+    if ([device isExposureModeSupported:AVCaptureExposureModeLocked]) {
+        NSError *error = nil;
+        if ([device lockForConfiguration:&error]) {
+            device.exposureMode = AVCaptureExposureModeLocked;
+            [device unlockForConfiguration];
+        } else {
+            NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]);
+        }
+    }
+}
+
+- (void) unlockExposure;
+{
+    AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
+    if ([device isExposureModeSupported:AVCaptureExposureModeContinuousAutoExposure]) {
+        NSError *error = nil;
+        if ([device lockForConfiguration:&error]) {
+            device.exposureMode = AVCaptureExposureModeContinuousAutoExposure;
+            [device unlockForConfiguration];
+        } else {
+            NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]);
+        }
+    }
+}
+
+- (void)lockBalance;
+{
+    AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
+    if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeLocked]) {
+        NSError *error = nil;
+        if ([device lockForConfiguration:&error]) {
+            device.whiteBalanceMode = AVCaptureWhiteBalanceModeLocked;
+            [device unlockForConfiguration];
+        } else {
+            NSLog(@"unable to lock device for locked white balance configuration %@", [error localizedDescription]);
+        }
+    }
+}
+
+- (void) unlockBalance;
+{
+    AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
+    if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance]) {
+        NSError *error = nil;
+        if ([device lockForConfiguration:&error]) {
+            device.whiteBalanceMode = AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance;
+            [device unlockForConfiguration];
+        } else {
+            NSLog(@"unable to lock device for auto white balance configuration %@", [error localizedDescription]);
+        }
+    }
+}
+
 @end
diff --git a/modules/highgui/src/cap_ios_video_camera.mm b/modules/highgui/src/cap_ios_video_camera.mm
index 1f9ea14bf..99f8a75ae 100644
--- a/modules/highgui/src/cap_ios_video_camera.mm
+++ b/modules/highgui/src/cap_ios_video_camera.mm
@@ -2,6 +2,7 @@
  *  cap_ios_video_camera.mm
  *  For iOS video I/O
  *  by Eduard Feicho on 29/07/12
+ *  by Alexander Shishkov on 17/07/13
  *  Copyright 2012. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -30,7 +31,6 @@
 
 #import "opencv2/highgui/cap_ios.h"
 #include "precomp.hpp"
-
 #import <AssetsLibrary/AssetsLibrary.h>
 
 
@@ -70,6 +70,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
 @synthesize videoDataOutput;
 
 @synthesize recordVideo;
+@synthesize rotateVideo;
 //@synthesize videoFileOutput;
 @synthesize recordAssetWriterInput;
 @synthesize recordPixelBufferAdaptor;
@@ -85,6 +86,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
     if (self) {
         self.useAVCaptureVideoPreviewLayer = NO;
         self.recordVideo = NO;
+        self.rotateVideo = NO;
     }
     return self;
 }
@@ -269,13 +271,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
 
 }
 
-
-
-
 #pragma mark - Private Interface
 
-
-
 - (void)createVideoDataOutput;
 {
     // Make a video data output
@@ -389,6 +386,38 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
     [self.parentView.layer addSublayer:self.customPreviewLayer];
 }
 
+- (CVPixelBufferRef) pixelBufferFromCGImage: (CGImageRef) image
+{
+
+    CGSize frameSize = CGSizeMake(CGImageGetWidth(image), CGImageGetHeight(image));
+    NSDictionary *options = [NSDictionary dictionaryWithObjectsAndKeys:
+                             [NSNumber numberWithBool:NO], kCVPixelBufferCGImageCompatibilityKey,
+                             [NSNumber numberWithBool:NO], kCVPixelBufferCGBitmapContextCompatibilityKey,
+                             nil];
+    CVPixelBufferRef pxbuffer = NULL;
+    CVReturn status = CVPixelBufferCreate(kCFAllocatorDefault, frameSize.width,
+                                          frameSize.height,  kCVPixelFormatType_32ARGB, (CFDictionaryRef) CFBridgingRetain(options),
+                                          &pxbuffer);
+    NSParameterAssert(status == kCVReturnSuccess && pxbuffer != NULL);
+
+    CVPixelBufferLockBaseAddress(pxbuffer, 0);
+    void *pxdata = CVPixelBufferGetBaseAddress(pxbuffer);
+
+
+    CGColorSpaceRef rgbColorSpace = CGColorSpaceCreateDeviceRGB();
+    CGContextRef context = CGBitmapContextCreate(pxdata, frameSize.width,
+                                                 frameSize.height, 8, 4*frameSize.width, rgbColorSpace,
+                                                 kCGImageAlphaPremultipliedFirst);
+
+    CGContextDrawImage(context, CGRectMake(0, 0, CGImageGetWidth(image),
+                                           CGImageGetHeight(image)), image);
+    CGColorSpaceRelease(rgbColorSpace);
+    CGContextRelease(context);
+
+    CVPixelBufferUnlockBaseAddress(pxbuffer, 0);
+
+    return pxbuffer;
+}
 
 #pragma mark - Protocol AVCaptureVideoDataOutputSampleBufferDelegate
 
@@ -522,7 +551,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
             }
 
             if (self.recordAssetWriterInput.readyForMoreMediaData) {
-                if (! [self.recordPixelBufferAdaptor appendPixelBuffer:imageBuffer
+                CVImageBufferRef pixelBuffer = [self pixelBufferFromCGImage:dstImage];
+                if (! [self.recordPixelBufferAdaptor appendPixelBuffer:pixelBuffer
                                                   withPresentationTime:lastSampleTime] ) {
                     NSLog(@"Video Writing Error");
                 }
@@ -543,9 +573,12 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
 
 - (void)updateOrientation;
 {
-    NSLog(@"rotate..");
-    self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height);
-    [self layoutPreviewLayer];
+    if (self.rotateVideo == YES)
+    {
+        NSLog(@"rotate..");
+        self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height);
+        [self layoutPreviewLayer];
+    }
 }
 
 
diff --git a/modules/highgui/src/cap_libv4l.cpp b/modules/highgui/src/cap_libv4l.cpp
index b081621b1..3fd6dd59a 100644
--- a/modules/highgui/src/cap_libv4l.cpp
+++ b/modules/highgui/src/cap_libv4l.cpp
@@ -14,7 +14,9 @@ It has been tested with the motempl sample program
 First Patch:  August 24, 2004 Travis Wood   TravisOCV@tkwood.com
 For Release:  OpenCV-Linux Beta4  opencv-0.9.6
 Tested On:    LMLBT44 with 8 video inputs
-Problems?     Post problems/fixes to OpenCV group on groups.yahoo.com
+Problems?     Post your questions at answers.opencv.org,
+              Report bugs at code.opencv.org,
+              Submit your fixes at https://github.com/Itseez/opencv/
 Patched Comments:
 
 TW: The cv cam utils that came with the initial release of OpenCV for LINUX Beta4
diff --git a/modules/highgui/src/cap_msmf.cpp b/modules/highgui/src/cap_msmf.cpp
index 52b780463..df52f04bd 100644
--- a/modules/highgui/src/cap_msmf.cpp
+++ b/modules/highgui/src/cap_msmf.cpp
@@ -53,7 +53,8 @@
 #include <Mfapi.h>
 #include <mfplay.h>
 #include <mfobjects.h>
-#include "Strsafe.h"
+#include <strsafe.h>
+#include <Mfreadwrite.h>
 #include <new>
 #include <map>
 #include <vector>
@@ -61,18 +62,27 @@
 #include <stdio.h>
 #include <stdarg.h>
 #include <string.h>
+
 #pragma warning(disable:4503)
 #pragma comment(lib, "mfplat")
 #pragma comment(lib, "mf")
 #pragma comment(lib, "mfuuid")
 #pragma comment(lib, "Strmiids")
+#pragma comment(lib, "Mfreadwrite")
 #pragma comment(lib, "MinCore_Downlevel")
+
+// for ComPtr usage
+#include <wrl/client.h>
+using namespace Microsoft::WRL;
+
 struct IMFMediaType;
 struct IMFActivate;
 struct IMFMediaSource;
 struct IMFAttributes;
+
 namespace
 {
+
 template <class T> void SafeRelease(T **ppT)
 {
     if (*ppT)
@@ -81,7 +91,8 @@ template <class T> void SafeRelease(T **ppT)
         *ppT = NULL;
     }
 }
- /// Class for printing info into consol
+
+/// Class for printing info into consol
 class DebugPrintOut
 {
 public:
@@ -93,6 +104,7 @@ public:
 private:
     DebugPrintOut(void);
 };
+
 // Structure for collecting info about types of video, which are supported by current video device
 struct MediaType
 {
@@ -101,14 +113,14 @@ struct MediaType
     unsigned int width;
     unsigned int MF_MT_YUV_MATRIX;
     unsigned int MF_MT_VIDEO_LIGHTING;
-    unsigned int MF_MT_DEFAULT_STRIDE;
+    int MF_MT_DEFAULT_STRIDE; // stride is negative if image is bottom-up
     unsigned int MF_MT_VIDEO_CHROMA_SITING;
     GUID MF_MT_AM_FORMAT_TYPE;
     wchar_t *pMF_MT_AM_FORMAT_TYPEName;
     unsigned int MF_MT_FIXED_SIZE_SAMPLES;
     unsigned int MF_MT_VIDEO_NOMINAL_RANGE;
-    unsigned int MF_MT_FRAME_RATE;
-    unsigned int MF_MT_FRAME_RATE_low;
+    unsigned int MF_MT_FRAME_RATE_NUMERATOR;
+    unsigned int MF_MT_FRAME_RATE_DENOMINATOR;
     unsigned int MF_MT_PIXEL_ASPECT_RATIO;
     unsigned int MF_MT_PIXEL_ASPECT_RATIO_low;
     unsigned int MF_MT_ALL_SAMPLES_INDEPENDENT;
@@ -127,6 +139,7 @@ struct MediaType
     ~MediaType();
     void Clear();
 };
+
 /// Class for parsing info from IMFMediaType into the local MediaType
 class FormatReader
 {
@@ -136,9 +149,10 @@ public:
 private:
     FormatReader(void);
 };
+
 DWORD WINAPI MainThreadFunction( LPVOID lpParam );
 typedef void(*emergensyStopEventCallback)(int, void *);
-typedef unsigned char BYTE;
+
 class RawImage
 {
 public:
@@ -156,6 +170,7 @@ private:
     unsigned char *ri_pixels;
     RawImage(unsigned int size);
 };
+
 // Class for grabbing image from video stream
 class ImageGrabber : public IMFSampleGrabberSinkCallback
 {
@@ -163,13 +178,21 @@ public:
     ~ImageGrabber(void);
     HRESULT initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat);
     HRESULT startGrabbing(void);
+    void pauseGrabbing();
+    void resumeGrabbing();
     void stopGrabbing();
     RawImage *getRawImage();
     // Function of creation of the instance of the class
-    static HRESULT CreateInstance(ImageGrabber **ppIG,unsigned int deviceID);
+    static HRESULT CreateInstance(ImageGrabber **ppIG, unsigned int deviceID, bool synchronous = false);
+
+    const HANDLE ig_hFrameReady;
+    const HANDLE ig_hFrameGrabbed;
+    const HANDLE ig_hFinish;
+
 private:
     bool ig_RIE;
     bool ig_Close;
+    bool ig_Synchronous;
     long m_cRef;
     unsigned int ig_DeviceID;
     IMFMediaSource *ig_pSource;
@@ -178,19 +201,11 @@ private:
     RawImage *ig_RIFirst;
     RawImage *ig_RISecond;
     RawImage *ig_RIOut;
-    ImageGrabber(unsigned int deviceID);
+    ImageGrabber(unsigned int deviceID, bool synchronous);
     HRESULT CreateTopology(IMFMediaSource *pSource, IMFActivate *pSinkActivate, IMFTopology **ppTopo);
-    HRESULT AddSourceNode(
-    IMFTopology *pTopology,
-    IMFMediaSource *pSource,
-    IMFPresentationDescriptor *pPD,
-    IMFStreamDescriptor *pSD,
-    IMFTopologyNode **ppNode);
-    HRESULT AddOutputNode(
-    IMFTopology *pTopology,
-    IMFActivate *pActivate,
-    DWORD dwId,
-    IMFTopologyNode **ppNode);
+    HRESULT AddSourceNode(IMFTopology *pTopology, IMFMediaSource *pSource,
+        IMFPresentationDescriptor *pPD, IMFStreamDescriptor *pSD, IMFTopologyNode **ppNode);
+    HRESULT AddOutputNode(IMFTopology *pTopology, IMFActivate *pActivate, DWORD dwId, IMFTopologyNode **ppNode);
     // IUnknown methods
     STDMETHODIMP QueryInterface(REFIID iid, void** ppv);
     STDMETHODIMP_(ULONG) AddRef();
@@ -208,13 +223,14 @@ private:
         DWORD dwSampleSize);
     STDMETHODIMP OnShutdown();
 };
+
 /// Class for controlling of thread of the grabbing raw data from video device
 class ImageGrabberThread
 {
     friend DWORD WINAPI MainThreadFunction( LPVOID lpParam );
 public:
     ~ImageGrabberThread(void);
-    static HRESULT CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID);
+    static HRESULT CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID, bool synchronious = false);
     void start();
     void stop();
     void setEmergencyStopEvent(void *userData, void(*func)(int, void *));
@@ -222,7 +238,7 @@ public:
 protected:
     virtual void run();
 private:
-    ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID);
+    ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID, bool synchronious);
     HANDLE igt_Handle;
     DWORD   igt_ThreadIdArray;
     ImageGrabber *igt_pImageGrabber;
@@ -231,6 +247,7 @@ private:
     bool igt_stop;
     unsigned int igt_DeviceID;
 };
+
 // Structure for collecting info about one parametr of current video device
 struct Parametr
 {
@@ -242,6 +259,7 @@ struct Parametr
     long Flag;
     Parametr();
 };
+
 // Structure for collecting info about 17 parametrs of current video device
 struct CamParametrs
 {
@@ -263,11 +281,13 @@ struct CamParametrs
         Parametr Iris;
         Parametr Focus;
 };
+
 typedef std::wstring String;
 typedef std::vector<int> vectorNum;
 typedef std::map<String, vectorNum> SUBTYPEMap;
 typedef std::map<UINT64, SUBTYPEMap> FrameRateMap;
 typedef void(*emergensyStopEventCallback)(int, void *);
+
 /// Class for controlling of video device
 class videoDevice
 {
@@ -311,7 +331,7 @@ private:
     IMFMediaSource *vd_pSource;
     emergensyStopEventCallback vd_func;
     void *vd_userData;
-    long enumerateCaptureFormats(IMFMediaSource *pSource);
+    HRESULT enumerateCaptureFormats(IMFMediaSource *pSource);
     long setDeviceFormat(IMFMediaSource *pSource, unsigned long dwFormatIndex);
     void buildLibraryofTypes();
     int findType(unsigned int size, unsigned int frameRate = 0);
@@ -319,6 +339,7 @@ private:
     long initDevice();
     long checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice);
 };
+
 /// Class for managing of list of video devices
 class videoDevices
 {
@@ -334,6 +355,7 @@ private:
     std::vector<videoDevice *> vds_Devices;
     videoDevices(void);
 };
+
 // Class for creating of Media Foundation context
 class Media_Foundation
 {
@@ -344,6 +366,7 @@ public:
 private:
     Media_Foundation(void);
 };
+
 /// The only visiable class for controlling of video devices in format singelton
 class videoInput
 {
@@ -393,23 +416,27 @@ public:
     bool isFrameNew(int deviceID);
     // Writing of Raw Data pixels from video device with deviceID with correction of RedAndBlue flipping flipRedAndBlue and vertical flipping flipImage
     bool getPixels(int deviceID, unsigned char * pixels, bool flipRedAndBlue = false, bool flipImage = false);
+    static void processPixels(unsigned char * src, unsigned char * dst, unsigned int width, unsigned int height, unsigned int bpp, bool bRGB, bool bFlip);
 private:
     bool accessToDevices;
     videoInput(void);
-    void processPixels(unsigned char * src, unsigned char * dst, unsigned int width, unsigned int height, unsigned int bpp, bool bRGB, bool bFlip);
     void updateListOfDevices();
 };
+
 DebugPrintOut::DebugPrintOut(void):verbose(true)
 {
 }
+
 DebugPrintOut::~DebugPrintOut(void)
 {
 }
+
 DebugPrintOut& DebugPrintOut::getInstance()
 {
     static DebugPrintOut instance;
     return instance;
 }
+
 void DebugPrintOut::printOut(const wchar_t *format, ...)
 {
     if(verbose)
@@ -430,14 +457,17 @@ void DebugPrintOut::printOut(const wchar_t *format, ...)
         va_end (args);
     }
 }
+
 void DebugPrintOut::setVerbose(bool state)
 {
     verbose = state;
 }
+
 LPCWSTR GetGUIDNameConstNew(const GUID& guid);
 HRESULT GetGUIDNameNew(const GUID& guid, WCHAR **ppwsz);
 HRESULT LogAttributeValueByIndexNew(IMFAttributes *pAttr, DWORD index);
 HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaType &out);
+
 unsigned int *GetParametr(GUID guid, MediaType &out)
 {
     if(guid == MF_MT_YUV_MATRIX)
@@ -445,7 +475,7 @@ unsigned int *GetParametr(GUID guid, MediaType &out)
     if(guid == MF_MT_VIDEO_LIGHTING)
         return &(out.MF_MT_VIDEO_LIGHTING);
     if(guid == MF_MT_DEFAULT_STRIDE)
-        return &(out.MF_MT_DEFAULT_STRIDE);
+        return (unsigned int*)&(out.MF_MT_DEFAULT_STRIDE);
     if(guid == MF_MT_VIDEO_CHROMA_SITING)
         return &(out.MF_MT_VIDEO_CHROMA_SITING);
     if(guid == MF_MT_VIDEO_NOMINAL_RANGE)
@@ -462,6 +492,7 @@ unsigned int *GetParametr(GUID guid, MediaType &out)
         return &(out.MF_MT_INTERLACE_MODE);
     return NULL;
 }
+
 HRESULT LogAttributeValueByIndexNew(IMFAttributes *pAttr, DWORD index, MediaType &out)
 {
     WCHAR *pGuidName = NULL;
@@ -548,6 +579,7 @@ done:
     PropVariantClear(&var);
     return hr;
 }
+
 HRESULT GetGUIDNameNew(const GUID& guid, WCHAR **ppwsz)
 {
     HRESULT hr = S_OK;
@@ -589,14 +621,17 @@ done:
     }
     return hr;
 }
+
 void LogUINT32AsUINT64New(const PROPVARIANT& var, UINT32 &uHigh, UINT32 &uLow)
 {
     Unpack2UINT32AsUINT64(var.uhVal.QuadPart, &uHigh, &uLow);
 }
+
 float OffsetToFloatNew(const MFOffset& offset)
 {
     return offset.value + (static_cast<float>(offset.fract) / 65536.0f);
 }
+
 HRESULT LogVideoAreaNew(const PROPVARIANT& var)
 {
     if (var.caub.cElems < sizeof(MFVideoArea))
@@ -605,8 +640,13 @@ HRESULT LogVideoAreaNew(const PROPVARIANT& var)
     }
     return S_OK;
 }
+
 HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaType &out)
 {
+    if (guid == MF_MT_DEFAULT_STRIDE)
+    {
+        out.MF_MT_DEFAULT_STRIDE = var.intVal;
+    } else
     if (guid == MF_MT_FRAME_SIZE)
     {
         UINT32 uHigh = 0, uLow = 0;
@@ -620,8 +660,8 @@ HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaTyp
     {
         UINT32 uHigh = 0, uLow = 0;
         LogUINT32AsUINT64New(var, uHigh, uLow);
-        out.MF_MT_FRAME_RATE = uHigh;
-        out.MF_MT_FRAME_RATE_low = uLow;
+        out.MF_MT_FRAME_RATE_NUMERATOR = uHigh;
+        out.MF_MT_FRAME_RATE_DENOMINATOR = uLow;
     }
     else
     if (guid == MF_MT_FRAME_RATE_RANGE_MAX)
@@ -653,9 +693,11 @@ HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaTyp
     }
     return S_OK;
 }
+
 #ifndef IF_EQUAL_RETURN
 #define IF_EQUAL_RETURN(param, val) if(val == param) return L#val
 #endif
+
 LPCWSTR GetGUIDNameConstNew(const GUID& guid)
 {
     IF_EQUAL_RETURN(guid, MF_MT_MAJOR_TYPE);
@@ -800,9 +842,11 @@ LPCWSTR GetGUIDNameConstNew(const GUID& guid)
     IF_EQUAL_RETURN(guid, MFAudioFormat_ADTS); //             WAVE_FORMAT_MPEG_ADTS_AAC
     return NULL;
 }
+
 FormatReader::FormatReader(void)
 {
 }
+
 MediaType FormatReader::Read(IMFMediaType *pType)
 {
     UINT32 count = 0;
@@ -833,32 +877,57 @@ MediaType FormatReader::Read(IMFMediaType *pType)
     }
     return out;
 }
+
 FormatReader::~FormatReader(void)
 {
 }
+
 #define CHECK_HR(x) if (FAILED(x)) { goto done; }
-ImageGrabber::ImageGrabber(unsigned int deviceID): m_cRef(1), ig_DeviceID(deviceID), ig_pSource(NULL), ig_pSession(NULL), ig_pTopology(NULL), ig_RIE(true), ig_Close(false)
-{
-}
+
+ImageGrabber::ImageGrabber(unsigned int deviceID, bool synchronous):
+    m_cRef(1),
+    ig_DeviceID(deviceID),
+    ig_pSource(NULL),
+    ig_pSession(NULL),
+    ig_pTopology(NULL),
+    ig_RIE(true),
+    ig_Close(false),
+    ig_Synchronous(synchronous),
+    ig_hFrameReady(synchronous ? CreateEvent(NULL, FALSE, FALSE, NULL): 0),
+    ig_hFrameGrabbed(synchronous ? CreateEvent(NULL, FALSE, TRUE, NULL): 0),
+    ig_hFinish(CreateEvent(NULL, TRUE, FALSE, NULL))
+{}
+
 ImageGrabber::~ImageGrabber(void)
 {
     if (ig_pSession)
     {
         ig_pSession->Shutdown();
     }
-    //SafeRelease(&ig_pSession);
-    //SafeRelease(&ig_pTopology);
+
+    CloseHandle(ig_hFinish);
+
+    if (ig_Synchronous)
+    {
+        CloseHandle(ig_hFrameReady);
+        CloseHandle(ig_hFrameGrabbed);
+    }
+
+    SafeRelease(&ig_pSession);
+    SafeRelease(&ig_pTopology);
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
-    DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Destroing instance of the ImageGrabber class \n", ig_DeviceID);
+
+    DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Destroing instance of the ImageGrabber class\n", ig_DeviceID);
 }
+
 HRESULT ImageGrabber::initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat)
 {
-    IMFActivate *pSinkActivate = NULL;
-    IMFMediaType *pType = NULL;
-    IMFPresentationDescriptor *pPD = NULL;
-    IMFStreamDescriptor *pSD = NULL;
-    IMFMediaTypeHandler *pHandler = NULL;
-    IMFMediaType *pCurrentType = NULL;
+    ComPtr<IMFActivate> pSinkActivate = NULL;
+    ComPtr<IMFMediaType> pType = NULL;
+    ComPtr<IMFPresentationDescriptor> pPD = NULL;
+    ComPtr<IMFStreamDescriptor> pSD = NULL;
+    ComPtr<IMFMediaTypeHandler> pHandler = NULL;
+    ComPtr<IMFMediaType> pCurrentType = NULL;
     HRESULT hr = S_OK;
     MediaType MT;
      // Clean up.
@@ -871,30 +940,32 @@ HRESULT ImageGrabber::initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat
     ig_pSource = pSource;
     hr = pSource->CreatePresentationDescriptor(&pPD);
     if (FAILED(hr))
+    {
         goto err;
+    }
     BOOL fSelected;
     hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD);
-    if (FAILED(hr))
+    if (FAILED(hr)) {
         goto err;
+    }
     hr = pSD->GetMediaTypeHandler(&pHandler);
-    if (FAILED(hr))
+    if (FAILED(hr)) {
         goto err;
+    }
     DWORD cTypes = 0;
     hr = pHandler->GetMediaTypeCount(&cTypes);
-    if (FAILED(hr))
+    if (FAILED(hr)) {
         goto err;
+    }
     if(cTypes > 0)
     {
         hr = pHandler->GetCurrentMediaType(&pCurrentType);
-        if (FAILED(hr))
+        if (FAILED(hr)) {
             goto err;
-        MT = FormatReader::Read(pCurrentType);
+        }
+        MT = FormatReader::Read(pCurrentType.Get());
     }
 err:
-    SafeRelease(&pPD);
-    SafeRelease(&pSD);
-    SafeRelease(&pHandler);
-    SafeRelease(&pCurrentType);
     unsigned int sizeRawImage = 0;
     if(VideoFormat == MFVideoFormat_RGB24)
     {
@@ -910,17 +981,17 @@ err:
     // Configure the media type that the Sample Grabber will receive.
     // Setting the major and subtype is usually enough for the topology loader
     // to resolve the topology.
-    CHECK_HR(hr = MFCreateMediaType(&pType));
+    CHECK_HR(hr = MFCreateMediaType(pType.GetAddressOf()));
     CHECK_HR(hr = pType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
     CHECK_HR(hr = pType->SetGUID(MF_MT_SUBTYPE, VideoFormat));
     // Create the sample grabber sink.
-    CHECK_HR(hr = MFCreateSampleGrabberSinkActivate(pType, this, &pSinkActivate));
+    CHECK_HR(hr = MFCreateSampleGrabberSinkActivate(pType.Get(), this, pSinkActivate.GetAddressOf()));
     // To run as fast as possible, set this attribute (requires Windows 7):
     CHECK_HR(hr = pSinkActivate->SetUINT32(MF_SAMPLEGRABBERSINK_IGNORE_CLOCK, TRUE));
     // Create the Media Session.
     CHECK_HR(hr = MFCreateMediaSession(NULL, &ig_pSession));
     // Create the topology.
-    CHECK_HR(hr = CreateTopology(pSource, pSinkActivate, &ig_pTopology));
+    CHECK_HR(hr = CreateTopology(pSource, pSinkActivate.Get(), &ig_pTopology));
 done:
     // Clean up.
     if (FAILED(hr))
@@ -932,10 +1003,10 @@ done:
         SafeRelease(&ig_pSession);
         SafeRelease(&ig_pTopology);
     }
-    SafeRelease(&pSinkActivate);
-    SafeRelease(&pType);
+
     return hr;
 }
+
 void ImageGrabber::stopGrabbing()
 {
     if(ig_pSession)
@@ -943,16 +1014,17 @@ void ImageGrabber::stopGrabbing()
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
     DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Stopping of of grabbing of images\n", ig_DeviceID);
 }
+
 HRESULT ImageGrabber::startGrabbing(void)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
-    IMFMediaEvent *pEvent = NULL;
+    ComPtr<IMFMediaEvent> pEvent = NULL;
     PROPVARIANT var;
     PropVariantInit(&var);
     HRESULT hr = S_OK;
-    CHECK_HR(hr = ig_pSession->SetTopology(0, ig_pTopology));
-    CHECK_HR(hr = ig_pSession->Start(&GUID_NULL, &var));
+    hr = ig_pSession->SetTopology(0, ig_pTopology);
     DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Start Grabbing of the images\n", ig_DeviceID);
+    hr = ig_pSession->Start(&GUID_NULL, &var);
     for(;;)
     {
         HRESULT hrStatus = S_OK;
@@ -992,27 +1064,41 @@ HRESULT ImageGrabber::startGrabbing(void)
             DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MEVideoCaptureDeviceRemoved \n", ig_DeviceID);
             break;
         }
-        SafeRelease(&pEvent);
+        if ((met == MEError) || (met == MENonFatalError))
+        {
+            pEvent->GetStatus(&hrStatus);
+            DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MEError | MENonFatalError: %u\n", ig_DeviceID, hrStatus);
+            break;
+        }
     }
     DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Finish startGrabbing \n", ig_DeviceID);
+
 done:
-    SafeRelease(&pEvent);
-    SafeRelease(&ig_pSession);
-    SafeRelease(&ig_pTopology);
+    SetEvent(ig_hFinish);
+
     return hr;
 }
+
+void ImageGrabber::pauseGrabbing()
+{
+}
+
+void ImageGrabber::resumeGrabbing()
+{
+}
+
 HRESULT ImageGrabber::CreateTopology(IMFMediaSource *pSource, IMFActivate *pSinkActivate, IMFTopology **ppTopo)
 {
-    IMFTopology *pTopology = NULL;
-    IMFPresentationDescriptor *pPD = NULL;
-    IMFStreamDescriptor *pSD = NULL;
-    IMFMediaTypeHandler *pHandler = NULL;
-    IMFTopologyNode *pNode1 = NULL;
-    IMFTopologyNode *pNode2 = NULL;
+    IMFTopology* pTopology = NULL;
+    ComPtr<IMFPresentationDescriptor> pPD = NULL;
+    ComPtr<IMFStreamDescriptor> pSD = NULL;
+    ComPtr<IMFMediaTypeHandler> pHandler = NULL;
+    ComPtr<IMFTopologyNode> pNode1 = NULL;
+    ComPtr<IMFTopologyNode> pNode2 = NULL;
     HRESULT hr = S_OK;
     DWORD cStreams = 0;
     CHECK_HR(hr = MFCreateTopology(&pTopology));
-    CHECK_HR(hr = pSource->CreatePresentationDescriptor(&pPD));
+    CHECK_HR(hr = pSource->CreatePresentationDescriptor(pPD.GetAddressOf()));
     CHECK_HR(hr = pPD->GetStreamDescriptorCount(&cStreams));
     for (DWORD i = 0; i < cStreams; i++)
     {
@@ -1024,29 +1110,23 @@ HRESULT ImageGrabber::CreateTopology(IMFMediaSource *pSource, IMFActivate *pSink
         CHECK_HR(hr = pHandler->GetMajorType(&majorType));
         if (majorType == MFMediaType_Video && fSelected)
         {
-            CHECK_HR(hr = AddSourceNode(pTopology, pSource, pPD, pSD, &pNode1));
-            CHECK_HR(hr = AddOutputNode(pTopology, pSinkActivate, 0, &pNode2));
-            CHECK_HR(hr = pNode1->ConnectOutput(0, pNode2, 0));
+            CHECK_HR(hr = AddSourceNode(pTopology, pSource, pPD.Get(), pSD.Get(), pNode1.GetAddressOf()));
+            CHECK_HR(hr = AddOutputNode(pTopology, pSinkActivate, 0, pNode2.GetAddressOf()));
+            CHECK_HR(hr = pNode1->ConnectOutput(0, pNode2.Get(), 0));
             break;
         }
         else
         {
             CHECK_HR(hr = pPD->DeselectStream(i));
         }
-        SafeRelease(&pSD);
-        SafeRelease(&pHandler);
     }
     *ppTopo = pTopology;
     (*ppTopo)->AddRef();
+
 done:
-    SafeRelease(&pTopology);
-    SafeRelease(&pNode1);
-    SafeRelease(&pNode2);
-    SafeRelease(&pPD);
-    SafeRelease(&pSD);
-    SafeRelease(&pHandler);
     return hr;
 }
+
 HRESULT ImageGrabber::AddSourceNode(
     IMFTopology *pTopology,           // Topology.
     IMFMediaSource *pSource,          // Media source.
@@ -1054,43 +1134,45 @@ HRESULT ImageGrabber::AddSourceNode(
     IMFStreamDescriptor *pSD,         // Stream descriptor.
     IMFTopologyNode **ppNode)         // Receives the node pointer.
 {
-    IMFTopologyNode *pNode = NULL;
+    ComPtr<IMFTopologyNode> pNode = NULL;
     HRESULT hr = S_OK;
-    CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_SOURCESTREAM_NODE, &pNode));
+    CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_SOURCESTREAM_NODE, pNode.GetAddressOf()));
     CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_SOURCE, pSource));
     CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_PRESENTATION_DESCRIPTOR, pPD));
     CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_STREAM_DESCRIPTOR, pSD));
-    CHECK_HR(hr = pTopology->AddNode(pNode));
+    CHECK_HR(hr = pTopology->AddNode(pNode.Get()));
     // Return the pointer to the caller.
-    *ppNode = pNode;
+    *ppNode = pNode.Get();
     (*ppNode)->AddRef();
+
 done:
-    SafeRelease(&pNode);
     return hr;
 }
+
 HRESULT ImageGrabber::AddOutputNode(
     IMFTopology *pTopology,     // Topology.
     IMFActivate *pActivate,     // Media sink activation object.
     DWORD dwId,                 // Identifier of the stream sink.
     IMFTopologyNode **ppNode)   // Receives the node pointer.
 {
-    IMFTopologyNode *pNode = NULL;
+    ComPtr<IMFTopologyNode> pNode = NULL;
     HRESULT hr = S_OK;
-    CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_OUTPUT_NODE, &pNode));
+    CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_OUTPUT_NODE, pNode.GetAddressOf()));
     CHECK_HR(hr = pNode->SetObject(pActivate));
     CHECK_HR(hr = pNode->SetUINT32(MF_TOPONODE_STREAMID, dwId));
     CHECK_HR(hr = pNode->SetUINT32(MF_TOPONODE_NOSHUTDOWN_ON_REMOVE, FALSE));
-    CHECK_HR(hr = pTopology->AddNode(pNode));
+    CHECK_HR(hr = pTopology->AddNode(pNode.Get()));
     // Return the pointer to the caller.
-    *ppNode = pNode;
+    *ppNode = pNode.Get();
     (*ppNode)->AddRef();
+
 done:
-    SafeRelease(&pNode);
     return hr;
 }
-HRESULT ImageGrabber::CreateInstance(ImageGrabber **ppIG, unsigned int deviceID)
+
+HRESULT ImageGrabber::CreateInstance(ImageGrabber **ppIG, unsigned int deviceID, bool synchronious)
 {
-    *ppIG = new (std::nothrow) ImageGrabber(deviceID);
+    *ppIG = new (std::nothrow) ImageGrabber(deviceID, synchronious);
     if (ppIG == NULL)
     {
         return E_OUTOFMEMORY;
@@ -1099,6 +1181,7 @@ HRESULT ImageGrabber::CreateInstance(ImageGrabber **ppIG, unsigned int deviceID)
     DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Creating instance of ImageGrabber\n", deviceID);
     return S_OK;
 }
+
 STDMETHODIMP ImageGrabber::QueryInterface(REFIID riid, void** ppv)
 {
     HRESULT hr = E_NOINTERFACE;
@@ -1119,10 +1202,12 @@ STDMETHODIMP ImageGrabber::QueryInterface(REFIID riid, void** ppv)
     }
     return hr;
 }
+
 STDMETHODIMP_(ULONG) ImageGrabber::AddRef()
 {
     return InterlockedIncrement(&m_cRef);
 }
+
 STDMETHODIMP_(ULONG) ImageGrabber::Release()
 {
     ULONG cRef = InterlockedDecrement(&m_cRef);
@@ -1132,38 +1217,45 @@ STDMETHODIMP_(ULONG) ImageGrabber::Release()
     }
     return cRef;
 }
+
 STDMETHODIMP ImageGrabber::OnClockStart(MFTIME hnsSystemTime, LONGLONG llClockStartOffset)
 {
     (void)hnsSystemTime;
     (void)llClockStartOffset;
     return S_OK;
 }
+
 STDMETHODIMP ImageGrabber::OnClockStop(MFTIME hnsSystemTime)
 {
     (void)hnsSystemTime;
     return S_OK;
 }
+
 STDMETHODIMP ImageGrabber::OnClockPause(MFTIME hnsSystemTime)
 {
     (void)hnsSystemTime;
     return S_OK;
 }
+
 STDMETHODIMP ImageGrabber::OnClockRestart(MFTIME hnsSystemTime)
 {
     (void)hnsSystemTime;
     return S_OK;
 }
+
 STDMETHODIMP ImageGrabber::OnClockSetRate(MFTIME hnsSystemTime, float flRate)
 {
     (void)flRate;
     (void)hnsSystemTime;
     return S_OK;
 }
+
 STDMETHODIMP ImageGrabber::OnSetPresentationClock(IMFPresentationClock* pClock)
 {
     (void)pClock;
     return S_OK;
 }
+
 STDMETHODIMP ImageGrabber::OnProcessSample(REFGUID guidMajorMediaType, DWORD dwSampleFlags,
     LONGLONG llSampleTime, LONGLONG llSampleDuration, const BYTE * pSampleBuffer,
     DWORD dwSampleSize)
@@ -1173,6 +1265,16 @@ STDMETHODIMP ImageGrabber::OnProcessSample(REFGUID guidMajorMediaType, DWORD dwS
     (void)dwSampleFlags;
     (void)llSampleDuration;
     (void)dwSampleSize;
+
+    HANDLE tmp[] = {ig_hFinish, ig_hFrameGrabbed, NULL};
+
+    DWORD status = WaitForMultipleObjects(2, tmp, FALSE, INFINITE);
+    if (status == WAIT_OBJECT_0)
+    {
+        printf("OnProcessFrame called after ig_hFinish event\n");
+        return S_OK;
+    }
+
     if(ig_RIE)
     {
         ig_RIFirst->fastCopy(pSampleBuffer);
@@ -1183,27 +1285,41 @@ STDMETHODIMP ImageGrabber::OnProcessSample(REFGUID guidMajorMediaType, DWORD dwS
         ig_RISecond->fastCopy(pSampleBuffer);
         ig_RIOut = ig_RISecond;
     }
-    ig_RIE = !ig_RIE;
+
+    if (ig_Synchronous)
+    {
+        SetEvent(ig_hFrameReady);
+    }
+    else
+    {
+        ig_RIE = !ig_RIE;
+    }
+
     return S_OK;
 }
+
 STDMETHODIMP ImageGrabber::OnShutdown()
 {
+    SetEvent(ig_hFinish);
     return S_OK;
 }
+
 RawImage *ImageGrabber::getRawImage()
 {
     return ig_RIOut;
 }
+
 DWORD WINAPI MainThreadFunction( LPVOID lpParam )
 {
     ImageGrabberThread *pIGT = (ImageGrabberThread *)lpParam;
     pIGT->run();
     return 0;
 }
-HRESULT ImageGrabberThread::CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID)
+
+HRESULT ImageGrabberThread::CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID, bool synchronious)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
-    *ppIGT = new (std::nothrow) ImageGrabberThread(pSource, deviceID);
+    *ppIGT = new (std::nothrow) ImageGrabberThread(pSource, deviceID, synchronious);
     if (ppIGT == NULL)
     {
         DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Memory cannot be allocated\n", deviceID);
@@ -1213,10 +1329,14 @@ HRESULT ImageGrabberThread::CreateInstance(ImageGrabberThread **ppIGT, IMFMediaS
         DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Creating of the instance of ImageGrabberThread\n", deviceID);
     return S_OK;
 }
-ImageGrabberThread::ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID): igt_Handle(NULL), igt_stop(false)
+
+ImageGrabberThread::ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID, bool synchronious):
+    igt_func(NULL),
+    igt_Handle(NULL),
+    igt_stop(false)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
-    HRESULT hr = ImageGrabber::CreateInstance(&igt_pImageGrabber, deviceID);
+    HRESULT hr = ImageGrabber::CreateInstance(&igt_pImageGrabber, deviceID, synchronious);
     igt_DeviceID = deviceID;
     if(SUCCEEDED(hr))
     {
@@ -1235,6 +1355,7 @@ ImageGrabberThread::ImageGrabberThread(IMFMediaSource *pSource, unsigned int dev
         DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i There is a problem with creation of the instance of the ImageGrabber class\n", deviceID);
     }
 }
+
 void ImageGrabberThread::setEmergencyStopEvent(void *userData, void(*func)(int, void *))
 {
     if(func)
@@ -1243,12 +1364,16 @@ void ImageGrabberThread::setEmergencyStopEvent(void *userData, void(*func)(int,
         igt_userData = userData;
     }
 }
+
 ImageGrabberThread::~ImageGrabberThread(void)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
     DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Destroing ImageGrabberThread\n", igt_DeviceID);
+    if (igt_Handle)
+        WaitForSingleObject(igt_Handle, INFINITE);
     delete igt_pImageGrabber;
 }
+
 void ImageGrabberThread::stop()
 {
     igt_stop = true;
@@ -1257,16 +1382,18 @@ void ImageGrabberThread::stop()
         igt_pImageGrabber->stopGrabbing();
     }
 }
+
 void ImageGrabberThread::start()
 {
     igt_Handle = CreateThread(
-            NULL,                   // default security attributes
-            0,                      // use default stack size
-            MainThreadFunction,       // thread function name
-            this,          // argument to thread function
-            0,                      // use default creation flags
+            NULL,                  // default security attributes
+            0,                     // use default stack size
+            MainThreadFunction,    // thread function name
+            this,                  // argument to thread function
+            0,                     // use default creation flags
             &igt_ThreadIdArray);   // returns the thread identifier
 }
+
 void ImageGrabberThread::run()
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -1294,10 +1421,12 @@ void ImageGrabberThread::run()
     else
         DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Finish thread\n", igt_DeviceID);
 }
+
 ImageGrabber *ImageGrabberThread::getImageGrabber()
 {
     return igt_pImageGrabber;
 }
+
 Media_Foundation::Media_Foundation(void)
 {
     HRESULT hr = MFStartup(MF_VERSION);
@@ -1307,6 +1436,7 @@ Media_Foundation::Media_Foundation(void)
         DPO->printOut(L"MEDIA FOUNDATION: It cannot be created!!!\n");
     }
 }
+
 Media_Foundation::~Media_Foundation(void)
 {
     HRESULT hr = MFShutdown();
@@ -1316,12 +1446,13 @@ Media_Foundation::~Media_Foundation(void)
         DPO->printOut(L"MEDIA FOUNDATION: Resources cannot be released\n");
     }
 }
+
 bool Media_Foundation::buildListOfDevices()
 {
     HRESULT hr = S_OK;
-    IMFAttributes *pAttributes = NULL;
+    ComPtr<IMFAttributes> pAttributes = NULL;
     CoInitialize(NULL);
-    hr = MFCreateAttributes(&pAttributes, 1);
+    hr = MFCreateAttributes(pAttributes.GetAddressOf(), 1);
     if (SUCCEEDED(hr))
     {
         hr = pAttributes->SetGUID(
@@ -1332,40 +1463,46 @@ bool Media_Foundation::buildListOfDevices()
     if (SUCCEEDED(hr))
     {
         videoDevices *vDs = &videoDevices::getInstance();
-        hr = vDs->initDevices(pAttributes);
+        hr = vDs->initDevices(pAttributes.Get());
     }
     else
     {
        DebugPrintOut *DPO = &DebugPrintOut::getInstance();
        DPO->printOut(L"MEDIA FOUNDATION: The access to the video cameras denied\n");
     }
-    SafeRelease(&pAttributes);
+
     return (SUCCEEDED(hr));
 }
+
 Media_Foundation& Media_Foundation::getInstance()
 {
     static Media_Foundation instance;
     return instance;
 }
+
 RawImage::RawImage(unsigned int size): ri_new(false), ri_pixels(NULL)
 {
     ri_size = size;
     ri_pixels = new unsigned char[size];
     memset((void *)ri_pixels,0,ri_size);
 }
+
 bool RawImage::isNew()
 {
     return ri_new;
 }
+
 unsigned int RawImage::getSize()
 {
     return ri_size;
 }
+
 RawImage::~RawImage(void)
 {
     delete []ri_pixels;
     ri_pixels = NULL;
 }
+
 long RawImage::CreateInstance(RawImage **ppRImage,unsigned int size)
 {
     *ppRImage = new (std::nothrow) RawImage(size);
@@ -1375,25 +1512,30 @@ long RawImage::CreateInstance(RawImage **ppRImage,unsigned int size)
     }
     return S_OK;
 }
+
 void RawImage::setCopy(const BYTE * pSampleBuffer)
 {
     memcpy(ri_pixels, pSampleBuffer, ri_size);
     ri_new = true;
 }
+
 void RawImage::fastCopy(const BYTE * pSampleBuffer)
 {
     memcpy(ri_pixels, pSampleBuffer, ri_size);
     ri_new = true;
 }
+
 unsigned char * RawImage::getpPixels()
 {
     ri_new = false;
     return ri_pixels;
 }
+
 videoDevice::videoDevice(void): vd_IsSetuped(false), vd_LockOut(OpenLock), vd_pFriendlyName(NULL),
     vd_Width(0), vd_Height(0), vd_pSource(NULL), vd_func(NULL), vd_userData(NULL)
 {
 }
+
 void videoDevice::setParametrs(CamParametrs parametrs)
 {
     if(vd_IsSetuped)
@@ -1428,6 +1570,7 @@ void videoDevice::setParametrs(CamParametrs parametrs)
         }
     }
 }
+
 CamParametrs videoDevice::getParametrs()
 {
     CamParametrs out;
@@ -1472,6 +1615,7 @@ CamParametrs videoDevice::getParametrs()
     }
     return out;
 }
+
 long videoDevice::resetDevice(IMFActivate *pActivate)
 {
     HRESULT hr = -1;
@@ -1503,6 +1647,7 @@ long videoDevice::resetDevice(IMFActivate *pActivate)
     }
     return hr;
 }
+
 long videoDevice::readInfoOfDevice(IMFActivate *pActivate, unsigned int Num)
 {
     HRESULT hr = -1;
@@ -1510,6 +1655,7 @@ long videoDevice::readInfoOfDevice(IMFActivate *pActivate, unsigned int Num)
     hr = resetDevice(pActivate);
     return hr;
 }
+
 long videoDevice::checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice)
 {
     HRESULT hr = S_OK;
@@ -1568,14 +1714,15 @@ long videoDevice::checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice)
     }
     return hr;
 }
+
 long videoDevice::initDevice()
 {
     HRESULT hr = -1;
-    IMFAttributes *pAttributes = NULL;
-    IMFActivate * vd_pActivate= NULL;
+    ComPtr<IMFAttributes> pAttributes = NULL;
+    IMFActivate *vd_pActivate = NULL;
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
     CoInitialize(NULL);
-    hr = MFCreateAttributes(&pAttributes, 1);
+    hr = MFCreateAttributes(pAttributes.GetAddressOf(), 1);
     if (SUCCEEDED(hr))
     {
         hr = pAttributes->SetGUID(
@@ -1585,7 +1732,7 @@ long videoDevice::initDevice()
     }
     if (SUCCEEDED(hr))
     {
-        hr = checkDevice(pAttributes, &vd_pActivate);
+        hr = checkDevice(pAttributes.Get(), &vd_pActivate);
         if (SUCCEEDED(hr) && vd_pActivate)
         {
             SafeRelease(&vd_pSource);
@@ -1607,9 +1754,10 @@ long videoDevice::initDevice()
     {
         DPO->printOut(L"VIDEODEVICE %i: The attribute of video cameras cannot be getting \n", vd_CurrentNumber);
     }
-    SafeRelease(&pAttributes);
+
     return hr;
 }
+
 MediaType videoDevice::getFormat(unsigned int id)
 {
     if(id < vd_CurrentFormats.size())
@@ -1713,6 +1861,7 @@ int videoDevice::findType(unsigned int size, unsigned int frameRate)
         return 0;
     return VN[0];
 }
+
 void videoDevice::buildLibraryofTypes()
 {
     unsigned int size;
@@ -1722,7 +1871,7 @@ void videoDevice::buildLibraryofTypes()
     for(; i != vd_CurrentFormats.end(); i++)
     {
         size = (*i).MF_MT_FRAME_SIZE;
-        framerate = (*i).MF_MT_FRAME_RATE;
+        framerate = (*i).MF_MT_FRAME_RATE_NUMERATOR;
         FrameRateMap FRM = vd_CaptureFormats[size];
         SUBTYPEMap STM = FRM[framerate];
         String subType((*i).pMF_MT_SUBTYPEName);
@@ -1734,45 +1883,45 @@ void videoDevice::buildLibraryofTypes()
         count++;
     }
 }
+
 long videoDevice::setDeviceFormat(IMFMediaSource *pSource, unsigned long  dwFormatIndex)
 {
-    IMFPresentationDescriptor *pPD = NULL;
-    IMFStreamDescriptor *pSD = NULL;
-    IMFMediaTypeHandler *pHandler = NULL;
-    IMFMediaType *pType = NULL;
-    HRESULT hr = pSource->CreatePresentationDescriptor(&pPD);
+    ComPtr<IMFPresentationDescriptor> pPD = NULL;
+    ComPtr<IMFStreamDescriptor> pSD = NULL;
+    ComPtr<IMFMediaTypeHandler> pHandler = NULL;
+    ComPtr<IMFMediaType> pType = NULL;
+    HRESULT hr = pSource->CreatePresentationDescriptor(pPD.GetAddressOf());
     if (FAILED(hr))
     {
         goto done;
     }
     BOOL fSelected;
-    hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD);
+    hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, pSD.GetAddressOf());
     if (FAILED(hr))
     {
         goto done;
     }
-    hr = pSD->GetMediaTypeHandler(&pHandler);
+    hr = pSD->GetMediaTypeHandler(pHandler.GetAddressOf());
     if (FAILED(hr))
     {
         goto done;
     }
-    hr = pHandler->GetMediaTypeByIndex((DWORD)dwFormatIndex, &pType);
+    hr = pHandler->GetMediaTypeByIndex((DWORD)dwFormatIndex, pType.GetAddressOf());
     if (FAILED(hr))
     {
         goto done;
     }
-    hr = pHandler->SetCurrentMediaType(pType);
+    hr = pHandler->SetCurrentMediaType(pType.Get());
+
 done:
-    SafeRelease(&pPD);
-    SafeRelease(&pSD);
-    SafeRelease(&pHandler);
-    SafeRelease(&pType);
     return hr;
 }
+
 bool videoDevice::isDeviceSetup()
 {
     return vd_IsSetuped;
 }
+
 RawImage * videoDevice::getRawImageOut()
 {
     if(!vd_IsSetuped) return NULL;
@@ -1785,6 +1934,7 @@ RawImage * videoDevice::getRawImageOut()
     }
     return NULL;
 }
+
 bool videoDevice::isFrameNew()
 {
     if(!vd_IsSetuped) return false;
@@ -1809,16 +1959,19 @@ bool videoDevice::isFrameNew()
     }
     return false;
 }
+
 bool videoDevice::isDeviceMediaSource()
 {
     if(vd_LockOut == MediaSourceLock) return true;
     return false;
 }
+
 bool videoDevice::isDeviceRawDataSource()
 {
     if(vd_LockOut == RawDataLock) return true;
     return false;
 }
+
 bool videoDevice::setupDevice(unsigned int id)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -1849,15 +2002,18 @@ bool videoDevice::setupDevice(unsigned int id)
         return false;
     }
 }
+
 bool videoDevice::setupDevice(unsigned int w, unsigned int h, unsigned int idealFramerate)
 {
     unsigned int id = findType(w * h, idealFramerate);
     return setupDevice(id);
 }
+
 wchar_t *videoDevice::getName()
 {
     return vd_pFriendlyName;
 }
+
 videoDevice::~videoDevice(void)
 {
     closeDevice();
@@ -1865,24 +2021,25 @@ videoDevice::~videoDevice(void)
     if(vd_pFriendlyName)
         CoTaskMemFree(vd_pFriendlyName);
 }
-long videoDevice::enumerateCaptureFormats(IMFMediaSource *pSource)
+
+HRESULT videoDevice::enumerateCaptureFormats(IMFMediaSource *pSource)
 {
-    IMFPresentationDescriptor *pPD = NULL;
-    IMFStreamDescriptor *pSD = NULL;
-    IMFMediaTypeHandler *pHandler = NULL;
-    IMFMediaType *pType = NULL;
-    HRESULT hr = pSource->CreatePresentationDescriptor(&pPD);
+    ComPtr<IMFPresentationDescriptor> pPD = NULL;
+    ComPtr<IMFStreamDescriptor> pSD = NULL;
+    ComPtr<IMFMediaTypeHandler> pHandler = NULL;
+    ComPtr<IMFMediaType> pType = NULL;
+    HRESULT hr = pSource->CreatePresentationDescriptor(pPD.GetAddressOf());
     if (FAILED(hr))
     {
         goto done;
     }
     BOOL fSelected;
-    hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD);
+    hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, pSD.GetAddressOf());
     if (FAILED(hr))
     {
         goto done;
     }
-    hr = pSD->GetMediaTypeHandler(&pHandler);
+    hr = pSD->GetMediaTypeHandler(pHandler.GetAddressOf());
     if (FAILED(hr))
     {
         goto done;
@@ -1895,24 +2052,22 @@ long videoDevice::enumerateCaptureFormats(IMFMediaSource *pSource)
     }
     for (DWORD i = 0; i < cTypes; i++)
     {
-        hr = pHandler->GetMediaTypeByIndex(i, &pType);
+        hr = pHandler->GetMediaTypeByIndex(i, pType.GetAddressOf());
         if (FAILED(hr))
         {
             goto done;
         }
-        MediaType MT = FormatReader::Read(pType);
+        MediaType MT = FormatReader::Read(pType.Get());
         vd_CurrentFormats.push_back(MT);
-        SafeRelease(&pType);
     }
+
 done:
-    SafeRelease(&pPD);
-    SafeRelease(&pSD);
-    SafeRelease(&pHandler);
-    SafeRelease(&pType);
     return hr;
 }
+
 videoDevices::videoDevices(void): count(0)
 {}
+
 void videoDevices::clearDevices()
 {
     std::vector<videoDevice *>::iterator i = vds_Devices.begin();
@@ -1920,10 +2075,12 @@ void videoDevices::clearDevices()
         delete (*i);
     vds_Devices.clear();
 }
+
 videoDevices::~videoDevices(void)
 {
     clearDevices();
 }
+
 videoDevice * videoDevices::getDevice(unsigned int i)
 {
     if(i >= vds_Devices.size())
@@ -1936,6 +2093,7 @@ videoDevice * videoDevices::getDevice(unsigned int i)
     }
     return vds_Devices[i];
 }
+
 long videoDevices::initDevices(IMFAttributes *pAttributes)
 {
     HRESULT hr = S_OK;
@@ -1965,15 +2123,18 @@ long videoDevices::initDevices(IMFAttributes *pAttributes)
     }
     return hr;
 }
-size_t videoDevices::getCount()
+
+unsigned int videoDevices::getCount()
 {
     return vds_Devices.size();
 }
+
 videoDevices& videoDevices::getInstance()
 {
     static videoDevices instance;
     return instance;
 }
+
 Parametr::Parametr()
 {
     CurrentValue = 0;
@@ -1983,6 +2144,7 @@ Parametr::Parametr()
     Default = 0;
     Flag = 0;
 }
+
 MediaType::MediaType()
 {
     pMF_MT_AM_FORMAT_TYPEName = NULL;
@@ -1990,10 +2152,12 @@ MediaType::MediaType()
     pMF_MT_SUBTYPEName = NULL;
     Clear();
 }
+
 MediaType::~MediaType()
 {
     Clear();
 }
+
 void MediaType::Clear()
 {
     MF_MT_FRAME_SIZE = 0;
@@ -2005,8 +2169,8 @@ void MediaType::Clear()
     MF_MT_VIDEO_CHROMA_SITING = 0;
     MF_MT_FIXED_SIZE_SAMPLES = 0;
     MF_MT_VIDEO_NOMINAL_RANGE = 0;
-    MF_MT_FRAME_RATE = 0;
-    MF_MT_FRAME_RATE_low = 0;
+    MF_MT_FRAME_RATE_NUMERATOR = 0;
+    MF_MT_FRAME_RATE_DENOMINATOR = 0;
     MF_MT_PIXEL_ASPECT_RATIO = 0;
     MF_MT_PIXEL_ASPECT_RATIO_low = 0;
     MF_MT_ALL_SAMPLES_INDEPENDENT = 0;
@@ -2021,6 +2185,7 @@ void MediaType::Clear()
     memset(&MF_MT_AM_FORMAT_TYPE, 0, sizeof(GUID));
     memset(&MF_MT_SUBTYPE, 0, sizeof(GUID));
 }
+
 videoInput::videoInput(void): accessToDevices(false)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2029,6 +2194,7 @@ videoInput::videoInput(void): accessToDevices(false)
     if(!accessToDevices)
         DPO->printOut(L"INITIALIZATION: Ther is not any suitable video device\n");
 }
+
 void videoInput::updateListOfDevices()
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2037,11 +2203,13 @@ void videoInput::updateListOfDevices()
     if(!accessToDevices)
         DPO->printOut(L"UPDATING: Ther is not any suitable video device\n");
 }
+
 videoInput::~videoInput(void)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
     DPO->printOut(L"\n***** CLOSE VIDEOINPUT LIBRARY - 2013 *****\n\n");
 }
+
 IMFMediaSource *videoInput::getMediaSource(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2063,6 +2231,7 @@ IMFMediaSource *videoInput::getMediaSource(int deviceID)
     }
     return NULL;
 }
+
 bool videoInput::setupDevice(int deviceID, unsigned int id)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2089,6 +2258,7 @@ bool videoInput::setupDevice(int deviceID, unsigned int id)
     }
     return false;
 }
+
 bool videoInput::setupDevice(int deviceID, unsigned int w, unsigned int h, unsigned int idealFramerate)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2115,6 +2285,7 @@ bool videoInput::setupDevice(int deviceID, unsigned int w, unsigned int h, unsig
     }
     return false;
 }
+
 MediaType videoInput::getFormat(int deviceID, unsigned int id)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2136,6 +2307,7 @@ MediaType videoInput::getFormat(int deviceID, unsigned int id)
     }
     return MediaType();
 }
+
 bool videoInput::isDeviceSetup(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2157,6 +2329,7 @@ bool videoInput::isDeviceSetup(int deviceID)
     }
     return false;
 }
+
 bool videoInput::isDeviceMediaSource(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2178,6 +2351,7 @@ bool videoInput::isDeviceMediaSource(int deviceID)
     }
     return false;
 }
+
 bool videoInput::isDeviceRawDataSource(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2202,6 +2376,7 @@ bool videoInput::isDeviceRawDataSource(int deviceID)
     }
     return false;
 }
+
 bool videoInput::isFrameNew(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2230,6 +2405,7 @@ bool videoInput::isFrameNew(int deviceID)
     }
     return false;
 }
+
 unsigned int videoInput::getCountFormats(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2251,12 +2427,14 @@ unsigned int videoInput::getCountFormats(int deviceID)
     }
     return 0;
 }
+
 void videoInput::closeAllDevices()
 {
     videoDevices *VDS = &videoDevices::getInstance();
     for(unsigned int i = 0; i < VDS->getCount(); i++)
         closeDevice(i);
 }
+
 void videoInput::setParametrs(int deviceID, CamParametrs parametrs)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2277,6 +2455,7 @@ void videoInput::setParametrs(int deviceID, CamParametrs parametrs)
         DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n");
     }
 }
+
 CamParametrs videoInput::getParametrs(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2299,6 +2478,7 @@ CamParametrs videoInput::getParametrs(int deviceID)
     }
     return out;
 }
+
 void videoInput::closeDevice(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2319,6 +2499,7 @@ void videoInput::closeDevice(int deviceID)
         DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n");
     }
 }
+
 unsigned int videoInput::getWidth(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2340,6 +2521,7 @@ unsigned int videoInput::getWidth(int deviceID)
     }
     return 0;
 }
+
 unsigned int videoInput::getHeight(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2361,6 +2543,7 @@ unsigned int videoInput::getHeight(int deviceID)
     }
     return 0;
 }
+
 wchar_t *videoInput::getNameVideoDevice(int deviceID)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2382,6 +2565,7 @@ wchar_t *videoInput::getNameVideoDevice(int deviceID)
     }
     return L"Empty";
 }
+
 unsigned int videoInput::listDevices(bool silent)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2405,20 +2589,24 @@ unsigned int videoInput::listDevices(bool silent)
     }
     return out;
 }
+
 videoInput& videoInput::getInstance()
 {
     static videoInput instance;
     return instance;
 }
+
 bool videoInput::isDevicesAcceable()
 {
     return accessToDevices;
 }
+
 void videoInput::setVerbose(bool state)
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
     DPO->setVerbose(state);
 }
+
 void videoInput::setEmergencyStopEvent(int deviceID, void *userData, void(*func)(int, void *))
 {
     DebugPrintOut *DPO = &DebugPrintOut::getInstance();
@@ -2442,6 +2630,7 @@ void videoInput::setEmergencyStopEvent(int deviceID, void *userData, void(*func)
         DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n");
     }
 }
+
 bool videoInput::getPixels(int deviceID, unsigned char * dstBuffer, bool flipRedAndBlue, bool flipImage)
 {
     bool success = false;
@@ -2491,6 +2680,7 @@ bool videoInput::getPixels(int deviceID, unsigned char * dstBuffer, bool flipRed
     }
     return success;
 }
+
 void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigned int width,
                                 unsigned int height, unsigned int bpp, bool bRGB, bool bFlip)
 {
@@ -2553,6 +2743,7 @@ void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigne
     }
 }
 }
+
 /******* Capturing video from camera via Microsoft Media Foundation **********/
 class CvCaptureCAM_MSMF : public CvCapture
 {
@@ -2568,33 +2759,35 @@ public:
     virtual int getCaptureDomain() { return CV_CAP_MSMF; } // Return the type of the capture object: CV_CAP_VFW, etc...
 protected:
     void init();
-    int index, width, height,fourcc;
-    int widthSet, heightSet;
+    int index, width, height, fourcc;
     IplImage* frame;
     videoInput VI;
 };
+
 struct SuppressVideoInputMessages
 {
     SuppressVideoInputMessages() { videoInput::setVerbose(true); }
 };
+
 static SuppressVideoInputMessages do_it;
+
 CvCaptureCAM_MSMF::CvCaptureCAM_MSMF():
     index(-1),
     width(-1),
     height(-1),
     fourcc(-1),
-    widthSet(-1),
-    heightSet(-1),
-    frame(0),
+    frame(NULL),
     VI(videoInput::getInstance())
 {
     CoInitialize(0);
 }
+
 CvCaptureCAM_MSMF::~CvCaptureCAM_MSMF()
 {
     close();
     CoUninitialize();
 }
+
 void CvCaptureCAM_MSMF::close()
 {
     if( index >= 0 )
@@ -2603,8 +2796,9 @@ void CvCaptureCAM_MSMF::close()
         index = -1;
         cvReleaseImage(&frame);
     }
-    widthSet = heightSet = width = height = -1;
+    width = height = -1;
 }
+
 // Initialize camera input
 bool CvCaptureCAM_MSMF::open( int _index )
 {
@@ -2621,10 +2815,14 @@ bool CvCaptureCAM_MSMF::open( int _index )
     index = try_index;
     return true;
 }
+
 bool CvCaptureCAM_MSMF::grabFrame()
 {
-    return true;
+    while (VI.isDeviceSetup(index) && !VI.isFrameNew(index))
+        Sleep(1);
+    return VI.isDeviceSetup(index);
 }
+
 IplImage* CvCaptureCAM_MSMF::retrieveFrame(int)
 {
     if( !frame || (int)VI.getWidth(index) != frame->width || (int)VI.getHeight(index) != frame->height )
@@ -2637,6 +2835,7 @@ IplImage* CvCaptureCAM_MSMF::retrieveFrame(int)
     VI.getPixels( index, (uchar*)frame->imageData, false, true );
     return frame;
 }
+
 double CvCaptureCAM_MSMF::getProperty( int property_id )
 {
     // image format proprrties
@@ -2646,50 +2845,7 @@ double CvCaptureCAM_MSMF::getProperty( int property_id )
         return VI.getWidth(index);
     case CV_CAP_PROP_FRAME_HEIGHT:
         return VI.getHeight(index);
-    case CV_CAP_PROP_FOURCC:
-        // FIXME: implement method in VideoInput back end
-        //return VI.getFourcc(index);
-        ;
-    case CV_CAP_PROP_FPS:
-        // FIXME: implement method in VideoInput back end
-        //return VI.getFPS(index);
-        ;
     }
-    // video filter properties
-    switch( property_id )
-    {
-    case CV_CAP_PROP_BRIGHTNESS:
-    case CV_CAP_PROP_CONTRAST:
-    case CV_CAP_PROP_HUE:
-    case CV_CAP_PROP_SATURATION:
-    case CV_CAP_PROP_SHARPNESS:
-    case CV_CAP_PROP_GAMMA:
-    case CV_CAP_PROP_MONOCROME:
-    case CV_CAP_PROP_WHITE_BALANCE_BLUE_U:
-    case CV_CAP_PROP_BACKLIGHT:
-    case CV_CAP_PROP_GAIN:
-        // FIXME: implement method in VideoInput back end
-        // if ( VI.getVideoSettingFilter(index, VI.getVideoPropertyFromCV(property_id), min_value,
-        //                               max_value, stepping_delta, current_value, flags,defaultValue) )
-        //     return (double)current_value;
-        return 0.;
-    }
-    // camera properties
-    switch( property_id )
-    {
-    case CV_CAP_PROP_PAN:
-    case CV_CAP_PROP_TILT:
-    case CV_CAP_PROP_ROLL:
-    case CV_CAP_PROP_ZOOM:
-    case CV_CAP_PROP_EXPOSURE:
-    case CV_CAP_PROP_IRIS:
-    case CV_CAP_PROP_FOCUS:
-    // FIXME: implement method in VideoInput back end
-    //     if (VI.getVideoSettingCamera(index,VI.getCameraPropertyFromCV(property_id),min_value,
-    //          max_value,stepping_delta,current_value,flags,defaultValue) ) return (double)current_value;
-        return 0.;
-    }
-    // unknown parameter or value not available
     return -1;
 }
 bool CvCaptureCAM_MSMF::setProperty( int property_id, double value )
@@ -2706,91 +2862,272 @@ bool CvCaptureCAM_MSMF::setProperty( int property_id, double value )
         height = cvRound(value);
         handled = true;
         break;
-    case CV_CAP_PROP_FOURCC:
-        fourcc = (int)(unsigned long)(value);
-        if ( fourcc == -1 ) {
-            // following cvCreateVideo usage will pop up caprturepindialog here if fourcc=-1
-            // TODO - how to create a capture pin dialog
-        }
-        handled = true;
-        break;
-    case CV_CAP_PROP_FPS:
-        // FIXME: implement method in VideoInput back end
-        // int fps = cvRound(value);
-        // if (fps != VI.getFPS(index))
-        // {
-        //     VI.stopDevice(index);
-        //     VI.setIdealFramerate(index,fps);
-        //     if (widthSet > 0 && heightSet > 0)
-        //         VI.setupDevice(index, widthSet, heightSet);
-        //     else
-        //         VI.setupDevice(index);
-        // }
-        // return VI.isDeviceSetup(index);
-        ;
     }
+
     if ( handled ) {
-        // a stream setting
         if( width > 0 && height > 0 )
         {
-            if( width != (int)VI.getWidth(index) || height != (int)VI.getHeight(index) )//|| fourcc != VI.getFourcc(index) )
+            if( width != (int)VI.getWidth(index) || height != (int)VI.getHeight(index)  && VI.isDeviceSetup(index))//|| fourcc != VI.getFourcc(index) )
             {
-                // FIXME: implement method in VideoInput back end
-                // int fps = static_cast<int>(VI.getFPS(index));
-                // VI.stopDevice(index);
-                // VI.setIdealFramerate(index, fps);
-                // VI.setupDeviceFourcc(index, width, height, fourcc);
+                VI.closeDevice(index);
+                VI.setupDevice(index, width, height);
             }
-            bool success = VI.isDeviceSetup(index);
-            if (success)
-            {
-                widthSet = width;
-                heightSet = height;
-                width = height = fourcc = -1;
-            }
-            return success;
+            return VI.isDeviceSetup(index);
         }
         return true;
     }
-    // show video/camera filter dialog
-    // FIXME: implement method in VideoInput back end
-    // if ( property_id == CV_CAP_PROP_SETTINGS ) {
-    //     VI.showSettingsWindow(index);
-    //     return true;
-    // }
-    //video Filter properties
-    switch( property_id )
-    {
-    case CV_CAP_PROP_BRIGHTNESS:
-    case CV_CAP_PROP_CONTRAST:
-    case CV_CAP_PROP_HUE:
-    case CV_CAP_PROP_SATURATION:
-    case CV_CAP_PROP_SHARPNESS:
-    case CV_CAP_PROP_GAMMA:
-    case CV_CAP_PROP_MONOCROME:
-    case CV_CAP_PROP_WHITE_BALANCE_BLUE_U:
-    case CV_CAP_PROP_BACKLIGHT:
-    case CV_CAP_PROP_GAIN:
-        // FIXME: implement method in VideoInput back end
-        //return VI.setVideoSettingFilter(index,VI.getVideoPropertyFromCV(property_id),(long)value);
-        ;
-    }
-    //camera properties
-    switch( property_id )
-    {
-    case CV_CAP_PROP_PAN:
-    case CV_CAP_PROP_TILT:
-    case CV_CAP_PROP_ROLL:
-    case CV_CAP_PROP_ZOOM:
-    case CV_CAP_PROP_EXPOSURE:
-    case CV_CAP_PROP_IRIS:
-    case CV_CAP_PROP_FOCUS:
-        // FIXME: implement method in VideoInput back end
-        //return VI.setVideoSettingCamera(index,VI.getCameraPropertyFromCV(property_id),(long)value);
-        ;
-    }
+
     return false;
 }
+
+class CvCaptureFile_MSMF : public CvCapture
+{
+public:
+    CvCaptureFile_MSMF();
+    virtual ~CvCaptureFile_MSMF();
+
+    virtual bool open( const char* filename );
+    virtual void close();
+
+    virtual double getProperty(int);
+    virtual bool setProperty(int, double);
+    virtual bool grabFrame();
+    virtual IplImage* retrieveFrame(int);
+    virtual int getCaptureDomain() { return CV_CAP_MSMF; }
+protected:
+    ImageGrabberThread* grabberThread;
+    IMFMediaSource* videoFileSource;
+    std::vector<MediaType> captureFormats;
+    int captureFormatIndex;
+    IplImage* frame;
+    bool isOpened;
+
+    HRESULT enumerateCaptureFormats(IMFMediaSource *pSource);
+    HRESULT getSourceDuration(IMFMediaSource *pSource, MFTIME *pDuration);
+};
+
+CvCaptureFile_MSMF::CvCaptureFile_MSMF():
+    grabberThread(NULL),
+    videoFileSource(NULL),
+    captureFormatIndex(0),
+    frame(NULL),
+    isOpened(false)
+{
+    MFStartup(MF_VERSION);
+}
+
+CvCaptureFile_MSMF::~CvCaptureFile_MSMF()
+{
+    close();
+    MFShutdown();
+}
+
+bool CvCaptureFile_MSMF::open(const char* filename)
+{
+    if (!filename)
+        return false;
+
+    wchar_t* unicodeFileName = new wchar_t[strlen(filename)+1];
+    MultiByteToWideChar(CP_ACP, 0, filename, -1, unicodeFileName, strlen(filename)+1);
+
+    HRESULT hr = S_OK;
+
+    MF_OBJECT_TYPE ObjectType = MF_OBJECT_INVALID;
+
+    ComPtr<IMFSourceResolver> pSourceResolver = NULL;
+    IUnknown* pUnkSource = NULL;
+
+    hr = MFCreateSourceResolver(pSourceResolver.GetAddressOf());
+
+    if (SUCCEEDED(hr))
+    {
+        hr = pSourceResolver->CreateObjectFromURL(
+            unicodeFileName,
+            MF_RESOLUTION_MEDIASOURCE,
+            NULL, // Optional property store.
+            &ObjectType,
+            &pUnkSource
+            );
+    }
+
+    // Get the IMFMediaSource from the IUnknown pointer.
+    if (SUCCEEDED(hr))
+    {
+        hr = pUnkSource->QueryInterface(IID_PPV_ARGS(&videoFileSource));
+    }
+
+    SafeRelease(&pUnkSource);
+
+    if (SUCCEEDED(hr))
+    {
+        hr = enumerateCaptureFormats(videoFileSource);
+    }
+
+    if (SUCCEEDED(hr))
+    {
+        hr = ImageGrabberThread::CreateInstance(&grabberThread, videoFileSource, (unsigned int)-2, true);
+    }
+
+    if (SUCCEEDED(hr))
+    {
+        grabberThread->start();
+    }
+
+    isOpened = SUCCEEDED(hr);
+
+    return isOpened;
+}
+
+void CvCaptureFile_MSMF::close()
+{
+    if (grabberThread)
+    {
+        isOpened = false;
+        SetEvent(grabberThread->getImageGrabber()->ig_hFinish);
+        grabberThread->stop();
+        delete grabberThread;
+    }
+
+    if (videoFileSource)
+    {
+        videoFileSource->Shutdown();
+    }
+}
+
+bool CvCaptureFile_MSMF::setProperty(int property_id, double value)
+{
+    // image capture properties
+    // FIXME: implement method in VideoInput back end
+    (void) property_id;
+    (void) value;
+    return false;
+}
+
+double CvCaptureFile_MSMF::getProperty(int property_id)
+{
+    // image format proprrties
+    switch( property_id )
+    {
+    case CV_CAP_PROP_FRAME_WIDTH:
+        return captureFormats[captureFormatIndex].width;
+    case CV_CAP_PROP_FRAME_HEIGHT:
+        return captureFormats[captureFormatIndex].height;
+    case CV_CAP_PROP_FRAME_COUNT:
+        {
+            MFTIME duration;
+            getSourceDuration(this->videoFileSource, &duration);
+            double fps = ((double)captureFormats[captureFormatIndex].MF_MT_FRAME_RATE_NUMERATOR) /
+            ((double)captureFormats[captureFormatIndex].MF_MT_FRAME_RATE_DENOMINATOR);
+            return (double)floor(((double)duration/1e7)*fps+0.5);
+        }
+    case CV_CAP_PROP_FOURCC:
+        return captureFormats[captureFormatIndex].MF_MT_SUBTYPE.Data1;
+    case CV_CAP_PROP_FPS:
+        return ((double)captureFormats[captureFormatIndex].MF_MT_FRAME_RATE_NUMERATOR) /
+            ((double)captureFormats[captureFormatIndex].MF_MT_FRAME_RATE_DENOMINATOR);
+    }
+
+    return -1;
+}
+
+bool CvCaptureFile_MSMF::grabFrame()
+{
+    DWORD waitResult = (DWORD)-1;
+    if (isOpened)
+    {
+        SetEvent(grabberThread->getImageGrabber()->ig_hFrameGrabbed);
+        HANDLE tmp[] = {grabberThread->getImageGrabber()->ig_hFrameReady, grabberThread->getImageGrabber()->ig_hFinish, 0};
+        waitResult = WaitForMultipleObjects(2, tmp, FALSE, INFINITE);
+    }
+
+    return isOpened && grabberThread->getImageGrabber()->getRawImage()->isNew() && (waitResult == WAIT_OBJECT_0);
+}
+
+IplImage* CvCaptureFile_MSMF::retrieveFrame(int)
+{
+    unsigned int width = captureFormats[captureFormatIndex].width;
+    unsigned int height = captureFormats[captureFormatIndex].height;
+    unsigned int bytes = 3;
+    if( !frame || (int)width != frame->width || (int)height != frame->height )
+    {
+        if (frame)
+            cvReleaseImage( &frame );
+        frame = cvCreateImage( cvSize(width,height), 8, 3 );
+    }
+
+    RawImage *RIOut = grabberThread->getImageGrabber()->getRawImage();
+    unsigned int size = bytes * width * height;
+
+    bool verticalFlip = captureFormats[captureFormatIndex].MF_MT_DEFAULT_STRIDE < 0;
+
+    if(RIOut && size == RIOut->getSize())
+    {
+         videoInput::processPixels(RIOut->getpPixels(), (unsigned char*)frame->imageData, width,
+             height, bytes, false, verticalFlip);
+    }
+
+    return frame;
+}
+
+HRESULT CvCaptureFile_MSMF::enumerateCaptureFormats(IMFMediaSource *pSource)
+{
+    ComPtr<IMFPresentationDescriptor> pPD = NULL;
+    ComPtr<IMFStreamDescriptor> pSD = NULL;
+    ComPtr<IMFMediaTypeHandler> pHandler = NULL;
+    ComPtr<IMFMediaType> pType = NULL;
+    HRESULT hr = pSource->CreatePresentationDescriptor(pPD.GetAddressOf());
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    BOOL fSelected;
+    hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, pSD.GetAddressOf());
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+    hr = pSD->GetMediaTypeHandler(pHandler.GetAddressOf());
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+    DWORD cTypes = 0;
+    hr = pHandler->GetMediaTypeCount(&cTypes);
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+    for (DWORD i = 0; i < cTypes; i++)
+    {
+        hr = pHandler->GetMediaTypeByIndex(i, pType.GetAddressOf());
+        if (FAILED(hr))
+        {
+            goto done;
+        }
+        MediaType MT = FormatReader::Read(pType.Get());
+        captureFormats.push_back(MT);
+    }
+
+done:
+    return hr;
+}
+
+HRESULT CvCaptureFile_MSMF::getSourceDuration(IMFMediaSource *pSource, MFTIME *pDuration)
+{
+    *pDuration = 0;
+
+    IMFPresentationDescriptor *pPD = NULL;
+
+    HRESULT hr = pSource->CreatePresentationDescriptor(&pPD);
+    if (SUCCEEDED(hr))
+    {
+        hr = pPD->GetUINT64(MF_PD_DURATION, (UINT64*)pDuration);
+        pPD->Release();
+    }
+    return hr;
+}
+
 CvCapture* cvCreateCameraCapture_MSMF( int index )
 {
     CvCaptureCAM_MSMF* capture = new CvCaptureCAM_MSMF;
@@ -2807,4 +3144,392 @@ CvCapture* cvCreateCameraCapture_MSMF( int index )
     delete capture;
     return 0;
 }
-#endif
\ No newline at end of file
+
+CvCapture* cvCreateFileCapture_MSMF (const char* filename)
+{
+    CvCaptureFile_MSMF* capture = new CvCaptureFile_MSMF;
+    try
+    {
+        if( capture->open(filename) )
+            return capture;
+        else
+        {
+            delete capture;
+            return NULL;
+        }
+    }
+    catch(...)
+    {
+        delete capture;
+        throw;
+    }
+}
+
+//
+//
+// Media Foundation-based Video Writer
+//
+//
+
+class CvVideoWriter_MSMF : public CvVideoWriter
+{
+public:
+    CvVideoWriter_MSMF();
+    virtual ~CvVideoWriter_MSMF();
+    virtual bool open(const char* filename, int fourcc,
+                       double fps, CvSize frameSize, bool isColor);
+    virtual void close();
+    virtual bool writeFrame(const IplImage* img);
+
+private:
+    UINT32 videoWidth;
+    UINT32 videoHeight;
+    double fps;
+    UINT32 bitRate;
+    UINT32 frameSize;
+    GUID   encodingFormat;
+    GUID   inputFormat;
+
+    DWORD  streamIndex;
+    ComPtr<IMFSinkWriter> sinkWriter;
+
+    bool   initiated;
+
+    LONGLONG rtStart;
+    UINT64 rtDuration;
+
+    HRESULT InitializeSinkWriter(const char* filename);
+    static const GUID FourCC2GUID(int fourcc);
+    HRESULT WriteFrame(DWORD *videoFrameBuffer, const LONGLONG& rtStart, const LONGLONG& rtDuration);
+};
+
+CvVideoWriter_MSMF::CvVideoWriter_MSMF():
+    initiated(false)
+{
+}
+
+CvVideoWriter_MSMF::~CvVideoWriter_MSMF()
+{
+    close();
+}
+
+const GUID CvVideoWriter_MSMF::FourCC2GUID(int fourcc)
+{
+    switch(fourcc)
+    {
+        case CV_FOURCC_MACRO('d', 'v', '2', '5'):
+            return MFVideoFormat_DV25; break;
+        case CV_FOURCC_MACRO('d', 'v', '5', '0'):
+            return MFVideoFormat_DV50; break;
+        case CV_FOURCC_MACRO('d', 'v', 'c', ' '):
+            return MFVideoFormat_DVC; break;
+        case CV_FOURCC_MACRO('d', 'v', 'h', '1'):
+            return MFVideoFormat_DVH1; break;
+        case CV_FOURCC_MACRO('d', 'v', 'h', 'd'):
+            return MFVideoFormat_DVHD; break;
+        case CV_FOURCC_MACRO('d', 'v', 's', 'd'):
+            return MFVideoFormat_DVSD; break;
+        case CV_FOURCC_MACRO('d', 'v', 's', 'l'):
+                return MFVideoFormat_DVSL; break;
+        case CV_FOURCC_MACRO('H', '2', '6', '3'):
+                return MFVideoFormat_H263; break;
+        case CV_FOURCC_MACRO('H', '2', '6', '4'):
+                return MFVideoFormat_H264; break;
+        case CV_FOURCC_MACRO('M', '4', 'S', '2'):
+                return MFVideoFormat_M4S2; break;
+        case CV_FOURCC_MACRO('M', 'J', 'P', 'G'):
+                return MFVideoFormat_MJPG; break;
+        case CV_FOURCC_MACRO('M', 'P', '4', '3'):
+                return MFVideoFormat_MP43; break;
+        case CV_FOURCC_MACRO('M', 'P', '4', 'S'):
+                return MFVideoFormat_MP4S; break;
+        case CV_FOURCC_MACRO('M', 'P', '4', 'V'):
+                return MFVideoFormat_MP4V; break;
+        case CV_FOURCC_MACRO('M', 'P', 'G', '1'):
+                return MFVideoFormat_MPG1; break;
+        case CV_FOURCC_MACRO('M', 'S', 'S', '1'):
+                return MFVideoFormat_MSS1; break;
+        case CV_FOURCC_MACRO('M', 'S', 'S', '2'):
+                return MFVideoFormat_MSS2; break;
+        case CV_FOURCC_MACRO('W', 'M', 'V', '1'):
+                return MFVideoFormat_WMV1; break;
+        case CV_FOURCC_MACRO('W', 'M', 'V', '2'):
+                return MFVideoFormat_WMV2; break;
+        case CV_FOURCC_MACRO('W', 'M', 'V', '3'):
+                return MFVideoFormat_WMV3; break;
+        case CV_FOURCC_MACRO('W', 'V', 'C', '1'):
+                return MFVideoFormat_WVC1; break;
+        default:
+            return MFVideoFormat_H264;
+    }
+}
+
+bool CvVideoWriter_MSMF::open( const char* filename, int fourcc,
+                       double _fps, CvSize frameSize, bool /*isColor*/ )
+{
+    videoWidth = frameSize.width;
+    videoHeight = frameSize.height;
+    fps = _fps;
+    bitRate = (UINT32)fps*videoWidth*videoHeight; // 1-bit per pixel
+    encodingFormat = FourCC2GUID(fourcc);
+    inputFormat = MFVideoFormat_RGB32;
+
+    HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
+    if (SUCCEEDED(hr))
+    {
+        hr = MFStartup(MF_VERSION);
+        if (SUCCEEDED(hr))
+        {
+            hr = InitializeSinkWriter(filename);
+            if (SUCCEEDED(hr))
+            {
+                initiated = true;
+                rtStart = 0;
+                MFFrameRateToAverageTimePerFrame((UINT32)fps, 1, &rtDuration);
+            }
+        }
+    }
+
+    return SUCCEEDED(hr);
+}
+
+void CvVideoWriter_MSMF::close()
+{
+    if (!initiated)
+    {
+        return;
+    }
+
+    initiated = false;
+    sinkWriter->Finalize();
+    MFShutdown();
+}
+
+bool CvVideoWriter_MSMF::writeFrame(const IplImage* img)
+{
+    if (!img)
+        return false;
+
+    int length = img->width * img->height * 4;
+    DWORD* target = new DWORD[length];
+
+    for (int rowIdx = 0; rowIdx < img->height; rowIdx++)
+    {
+        char* rowStart = img->imageData + rowIdx*img->widthStep;
+        for (int colIdx = 0; colIdx < img->width; colIdx++)
+        {
+            BYTE b = rowStart[colIdx * img->nChannels + 0];
+            BYTE g = rowStart[colIdx * img->nChannels + 1];
+            BYTE r = rowStart[colIdx * img->nChannels + 2];
+
+            target[rowIdx*img->width+colIdx] = (r << 16) + (g << 8) + b;
+        }
+    }
+
+    // Send frame to the sink writer.
+    HRESULT hr = WriteFrame(target, rtStart, rtDuration);
+    if (FAILED(hr))
+    {
+        delete[] target;
+        return false;
+    }
+    rtStart += rtDuration;
+
+    delete[] target;
+
+    return true;
+}
+
+HRESULT CvVideoWriter_MSMF::InitializeSinkWriter(const char* filename)
+{
+    ComPtr<IMFAttributes> spAttr;
+    ComPtr<IMFMediaType>  mediaTypeOut;
+    ComPtr<IMFMediaType>  mediaTypeIn;
+    ComPtr<IMFByteStream> spByteStream;
+
+    MFCreateAttributes(&spAttr, 10);
+    spAttr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true);
+
+    wchar_t* unicodeFileName = new wchar_t[strlen(filename)+1];
+    MultiByteToWideChar(CP_ACP, 0, filename, -1, unicodeFileName, strlen(filename)+1);
+
+    HRESULT hr = MFCreateSinkWriterFromURL(unicodeFileName, NULL, spAttr.Get(), &sinkWriter);
+
+    delete[] unicodeFileName;
+
+    // Set the output media type.
+    if (SUCCEEDED(hr))
+    {
+        hr = MFCreateMediaType(&mediaTypeOut);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = mediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = mediaTypeOut->SetGUID(MF_MT_SUBTYPE, encodingFormat);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = mediaTypeOut->SetUINT32(MF_MT_AVG_BITRATE, bitRate);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = mediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = MFSetAttributeSize(mediaTypeOut.Get(), MF_MT_FRAME_SIZE, videoWidth, videoHeight);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = MFSetAttributeRatio(mediaTypeOut.Get(), MF_MT_FRAME_RATE, (UINT32)fps, 1);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = MFSetAttributeRatio(mediaTypeOut.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
+    }
+
+    if (SUCCEEDED(hr))
+    {
+        hr = sinkWriter->AddStream(mediaTypeOut.Get(), &streamIndex);
+    }
+
+    // Set the input media type.
+    if (SUCCEEDED(hr))
+    {
+        hr = MFCreateMediaType(&mediaTypeIn);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = mediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = mediaTypeIn->SetGUID(MF_MT_SUBTYPE, inputFormat);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = mediaTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = MFSetAttributeSize(mediaTypeIn.Get(), MF_MT_FRAME_SIZE, videoWidth, videoHeight);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = MFSetAttributeRatio(mediaTypeIn.Get(), MF_MT_FRAME_RATE, (UINT32)fps, 1);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = MFSetAttributeRatio(mediaTypeIn.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
+    }
+
+    if (SUCCEEDED(hr))
+    {
+        hr = sinkWriter->SetInputMediaType(streamIndex, mediaTypeIn.Get(), NULL);
+    }
+
+    // Tell the sink writer to start accepting data.
+    if (SUCCEEDED(hr))
+    {
+        hr = sinkWriter->BeginWriting();
+    }
+
+    return hr;
+}
+
+HRESULT CvVideoWriter_MSMF::WriteFrame(DWORD *videoFrameBuffer, const LONGLONG& Start, const LONGLONG& Duration)
+{
+    ComPtr<IMFSample> sample;
+    ComPtr<IMFMediaBuffer> buffer;
+
+    const LONG cbWidth = 4 * videoWidth;
+    const DWORD cbBuffer = cbWidth * videoHeight;
+
+    BYTE *pData = NULL;
+
+    // Create a new memory buffer.
+    HRESULT hr = MFCreateMemoryBuffer(cbBuffer, &buffer);
+
+    // Lock the buffer and copy the video frame to the buffer.
+    if (SUCCEEDED(hr))
+    {
+        hr = buffer->Lock(&pData, NULL, NULL);
+    }
+
+    if (SUCCEEDED(hr))
+    {
+#if defined(_M_ARM)
+        hr = MFCopyImage(
+            pData,                      // Destination buffer.
+            -cbWidth,                   // Destination stride.
+            (BYTE*)videoFrameBuffer,    // First row in source image.
+            cbWidth,                    // Source stride.
+            cbWidth,                    // Image width in bytes.
+            videoHeight                 // Image height in pixels.
+            );
+#else
+        hr = MFCopyImage(
+            pData,                      // Destination buffer.
+            cbWidth,                    // Destination stride.
+            (BYTE*)videoFrameBuffer,    // First row in source image.
+            cbWidth,                    // Source stride.
+            cbWidth,                    // Image width in bytes.
+            videoHeight                 // Image height in pixels.
+            );
+#endif
+    }
+
+    if (buffer)
+    {
+        buffer->Unlock();
+    }
+
+    // Set the data length of the buffer.
+    if (SUCCEEDED(hr))
+    {
+        hr = buffer->SetCurrentLength(cbBuffer);
+    }
+
+    // Create a media sample and add the buffer to the sample.
+    if (SUCCEEDED(hr))
+    {
+        hr = MFCreateSample(&sample);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = sample->AddBuffer(buffer.Get());
+    }
+
+    // Set the time stamp and the duration.
+    if (SUCCEEDED(hr))
+    {
+        hr = sample->SetSampleTime(Start);
+    }
+    if (SUCCEEDED(hr))
+    {
+        hr = sample->SetSampleDuration(Duration);
+    }
+
+    // Send the sample to the Sink Writer.
+    if (SUCCEEDED(hr))
+    {
+        hr = sinkWriter->WriteSample(streamIndex, sample.Get());
+    }
+
+    return hr;
+}
+
+CvVideoWriter* cvCreateVideoWriter_MSMF( const char* filename, int fourcc,
+                                        double fps, CvSize frameSize, int isColor )
+{
+    CvVideoWriter_MSMF* writer = new CvVideoWriter_MSMF;
+    if( writer->open( filename, fourcc, fps, frameSize, isColor != 0 ))
+        return writer;
+    delete writer;
+    return NULL;
+}
+
+#endif
diff --git a/modules/highgui/src/cap_qtkit.mm b/modules/highgui/src/cap_qtkit.mm
index d44b7636d..8a7b3d84e 100644
--- a/modules/highgui/src/cap_qtkit.mm
+++ b/modules/highgui/src/cap_qtkit.mm
@@ -1,32 +1,44 @@
-/*
- *  CvCapture.mm
- *
- *  Created by Nicholas Butko on 11/3/09.
- *  Copyright 2009. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
- * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the contributor be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*////////////////////////////////////////////////////////////////////////////////////////
+
 
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
@@ -399,6 +411,9 @@ int CvCaptureCAM::startCaptureDevice(int cameraNum) {
 
 void CvCaptureCAM::setWidthHeight() {
     NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];
+
+    [mCaptureSession stopRunning];
+
     NSDictionary* pixelBufferOptions = [NSDictionary dictionaryWithObjectsAndKeys:
                           [NSNumber numberWithDouble:1.0*width], (id)kCVPixelBufferWidthKey,
                           [NSNumber numberWithDouble:1.0*height], (id)kCVPixelBufferHeightKey,
@@ -407,6 +422,9 @@ void CvCaptureCAM::setWidthHeight() {
                           nil];
 
     [mCaptureDecompressedVideoOutput setPixelBufferAttributes:pixelBufferOptions];
+
+    [mCaptureSession startRunning];
+
     grabFrame(60);
     [localpool drain];
 }
@@ -1026,4 +1044,3 @@ bool CvVideoWriter_QT::writeFrame(const IplImage* image) {
 
     return 1;
 }
-
diff --git a/modules/highgui/src/cap_v4l.cpp b/modules/highgui/src/cap_v4l.cpp
index 6fc122fd0..045c6f889 100644
--- a/modules/highgui/src/cap_v4l.cpp
+++ b/modules/highgui/src/cap_v4l.cpp
@@ -14,7 +14,9 @@ It has been tested with the motempl sample program
 First Patch:  August 24, 2004 Travis Wood   TravisOCV@tkwood.com
 For Release:  OpenCV-Linux Beta4  opencv-0.9.6
 Tested On:    LMLBT44 with 8 video inputs
-Problems?     Post problems/fixes to OpenCV group on groups.yahoo.com
+Problems?     Post your questions at answers.opencv.org,
+              Report bugs at code.opencv.org,
+              Submit your fixes at https://github.com/Itseez/opencv/
 Patched Comments:
 
 TW: The cv cam utils that came with the initial release of OpenCV for LINUX Beta4
diff --git a/modules/highgui/src/cap_vfw.cpp b/modules/highgui/src/cap_vfw.cpp
index d419a4891..d845953f8 100644
--- a/modules/highgui/src/cap_vfw.cpp
+++ b/modules/highgui/src/cap_vfw.cpp
@@ -613,8 +613,10 @@ bool CvVideoWriter_VFW::open( const char* filename, int _fourcc, double _fps, Cv
             close();
             return false;
         }
+        return true;
     }
-    return true;
+    else
+        return false;
 }
 
 
diff --git a/modules/highgui/src/cap_ximea.cpp b/modules/highgui/src/cap_ximea.cpp
index 5acf2c09d..7292727b7 100644
--- a/modules/highgui/src/cap_ximea.cpp
+++ b/modules/highgui/src/cap_ximea.cpp
@@ -52,6 +52,8 @@ void CvCaptureCAM_XIMEA::init()
 {
     xiGetNumberDevices( &numDevices);
     hmv = NULL;
+    frame = NULL;
+    timeout = 0;
     memset(&image, 0, sizeof(XI_IMG));
 }
 
@@ -60,6 +62,8 @@ void CvCaptureCAM_XIMEA::init()
 // Initialize camera input
 bool CvCaptureCAM_XIMEA::open( int wIndex )
 {
+#define HandleXiResult(res) if (res!=XI_OK)  goto error;
+
     int mvret = XI_OK;
 
     if(numDevices == 0)
@@ -73,26 +77,42 @@ bool CvCaptureCAM_XIMEA::open( int wIndex )
 
     // always use auto exposure/gain
     mvret = xiSetParamInt( hmv, XI_PRM_AEAG, 1);
-    if(mvret != XI_OK) goto error;
-
-    // always use auto white ballance
-    mvret = xiSetParamInt( hmv, XI_PRM_AUTO_WB, 1);
-    if(mvret != XI_OK) goto error;
-    
-    // default image format RGB24
-    mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, XI_RGB24);
-    if(mvret != XI_OK) goto error;
+    HandleXiResult(mvret);
 
     int width = 0;
     mvret = xiGetParamInt( hmv, XI_PRM_WIDTH, &width);
-    if(mvret != XI_OK) goto error;
+    HandleXiResult(mvret);
 
     int height = 0;
     mvret = xiGetParamInt( hmv, XI_PRM_HEIGHT, &height);
-    if(mvret != XI_OK) goto error;
+    HandleXiResult(mvret);
 
-    // allocate frame buffer for RGB24 image
-    frame = cvCreateImage(cvSize( width, height), IPL_DEPTH_8U, 3);
+    int isColor = 0;
+    mvret = xiGetParamInt(hmv, XI_PRM_IMAGE_IS_COLOR, &isColor);
+    HandleXiResult(mvret);
+
+    if(isColor)	// for color cameras
+    {
+        // default image format RGB24
+        mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, XI_RGB24);
+        HandleXiResult(mvret);
+
+        // always use auto white ballance for color cameras
+        mvret = xiSetParamInt( hmv, XI_PRM_AUTO_WB, 1);
+        HandleXiResult(mvret);
+
+        // allocate frame buffer for RGB24 image
+        frame = cvCreateImage(cvSize( width, height), IPL_DEPTH_8U, 3);
+    }
+    else // for mono cameras
+    {
+        // default image format MONO8
+        mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, XI_MONO8);
+        HandleXiResult(mvret);
+
+        // allocate frame buffer for MONO8 image
+        frame = cvCreateImage(cvSize( width, height), IPL_DEPTH_8U, 1);
+    }
 
     //default capture timeout 10s
     timeout = 10000;
@@ -119,8 +139,11 @@ void CvCaptureCAM_XIMEA::close()
     if(frame)
         cvReleaseImage(&frame);
 
-    xiStopAcquisition(hmv);
-    xiCloseDevice(hmv);
+    if(hmv)
+    {
+        xiStopAcquisition(hmv);
+        xiCloseDevice(hmv);
+    }
     hmv = NULL;
 }
 
@@ -153,11 +176,11 @@ IplImage* CvCaptureCAM_XIMEA::retrieveFrame(int)
 {
     // update cvImage after format has changed
     resetCvImage();
-    
+
     // copy pixel data
     switch( image.frm)
     {
-    case XI_MONO8       : 
+    case XI_MONO8       :
     case XI_RAW8        : memcpy( frame->imageData, image.bp, image.width*image.height); break;
     case XI_MONO16      :
     case XI_RAW16       : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(WORD)); break;
@@ -187,15 +210,15 @@ void CvCaptureCAM_XIMEA::resetCvImage()
         {
         case XI_MONO8       :
         case XI_RAW8        : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 1); break;
-        case XI_MONO16      : 
+        case XI_MONO16      :
         case XI_RAW16       : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_16U, 1); break;
-        case XI_RGB24       : 
+        case XI_RGB24       :
         case XI_RGB_PLANAR  : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 3); break;
         case XI_RGB32       : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 4); break;
         default :
             return;
         }
-    }	
+    }
     cvZero(frame);
 }
 /**********************************************************************************/
@@ -315,9 +338,9 @@ int  CvCaptureCAM_XIMEA::getBpp()
     {
     case XI_MONO8       :
     case XI_RAW8        : return 1;
-    case XI_MONO16      : 
+    case XI_MONO16      :
     case XI_RAW16       : return 2;
-    case XI_RGB24       : 
+    case XI_RGB24       :
     case XI_RGB_PLANAR  : return 3;
     case XI_RGB32       : return 4;
     default :
@@ -325,4 +348,4 @@ int  CvCaptureCAM_XIMEA::getBpp()
     }
 }
 
-/**********************************************************************************/
\ No newline at end of file
+/**********************************************************************************/
diff --git a/modules/highgui/src/files_Qt/Milky/48/1.png b/modules/highgui/src/files_Qt/Milky/48/1.png
index af3dc132c..69b4dee0a 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/1.png and b/modules/highgui/src/files_Qt/Milky/48/1.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/10.png b/modules/highgui/src/files_Qt/Milky/48/10.png
index d01626050..34185e1fa 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/10.png and b/modules/highgui/src/files_Qt/Milky/48/10.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/11.png b/modules/highgui/src/files_Qt/Milky/48/11.png
index ac236491b..565ad498e 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/11.png and b/modules/highgui/src/files_Qt/Milky/48/11.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/12.png b/modules/highgui/src/files_Qt/Milky/48/12.png
index e297afe22..72712fe9e 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/12.png and b/modules/highgui/src/files_Qt/Milky/48/12.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/13.png b/modules/highgui/src/files_Qt/Milky/48/13.png
index 691e73bf9..6cb5c5dca 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/13.png and b/modules/highgui/src/files_Qt/Milky/48/13.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/14.png b/modules/highgui/src/files_Qt/Milky/48/14.png
index ede537cef..8d217be46 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/14.png and b/modules/highgui/src/files_Qt/Milky/48/14.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/16.png b/modules/highgui/src/files_Qt/Milky/48/16.png
index 3e675d4b1..2e011c739 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/16.png and b/modules/highgui/src/files_Qt/Milky/48/16.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/17.png b/modules/highgui/src/files_Qt/Milky/48/17.png
index 32ddc3eba..4a7e5de41 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/17.png and b/modules/highgui/src/files_Qt/Milky/48/17.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/18.png b/modules/highgui/src/files_Qt/Milky/48/18.png
index be8e3e716..43f5405f5 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/18.png and b/modules/highgui/src/files_Qt/Milky/48/18.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/19.png b/modules/highgui/src/files_Qt/Milky/48/19.png
index e5145d052..203510ddd 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/19.png and b/modules/highgui/src/files_Qt/Milky/48/19.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/2.png b/modules/highgui/src/files_Qt/Milky/48/2.png
index d2ad4a4c3..8f4903eea 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/2.png and b/modules/highgui/src/files_Qt/Milky/48/2.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/20.png b/modules/highgui/src/files_Qt/Milky/48/20.png
index c53289aae..1a591ca41 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/20.png and b/modules/highgui/src/files_Qt/Milky/48/20.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/21.png b/modules/highgui/src/files_Qt/Milky/48/21.png
index 2df56ef77..e65e4acd3 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/21.png and b/modules/highgui/src/files_Qt/Milky/48/21.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/22.png b/modules/highgui/src/files_Qt/Milky/48/22.png
index 36d41cc63..a81aca191 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/22.png and b/modules/highgui/src/files_Qt/Milky/48/22.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/23.png b/modules/highgui/src/files_Qt/Milky/48/23.png
index eb51b8385..ab9e60cfc 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/23.png and b/modules/highgui/src/files_Qt/Milky/48/23.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/24.png b/modules/highgui/src/files_Qt/Milky/48/24.png
index b0033cf64..4e5629cb4 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/24.png and b/modules/highgui/src/files_Qt/Milky/48/24.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/25.png b/modules/highgui/src/files_Qt/Milky/48/25.png
index d41d79295..da93a5962 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/25.png and b/modules/highgui/src/files_Qt/Milky/48/25.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/26.png b/modules/highgui/src/files_Qt/Milky/48/26.png
index 055c496a2..6ba5d6c10 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/26.png and b/modules/highgui/src/files_Qt/Milky/48/26.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/27.png b/modules/highgui/src/files_Qt/Milky/48/27.png
index 34f5f0c8c..a14e20420 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/27.png and b/modules/highgui/src/files_Qt/Milky/48/27.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/28.png b/modules/highgui/src/files_Qt/Milky/48/28.png
index 9c94db1aa..f0df2d35e 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/28.png and b/modules/highgui/src/files_Qt/Milky/48/28.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/29.png b/modules/highgui/src/files_Qt/Milky/48/29.png
index 9ca7137ed..6d79d929f 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/29.png and b/modules/highgui/src/files_Qt/Milky/48/29.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/3.png b/modules/highgui/src/files_Qt/Milky/48/3.png
index 5144bbfc3..40d594668 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/3.png and b/modules/highgui/src/files_Qt/Milky/48/3.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/30.png b/modules/highgui/src/files_Qt/Milky/48/30.png
index db76e78f8..44037a72f 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/30.png and b/modules/highgui/src/files_Qt/Milky/48/30.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/31.png b/modules/highgui/src/files_Qt/Milky/48/31.png
index e79c0dfda..b9d421337 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/31.png and b/modules/highgui/src/files_Qt/Milky/48/31.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/32.png b/modules/highgui/src/files_Qt/Milky/48/32.png
index 414eecfde..d72749c50 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/32.png and b/modules/highgui/src/files_Qt/Milky/48/32.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/33.png b/modules/highgui/src/files_Qt/Milky/48/33.png
index 2ec44d312..85bb86ff0 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/33.png and b/modules/highgui/src/files_Qt/Milky/48/33.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/34.png b/modules/highgui/src/files_Qt/Milky/48/34.png
index 63f3b0465..fd095ee77 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/34.png and b/modules/highgui/src/files_Qt/Milky/48/34.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/35.png b/modules/highgui/src/files_Qt/Milky/48/35.png
index 41a041599..abb64d612 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/35.png and b/modules/highgui/src/files_Qt/Milky/48/35.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/37.png b/modules/highgui/src/files_Qt/Milky/48/37.png
index 70be99dac..fc9f361ec 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/37.png and b/modules/highgui/src/files_Qt/Milky/48/37.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/38.png b/modules/highgui/src/files_Qt/Milky/48/38.png
index 1faaa2e59..81cd7e139 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/38.png and b/modules/highgui/src/files_Qt/Milky/48/38.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/39.png b/modules/highgui/src/files_Qt/Milky/48/39.png
index b5b7b980f..d76effcd7 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/39.png and b/modules/highgui/src/files_Qt/Milky/48/39.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/4.png b/modules/highgui/src/files_Qt/Milky/48/4.png
index fe93c3503..a6a8d07a7 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/4.png and b/modules/highgui/src/files_Qt/Milky/48/4.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/40.png b/modules/highgui/src/files_Qt/Milky/48/40.png
index 103590607..f17ad6aa1 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/40.png and b/modules/highgui/src/files_Qt/Milky/48/40.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/41.png b/modules/highgui/src/files_Qt/Milky/48/41.png
index 622e1d8b2..4553c0458 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/41.png and b/modules/highgui/src/files_Qt/Milky/48/41.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/42.png b/modules/highgui/src/files_Qt/Milky/48/42.png
index 2c20bf60f..fb5f9a2a6 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/42.png and b/modules/highgui/src/files_Qt/Milky/48/42.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/43.png b/modules/highgui/src/files_Qt/Milky/48/43.png
index b849f939b..3c958420b 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/43.png and b/modules/highgui/src/files_Qt/Milky/48/43.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/44.png b/modules/highgui/src/files_Qt/Milky/48/44.png
index 3902ba1e3..ef3c114d4 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/44.png and b/modules/highgui/src/files_Qt/Milky/48/44.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/45.png b/modules/highgui/src/files_Qt/Milky/48/45.png
index cd4d6deeb..a77fb9e14 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/45.png and b/modules/highgui/src/files_Qt/Milky/48/45.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/5.png b/modules/highgui/src/files_Qt/Milky/48/5.png
index 60827ff7e..46df26f5e 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/5.png and b/modules/highgui/src/files_Qt/Milky/48/5.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/6.png b/modules/highgui/src/files_Qt/Milky/48/6.png
index ed04e555d..72853e685 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/6.png and b/modules/highgui/src/files_Qt/Milky/48/6.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/7.png b/modules/highgui/src/files_Qt/Milky/48/7.png
index 6eca1fadf..832772b59 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/7.png and b/modules/highgui/src/files_Qt/Milky/48/7.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/48/9.png b/modules/highgui/src/files_Qt/Milky/48/9.png
index e6a9be154..4ece823fe 100644
Binary files a/modules/highgui/src/files_Qt/Milky/48/9.png and b/modules/highgui/src/files_Qt/Milky/48/9.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/1.png b/modules/highgui/src/files_Qt/Milky/64/1.png
index 9222f069f..36a19f6e0 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/1.png and b/modules/highgui/src/files_Qt/Milky/64/1.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/10.png b/modules/highgui/src/files_Qt/Milky/64/10.png
index e450e4ae3..28e0be1f5 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/10.png and b/modules/highgui/src/files_Qt/Milky/64/10.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/100.png b/modules/highgui/src/files_Qt/Milky/64/100.png
index 2920ba3db..fbcfbaabb 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/100.png and b/modules/highgui/src/files_Qt/Milky/64/100.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/11.png b/modules/highgui/src/files_Qt/Milky/64/11.png
index 09e42528a..1002f3457 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/11.png and b/modules/highgui/src/files_Qt/Milky/64/11.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/12.png b/modules/highgui/src/files_Qt/Milky/64/12.png
index 42ddcf2fa..822bc42e2 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/12.png and b/modules/highgui/src/files_Qt/Milky/64/12.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/126.png b/modules/highgui/src/files_Qt/Milky/64/126.png
index 4d34cfeb1..d8715004a 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/126.png and b/modules/highgui/src/files_Qt/Milky/64/126.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/13.png b/modules/highgui/src/files_Qt/Milky/64/13.png
index a5d1aff9c..c4ae0c42f 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/13.png and b/modules/highgui/src/files_Qt/Milky/64/13.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/14.png b/modules/highgui/src/files_Qt/Milky/64/14.png
index 091cf5847..ce96e04ce 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/14.png and b/modules/highgui/src/files_Qt/Milky/64/14.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/15.png b/modules/highgui/src/files_Qt/Milky/64/15.png
index f55ea7dd4..eb9dca50a 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/15.png and b/modules/highgui/src/files_Qt/Milky/64/15.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/16.png b/modules/highgui/src/files_Qt/Milky/64/16.png
index 008823301..9acf8c20b 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/16.png and b/modules/highgui/src/files_Qt/Milky/64/16.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/17.png b/modules/highgui/src/files_Qt/Milky/64/17.png
index 7ab1b957f..18d286ab2 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/17.png and b/modules/highgui/src/files_Qt/Milky/64/17.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/18.png b/modules/highgui/src/files_Qt/Milky/64/18.png
index c0772630f..3b76256ba 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/18.png and b/modules/highgui/src/files_Qt/Milky/64/18.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/19.png b/modules/highgui/src/files_Qt/Milky/64/19.png
index d167b9ace..b0de5da46 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/19.png and b/modules/highgui/src/files_Qt/Milky/64/19.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/2.png b/modules/highgui/src/files_Qt/Milky/64/2.png
index f54098767..3523cc670 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/2.png and b/modules/highgui/src/files_Qt/Milky/64/2.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/21.png b/modules/highgui/src/files_Qt/Milky/64/21.png
index d1ae8e648..4c94f5442 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/21.png and b/modules/highgui/src/files_Qt/Milky/64/21.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/22.png b/modules/highgui/src/files_Qt/Milky/64/22.png
index ca026ecbc..f7f0c3d9e 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/22.png and b/modules/highgui/src/files_Qt/Milky/64/22.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/23.png b/modules/highgui/src/files_Qt/Milky/64/23.png
index a374ea029..11b0899f5 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/23.png and b/modules/highgui/src/files_Qt/Milky/64/23.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/24.png b/modules/highgui/src/files_Qt/Milky/64/24.png
index 5566ebd4d..94a8e9496 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/24.png and b/modules/highgui/src/files_Qt/Milky/64/24.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/25.png b/modules/highgui/src/files_Qt/Milky/64/25.png
index 368f7a1c6..4e650dc81 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/25.png and b/modules/highgui/src/files_Qt/Milky/64/25.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/26.png b/modules/highgui/src/files_Qt/Milky/64/26.png
index 10ecc3be9..6dec63383 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/26.png and b/modules/highgui/src/files_Qt/Milky/64/26.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/27.png b/modules/highgui/src/files_Qt/Milky/64/27.png
index 9946afb50..1ab2410c7 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/27.png and b/modules/highgui/src/files_Qt/Milky/64/27.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/28.png b/modules/highgui/src/files_Qt/Milky/64/28.png
index 9094ba190..7d4d62435 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/28.png and b/modules/highgui/src/files_Qt/Milky/64/28.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/29.png b/modules/highgui/src/files_Qt/Milky/64/29.png
index dc593dfcd..74a499650 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/29.png and b/modules/highgui/src/files_Qt/Milky/64/29.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/30.png b/modules/highgui/src/files_Qt/Milky/64/30.png
index 2430966be..d1fbb7208 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/30.png and b/modules/highgui/src/files_Qt/Milky/64/30.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/31.png b/modules/highgui/src/files_Qt/Milky/64/31.png
index 1748d7135..70d95908f 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/31.png and b/modules/highgui/src/files_Qt/Milky/64/31.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/32.png b/modules/highgui/src/files_Qt/Milky/64/32.png
index d225fb3d9..11b66ad44 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/32.png and b/modules/highgui/src/files_Qt/Milky/64/32.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/33.png b/modules/highgui/src/files_Qt/Milky/64/33.png
index 3fb4ac5b1..c76151cc3 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/33.png and b/modules/highgui/src/files_Qt/Milky/64/33.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/34.png b/modules/highgui/src/files_Qt/Milky/64/34.png
index 0dd5f23ef..1b009aac0 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/34.png and b/modules/highgui/src/files_Qt/Milky/64/34.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/35.png b/modules/highgui/src/files_Qt/Milky/64/35.png
index 5cdb35c97..c9b408445 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/35.png and b/modules/highgui/src/files_Qt/Milky/64/35.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/36.png b/modules/highgui/src/files_Qt/Milky/64/36.png
index d2b244419..d5aad30b6 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/36.png and b/modules/highgui/src/files_Qt/Milky/64/36.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/37.png b/modules/highgui/src/files_Qt/Milky/64/37.png
index ef2b8a56d..b0898c8cc 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/37.png and b/modules/highgui/src/files_Qt/Milky/64/37.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/38.png b/modules/highgui/src/files_Qt/Milky/64/38.png
index f1a700c14..0aa9224a6 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/38.png and b/modules/highgui/src/files_Qt/Milky/64/38.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/39.png b/modules/highgui/src/files_Qt/Milky/64/39.png
index 121a3f8a0..f67ae06b5 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/39.png and b/modules/highgui/src/files_Qt/Milky/64/39.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/4.png b/modules/highgui/src/files_Qt/Milky/64/4.png
index 264f8bd6a..eb1a5facd 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/4.png and b/modules/highgui/src/files_Qt/Milky/64/4.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/40.png b/modules/highgui/src/files_Qt/Milky/64/40.png
index a04765de1..f39b08537 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/40.png and b/modules/highgui/src/files_Qt/Milky/64/40.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/41.png b/modules/highgui/src/files_Qt/Milky/64/41.png
index 3bed0ebda..3061701f5 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/41.png and b/modules/highgui/src/files_Qt/Milky/64/41.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/42.png b/modules/highgui/src/files_Qt/Milky/64/42.png
index c17662099..1b979766a 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/42.png and b/modules/highgui/src/files_Qt/Milky/64/42.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/43.png b/modules/highgui/src/files_Qt/Milky/64/43.png
index 295d2886d..f3d9e5d43 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/43.png and b/modules/highgui/src/files_Qt/Milky/64/43.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/44.png b/modules/highgui/src/files_Qt/Milky/64/44.png
index a8fba7545..a549bfe0c 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/44.png and b/modules/highgui/src/files_Qt/Milky/64/44.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/45.png b/modules/highgui/src/files_Qt/Milky/64/45.png
index d65e2bd93..123fa1a3d 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/45.png and b/modules/highgui/src/files_Qt/Milky/64/45.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/5.png b/modules/highgui/src/files_Qt/Milky/64/5.png
index 185219d26..9d3c5a368 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/5.png and b/modules/highgui/src/files_Qt/Milky/64/5.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/6.png b/modules/highgui/src/files_Qt/Milky/64/6.png
index fa55cbc04..5ae8a8e67 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/6.png and b/modules/highgui/src/files_Qt/Milky/64/6.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/65.png b/modules/highgui/src/files_Qt/Milky/64/65.png
index 46ca28ee1..f4e9a2880 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/65.png and b/modules/highgui/src/files_Qt/Milky/64/65.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/7.png b/modules/highgui/src/files_Qt/Milky/64/7.png
index 98083e066..e97ab37ce 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/7.png and b/modules/highgui/src/files_Qt/Milky/64/7.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/8.png b/modules/highgui/src/files_Qt/Milky/64/8.png
index 2daaaeffc..639ac9a1f 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/8.png and b/modules/highgui/src/files_Qt/Milky/64/8.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/9.png b/modules/highgui/src/files_Qt/Milky/64/9.png
index 5eab48e62..cce6c7ef8 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/9.png and b/modules/highgui/src/files_Qt/Milky/64/9.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/94.png b/modules/highgui/src/files_Qt/Milky/64/94.png
index 9be117889..aca0ff936 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/94.png and b/modules/highgui/src/files_Qt/Milky/64/94.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/98.png b/modules/highgui/src/files_Qt/Milky/64/98.png
index a2e58c7bb..0a11c643b 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/98.png and b/modules/highgui/src/files_Qt/Milky/64/98.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/64/99.png b/modules/highgui/src/files_Qt/Milky/64/99.png
index 5f952b8b5..122cf2de0 100644
Binary files a/modules/highgui/src/files_Qt/Milky/64/99.png and b/modules/highgui/src/files_Qt/Milky/64/99.png differ
diff --git a/modules/highgui/src/files_Qt/Milky/README.txt b/modules/highgui/src/files_Qt/Milky/README.txt
index df2a2e83f..01ab43145 100644
--- a/modules/highgui/src/files_Qt/Milky/README.txt
+++ b/modules/highgui/src/files_Qt/Milky/README.txt
@@ -16,4 +16,4 @@ The license does not permit the following uses:
 
 You may not use, or allow anyone else to use the icons to create pornographic, libelous, obscene, or defamatory material.
 
-All icon files are provided "as is". You agree not to hold IconEden.com liable for any damages that may occur due to use, or inability to use, icons or image data from IconEden.com.
\ No newline at end of file
+All icon files are provided "as is". You agree not to hold IconEden.com liable for any damages that may occur due to use, or inability to use, icons or image data from IconEden.com.
diff --git a/modules/highgui/src/grfmt_bmp.cpp b/modules/highgui/src/grfmt_bmp.cpp
index 5f1083e43..f7147b5fc 100644
--- a/modules/highgui/src/grfmt_bmp.cpp
+++ b/modules/highgui/src/grfmt_bmp.cpp
@@ -70,7 +70,7 @@ void  BmpDecoder::close()
 
 ImageDecoder BmpDecoder::newDecoder() const
 {
-    return new BmpDecoder;
+    return makePtr<BmpDecoder>();
 }
 
 bool  BmpDecoder::readHeader()
@@ -496,7 +496,7 @@ BmpEncoder::~BmpEncoder()
 
 ImageEncoder BmpEncoder::newEncoder() const
 {
-    return new BmpEncoder;
+    return makePtr<BmpEncoder>();
 }
 
 bool  BmpEncoder::write( const Mat& img, const std::vector<int>& )
@@ -563,4 +563,3 @@ bool  BmpEncoder::write( const Mat& img, const std::vector<int>& )
 }
 
 }
-
diff --git a/modules/highgui/src/grfmt_exr.cpp b/modules/highgui/src/grfmt_exr.cpp
index 33d0ad0f5..079de5834 100644
--- a/modules/highgui/src/grfmt_exr.cpp
+++ b/modules/highgui/src/grfmt_exr.cpp
@@ -551,7 +551,7 @@ void  ExrDecoder::RGBToGray( float *in, float *out )
 
 ImageDecoder ExrDecoder::newDecoder() const
 {
-    return new ExrDecoder;
+    return makePtr<ExrDecoder>();
 }
 
 /////////////////////// ExrEncoder ///////////////////
@@ -726,7 +726,7 @@ bool  ExrEncoder::write( const Mat& img, const std::vector<int>& )
 
 ImageEncoder ExrEncoder::newEncoder() const
 {
-    return new ExrEncoder;
+    return makePtr<ExrEncoder>();
 }
 
 }
diff --git a/modules/highgui/src/grfmt_jpeg.cpp b/modules/highgui/src/grfmt_jpeg.cpp
index 09db677a4..28c52e859 100644
--- a/modules/highgui/src/grfmt_jpeg.cpp
+++ b/modules/highgui/src/grfmt_jpeg.cpp
@@ -208,7 +208,7 @@ void  JpegDecoder::close()
 
 ImageDecoder JpegDecoder::newDecoder() const
 {
-    return new JpegDecoder;
+    return makePtr<JpegDecoder>();
 }
 
 bool  JpegDecoder::readHeader()
@@ -539,7 +539,7 @@ JpegEncoder::~JpegEncoder()
 
 ImageEncoder JpegEncoder::newEncoder() const
 {
-    return new JpegEncoder;
+    return makePtr<JpegEncoder>();
 }
 
 bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
diff --git a/modules/highgui/src/grfmt_jpeg2000.cpp b/modules/highgui/src/grfmt_jpeg2000.cpp
index 502e77b5a..d711846ce 100644
--- a/modules/highgui/src/grfmt_jpeg2000.cpp
+++ b/modules/highgui/src/grfmt_jpeg2000.cpp
@@ -53,12 +53,6 @@
 #endif
 #endif
 
-#undef PACKAGE
-#undef PACKAGE_BUGREPORT
-#undef PACKAGE_NAME
-#undef PACKAGE_STRING
-#undef PACKAGE_TARNAME
-#undef PACKAGE_VERSION
 #undef VERSION
 
 #include <jasper/jasper.h>
@@ -94,7 +88,7 @@ Jpeg2KDecoder::~Jpeg2KDecoder()
 
 ImageDecoder Jpeg2KDecoder::newDecoder() const
 {
-    return new Jpeg2KDecoder;
+    return makePtr<Jpeg2KDecoder>();
 }
 
 void  Jpeg2KDecoder::close()
@@ -409,7 +403,7 @@ Jpeg2KEncoder::~Jpeg2KEncoder()
 
 ImageEncoder Jpeg2KEncoder::newEncoder() const
 {
-    return new Jpeg2KEncoder;
+    return makePtr<Jpeg2KEncoder>();
 }
 
 bool  Jpeg2KEncoder::isFormatSupported( int depth ) const
diff --git a/modules/highgui/src/grfmt_png.cpp b/modules/highgui/src/grfmt_png.cpp
index eb68ec892..4d5c7793e 100644
--- a/modules/highgui/src/grfmt_png.cpp
+++ b/modules/highgui/src/grfmt_png.cpp
@@ -51,7 +51,6 @@
     and png2bmp sample from libpng distribution (Copyright (C) 1999-2001 MIYASAKA Masaru)
 \****************************************************************************************/
 
-#undef HAVE_UNISTD_H //to avoid redefinition
 #ifndef _LFS64_LARGEFILE
 #  define _LFS64_LARGEFILE 0
 #endif
@@ -102,7 +101,7 @@ PngDecoder::~PngDecoder()
 
 ImageDecoder PngDecoder::newDecoder() const
 {
-    return new PngDecoder;
+    return makePtr<PngDecoder>();
 }
 
 void  PngDecoder::close()
@@ -172,7 +171,9 @@ bool  PngDecoder::readHeader()
                 if( !m_buf.empty() || m_f )
                 {
                     png_uint_32 wdth, hght;
-                    int bit_depth, color_type;
+                    int bit_depth, color_type, num_trans=0;
+                    png_bytep trans;
+                    png_color_16p trans_values;
 
                     png_read_info( png_ptr, info_ptr );
 
@@ -188,15 +189,22 @@ bool  PngDecoder::readHeader()
                     {
                         switch(color_type)
                         {
-                           case PNG_COLOR_TYPE_RGB:
-                           case PNG_COLOR_TYPE_PALETTE:
-                               m_type = CV_8UC3;
-                               break;
-                          case PNG_COLOR_TYPE_RGB_ALPHA:
-                               m_type = CV_8UC4;
-                               break;
-                          default:
-                               m_type = CV_8UC1;
+                            case PNG_COLOR_TYPE_RGB:
+                                m_type = CV_8UC3;
+                                break;
+                            case PNG_COLOR_TYPE_PALETTE:
+                                png_get_tRNS( png_ptr, info_ptr, &trans, &num_trans, &trans_values);
+                                //Check if there is a transparency value in the palette
+                                if ( num_trans > 0 )
+                                    m_type = CV_8UC4;
+                                else
+                                    m_type = CV_8UC3;
+                                break;
+                            case PNG_COLOR_TYPE_RGB_ALPHA:
+                                m_type = CV_8UC4;
+                                break;
+                            default:
+                                m_type = CV_8UC1;
                         }
                         if( bit_depth == 16 )
                             m_type = CV_MAKETYPE(CV_16U, CV_MAT_CN(m_type));
@@ -269,6 +277,7 @@ bool  PngDecoder::readData( Mat& img )
             else
                 png_set_rgb_to_gray( png_ptr, 1, 0.299, 0.587 ); // RGB->Gray
 
+            png_set_interlace_handling( png_ptr );
             png_read_update_info( png_ptr, info_ptr );
 
             for( y = 0; y < m_height; y++ )
@@ -308,7 +317,7 @@ bool  PngEncoder::isFormatSupported( int depth ) const
 
 ImageEncoder PngEncoder::newEncoder() const
 {
-    return new PngEncoder;
+    return makePtr<PngEncoder>();
 }
 
 
diff --git a/modules/highgui/src/grfmt_pxm.cpp b/modules/highgui/src/grfmt_pxm.cpp
index 0acf195dd..425a296cb 100644
--- a/modules/highgui/src/grfmt_pxm.cpp
+++ b/modules/highgui/src/grfmt_pxm.cpp
@@ -116,7 +116,7 @@ bool PxMDecoder::checkSignature( const String& signature ) const
 
 ImageDecoder PxMDecoder::newDecoder() const
 {
-    return new PxMDecoder;
+    return makePtr<PxMDecoder>();
 }
 
 void  PxMDecoder::close()
@@ -357,7 +357,7 @@ PxMEncoder::~PxMEncoder()
 
 ImageEncoder  PxMEncoder::newEncoder() const
 {
-    return new PxMEncoder;
+    return makePtr<PxMEncoder>();
 }
 
 
diff --git a/modules/highgui/src/grfmt_sunras.cpp b/modules/highgui/src/grfmt_sunras.cpp
index a16e5d800..b67400f33 100644
--- a/modules/highgui/src/grfmt_sunras.cpp
+++ b/modules/highgui/src/grfmt_sunras.cpp
@@ -63,7 +63,7 @@ SunRasterDecoder::~SunRasterDecoder()
 
 ImageDecoder SunRasterDecoder::newDecoder() const
 {
-    return new SunRasterDecoder;
+    return makePtr<SunRasterDecoder>();
 }
 
 void  SunRasterDecoder::close()
@@ -388,7 +388,7 @@ SunRasterEncoder::SunRasterEncoder()
 
 ImageEncoder SunRasterEncoder::newEncoder() const
 {
-    return new SunRasterEncoder;
+    return makePtr<SunRasterEncoder>();
 }
 
 SunRasterEncoder::~SunRasterEncoder()
diff --git a/modules/highgui/src/grfmt_tiff.cpp b/modules/highgui/src/grfmt_tiff.cpp
index abcd7de41..d9a73c783 100644
--- a/modules/highgui/src/grfmt_tiff.cpp
+++ b/modules/highgui/src/grfmt_tiff.cpp
@@ -110,7 +110,7 @@ int TiffDecoder::normalizeChannelsNumber(int channels) const
 
 ImageDecoder TiffDecoder::newDecoder() const
 {
-    return new TiffDecoder;
+    return makePtr<TiffDecoder>();
 }
 
 bool TiffDecoder::readHeader()
@@ -445,7 +445,7 @@ TiffEncoder::~TiffEncoder()
 
 ImageEncoder TiffEncoder::newEncoder() const
 {
-    return new TiffEncoder;
+    return makePtr<TiffEncoder>();
 }
 
 bool TiffEncoder::isFormatSupported( int depth ) const
diff --git a/modules/highgui/src/grfmt_webp.cpp b/modules/highgui/src/grfmt_webp.cpp
index 3db4369fe..1c64adec6 100644
--- a/modules/highgui/src/grfmt_webp.cpp
+++ b/modules/highgui/src/grfmt_webp.cpp
@@ -46,6 +46,7 @@
 
 #include <webp/decode.h>
 #include <webp/encode.h>
+
 #include <stdio.h>
 #include <limits.h>
 
@@ -53,36 +54,43 @@
 
 #include "opencv2/imgproc.hpp"
 
+const size_t WEBP_HEADER_SIZE = 32;
+
 namespace cv
 {
 
 WebPDecoder::WebPDecoder()
 {
-    m_signature = "RIFF....WEBPVP8 ";
     m_buf_supported = true;
 }
 
-WebPDecoder::~WebPDecoder()
+WebPDecoder::~WebPDecoder() {}
+
+size_t WebPDecoder::signatureLength() const
 {
+    return WEBP_HEADER_SIZE;
+}
+
+bool WebPDecoder::checkSignature(const String & signature) const
+{
+    bool ret = false;
+
+    if(signature.size() >= WEBP_HEADER_SIZE)
+    {
+        WebPBitstreamFeatures features;
+        if(VP8_STATUS_OK == WebPGetFeatures((uint8_t *)signature.c_str(),
+                                            WEBP_HEADER_SIZE, &features))
+        {
+            ret = true;
+        }
+    }
+
+    return ret;
 }
 
 ImageDecoder WebPDecoder::newDecoder() const
 {
-    return new WebPDecoder;
-}
-
-bool WebPDecoder::checkSignature( const String& signature ) const
-{
-    size_t len = signatureLength();
-    bool ret = false;
-
-    if(signature.size() >= len)
-    {
-        ret = ( (memcmp(signature.c_str(), m_signature.c_str(), 4) == 0) &&
-            (memcmp(signature.c_str() + 8, m_signature.c_str() + 8, 4) == 0) );
-    }
-
-    return ret;
+    return makePtr<WebPDecoder>();
 }
 
 bool WebPDecoder::readHeader()
@@ -99,16 +107,16 @@ bool WebPDecoder::readHeader()
         }
 
         fseek(wfile, 0, SEEK_END);
-        size_t wfile_size = ftell(wfile);
+        long int wfile_size = ftell(wfile);
         fseek(wfile, 0, SEEK_SET);
 
-        if(wfile_size > (size_t)INT_MAX)
+        if(wfile_size > static_cast<long int>(INT_MAX))
         {
             fclose(wfile);
             return false;
         }
 
-        data.create(1, (int)wfile_size, CV_8U);
+        data.create(1, wfile_size, CV_8U);
 
         size_t data_size = fread(data.data, 1, wfile_size, wfile);
 
@@ -117,7 +125,7 @@ bool WebPDecoder::readHeader()
             fclose(wfile);
         }
 
-        if( data_size < wfile_size )
+        if(static_cast<long int>(data_size) != wfile_size)
         {
             return false;
         }
@@ -127,9 +135,23 @@ bool WebPDecoder::readHeader()
         data = m_buf;
     }
 
-    if(WebPGetInfo(data.data, data.total(), &m_width, &m_height) == 1)
+    WebPBitstreamFeatures features;
+    if(VP8_STATUS_OK == WebPGetFeatures(data.data, WEBP_HEADER_SIZE, &features))
     {
-        m_type = CV_8UC3;
+        m_width  = features.width;
+        m_height = features.height;
+
+        if (features.has_alpha)
+        {
+            m_type = CV_8UC4;
+            channels = 4;
+        }
+        else
+        {
+            m_type = CV_8UC3;
+            channels = 3;
+        }
+
         return true;
     }
 
@@ -140,10 +162,25 @@ bool WebPDecoder::readData(Mat &img)
 {
     if( m_width > 0 && m_height > 0 )
     {
-        uchar* out_data = img.data;
-        unsigned int out_data_size = m_width * m_height * 3 * sizeof(uchar);
+        if (img.cols != m_width || img.rows != m_height || img.type() != m_type)
+        {
+            img.create(m_height, m_width, m_type);
+        }
 
-        uchar *res_ptr = WebPDecodeBGRInto(data.data, data.total(), out_data, out_data_size, m_width * 3);
+        uchar* out_data = img.data;
+        size_t out_data_size = img.cols * img.rows * img.elemSize();
+
+        uchar *res_ptr = 0;
+        if (channels == 3)
+        {
+            res_ptr = WebPDecodeBGRInto(data.data, data.total(), out_data,
+                                        out_data_size, img.step);
+        }
+        else if (channels == 4)
+        {
+            res_ptr = WebPDecodeBGRAInto(data.data, data.total(), out_data,
+                                         out_data_size, img.step);
+        }
 
         if(res_ptr == out_data)
         {
@@ -160,13 +197,11 @@ WebPEncoder::WebPEncoder()
     m_buf_supported = true;
 }
 
-WebPEncoder::~WebPEncoder()
-{
-}
+WebPEncoder::~WebPEncoder() { }
 
 ImageEncoder WebPEncoder::newEncoder() const
 {
-    return new WebPEncoder();
+    return makePtr<WebPEncoder>();
 }
 
 bool WebPEncoder::write(const Mat& img, const std::vector<int>& params)
@@ -179,19 +214,19 @@ bool WebPEncoder::write(const Mat& img, const std::vector<int>& params)
     size_t size = 0;
 
     bool comp_lossless = true;
-    int quality = 100;
+    float quality = 100.0f;
 
     if (params.size() > 1)
     {
         if (params[0] == CV_IMWRITE_WEBP_QUALITY)
         {
             comp_lossless = false;
-            quality = params[1];
-            if (quality < 1)
+            quality = static_cast<float>(params[1]);
+            if (quality < 1.0f)
             {
-                quality = 1;
+                quality = 1.0f;
             }
-            if (quality > 100)
+            if (quality > 100.0f)
             {
                 comp_lossless = true;
             }
@@ -211,14 +246,32 @@ bool WebPEncoder::write(const Mat& img, const std::vector<int>& params)
         image = &temp;
         channels = 3;
     }
+    else if (channels == 2)
+    {
+        return false;
+    }
 
     if (comp_lossless)
     {
-        size = WebPEncodeLosslessBGR(image->data, width, height, ((width * 3 + 3) & ~3), &out);
+        if(channels == 3)
+        {
+            size = WebPEncodeLosslessBGR(image->data, width, height, image->step, &out);
+        }
+        else if(channels == 4)
+        {
+            size = WebPEncodeLosslessBGRA(image->data, width, height, image->step, &out);
+        }
     }
     else
     {
-        size = WebPEncodeBGR(image->data, width, height, ((width * 3 + 3) & ~3), (float)quality, &out);
+        if(channels == 3)
+        {
+            size = WebPEncodeBGR(image->data, width, height, image->step, quality, &out);
+        }
+        else if(channels == 4)
+        {
+            size = WebPEncodeBGRA(image->data, width, height, image->step, quality, &out);
+        }
     }
 
     if(size > 0)
diff --git a/modules/highgui/src/grfmt_webp.hpp b/modules/highgui/src/grfmt_webp.hpp
index f37e6e9f4..ea692bf8d 100644
--- a/modules/highgui/src/grfmt_webp.hpp
+++ b/modules/highgui/src/grfmt_webp.hpp
@@ -47,6 +47,8 @@
 
 #ifdef HAVE_WEBP
 
+
+
 namespace cv
 {
 
@@ -60,12 +62,15 @@ public:
     bool readData( Mat& img );
     bool readHeader();
     void close();
-    bool checkSignature( const String& signature ) const;
+
+    size_t signatureLength() const;
+    bool checkSignature( const String& signature) const;
 
     ImageDecoder newDecoder() const;
 
 protected:
     Mat data;
+    int channels;
 };
 
 class WebPEncoder : public BaseImageEncoder
diff --git a/modules/highgui/src/ios_conversions.mm b/modules/highgui/src/ios_conversions.mm
new file mode 100644
index 000000000..fa6208a17
--- /dev/null
+++ b/modules/highgui/src/ios_conversions.mm
@@ -0,0 +1,117 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#import "opencv2/highgui/cap_ios.h"
+#include "precomp.hpp"
+
+UIImage* MatToUIImage(const cv::Mat& image) {
+
+    NSData *data = [NSData dataWithBytes:image.data
+                                  length:image.elemSize()*image.total()];
+
+    CGColorSpaceRef colorSpace;
+
+    if (image.elemSize() == 1) {
+        colorSpace = CGColorSpaceCreateDeviceGray();
+    } else {
+        colorSpace = CGColorSpaceCreateDeviceRGB();
+    }
+
+    CGDataProviderRef provider =
+            CGDataProviderCreateWithCFData((__bridge CFDataRef)data);
+
+    // Creating CGImage from cv::Mat
+    CGImageRef imageRef = CGImageCreate(image.cols,
+                                        image.rows,
+                                        8,
+                                        8 * image.elemSize(),
+                                        image.step.p[0],
+                                        colorSpace,
+                                        kCGImageAlphaNone|
+                                        kCGBitmapByteOrderDefault,
+                                        provider,
+                                        NULL,
+                                        false,
+                                        kCGRenderingIntentDefault
+                                        );
+
+
+    // Getting UIImage from CGImage
+    UIImage *finalImage = [UIImage imageWithCGImage:imageRef];
+    CGImageRelease(imageRef);
+    CGDataProviderRelease(provider);
+    CGColorSpaceRelease(colorSpace);
+
+    return finalImage;
+}
+
+void UIImageToMat(const UIImage* image,
+                         cv::Mat& m, bool alphaExist) {
+    CGColorSpaceRef colorSpace = CGImageGetColorSpace(image.CGImage);
+    CGFloat cols = image.size.width, rows = image.size.height;
+    CGContextRef contextRef;
+    CGBitmapInfo bitmapInfo = kCGImageAlphaPremultipliedLast;
+    if (CGColorSpaceGetModel(colorSpace) == 0)
+    {
+        m.create(rows, cols, CV_8UC1); // 8 bits per component, 1 channel
+        bitmapInfo = kCGImageAlphaNone;
+        if (!alphaExist)
+            bitmapInfo = kCGImageAlphaNone;
+        contextRef = CGBitmapContextCreate(m.data, m.cols, m.rows, 8,
+                                           m.step[0], colorSpace,
+                                           bitmapInfo);
+    }
+    else
+    {
+        m.create(rows, cols, CV_8UC4); // 8 bits per component, 4 channels
+        if (!alphaExist)
+            bitmapInfo = kCGImageAlphaNoneSkipLast |
+                                kCGBitmapByteOrderDefault;
+        contextRef = CGBitmapContextCreate(m.data, m.cols, m.rows, 8,
+                                           m.step[0], colorSpace,
+                                           bitmapInfo);
+    }
+    CGContextDrawImage(contextRef, CGRectMake(0, 0, cols, rows),
+                       image.CGImage);
+    CGContextRelease(contextRef);
+    CGColorSpaceRelease(colorSpace);
+}
diff --git a/modules/highgui/src/loadsave.cpp b/modules/highgui/src/loadsave.cpp
index c75691698..8f481553f 100644
--- a/modules/highgui/src/loadsave.cpp
+++ b/modules/highgui/src/loadsave.cpp
@@ -59,37 +59,42 @@ struct ImageCodecInitializer
 {
     ImageCodecInitializer()
     {
+<<<<<<< HEAD
         decoders.push_back( new BmpDecoder );
         encoders.push_back( new BmpEncoder );
 		decoders.push_back( new HdrDecoder );
         encoders.push_back( new HdrEncoder );
+=======
+        decoders.push_back( makePtr<BmpDecoder>() );
+        encoders.push_back( makePtr<BmpEncoder>() );
+>>>>>>> 99a43257d5912ff215016e1cf5f4e0c2a934b72f
     #ifdef HAVE_JPEG
-        decoders.push_back( new JpegDecoder );
-        encoders.push_back( new JpegEncoder );
+        decoders.push_back( makePtr<JpegDecoder>() );
+        encoders.push_back( makePtr<JpegEncoder>() );
     #endif
     #ifdef HAVE_WEBP
-        decoders.push_back( new WebPDecoder );
-        encoders.push_back( new WebPEncoder );
+        decoders.push_back( makePtr<WebPDecoder>() );
+        encoders.push_back( makePtr<WebPEncoder>() );
     #endif
-        decoders.push_back( new SunRasterDecoder );
-        encoders.push_back( new SunRasterEncoder );
-        decoders.push_back( new PxMDecoder );
-        encoders.push_back( new PxMEncoder );
+        decoders.push_back( makePtr<SunRasterDecoder>() );
+        encoders.push_back( makePtr<SunRasterEncoder>() );
+        decoders.push_back( makePtr<PxMDecoder>() );
+        encoders.push_back( makePtr<PxMEncoder>() );
     #ifdef HAVE_TIFF
-        decoders.push_back( new TiffDecoder );
+        decoders.push_back( makePtr<TiffDecoder>() );
     #endif
-        encoders.push_back( new TiffEncoder );
+        encoders.push_back( makePtr<TiffEncoder>() );
     #ifdef HAVE_PNG
-        decoders.push_back( new PngDecoder );
-        encoders.push_back( new PngEncoder );
+        decoders.push_back( makePtr<PngDecoder>() );
+        encoders.push_back( makePtr<PngEncoder>() );
     #endif
     #ifdef HAVE_JASPER
-        decoders.push_back( new Jpeg2KDecoder );
-        encoders.push_back( new Jpeg2KEncoder );
+        decoders.push_back( makePtr<Jpeg2KDecoder>() );
+        encoders.push_back( makePtr<Jpeg2KEncoder>() );
     #endif
     #ifdef HAVE_OPENEXR
-        decoders.push_back( new ExrDecoder );
-        encoders.push_back( new ExrEncoder );
+        decoders.push_back( makePtr<ExrDecoder>() );
+        encoders.push_back( makePtr<ExrEncoder>() );
     #endif
     }
 
@@ -201,7 +206,7 @@ imread_( const String& filename, int flags, int hdrtype, Mat* mat=0 )
     Mat temp, *data = &temp;
 
     ImageDecoder decoder = findDecoder(filename);
-    if( decoder.empty() )
+    if( !decoder )
         return 0;
     decoder->setSource(filename);
     if( !decoder->readHeader() )
@@ -271,7 +276,7 @@ static bool imwrite_( const String& filename, const Mat& image,
     CV_Assert( image.channels() == 1 || image.channels() == 3 || image.channels() == 4 );
 
     ImageEncoder encoder = findEncoder( filename );
-    if( encoder.empty() )
+    if( !encoder )
         CV_Error( CV_StsError, "could not find a writer for the specified extension" );
     if( !encoder->isFormatSupported(image.depth()) )
     {
@@ -310,7 +315,7 @@ imdecode_( const Mat& buf, int flags, int hdrtype, Mat* mat=0 )
     String filename;
 
     ImageDecoder decoder = findDecoder(buf);
-    if( decoder.empty() )
+    if( !decoder )
         return 0;
 
     if( !decoder->setSource(buf) )
@@ -410,7 +415,7 @@ bool imencode( const String& ext, InputArray _image,
     CV_Assert( channels == 1 || channels == 3 || channels == 4 );
 
     ImageEncoder encoder = findEncoder( ext );
-    if( encoder.empty() )
+    if( !encoder )
         CV_Error( CV_StsError, "could not find encoder for the specified extension" );
 
     if( !encoder->isFormatSupported(image.depth()) )
diff --git a/modules/highgui/src/precomp.cpp b/modules/highgui/src/precomp.cpp
deleted file mode 100644
index d6f6e18f7..000000000
--- a/modules/highgui/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
diff --git a/modules/highgui/src/precomp.hpp b/modules/highgui/src/precomp.hpp
index 81ccb3203..d225cb314 100644
--- a/modules/highgui/src/precomp.hpp
+++ b/modules/highgui/src/precomp.hpp
@@ -120,6 +120,9 @@ CvVideoWriter* cvCreateVideoWriter_VFW( const char* filename, int fourcc,
                                         double fps, CvSize frameSize, int is_color );
 CvCapture* cvCreateCameraCapture_DShow( int index );
 CvCapture* cvCreateCameraCapture_MSMF( int index );
+CvCapture* cvCreateFileCapture_MSMF (const char* filename);
+CvVideoWriter* cvCreateVideoWriter_MSMF( const char* filename, int fourcc,
+                                        double fps, CvSize frameSize, int is_color );
 CvCapture* cvCreateCameraCapture_OpenNI( int index );
 CvCapture* cvCreateFileCapture_OpenNI( const char* filename );
 CvCapture* cvCreateCameraCapture_Android( int index );
diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp
index eda75bb3c..2b7775bd8 100644
--- a/modules/highgui/src/window_QT.cpp
+++ b/modules/highgui/src/window_QT.cpp
@@ -38,6 +38,7 @@
 
 //--------------------Google Code 2010 -- Yannick Verdie--------------------//
 
+#include "precomp.hpp"
 
 #if defined(HAVE_QT)
 
@@ -2474,34 +2475,32 @@ void DefaultViewPort::saveView()
     {
         QString extension = fileName.right(3);
 
-        //   (no need anymore) create the image resized to receive the 'screenshot'
-        //    image2Draw_qt_resized = QImage(viewport()->width(), viewport()->height(),QImage::Format_RGB888);
-
-        QPainter saveimage(&image2Draw_qt_resized);
-        this->render(&saveimage);
+        // Create a new pixmap to render the viewport into
+        QPixmap viewportPixmap(viewport()->size());
+        viewport()->render(&viewportPixmap);
 
         // Save it..
         if (QString::compare(extension, "png", Qt::CaseInsensitive) == 0)
         {
-            image2Draw_qt_resized.save(fileName, "PNG");
+            viewportPixmap.save(fileName, "PNG");
             return;
         }
 
         if (QString::compare(extension, "jpg", Qt::CaseInsensitive) == 0)
         {
-            image2Draw_qt_resized.save(fileName, "JPG");
+            viewportPixmap.save(fileName, "JPG");
             return;
         }
 
         if (QString::compare(extension, "bmp", Qt::CaseInsensitive) == 0)
         {
-            image2Draw_qt_resized.save(fileName, "BMP");
+            viewportPixmap.save(fileName, "BMP");
             return;
         }
 
         if (QString::compare(extension, "jpeg", Qt::CaseInsensitive) == 0)
         {
-            image2Draw_qt_resized.save(fileName, "JPEG");
+            viewportPixmap.save(fileName, "JPEG");
             return;
         }
 
@@ -2651,17 +2650,16 @@ void DefaultViewPort::paintEvent(QPaintEvent* evnt)
     //Now disable matrixWorld for overlay display
     myPainter.setWorldMatrixEnabled(false);
 
+    //overlay pixel values if zoomed in far enough
+    if (param_matrixWorld.m11()*ratioX >= threshold_zoom_img_region &&
+        param_matrixWorld.m11()*ratioY >= threshold_zoom_img_region)
+    {
+        drawImgRegion(&myPainter);
+    }
+
     //in mode zoom/panning
     if (param_matrixWorld.m11() > 1)
     {
-        if (param_matrixWorld.m11() >= threshold_zoom_img_region)
-        {
-            if (centralWidget->param_flags == CV_WINDOW_NORMAL)
-                startDisplayInfo("WARNING: The values displayed are the resized image's values. If you want the original image's values, use CV_WINDOW_AUTOSIZE", 1000);
-
-            drawImgRegion(&myPainter);
-        }
-
         drawViewOverview(&myPainter);
     }
 
@@ -2887,22 +2885,24 @@ void DefaultViewPort::drawStatusBar()
 //accept only CV_8UC1 and CV_8UC8 image for now
 void DefaultViewPort::drawImgRegion(QPainter *painter)
 {
-
     if (nbChannelOriginImage!=CV_8UC1 && nbChannelOriginImage!=CV_8UC3)
         return;
 
-    qreal offsetX = param_matrixWorld.dx()/param_matrixWorld.m11();
+    double pixel_width = param_matrixWorld.m11()*ratioX;
+    double pixel_height = param_matrixWorld.m11()*ratioY;
+
+    qreal offsetX = param_matrixWorld.dx()/pixel_width;
     offsetX = offsetX - floor(offsetX);
-    qreal offsetY = param_matrixWorld.dy()/param_matrixWorld.m11();
+    qreal offsetY = param_matrixWorld.dy()/pixel_height;
     offsetY = offsetY - floor(offsetY);
 
     QSize view = size();
     QVarLengthArray<QLineF, 30> linesX;
-    for (qreal _x = offsetX*param_matrixWorld.m11(); _x < view.width(); _x += param_matrixWorld.m11() )
+    for (qreal _x = offsetX*pixel_width; _x < view.width(); _x += pixel_width )
         linesX.append(QLineF(_x, 0, _x, view.height()));
 
     QVarLengthArray<QLineF, 30> linesY;
-    for (qreal _y = offsetY*param_matrixWorld.m11(); _y < view.height(); _y += param_matrixWorld.m11() )
+    for (qreal _y = offsetY*pixel_height; _y < view.height(); _y += pixel_height )
         linesY.append(QLineF(0, _y, view.width(), _y));
 
 
@@ -2910,27 +2910,25 @@ void DefaultViewPort::drawImgRegion(QPainter *painter)
     int original_font_size = f.pointSize();
     //change font size
     //f.setPointSize(4+(param_matrixWorld.m11()-threshold_zoom_img_region)/5);
-    f.setPixelSize(10+(param_matrixWorld.m11()-threshold_zoom_img_region)/5);
+    f.setPixelSize(10+(pixel_height-threshold_zoom_img_region)/5);
     painter->setFont(f);
-    QString val;
-    QRgb rgbValue;
 
-    QPointF point1;//sorry, I do not know how to name it
-    QPointF point2;//idem
 
-    for (int j=-1;j<height()/param_matrixWorld.m11();j++)//-1 because display the pixels top rows left colums
-        for (int i=-1;i<width()/param_matrixWorld.m11();i++)//-1
+    for (int j=-1;j<height()/pixel_height;j++)//-1 because display the pixels top rows left columns
+        for (int i=-1;i<width()/pixel_width;i++)//-1
         {
-            point1.setX((i+offsetX)*param_matrixWorld.m11());
-            point1.setY((j+offsetY)*param_matrixWorld.m11());
+            // Calculate top left of the pixel's position in the viewport (screen space)
+            QPointF pos_in_view((i+offsetX)*pixel_width, (j+offsetY)*pixel_height);
 
-            matrixWorld_inv.map(point1.x(),point1.y(),&point2.rx(),&point2.ry());
+            // Calculate top left of the pixel's position in the image (image space)
+            QPointF pos_in_image = matrixWorld_inv.map(pos_in_view);// Top left of pixel in view
+            pos_in_image.rx() = pos_in_image.x()/ratioX;
+            pos_in_image.ry() = pos_in_image.y()/ratioY;
+            QPoint point_in_image(pos_in_image.x() + 0.5f,pos_in_image.y() + 0.5f);// Add 0.5 for rounding
 
-            point2.rx()= (long) (point2.x() + 0.5);
-            point2.ry()= (long) (point2.y() + 0.5);
-
-            if (point2.x() >= 0 && point2.y() >= 0)
-                rgbValue = image2Draw_qt_resized.pixel(QPoint(point2.x(),point2.y()));
+            QRgb rgbValue;
+            if (image2Draw_qt.valid(point_in_image))
+                rgbValue = image2Draw_qt.pixel(point_in_image);
             else
                 rgbValue = qRgb(0,0,0);
 
@@ -2943,29 +2941,29 @@ void DefaultViewPort::drawImgRegion(QPainter *painter)
                 painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/2),
                     Qt::AlignCenter, val);
                 */
+                QString val;
 
                 val = tr("%1").arg(qRed(rgbValue));
                 painter->setPen(QPen(Qt::red, 1));
-                painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/3),
+                painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height/3),
                     Qt::AlignCenter, val);
 
                 val = tr("%1").arg(qGreen(rgbValue));
                 painter->setPen(QPen(Qt::green, 1));
-                painter->drawText(QRect(point1.x(),point1.y()+param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3),
+                painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+pixel_height/3,pixel_width,pixel_height/3),
                     Qt::AlignCenter, val);
 
                 val = tr("%1").arg(qBlue(rgbValue));
                 painter->setPen(QPen(Qt::blue, 1));
-                painter->drawText(QRect(point1.x(),point1.y()+2*param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3),
+                painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+2*pixel_height/3,pixel_width,pixel_height/3),
                     Qt::AlignCenter, val);
 
             }
 
             if (nbChannelOriginImage==CV_8UC1)
             {
-
-                val = tr("%1").arg(qRed(rgbValue));
-                painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()),
+                QString val = tr("%1").arg(qRed(rgbValue));
+                painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height),
                     Qt::AlignCenter, val);
             }
         }
diff --git a/modules/highgui/src/window_QT.h b/modules/highgui/src/window_QT.h
index 089997f51..a96a8c6e6 100644
--- a/modules/highgui/src/window_QT.h
+++ b/modules/highgui/src/window_QT.h
@@ -522,7 +522,6 @@ private:
 
     CvMat* image2Draw_mat;
     QImage image2Draw_qt;
-    QImage image2Draw_qt_resized;
     int nbChannelOriginImage;
 
     //for mouse callback
diff --git a/modules/highgui/test/test_fourcc.cpp b/modules/highgui/test/test_fourcc.cpp
index 584b8c596..82f699b09 100644
--- a/modules/highgui/test/test_fourcc.cpp
+++ b/modules/highgui/test/test_fourcc.cpp
@@ -112,4 +112,4 @@ TEST(Highgui_dshow, fourcc_conversion)
 
         EXPECT_EQ(fourcc, (unsigned long)(unsigned)fourccFromParam);
     }
-}
\ No newline at end of file
+}
diff --git a/modules/highgui/test/test_framecount.cpp b/modules/highgui/test/test_framecount.cpp
index 875ce6aca..30f6e67ce 100644
--- a/modules/highgui/test/test_framecount.cpp
+++ b/modules/highgui/test/test_framecount.cpp
@@ -71,8 +71,8 @@ void CV_FramecountTest::run(int)
     {
         string file_path = src_dir+"video/big_buck_bunny."+ext[i];
 
-        cap = cvCreateFileCapture(file_path.c_str());
-        if (cap.empty())
+        cap.reset(cvCreateFileCapture(file_path.c_str()));
+        if (!cap)
         {
             ts->printf(cvtest::TS::LOG, "\nFile information (video %d): \n\nName: big_buck_bunny.%s\nFAILED\n\n", i+1, ext[i].c_str());
             ts->printf(cvtest::TS::LOG, "Error: cannot read source video file.\n");
diff --git a/modules/highgui/test/test_grfmt.cpp b/modules/highgui/test/test_grfmt.cpp
index 6fa7af831..4db0d46c5 100644
--- a/modules/highgui/test/test_grfmt.cpp
+++ b/modules/highgui/test/test_grfmt.cpp
@@ -284,6 +284,98 @@ TEST(Highgui_ImreadVSCvtColor, regression)
     EXPECT_LT(actual_avg_diff, MAX_MEAN_DIFF);
     EXPECT_LT(actual_maxval, MAX_ABS_DIFF);
 }
+
+//Test OpenCV issue 3075 is solved
+class CV_GrfmtReadPNGColorPaletteWithAlphaTest : public cvtest::BaseTest
+{
+public:
+    void run(int)
+    {
+        try
+        {
+            // First Test : Read PNG with alpha, imread flag -1
+            Mat img = imread(string(ts->get_data_path()) + "readwrite/color_palette_alpha.png",-1);
+            if (img.empty()) ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+
+            ASSERT_TRUE(img.channels() == 4);
+
+            unsigned char* img_data = (unsigned char*)img.data;
+
+            // Verification first pixel is red in BGRA
+            ASSERT_TRUE(img_data[0] == 0x00);
+            ASSERT_TRUE(img_data[1] == 0x00);
+            ASSERT_TRUE(img_data[2] == 0xFF);
+            ASSERT_TRUE(img_data[3] == 0xFF);
+
+            // Verification second pixel is red in BGRA
+            ASSERT_TRUE(img_data[4] == 0x00);
+            ASSERT_TRUE(img_data[5] == 0x00);
+            ASSERT_TRUE(img_data[6] == 0xFF);
+            ASSERT_TRUE(img_data[7] == 0xFF);
+
+            // Second Test : Read PNG without alpha, imread flag -1
+            img = imread(string(ts->get_data_path()) + "readwrite/color_palette_no_alpha.png",-1);
+            if (img.empty()) ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+
+            ASSERT_TRUE(img.channels() == 3);
+
+            img_data = (unsigned char*)img.data;
+
+            // Verification first pixel is red in BGR
+            ASSERT_TRUE(img_data[0] == 0x00);
+            ASSERT_TRUE(img_data[1] == 0x00);
+            ASSERT_TRUE(img_data[2] == 0xFF);
+
+            // Verification second pixel is red in BGR
+            ASSERT_TRUE(img_data[3] == 0x00);
+            ASSERT_TRUE(img_data[4] == 0x00);
+            ASSERT_TRUE(img_data[5] == 0xFF);
+
+            // Third Test : Read PNG with alpha, imread flag 1
+            img = imread(string(ts->get_data_path()) + "readwrite/color_palette_alpha.png",1);
+            if (img.empty()) ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+
+            ASSERT_TRUE(img.channels() == 3);
+
+            img_data = (unsigned char*)img.data;
+
+            // Verification first pixel is red in BGR
+            ASSERT_TRUE(img_data[0] == 0x00);
+            ASSERT_TRUE(img_data[1] == 0x00);
+            ASSERT_TRUE(img_data[2] == 0xFF);
+
+            // Verification second pixel is red in BGR
+            ASSERT_TRUE(img_data[3] == 0x00);
+            ASSERT_TRUE(img_data[4] == 0x00);
+            ASSERT_TRUE(img_data[5] == 0xFF);
+
+            // Fourth Test : Read PNG without alpha, imread flag 1
+            img = imread(string(ts->get_data_path()) + "readwrite/color_palette_no_alpha.png",1);
+            if (img.empty()) ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+
+            ASSERT_TRUE(img.channels() == 3);
+
+            img_data = (unsigned char*)img.data;
+
+            // Verification first pixel is red in BGR
+            ASSERT_TRUE(img_data[0] == 0x00);
+            ASSERT_TRUE(img_data[1] == 0x00);
+            ASSERT_TRUE(img_data[2] == 0xFF);
+
+            // Verification second pixel is red in BGR
+            ASSERT_TRUE(img_data[3] == 0x00);
+            ASSERT_TRUE(img_data[4] == 0x00);
+            ASSERT_TRUE(img_data[5] == 0xFF);
+        }
+        catch(...)
+        {
+            ts->set_failed_test_info(cvtest::TS::FAIL_EXCEPTION);
+    }
+        ts->set_failed_test_info(cvtest::TS::OK);
+    }
+};
+
+TEST(Highgui_Image, read_png_color_palette_with_alpha) { CV_GrfmtReadPNGColorPaletteWithAlphaTest test; test.safe_run(); }
 #endif
 
 #ifdef HAVE_JPEG
@@ -387,11 +479,15 @@ TEST(Highgui_WebP, encode_decode_lossless_webp)
 TEST(Highgui_WebP, encode_decode_lossy_webp)
 {
     cvtest::TS& ts = *cvtest::TS::ptr();
+<<<<<<< HEAD
     string input = string(ts.get_data_path()) + "/../cv/shared/lena.png";
+=======
+    std::string input = std::string(ts.get_data_path()) + "../cv/shared/lena.png";
+>>>>>>> 99a43257d5912ff215016e1cf5f4e0c2a934b72f
     cv::Mat img = cv::imread(input);
     ASSERT_FALSE(img.empty());
 
-    for(int q = 100; q>=0; q-=10)
+    for(int q = 100; q>=0; q-=20)
     {
         std::vector<int> params;
         params.push_back(IMWRITE_WEBP_QUALITY);
@@ -402,9 +498,36 @@ TEST(Highgui_WebP, encode_decode_lossy_webp)
         cv::Mat img_webp = cv::imread(output);
         remove(output.c_str());
         EXPECT_FALSE(img_webp.empty());
+        EXPECT_EQ(3,   img_webp.channels());
+        EXPECT_EQ(512, img_webp.cols);
+        EXPECT_EQ(512, img_webp.rows);
     }
 }
 
+TEST(Highgui_WebP, encode_decode_with_alpha_webp)
+{
+    cvtest::TS& ts = *cvtest::TS::ptr();
+    std::string input = std::string(ts.get_data_path()) + "../cv/shared/lena.png";
+    cv::Mat img = cv::imread(input);
+    ASSERT_FALSE(img.empty());
+
+    std::vector<cv::Mat> imgs;
+    cv::split(img, imgs);
+    imgs.push_back(cv::Mat(imgs[0]));
+    imgs[imgs.size() - 1] = cv::Scalar::all(128);
+    cv::merge(imgs, img);
+
+    string output = cv::tempfile(".webp");
+
+    EXPECT_NO_THROW(cv::imwrite(output, img));
+    cv::Mat img_webp = cv::imread(output);
+    remove(output.c_str());
+    EXPECT_FALSE(img_webp.empty());
+    EXPECT_EQ(4,   img_webp.channels());
+    EXPECT_EQ(512, img_webp.cols);
+    EXPECT_EQ(512, img_webp.rows);
+}
+
 #endif
 
 TEST(Highgui_Hdr, regression)
diff --git a/modules/highgui/test/test_main.cpp b/modules/highgui/test/test_main.cpp
index 363b54161..4727b9565 100644
--- a/modules/highgui/test/test_main.cpp
+++ b/modules/highgui/test/test_main.cpp
@@ -1,4 +1,3 @@
 #include "test_precomp.hpp"
 
 CV_TEST_MAIN("highgui")
-
diff --git a/modules/highgui/test/test_positioning.cpp b/modules/highgui/test/test_positioning.cpp
index 396839353..edc8dcf42 100644
--- a/modules/highgui/test/test_positioning.cpp
+++ b/modules/highgui/test/test_positioning.cpp
@@ -220,4 +220,4 @@ void CV_VideoRandomPositioningTest::run(int)
 #if BUILD_WITH_VIDEO_INPUT_SUPPORT && defined HAVE_FFMPEG
 TEST (Highgui_Video, seek_progressive) { CV_VideoProgressivePositioningTest test; test.safe_run(); }
 TEST (Highgui_Video, seek_random) { CV_VideoRandomPositioningTest test; test.safe_run(); }
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/highgui/test/test_precomp.cpp b/modules/highgui/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/highgui/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/highgui/test/test_precomp.hpp b/modules/highgui/test/test_precomp.hpp
index d904b4cb4..8468e4618 100644
--- a/modules/highgui/test/test_precomp.hpp
+++ b/modules/highgui/test/test_precomp.hpp
@@ -28,6 +28,7 @@
     defined(HAVE_CMU1394)      || \
     defined(HAVE_MIL)          || \
     defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
     defined(HAVE_UNICAP)       || \
     defined(HAVE_PVAPI)        || \
     defined(HAVE_OPENNI)       || \
@@ -44,9 +45,11 @@
 #if defined(HAVE_XINE)         || \
     defined(HAVE_GSTREAMER)    || \
     defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
     defined(HAVE_AVFOUNDATION) || \
     /*defined(HAVE_OPENNI)     || too specialized */ \
-    defined(HAVE_FFMPEG)
+    defined(HAVE_FFMPEG)       || \
+    defined(HAVE_MSMF)
 #  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
 #else
 #  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
@@ -55,8 +58,10 @@
 #if /*defined(HAVE_XINE)       || */\
     defined(HAVE_GSTREAMER)    || \
     defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
     defined(HAVE_AVFOUNDATION) || \
-    defined(HAVE_FFMPEG)
+    defined(HAVE_FFMPEG)       || \
+    defined(HAVE_MSMF)
 #  define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1
 #else
 #  define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 0
diff --git a/modules/highgui/test/test_video_io.cpp b/modules/highgui/test/test_video_io.cpp
index 8738cef3e..059d1e4fb 100644
--- a/modules/highgui/test/test_video_io.cpp
+++ b/modules/highgui/test/test_video_io.cpp
@@ -54,6 +54,35 @@ string fourccToString(int fourcc)
     return format("%c%c%c%c", fourcc & 255, (fourcc >> 8) & 255, (fourcc >> 16) & 255, (fourcc >> 24) & 255);
 }
 
+#ifdef HAVE_MSMF
+const VideoFormat g_specific_fmt_list[] =
+{
+        /*VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', '2', '5')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', '5', '0')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 'c', ' ')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 'h', '1')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 'h', 'd')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 's', 'd')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 's', 'l')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('H', '2', '6', '3')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('M', '4', 'S', '2')),
+        VideoFormat("avi", CV_FOURCC_MACRO('M', 'J', 'P', 'G')),
+        VideoFormat("mp4", CV_FOURCC_MACRO('M', 'P', '4', 'S')),
+        VideoFormat("mp4", CV_FOURCC_MACRO('M', 'P', '4', 'V')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('M', 'P', '4', '3')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('M', 'P', 'G', '1')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('M', 'S', 'S', '1')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('M', 'S', 'S', '2')),*/
+#if !defined(_M_ARM)
+        VideoFormat("wmv", CV_FOURCC_MACRO('W', 'M', 'V', '1')),
+        VideoFormat("wmv", CV_FOURCC_MACRO('W', 'M', 'V', '2')),
+#endif
+        VideoFormat("wmv", CV_FOURCC_MACRO('W', 'M', 'V', '3')),
+        VideoFormat("avi", CV_FOURCC_MACRO('H', '2', '6', '4')),
+        //VideoFormat("wmv", CV_FOURCC_MACRO('W', 'V', 'C', '1')),
+        VideoFormat()
+};
+#else
 const VideoFormat g_specific_fmt_list[] =
 {
     VideoFormat("avi", VideoWriter::fourcc('X', 'V', 'I', 'D')),
@@ -63,17 +92,17 @@ const VideoFormat g_specific_fmt_list[] =
     VideoFormat("mkv", VideoWriter::fourcc('X', 'V', 'I', 'D')),
     VideoFormat("mkv", VideoWriter::fourcc('M', 'P', 'E', 'G')),
     VideoFormat("mkv", VideoWriter::fourcc('M', 'J', 'P', 'G')),
-
     VideoFormat("mov", VideoWriter::fourcc('m', 'p', '4', 'v')),
     VideoFormat()
 };
+#endif
 
 }
 
 class CV_HighGuiTest : public cvtest::BaseTest
 {
 protected:
-    void ImageTest(const string& dir);
+    void ImageTest (const string& dir);
     void VideoTest (const string& dir, const cvtest::VideoFormat& fmt);
     void SpecificImageTest (const string& dir);
     void SpecificVideoTest (const string& dir, const cvtest::VideoFormat& fmt);
@@ -242,19 +271,19 @@ void CV_HighGuiTest::VideoTest(const string& dir, const cvtest::VideoFormat& fmt
 
     for(;;)
     {
-        IplImage * img = cvQueryFrame( cap );
+        IplImage* img = cvQueryFrame( cap );
 
         if (!img)
             break;
 
         frames.push_back(cv::cvarrToMat(img, true));
 
-        if (writer == 0)
+        if (writer == NULL)
         {
             writer = cvCreateVideoWriter(tmp_name.c_str(), fmt.fourcc, 24, cvGetSize(img));
-            if (writer == 0)
+            if (writer == NULL)
             {
-                ts->printf(ts->LOG, "can't create writer (with fourcc : %d)\n",
+                ts->printf(ts->LOG, "can't create writer (with fourcc : %s)\n",
                            cvtest::fourccToString(fmt.fourcc).c_str());
                 cvReleaseCapture( &cap );
                 ts->set_failed_test_info(ts->FAIL_MISMATCH);
@@ -290,15 +319,22 @@ void CV_HighGuiTest::VideoTest(const string& dir, const cvtest::VideoFormat& fmt
         double psnr = PSNR(img1, img);
         if (psnr < thresDbell)
         {
-            printf("Too low psnr = %gdb\n", psnr);
-            // imwrite("img.png", img);
-            // imwrite("img1.png", img1);
+            ts->printf(ts->LOG, "Too low frame %d psnr = %gdb\n", i, psnr);
             ts->set_failed_test_info(ts->FAIL_MISMATCH);
+
+            //imwrite("original.png", img);
+            //imwrite("after_test.png", img1);
+            //Mat diff;
+            //absdiff(img, img1, diff);
+            //imwrite("diff.png", diff);
+
             break;
         }
     }
 
+    printf("Before saved release for %s\n", tmp_name.c_str());
     cvReleaseCapture( &saved );
+    printf("After release\n");
 
     ts->printf(ts->LOG, "end test function : ImagesVideo \n");
 }
diff --git a/modules/imgproc/doc/feature_detection.rst b/modules/imgproc/doc/feature_detection.rst
index c88eaef65..a6d5817dd 100644
--- a/modules/imgproc/doc/feature_detection.rst
+++ b/modules/imgproc/doc/feature_detection.rst
@@ -30,7 +30,11 @@ Finds edges in an image using the [Canny86]_ algorithm.
 The function finds edges in the input image ``image`` and marks them in the output map ``edges`` using the Canny algorithm. The smallest value between ``threshold1`` and ``threshold2`` is used for edge linking. The largest value is used to find initial segments of strong edges. See
 http://en.wikipedia.org/wiki/Canny_edge_detector
 
+.. note::
 
+   * An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.cpp
+
+   * (Python) An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.py
 
 cornerEigenValsAndVecs
 ----------------------
@@ -81,11 +85,13 @@ The output of the function can be used for robust edge or corner detection.
     :ocv:func:`cornerHarris`,
     :ocv:func:`preCornerDetect`
 
+.. note::
 
+   * (Python) An example on how to use eigenvectors and eigenvalues to estimate image texture flow direction can be found at opencv_source_code/samples/python2/texture_flow.py
 
 cornerHarris
 ------------
-Harris edge detector.
+Harris corner detector.
 
 .. ocv:function:: void cornerHarris( InputArray src, OutputArray dst, int blockSize, int ksize, double k, int borderType=BORDER_DEFAULT )
 
@@ -105,7 +111,7 @@ Harris edge detector.
 
     :param borderType: Pixel extrapolation method. See  :ocv:func:`borderInterpolate` .
 
-The function runs the Harris edge detector on the image. Similarly to
+The function runs the Harris corner detector on the image. Similarly to
 :ocv:func:`cornerMinEigenVal` and
 :ocv:func:`cornerEigenValsAndVecs` , for each pixel
 :math:`(x, y)` it calculates a
@@ -344,6 +350,9 @@ Example: ::
     :ocv:func:`fitEllipse`,
     :ocv:func:`minEnclosingCircle`
 
+.. note::
+
+   * An example using the Hough circle detector can be found at opencv_source_code/samples/cpp/houghcircles.cpp
 
 HoughLines
 ----------
@@ -398,6 +407,10 @@ Finds lines in a binary image using the standard Hough transform.
 The function implements the standard or standard multi-scale Hough transform algorithm for line detection.  See http://homepages.inf.ed.ac.uk/rbf/HIPR2/hough.htm for a good explanation of Hough transform.
 See also the example in :ocv:func:`HoughLinesP` description.
 
+.. note::
+
+   * An example using the Hough line detector can be found at opencv_source_code/samples/cpp/houghlines.cpp
+
 HoughLinesP
 -----------
 Finds line segments in a binary image using the probabilistic Hough transform.
@@ -483,6 +496,110 @@ And this is the output of the above program in case of the probabilistic Hough t
 
 .. image:: pics/houghp.png
 
+.. seealso::
+
+    :ocv:class:`LineSegmentDetector`
+
+
+
+LineSegmentDetector
+-------------------
+Line segment detector class, following the algorithm described at [Rafael12]_.
+
+.. ocv:class:: LineSegmentDetector : public Algorithm
+
+
+createLineSegmentDetectorPtr
+----------------------------
+Creates a smart pointer to a LineSegmentDetector object and initializes it.
+
+.. ocv:function:: Ptr<LineSegmentDetector> createLineSegmentDetectorPtr(int _refine = LSD_REFINE_STD, double _scale = 0.8, double _sigma_scale = 0.6, double _quant = 2.0, double _ang_th = 22.5, double _log_eps = 0, double _density_th = 0.7, int _n_bins = 1024)
+
+    :param _refine: The way found lines will be refined:
+
+        * **LSD_REFINE_NONE** - No refinement applied.
+
+        * **LSD_REFINE_STD**  - Standard refinement is applied. E.g. breaking arches into smaller straighter line approximations.
+
+        * **LSD_REFINE_ADV**  - Advanced refinement. Number of false alarms is calculated, lines are refined through increase of precision, decrement in size, etc.
+
+    :param scale: The scale of the image that will be used to find the lines. Range (0..1].
+
+    :param sigma_scale: Sigma for Gaussian filter. It is computed as sigma = _sigma_scale/_scale.
+
+    :param quant: Bound to the quantization error on the gradient norm.
+
+    :param ang_th: Gradient angle tolerance in degrees.
+
+    :param log_eps: Detection threshold: -log10(NFA) > log_eps. Used only when advancent refinement is chosen.
+
+    :param density_th: Minimal density of aligned region points in the enclosing rectangle.
+
+    :param n_bins: Number of bins in pseudo-ordering of gradient modulus.
+
+The LineSegmentDetector algorithm is defined using the standard values. Only advanced users may want to edit those, as to tailor it for their own application.
+
+
+LineSegmentDetector::detect
+---------------------------
+Finds lines in the input image. See the lsd_lines.cpp sample for possible usage.
+
+.. ocv:function:: void LineSegmentDetector::detect(const InputArray _image, OutputArray _lines, OutputArray width = noArray(), OutputArray prec = noArray(), OutputArray nfa = noArray())
+
+    :param _image A grayscale (CV_8UC1) input image.
+        If only a roi needs to be selected, use ::
+        lsd_ptr->detect(image(roi), lines, ...);
+        lines += Scalar(roi.x, roi.y, roi.x, roi.y);
+
+    :param lines: A vector of Vec4i elements specifying the beginning and ending point of a line. Where Vec4i is (x1, y1, x2, y2), point 1 is the start, point 2 - end. Returned lines are strictly oriented depending on the gradient.
+
+    :param width: Vector of widths of the regions, where the lines are found. E.g. Width of line.
+
+    :param prec: Vector of precisions with which the lines are found.
+
+    :param nfa: Vector containing number of false alarms in the line region, with precision of 10%. The bigger the value, logarithmically better the detection.
+
+        * -1 corresponds to 10 mean false alarms
+
+        * 0 corresponds to 1 mean false alarm
+
+        * 1 corresponds to 0.1 mean false alarms
+
+    This vector will be calculated only when the objects type is LSD_REFINE_ADV.
+
+This is the output of the default parameters of the algorithm on the above shown image.
+
+.. image:: pics/building_lsd.png
+
+.. note::
+
+   * An example using the LineSegmentDetector can be found at opencv_source_code/samples/cpp/lsd_lines.cpp
+
+LineSegmentDetector::drawSegments
+---------------------------------
+Draws the line segments on a given image.
+
+.. ocv:function:: void LineSegmentDetector::drawSegments(InputOutputArray _image, InputArray lines)
+
+    :param image: The image, where the liens will be drawn. Should be bigger or equal to the image, where the lines were found.
+
+    :param lines: A vector of the lines that needed to be drawn.
+
+
+LineSegmentDetector::compareSegments
+------------------------------------
+Draws two groups of lines in blue and red, counting the non overlapping (mismatching) pixels.
+
+.. ocv:function:: int LineSegmentDetector::compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray _image = noArray())
+
+    :param size: The size of the image, where lines1 and lines2 were found.
+
+    :param lines1: The first group of lines that needs to be drawn. It is visualized in blue color.
+
+    :param lines2: The second group of lines. They visualized in red color.
+
+    :param image: Optional image, where the lines will be drawn. The image should be color in order for lines1 and lines2 to be drawn in the above mentioned colors.
+
 
 
 preCornerDetect
@@ -529,3 +646,5 @@ The corners can be found as local maximums of the functions, as shown below: ::
 .. [Shi94] J. Shi and C. Tomasi. *Good Features to Track*. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 593-600, June 1994.
 
 .. [Yuen90] Yuen, H. K. and Princen, J. and Illingworth, J. and Kittler, J., *Comparative study of Hough transform methods for circle finding*. Image Vision Comput. 8 1, pp 71–77 (1990)
+
+.. [Rafael12] Rafael Grompone von Gioi, Jérémie Jakubowicz, Jean-Michel Morel, and Gregory Randall, LSD: a Line Segment Detector, Image Processing On Line, vol. 2012. http://dx.doi.org/10.5201/ipol.2012.gjmr-lsd
diff --git a/modules/imgproc/doc/filtering.rst b/modules/imgproc/doc/filtering.rst
index 2465ac9a4..d019136fa 100755
--- a/modules/imgproc/doc/filtering.rst
+++ b/modules/imgproc/doc/filtering.rst
@@ -22,6 +22,10 @@ OpenCV enables you to specify the extrapolation method. For details, see the fun
     * BORDER_CONSTANT:      iiiiii|abcdefgh|iiiiiii  with some specified 'i'
     */
 
+.. note::
+
+   * (Python) A complete example illustrating different morphological operations like erode/dilate, open/close, blackhat/tophat ... can be found at opencv_source_code/samples/python2/morphology.py
+
 BaseColumnFilter
 ----------------
 .. ocv:class:: BaseColumnFilter
@@ -408,6 +412,28 @@ http://www.dai.ed.ac.uk/CVonline/LOCAL\_COPIES/MANDUCHI1/Bilateral\_Filtering.ht
 This filter does not work inplace.
 
 
+adaptiveBilateralFilter
+-----------------------
+Applies the adaptive bilateral filter to an image.
+
+.. ocv:function:: void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize, double sigmaSpace, Point anchor=Point(-1, -1), int borderType=BORDER_DEFAULT )
+
+.. ocv:pyfunction:: cv2.adaptiveBilateralFilter(src, ksize, sigmaSpace[, dst[, anchor[, borderType]]]) -> dst
+
+    :param src: Source 8-bit, 1-channel or 3-channel image.
+
+    :param dst: Destination image of the same size and type as  ``src`` .
+
+    :param ksize: filter kernel size.
+
+    :param sigmaSpace: Filter sigma in the coordinate space. It has similar meaning with ``sigmaSpace`` in ``bilateralFilter``.
+
+    :param anchor: anchor point; default value ``Point(-1,-1)`` means that the anchor is at the kernel center. Only default value is supported now.
+
+    :param borderType: border mode used to extrapolate pixels outside of the image.
+
+The function applies adaptive bilateral filtering to the input image. This filter is similar to ``bilateralFilter``, in that dissimilarity from and distance to the center pixel is punished. Instead of using ``sigmaColor``, we employ the variance of pixel values in the neighbourhood.
+
 
 
 blur
@@ -755,7 +781,7 @@ Dilates an image by using a specific structuring element.
 
     :param dst: output image of the same size and type as ``src``.
 
-    :param element: structuring element used for dilation; if  ``element=Mat()`` , a  ``3 x 3`` rectangular structuring element is used.
+    :param kernel: structuring element used for dilation; if  ``element=Mat()`` , a  ``3 x 3`` rectangular structuring element is used. Kernel can be created using :ocv:func:`getStructuringElement`
 
     :param anchor: position of the anchor within the element; default value ``(-1, -1)`` means that the anchor is at the element center.
 
@@ -778,6 +804,14 @@ The function supports the in-place mode. Dilation can be applied several ( ``ite
     :ocv:func:`erode`,
     :ocv:func:`morphologyEx`,
     :ocv:func:`createMorphologyFilter`
+    :ocv:func:`getStructuringElement`
+
+
+.. note::
+
+   * An example using the morphological dilate operation can be found at opencv_source_code/samples/cpp/morphology2.cpp
+
+
 
 
 erode
@@ -794,7 +828,7 @@ Erodes an image by using a specific structuring element.
 
     :param dst: output image of the same size and type as ``src``.
 
-    :param element: structuring element used for erosion; if  ``element=Mat()`` , a  ``3 x 3``  rectangular structuring element is used.
+    :param kernel: structuring element used for erosion; if  ``element=Mat()`` , a  ``3 x 3``  rectangular structuring element is used. Kernel can be created using :ocv:func:`getStructuringElement`.
 
     :param anchor: position of the anchor within the element; default value  ``(-1, -1)``  means that the anchor is at the element center.
 
@@ -816,9 +850,12 @@ The function supports the in-place mode. Erosion can be applied several ( ``iter
 
     :ocv:func:`dilate`,
     :ocv:func:`morphologyEx`,
-    :ocv:func:`createMorphologyFilter`
+    :ocv:func:`createMorphologyFilter`,
+    :ocv:func:`getStructuringElement`
 
+.. note::
 
+   * An example using the morphological erode operation can be found at opencv_source_code/samples/cpp/morphology2.cpp
 
 filter2D
 --------
@@ -947,7 +984,7 @@ Returns Gaussian filter coefficients.
     :param ksize: Aperture size. It should be odd ( :math:`\texttt{ksize} \mod 2 = 1` ) and positive.
 
     :param sigma: Gaussian standard deviation. If it is non-positive, it is computed from  ``ksize``  as  \ ``sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`` .
-    :param ktype: Type of filter coefficients. It can be  ``CV_32f``  or  ``CV_64F`` .
+    :param ktype: Type of filter coefficients. It can be  ``CV_32F``  or  ``CV_64F`` .
 
 The function computes and returns the
 :math:`\texttt{ksize} \times 1` matrix of Gaussian filter coefficients:
@@ -976,6 +1013,32 @@ Two of such generated kernels can be passed to
 
 
 
+getGaborKernel
+-----------------
+Returns Gabor filter coefficients.
+
+.. ocv:function:: Mat getGaborKernel( Size ksize, double sigma, double theta, double lambd, double gamma, double psi = CV_PI*0.5, int ktype = CV_64F )
+
+.. ocv:pyfunction:: cv2.getGaborKernel(ksize, sigma, theta, lambd, gamma[, psi[, ktype]]) -> retval
+
+    :param ksize: Size of the filter returned.
+
+    :param sigma: Standard deviation of the gaussian envelope.
+
+    :param theta: Orientation of the normal to the parallel stripes of a Gabor function.
+
+    :param lambd: Wavelength of the sinusoidal factor.
+
+    :param gamma: Spatial aspect ratio.
+
+    :param psi: Phase offset.
+
+    :param ktype: Type of filter coefficients. It can be  ``CV_32F``  or  ``CV_64F`` .
+
+For more details about gabor filter equations and parameters, see: `Gabor Filter <http://en.wikipedia.org/wiki/Gabor_filter>`_.
+
+
+
 getKernelType
 -------------
 Returns the kernel type.
@@ -1090,7 +1153,9 @@ Performs advanced morphological transformations.
 
     :param dst: Destination image of the same size and type as  ``src`` .
 
-    :param element: Structuring element.
+    :param kernel: Structuring element. It can be created using :ocv:func:`getStructuringElement`.
+
+    :param anchor: Anchor position with the kernel. Negative values mean that the anchor is at the kernel center.
 
     :param op: Type of a morphological operation that can be one of the following:
 
@@ -1148,8 +1213,12 @@ Any of the operations can be done in-place. In case of multi-channel images, eac
 
     :ocv:func:`dilate`,
     :ocv:func:`erode`,
-    :ocv:func:`createMorphologyFilter`
+    :ocv:func:`createMorphologyFilter`,
+    :ocv:func:`getStructuringElement`
 
+.. note::
+
+   * An example using the morphologyEx function for the morphological opening and closing operations can be found at opencv_source_code/samples/cpp/morphology2.cpp
 
 Laplacian
 ---------
@@ -1193,7 +1262,9 @@ This is done when ``ksize > 1`` . When ``ksize == 1`` , the Laplacian is compute
     :ocv:func:`Sobel`,
     :ocv:func:`Scharr`
 
+.. note::
 
+   * An example using the Laplace transformation for edge detection can be found at opencv_source_code/samples/cpp/laplace.cpp
 
 pyrDown
 -------
@@ -1250,6 +1321,10 @@ Upsamples an image and then blurs it.
 The function performs the upsampling step of the Gaussian pyramid construction, though it can actually be used to construct the Laplacian pyramid. First, it upsamples the source image by injecting even zero rows and columns and then convolves the result with the same kernel as in
 :ocv:func:`pyrDown`  multiplied by 4.
 
+.. note::
+
+   * (Python) An example of Laplacian Pyramid construction and merging can be found at opencv_source_code/samples/python2/lappyr.py
+
 
 pyrMeanShiftFiltering
 ---------------------
@@ -1297,6 +1372,9 @@ After the iterations over, the color components of the initial pixel (that is, t
 
 When ``maxLevel > 0``, the gaussian pyramid of ``maxLevel+1`` levels is built, and the above procedure is run on the smallest layer first. After that, the results are propagated to the larger layer and the iterations are run again only on those pixels where the layer colors differ by more than ``sr`` from the lower-resolution layer of the pyramid. That makes boundaries of color regions sharper. Note that the results will be actually different from the ones obtained by running the meanshift procedure on the whole original image (i.e. when ``maxLevel==0``).
 
+.. note::
+
+   * An example using mean-shift image segmentation can be found at opencv_source_code/samples/cpp/meanshift_segmentation.cpp
 
 sepFilter2D
 -----------
@@ -1322,7 +1400,7 @@ Applies a separable linear filter to an image.
 
     :param kernelY: Coefficients for filtering each column.
 
-    :param anchor: Anchor position within the kernel. The default value  :math:`(-1, 1)`  means that the anchor is at the kernel center.
+    :param anchor: Anchor position within the kernel. The default value  :math:`(-1,-1)`  means that the anchor is at the kernel center.
 
     :param delta: Value added to the filtered results before storing them.
 
@@ -1514,4 +1592,3 @@ is equivalent to
 .. seealso::
 
     :ocv:func:`cartToPolar`
-
diff --git a/modules/imgproc/doc/geometric_transformations.rst b/modules/imgproc/doc/geometric_transformations.rst
index 2eac1fb29..c94178f6c 100644
--- a/modules/imgproc/doc/geometric_transformations.rst
+++ b/modules/imgproc/doc/geometric_transformations.rst
@@ -298,6 +298,9 @@ where
 
 The function emulates the human "foveal" vision and can be used for fast scale and rotation-invariant template matching, for object tracking and so forth. The function can not operate in-place.
 
+.. note::
+
+   * An example using the geometric logpolar operation in 4 applications can be found at opencv_source_code/samples/cpp/logpolar_bsm.cpp
 
 remap
 -----
@@ -679,5 +682,3 @@ The function is similar to
 where ``undistort()`` is an approximate iterative algorithm that estimates the normalized original point coordinates out of the normalized distorted point coordinates ("normalized" means that the coordinates do not depend on the camera matrix).
 
 The function can be used for both a stereo camera head or a monocular camera (when R is empty).
-
-
diff --git a/modules/imgproc/doc/histograms.rst b/modules/imgproc/doc/histograms.rst
index 984bbc136..91199f378 100644
--- a/modules/imgproc/doc/histograms.rst
+++ b/modules/imgproc/doc/histograms.rst
@@ -98,7 +98,12 @@ input arrays at the same location. The sample below shows how to compute a 2D Hu
         waitKey();
     }
 
+.. note::
 
+   * An example for creating histograms of an image can be found at opencv_source_code/samples/cpp/demhist.cpp
+
+   * (Python) An example for creating color histograms can be found at opencv_source/samples/python2/color_histogram.py
+   * (Python) An example illustrating RGB and grayscale histogram plotting can be found at opencv_source/samples/python2/hist.py
 
 
 calcBackProject
@@ -168,6 +173,8 @@ Compares two histograms.
 
             * **CV_COMP_CHISQR**     Chi-Square
 
+            * **CV_COMP_CHISQR_ALT**     Alternative Chi-Square
+
             * **CV_COMP_INTERSECT**     Intersection
 
             * **CV_COMP_BHATTACHARYYA**     Bhattacharyya distance
@@ -197,6 +204,14 @@ The functions ``compareHist`` compare two dense or two sparse histograms using t
 
         d(H_1,H_2) =  \sum _I  \frac{\left(H_1(I)-H_2(I)\right)^2}{H_1(I)}
 
+* Alternative Chi-Square (``method=CV_COMP_CHISQR_ALT``)
+
+    .. math::
+
+        d(H_1,H_2) =  2 * \sum _I  \frac{\left(H_1(I)-H_2(I)\right)^2}{H_1(I)+H_2(I)}
+
+    This alternative formula is regularly used for texture comparison. See e.g. [Puzicha1997]_.
+
 * Intersection (``method=CV_COMP_INTERSECT``)
 
     .. math::
@@ -488,3 +503,4 @@ The function clears histogram bins that are below the specified threshold.
 
 
 .. [RubnerSept98] Y. Rubner. C. Tomasi, L.J. Guibas. *The Earth Mover’s Distance as a Metric for Image Retrieval*. Technical Report STAN-CS-TN-98-86, Department of Computer Science, Stanford University, September 1998.
+.. [Puzicha1997] Puzicha, J., Hofmann, T., and Buhmann, J. *Non-parametric similarity measures for unsupervised texture segmentation and image retrieval.* In Proc. IEEE Conf. Computer Vision and Pattern Recognition, San Juan, Puerto Rico, pp. 267-272, 1997.
diff --git a/modules/imgproc/doc/miscellaneous_transformations.rst b/modules/imgproc/doc/miscellaneous_transformations.rst
index c3ef2f308..47de0b442 100644
--- a/modules/imgproc/doc/miscellaneous_transformations.rst
+++ b/modules/imgproc/doc/miscellaneous_transformations.rst
@@ -476,6 +476,12 @@ In this mode, the complexity is still linear.
 That is, the function provides a very fast way to compute the Voronoi diagram for a binary image.
 Currently, the second variant can use only the approximate distance transform algorithm, i.e. ``maskSize=CV_DIST_MASK_PRECISE`` is not supported yet.
 
+.. note::
+
+   * An example on using the distance transform can be found at opencv_source_code/samples/cpp/distrans.cpp
+
+   * (Python) An example on using the distance transform can be found at opencv_source/samples/python2/distrans.py
+
 floodFill
 ---------
 Fills a connected component with the given color.
@@ -574,11 +580,15 @@ where
 *
     Color/brightness of the seed point in case of a fixed range.
 
-Use these functions to either mark a connected component with the specified color in-place, or build a mask and then extract the contour, or copy the region to another image, and so on. Various modes of the function are demonstrated in the ``floodfill.cpp`` sample.
+Use these functions to either mark a connected component with the specified color in-place, or build a mask and then extract the contour, or copy the region to another image, and so on.
 
 .. seealso:: :ocv:func:`findContours`
 
+.. note::
 
+   * An example using the FloodFill technique can be found at opencv_source_code/samples/cpp/ffilldemo.cpp
+
+   * (Python) An example using the FloodFill technique can be found at opencv_source_code/samples/python2/floodfill.cpp
 
 integral
 --------
@@ -738,6 +748,12 @@ Visual demonstration and usage example of the function can be found in the OpenC
 
 .. seealso:: :ocv:func:`findContours`
 
+.. note::
+
+   * An example using the watershed algorithm can be found at opencv_source_code/samples/cpp/watershed.cpp
+
+   * (Python) An example using the watershed algorithm can be found at opencv_source_code/samples/python2/watershed.py
+
 grabCut
 -------
 Runs the GrabCut algorithm.
@@ -783,4 +799,9 @@ See the sample ``grabcut.cpp`` to learn how to use the function.
 
 .. [Meyer92] Meyer, F. *Color Image Segmentation*, ICIP92, 1992
 
-.. [Telea04] Alexandru Telea, *An Image Inpainting Technique Based on the Fast Marching Method*. Journal of Graphics, GPU, and Game Tools 9 1, pp 23-34 (2004)
+
+.. note::
+
+   * An example using the GrabCut algorithm can be found at opencv_source_code/samples/cpp/grabcut.cpp
+
+   * (Python) An example using the GrabCut algorithm can be found at opencv_source_code/samples/python2/grabcut.py
diff --git a/modules/imgproc/doc/object_detection.rst b/modules/imgproc/doc/object_detection.rst
index 811ea8ff1..12996b5a4 100644
--- a/modules/imgproc/doc/object_detection.rst
+++ b/modules/imgproc/doc/object_detection.rst
@@ -73,3 +73,6 @@ image patch:
 After the function finishes the comparison, the best matches can be found as global minimums (when ``CV_TM_SQDIFF`` was used) or maximums (when ``CV_TM_CCORR`` or ``CV_TM_CCOEFF`` was used) using the
 :ocv:func:`minMaxLoc` function. In case of a color image, template summation in the numerator and each sum in the denominator is done over all of the channels and separate mean values are used for each channel. That is, the function can take a color template and a color image. The result will still be a single-channel image, which is easier to analyze.
 
+.. note::
+
+   * (Python) An example on how to match mouse selected regions in an image can be found at opencv_source_code/samples/python2/mouse_and_match.py
diff --git a/modules/imgproc/doc/pics/building_lsd.png b/modules/imgproc/doc/pics/building_lsd.png
new file mode 100644
index 000000000..747029a65
Binary files /dev/null and b/modules/imgproc/doc/pics/building_lsd.png differ
diff --git a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst
index 6f7cba3a9..d34600669 100644
--- a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst
+++ b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst
@@ -192,6 +192,14 @@ The function retrieves contours from the binary image using the algorithm
 
 .. note:: If you use the new Python interface then the ``CV_`` prefix has to be omitted in contour retrieval mode and contour approximation method parameters (for example, use ``cv2.RETR_LIST`` and ``cv2.CHAIN_APPROX_NONE`` parameters). If you use the old Python interface then these parameters have the ``CV_`` prefix (for example, use ``cv.CV_RETR_LIST`` and ``cv.CV_CHAIN_APPROX_NONE``).
 
+.. note::
+
+   * An example using the findContour functionality can be found at opencv_source_code/samples/cpp/contours2.cpp
+   * An example using findContours to clean up a background segmentation result at opencv_source_code/samples/cpp/segment_objects.cpp
+
+   * (Python) An example using the findContour functionality can be found at opencv_source/samples/python2/contours.py
+   * (Python) An example of detecting squares in an image can be found at opencv_source/samples/python2/squares.py
+
 
 approxPolyDP
 ----------------
@@ -353,6 +361,10 @@ The functions find the convex hull of a 2D point set using the Sklansky's algori
 that has
 *O(N logN)* complexity in the current implementation. See the OpenCV sample ``convexhull.cpp`` that demonstrates the usage of different function variants.
 
+.. note::
+
+   * An example using the convexHull functionality can be found at opencv_source_code/samples/cpp/convexhull.cpp
+
 
 convexityDefects
 ----------------
@@ -406,6 +418,11 @@ Fits an ellipse around a set of 2D points.
 
 The function calculates the ellipse that fits (in a least-squares sense) a set of 2D points best of all. It returns the rotated rectangle in which the ellipse is inscribed. The algorithm [Fitzgibbon95]_ is used.
 
+.. note::
+
+   * An example using the fitEllipse technique can be found at opencv_source_code/samples/cpp/fitellipse.cpp
+
+
 fitLine
 -----------
 Fits a line to a 2D or 3D point set.
@@ -476,6 +493,9 @@ http://en.wikipedia.org/wiki/M-estimator
 :math:`w_i` are adjusted to be inversely proportional to
 :math:`\rho(r_i)` .
 
+.. Sample code:
+
+   * (Python) An example of robust line fitting can be found at opencv_source_code/samples/python2/fitline.py
 
 
 isContourConvex
@@ -522,6 +542,24 @@ The function calculates and returns the minimum-area bounding rectangle (possibl
 
 
 
+boxPoints
+-----------
+Finds the four vertices of a rotated rect. Useful to draw the rotated rectangle.
+
+.. ocv:function:: void boxPoints(RotatedRect box, OutputArray points)
+
+.. ocv:pyfunction:: cv2.boxPoints(box[, points]) -> points
+
+.. ocv:cfunction:: void cvBoxPoints( CvBox2D box, CvPoint2D32f pt[4] )
+
+    :param box: The input rotated rectangle. It may be the output of .. ocv:function:: minAreaRect.
+
+    :param points: The output array of four vertices of rectangles.
+
+The function finds the four vertices of a rotated rectangle. This function is useful to draw the rectangle. In C++, instead of using this function, you can directly use box.points() method. Please visit the `tutorial on bounding rectangle <http://docs.opencv.org/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.html#bounding-rects-circles>`_ for more information.
+
+
+
 minEnclosingCircle
 ----------------------
 Finds a circle of the minimum area enclosing a 2D point set.
diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp
index fcaf6a58e..5ff1b7449 100644
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -191,6 +191,12 @@ enum { HOUGH_STANDARD      = 0,
        HOUGH_GRADIENT      = 3
      };
 
+//! Variants of Line Segment Detector
+enum { LSD_REFINE_NONE = 0,
+       LSD_REFINE_STD  = 1,
+       LSD_REFINE_ADV  = 2
+     };
+
 //! Histogram comparison methods
 enum { HISTCMP_CORREL        = 0,
        HISTCMP_CHISQR        = 1,
@@ -650,7 +656,7 @@ public:
                         Point dstOfs = Point(0,0),
                         bool isolated = false);
     //! returns true if the filter is separable
-    bool isSeparable() const { return (const BaseFilter*)filter2D == 0; }
+    bool isSeparable() const { return !filter2D; }
     //! returns the number
     int remainingInputRows() const;
     int remainingOutputRows() const;
@@ -688,54 +694,119 @@ public:
 
 
 //! finds arbitrary template in the grayscale image using Generalized Hough Transform
-//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
-//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
 class CV_EXPORTS GeneralizedHough : public Algorithm
 {
 public:
-    enum { GHT_POSITION = 0,
-           GHT_SCALE    = 1,
-           GHT_ROTATION = 2
-         };
-
-    static Ptr<GeneralizedHough> create(int method);
-
-    virtual ~GeneralizedHough();
-
     //! set template to search
-    void setTemplate(InputArray templ, int cannyThreshold = 100, Point templCenter = Point(-1, -1));
-    void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter = Point(-1, -1));
+    virtual void setTemplate(InputArray templ, Point templCenter = Point(-1, -1)) = 0;
+    virtual void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter = Point(-1, -1)) = 0;
 
     //! find template on image
-    void detect(InputArray image, OutputArray positions, OutputArray votes = cv::noArray(), int cannyThreshold = 100);
-    void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes = cv::noArray());
+    virtual void detect(InputArray image, OutputArray positions, OutputArray votes = noArray()) = 0;
+    virtual void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes = noArray()) = 0;
 
-    void release();
+    //! Canny low threshold.
+    virtual void setCannyLowThresh(int cannyLowThresh) = 0;
+    virtual int getCannyLowThresh() const = 0;
 
-protected:
-    virtual void setTemplateImpl(const Mat& edges, const Mat& dx, const Mat& dy, Point templCenter) = 0;
-    virtual void detectImpl(const Mat& edges, const Mat& dx, const Mat& dy, OutputArray positions, OutputArray votes) = 0;
-    virtual void releaseImpl() = 0;
+    //! Canny high threshold.
+    virtual void setCannyHighThresh(int cannyHighThresh) = 0;
+    virtual int getCannyHighThresh() const = 0;
 
-private:
-    Mat edges_;
-    Mat dx_;
-    Mat dy_;
+    //! Minimum distance between the centers of the detected objects.
+    virtual void setMinDist(double minDist) = 0;
+    virtual double getMinDist() const = 0;
+
+    //! Inverse ratio of the accumulator resolution to the image resolution.
+    virtual void setDp(double dp) = 0;
+    virtual double getDp() const = 0;
+
+    //! Maximal size of inner buffers.
+    virtual void setMaxBufferSize(int maxBufferSize) = 0;
+    virtual int getMaxBufferSize() const = 0;
+};
+
+//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
+//! Detects position only without traslation and rotation
+class CV_EXPORTS GeneralizedHoughBallard : public GeneralizedHough
+{
+public:
+    //! R-Table levels.
+    virtual void setLevels(int levels) = 0;
+    virtual int getLevels() const = 0;
+
+    //! The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected.
+    virtual void setVotesThreshold(int votesThreshold) = 0;
+    virtual int getVotesThreshold() const = 0;
+};
+
+//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
+//! Detects position, traslation and rotation
+class CV_EXPORTS GeneralizedHoughGuil : public GeneralizedHough
+{
+public:
+    //! Angle difference in degrees between two points in feature.
+    virtual void setXi(double xi) = 0;
+    virtual double getXi() const = 0;
+
+    //! Feature table levels.
+    virtual void setLevels(int levels) = 0;
+    virtual int getLevels() const = 0;
+
+    //! Maximal difference between angles that treated as equal.
+    virtual void setAngleEpsilon(double angleEpsilon) = 0;
+    virtual double getAngleEpsilon() const = 0;
+
+    //! Minimal rotation angle to detect in degrees.
+    virtual void setMinAngle(double minAngle) = 0;
+    virtual double getMinAngle() const = 0;
+
+    //! Maximal rotation angle to detect in degrees.
+    virtual void setMaxAngle(double maxAngle) = 0;
+    virtual double getMaxAngle() const = 0;
+
+    //! Angle step in degrees.
+    virtual void setAngleStep(double angleStep) = 0;
+    virtual double getAngleStep() const = 0;
+
+    //! Angle votes threshold.
+    virtual void setAngleThresh(int angleThresh) = 0;
+    virtual int getAngleThresh() const = 0;
+
+    //! Minimal scale to detect.
+    virtual void setMinScale(double minScale) = 0;
+    virtual double getMinScale() const = 0;
+
+    //! Maximal scale to detect.
+    virtual void setMaxScale(double maxScale) = 0;
+    virtual double getMaxScale() const = 0;
+
+    //! Scale step.
+    virtual void setScaleStep(double scaleStep) = 0;
+    virtual double getScaleStep() const = 0;
+
+    //! Scale votes threshold.
+    virtual void setScaleThresh(int scaleThresh) = 0;
+    virtual int getScaleThresh() const = 0;
+
+    //! Position votes threshold.
+    virtual void setPosThresh(int posThresh) = 0;
+    virtual int getPosThresh() const = 0;
 };
 
 
-class CV_EXPORTS CLAHE : public Algorithm
+class CV_EXPORTS_W CLAHE : public Algorithm
 {
 public:
-    virtual void apply(InputArray src, OutputArray dst) = 0;
+    CV_WRAP virtual void apply(InputArray src, OutputArray dst) = 0;
 
-    virtual void setClipLimit(double clipLimit) = 0;
-    virtual double getClipLimit() const = 0;
+    CV_WRAP virtual void setClipLimit(double clipLimit) = 0;
+    CV_WRAP virtual double getClipLimit() const = 0;
 
-    virtual void setTilesGridSize(Size tileGridSize) = 0;
-    virtual Size getTilesGridSize() const = 0;
+    CV_WRAP virtual void setTilesGridSize(Size tileGridSize) = 0;
+    CV_WRAP virtual Size getTilesGridSize() const = 0;
 
-    virtual void collectGarbage() = 0;
+    CV_WRAP virtual void collectGarbage() = 0;
 };
 
 
@@ -829,7 +900,61 @@ protected:
     Point2f bottomRight;
 };
 
+class LineSegmentDetector : public Algorithm
+{
+public:
+/**
+ * Detect lines in the input image.
+ *
+ * @param _image    A grayscale(CV_8UC1) input image.
+ *                  If only a roi needs to be selected, use
+ *                  lsd_ptr->detect(image(roi), ..., lines);
+ *                  lines += Scalar(roi.x, roi.y, roi.x, roi.y);
+ * @param _lines    Return: A vector of Vec4i elements specifying the beginning and ending point of a line.
+ *                          Where Vec4i is (x1, y1, x2, y2), point 1 is the start, point 2 - end.
+ *                          Returned lines are strictly oriented depending on the gradient.
+ * @param width     Return: Vector of widths of the regions, where the lines are found. E.g. Width of line.
+ * @param prec      Return: Vector of precisions with which the lines are found.
+ * @param nfa       Return: Vector containing number of false alarms in the line region, with precision of 10%.
+ *                          The bigger the value, logarithmically better the detection.
+ *                              * -1 corresponds to 10 mean false alarms
+ *                              * 0 corresponds to 1 mean false alarm
+ *                              * 1 corresponds to 0.1 mean false alarms
+ *                          This vector will be calculated _only_ when the objects type is REFINE_ADV
+ */
+    virtual void detect(InputArray _image, OutputArray _lines,
+                        OutputArray width = noArray(), OutputArray prec = noArray(),
+                        OutputArray nfa = noArray()) = 0;
 
+/**
+ * Draw lines on the given canvas.
+ *
+ * @param image     The image, where lines will be drawn.
+ *                  Should have the size of the image, where the lines were found
+ * @param lines     The lines that need to be drawn
+ */
+    virtual void drawSegments(InputOutputArray _image, InputArray lines) = 0;
+
+/**
+ * Draw both vectors on the image canvas. Uses blue for lines 1 and red for lines 2.
+ *
+ * @param size      The size of the image, where lines were found.
+ * @param lines1    The first lines that need to be drawn. Color - Blue.
+ * @param lines2    The second lines that need to be drawn. Color - Red.
+ * @param image     Optional image, where lines will be drawn.
+ *                  Should have the size of the image, where the lines were found
+ * @return          The number of mismatching pixels between lines1 and lines2.
+ */
+    virtual int compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray _image = noArray()) = 0;
+
+    virtual ~LineSegmentDetector() {};
+};
+
+//! Returns a pointer to a LineSegmentDetector class.
+CV_EXPORTS Ptr<LineSegmentDetector> createLineSegmentDetectorPtr(
+    int _refine = LSD_REFINE_STD, double _scale = 0.8,
+    double _sigma_scale = 0.6, double _quant = 2.0, double _ang_th = 22.5,
+    double _log_eps = 0, double _density_th = 0.7, int _n_bins = 1024);
 
 //! returns type (one of KERNEL_*) of 1D or 2D kernel specified by its coefficients.
 CV_EXPORTS int getKernelType(InputArray kernel, Point anchor);
@@ -935,6 +1060,11 @@ CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d,
                                    double sigmaColor, double sigmaSpace,
                                    int borderType = BORDER_DEFAULT );
 
+//! smooths the image using adaptive bilateral filter
+CV_EXPORTS_W void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize,
+                                           double sigmaSpace, Point anchor=Point(-1, -1),
+                                           int borderType=BORDER_DEFAULT );
+
 //! smooths the image using the box filter. Each pixel is processed in O(1) time
 CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
                              Size ksize, Point anchor = Point(-1,-1),
@@ -1318,6 +1448,9 @@ CV_EXPORTS_W double contourArea( InputArray contour, bool oriented = false );
 //! computes the minimal rotated rectangle for a set of points
 CV_EXPORTS_W RotatedRect minAreaRect( InputArray points );
 
+//! computes boxpoints
+CV_EXPORTS_W void boxPoints(RotatedRect box, OutputArray points);
+
 //! computes the minimal enclosing circle for a set of points
 CV_EXPORTS_W void minEnclosingCircle( InputArray points,
                                       CV_OUT Point2f& center, CV_OUT float& radius );
@@ -1350,7 +1483,15 @@ CV_EXPORTS_W void fitLine( InputArray points, OutputArray line, int distType,
 //! checks if the point is inside the contour. Optionally computes the signed distance from the point to the contour boundary
 CV_EXPORTS_W double pointPolygonTest( InputArray contour, Point2f pt, bool measureDist );
 
-CV_EXPORTS Ptr<CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+CV_EXPORTS_W Ptr<CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+
+//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
+//! Detects position only without traslation and rotation
+CV_EXPORTS Ptr<GeneralizedHoughBallard> createGeneralizedHoughBallard();
+
+//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
+//! Detects position, traslation and rotation
+CV_EXPORTS Ptr<GeneralizedHoughGuil> createGeneralizedHoughGuil();
 
 } // cv
 
diff --git a/modules/imgproc/include/opencv2/imgproc/types_c.h b/modules/imgproc/include/opencv2/imgproc/types_c.h
index 2b1d0722e..dd0d8b8a6 100644
--- a/modules/imgproc/include/opencv2/imgproc/types_c.h
+++ b/modules/imgproc/include/opencv2/imgproc/types_c.h
@@ -508,7 +508,8 @@ enum
     CV_COMP_CHISQR        =1,
     CV_COMP_INTERSECT     =2,
     CV_COMP_BHATTACHARYYA =3,
-    CV_COMP_HELLINGER     =CV_COMP_BHATTACHARYYA
+    CV_COMP_HELLINGER     =CV_COMP_BHATTACHARYYA,
+    CV_COMP_CHISQR_ALT    =4
 };
 
 /* Mask size for distance transform */
diff --git a/modules/imgproc/perf/perf_cornerEigenValsAndVecs.cpp b/modules/imgproc/perf/perf_cornerEigenValsAndVecs.cpp
index 0e19da53a..5a323cc2a 100644
--- a/modules/imgproc/perf/perf_cornerEigenValsAndVecs.cpp
+++ b/modules/imgproc/perf/perf_cornerEigenValsAndVecs.cpp
@@ -37,4 +37,4 @@ PERF_TEST_P(Img_BlockSize_ApertureSize_BorderType, cornerEigenValsAndVecs,
     extractChannel(dst, l1, 0);
 
     SANITY_CHECK(l1, 2e-5);
-}
\ No newline at end of file
+}
diff --git a/modules/imgproc/perf/perf_cornerHarris.cpp b/modules/imgproc/perf/perf_cornerHarris.cpp
index 1568100cd..832845e7e 100644
--- a/modules/imgproc/perf/perf_cornerHarris.cpp
+++ b/modules/imgproc/perf/perf_cornerHarris.cpp
@@ -36,4 +36,4 @@ PERF_TEST_P(Img_BlockSize_ApertureSize_k_BorderType, cornerHarris,
     TEST_CYCLE() cornerHarris(src, dst, blockSize, apertureSize, k, borderType);
 
     SANITY_CHECK(dst, 2e-5);
-}
\ No newline at end of file
+}
diff --git a/modules/imgproc/perf/perf_cvt_color.cpp b/modules/imgproc/perf/perf_cvt_color.cpp
index a8932dd73..4ec464717 100644
--- a/modules/imgproc/perf/perf_cvt_color.cpp
+++ b/modules/imgproc/perf/perf_cvt_color.cpp
@@ -258,7 +258,7 @@ PERF_TEST_P(Size_CvtMode, cvtColor8u,
     declare.time(100);
     declare.in(src, WARMUP_RNG).out(dst);
 
-    int runs = sz.width <= 320 ? 70 : 1;
+    int runs = sz.width <= 320 ? 100 : 5;
     TEST_CYCLE_MULTIRUN(runs) cvtColor(src, dst, mode, ch.dcn);
 
     SANITY_CHECK(dst, 1);
diff --git a/modules/imgproc/perf/perf_filter2d.cpp b/modules/imgproc/perf/perf_filter2d.cpp
index e228e3385..cfce852b7 100644
--- a/modules/imgproc/perf/perf_filter2d.cpp
+++ b/modules/imgproc/perf/perf_filter2d.cpp
@@ -72,5 +72,3 @@ PERF_TEST_P( Image_KernelSize, GaborFilter2d,
 
     SANITY_CHECK(filteredImage, 1e-3);
 }
-
-
diff --git a/modules/imgproc/perf/perf_histogram.cpp b/modules/imgproc/perf/perf_histogram.cpp
index 92db3be34..1789470c2 100644
--- a/modules/imgproc/perf/perf_histogram.cpp
+++ b/modules/imgproc/perf/perf_histogram.cpp
@@ -28,14 +28,14 @@ PERF_TEST_P(Size_Source, calcHist1d,
     int dims = 1;
     int numberOfImages = 1;
 
-    const float r[] = {rangeLow, rangeHight};
-    const float* ranges[] = {r};
+    const float range[] = {rangeLow, rangeHight};
+    const float* ranges[] = {range};
 
     randu(source, rangeLow, rangeHight);
 
     declare.in(source);
 
-    TEST_CYCLE()
+    TEST_CYCLE_MULTIRUN(3)
     {
         calcHist(&source, numberOfImages, channels, Mat(), hist, dims, histSize, ranges);
     }
diff --git a/modules/imgproc/perf/perf_precomp.cpp b/modules/imgproc/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/imgproc/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/imgproc/perf/perf_resize.cpp b/modules/imgproc/perf/perf_resize.cpp
index 3e6ecbf1f..221288017 100644
--- a/modules/imgproc/perf/perf_resize.cpp
+++ b/modules/imgproc/perf/perf_resize.cpp
@@ -25,7 +25,7 @@ PERF_TEST_P(MatInfo_Size_Size, resizeUpLinear,
     cvtest::fillGradient(src);
     declare.in(src).out(dst);
 
-    TEST_CYCLE() resize(src, dst, to);
+    TEST_CYCLE_MULTIRUN(10) resize(src, dst, to);
 
 #ifdef ANDROID
     SANITY_CHECK(dst, 5);
@@ -52,7 +52,7 @@ PERF_TEST_P(MatInfo_Size_Size, resizeDownLinear,
     cvtest::fillGradient(src);
     declare.in(src).out(dst);
 
-    TEST_CYCLE() resize(src, dst, to);
+    TEST_CYCLE_MULTIRUN(10) resize(src, dst, to);
 
 #ifdef ANDROID
     SANITY_CHECK(dst, 5);
@@ -85,7 +85,8 @@ PERF_TEST_P(MatInfo_Size_Scale, ResizeAreaFast,
 
     declare.in(src, WARMUP_RNG).out(dst);
 
-    TEST_CYCLE() resize(src, dst, dst.size(), 0, 0, INTER_AREA);
+    int runs = 15;
+    TEST_CYCLE_MULTIRUN(runs) resize(src, dst, dst.size(), 0, 0, INTER_AREA);
 
     //difference equal to 1 is allowed because of different possible rounding modes: round-to-nearest vs bankers' rounding
     SANITY_CHECK(dst, 1);
diff --git a/modules/imgproc/perf/perf_threshold.cpp b/modules/imgproc/perf/perf_threshold.cpp
index 01fff2e8c..9ccafd6b5 100644
--- a/modules/imgproc/perf/perf_threshold.cpp
+++ b/modules/imgproc/perf/perf_threshold.cpp
@@ -51,7 +51,8 @@ PERF_TEST_P(Size_Only, threshold_otsu, testing::Values(TYPICAL_MAT_SIZES))
 
     declare.in(src, WARMUP_RNG).out(dst);
 
-    TEST_CYCLE() threshold(src, dst, 0, maxval, THRESH_BINARY|THRESH_OTSU);
+    int runs = 15;
+    TEST_CYCLE_MULTIRUN(runs) threshold(src, dst, 0, maxval, THRESH_BINARY|THRESH_OTSU);
 
     SANITY_CHECK(dst);
 }
diff --git a/modules/imgproc/perf/perf_warp.cpp b/modules/imgproc/perf/perf_warp.cpp
index e958a7e96..146528b6b 100644
--- a/modules/imgproc/perf/perf_warp.cpp
+++ b/modules/imgproc/perf/perf_warp.cpp
@@ -221,4 +221,3 @@ PERF_TEST(Transform, getPerspectiveTransform)
 
     SANITY_CHECK(transformCoefficient, 1e-5);
 }
-
diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp
index 4dae01343..fb6afaf2b 100644
--- a/modules/imgproc/src/canny.cpp
+++ b/modules/imgproc/src/canny.cpp
@@ -41,6 +41,50 @@
 
 #include "precomp.hpp"
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+#define USE_IPP_CANNY 1
+#else
+#undef USE_IPP_CANNY
+#endif
+
+#ifdef USE_IPP_CANNY
+namespace cv
+{
+static bool ippCanny(const Mat& _src, Mat& _dst, float low,  float high)
+{
+    int size = 0, size1 = 0;
+    IppiSize roi = { _src.cols, _src.rows };
+
+    ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size);
+    ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1);
+    size = std::max(size, size1);
+    ippiCannyGetSize(roi, &size1);
+    size = std::max(size, size1);
+
+    AutoBuffer<uchar> buf(size + 64);
+    uchar* buffer = alignPtr((uchar*)buf, 32);
+
+    Mat _dx(_src.rows, _src.cols, CV_16S);
+    if( ippiFilterSobelNegVertBorder_8u16s_C1R(_src.data, (int)_src.step,
+                    _dx.ptr<short>(), (int)_dx.step, roi,
+                    ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
+        return false;
+
+    Mat _dy(_src.rows, _src.cols, CV_16S);
+    if( ippiFilterSobelHorizBorder_8u16s_C1R(_src.data, (int)_src.step,
+                    _dy.ptr<short>(), (int)_dy.step, roi,
+                    ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
+        return false;
+
+    if( ippiCanny_16s8u_C1R(_dx.ptr<short>(), (int)_dx.step,
+                            _dy.ptr<short>(), (int)_dy.step,
+                            _dst.data, (int)_dst.step, roi, low, high, buffer) < 0 )
+        return false;
+    return true;
+}
+}
+#endif
+
 void cv::Canny( InputArray _src, OutputArray _dst,
                 double low_thresh, double high_thresh,
                 int aperture_size, bool L2gradient )
@@ -61,20 +105,26 @@ void cv::Canny( InputArray _src, OutputArray _dst,
     if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
         CV_Error(CV_StsBadFlag, "");
 
+    if (low_thresh > high_thresh)
+        std::swap(low_thresh, high_thresh);
+
 #ifdef HAVE_TEGRA_OPTIMIZATION
     if (tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient))
         return;
 #endif
 
+#ifdef USE_IPP_CANNY
+    if( aperture_size == 3 && !L2gradient &&
+        ippCanny(src, dst, low_thresh, high_thresh) >= 0 )
+        return;
+#endif
+
     const int cn = src.channels();
-    cv::Mat dx(src.rows, src.cols, CV_16SC(cn));
-    cv::Mat dy(src.rows, src.cols, CV_16SC(cn));
+    Mat dx(src.rows, src.cols, CV_16SC(cn));
+    Mat dy(src.rows, src.cols, CV_16SC(cn));
 
-    cv::Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, cv::BORDER_REPLICATE);
-    cv::Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, cv::BORDER_REPLICATE);
-
-    if (low_thresh > high_thresh)
-        std::swap(low_thresh, high_thresh);
+    Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, cv::BORDER_REPLICATE);
+    Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, cv::BORDER_REPLICATE);
 
     if (L2gradient)
     {
@@ -88,7 +138,7 @@ void cv::Canny( InputArray _src, OutputArray _dst,
     int high = cvFloor(high_thresh);
 
     ptrdiff_t mapstep = src.cols + 2;
-    cv::AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int));
+    AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int));
 
     int* mag_buf[3];
     mag_buf[0] = (int*)(uchar*)buffer;
diff --git a/modules/imgproc/src/clahe.cpp b/modules/imgproc/src/clahe.cpp
index 4ce479713..89fb62bd0 100644
--- a/modules/imgproc/src/clahe.cpp
+++ b/modules/imgproc/src/clahe.cpp
@@ -330,5 +330,5 @@ namespace
 
 cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
 {
-    return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
+    return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height);
 }
diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp
index e07902cad..14703cce2 100644
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -94,6 +94,13 @@
 
 #define  CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+#define MAX_IPP8u   255
+#define MAX_IPP16u  65535
+#define MAX_IPP32f  1.0
+static IppStatus sts = ippInit();
+#endif
+
 namespace cv
 {
 
@@ -191,6 +198,301 @@ void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt)
     parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker<Cvt>(src, dst, cvt), src.total()/(double)(1<<16) );
 }
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *);
+typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize);
+typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *);
+
+template <typename Cvt>
+class CvtColorIPPLoop_Invoker : public ParallelLoopBody
+{
+public:
+
+    CvtColorIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt, bool *_ok) :
+        ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt), ok(_ok)
+    {
+        *ok = true;
+    }
+
+    virtual void operator()(const Range& range) const
+    {
+        const void *yS = src.ptr<uchar>(range.start);
+        void *yD = dst.ptr<uchar>(range.start);
+        if( cvt(yS, (int)src.step[0], yD, (int)dst.step[0], src.cols, range.end - range.start) < 0 )
+            *ok = false;
+    }
+
+private:
+    const Mat& src;
+    Mat& dst;
+    const Cvt& cvt;
+    bool *ok;
+
+    const CvtColorIPPLoop_Invoker& operator= (const CvtColorIPPLoop_Invoker&);
+};
+
+template <typename Cvt>
+bool CvtColorIPPLoop(const Mat& src, Mat& dst, const Cvt& cvt)
+{
+    bool ok;
+    parallel_for_(Range(0, src.rows), CvtColorIPPLoop_Invoker<Cvt>(src, dst, cvt, &ok), src.total()/(double)(1<<16) );
+    return ok;
+}
+
+template <typename Cvt>
+bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
+{
+    Mat temp;
+    Mat &source = src;
+    if( src.data == dst.data )
+    {
+        src.copyTo(temp);
+        source = temp;
+    }
+    bool ok;
+    parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok), source.total()/(double)(1<<16) );
+    return ok;
+}
+
+static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
+         IppiSize roiSize, const int *dstOrder)
+{
+    return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u);
+}
+
+static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
+         IppiSize roiSize, const int *dstOrder)
+{
+    return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u);
+}
+
+static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
+         IppiSize roiSize, const int *dstOrder)
+{
+    return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f);
+}
+
+static ippiReorderFunc ippiSwapChannelsC3C4RTab[] =
+{
+    (ippiReorderFunc)ippiSwapChannels_8u_C3C4Rf, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3C4Rf, 0,
+    0, (ippiReorderFunc)ippiSwapChannels_32f_C3C4Rf, 0, 0
+};
+
+static ippiGeneralFunc ippiCopyAC4C3RTab[] =
+{
+    (ippiGeneralFunc)ippiCopy_8u_AC4C3R, 0, (ippiGeneralFunc)ippiCopy_16u_AC4C3R, 0,
+    0, (ippiGeneralFunc)ippiCopy_32f_AC4C3R, 0, 0
+};
+
+static ippiReorderFunc ippiSwapChannelsC4C3RTab[] =
+{
+    (ippiReorderFunc)ippiSwapChannels_8u_C4C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4C3R, 0,
+    0, (ippiReorderFunc)ippiSwapChannels_32f_C4C3R, 0, 0
+};
+
+static ippiReorderFunc ippiSwapChannelsC3RTab[] =
+{
+    (ippiReorderFunc)ippiSwapChannels_8u_C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3R, 0,
+    0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0
+};
+
+static ippiReorderFunc ippiSwapChannelsC4RTab[] =
+{
+    (ippiReorderFunc)ippiSwapChannels_8u_AC4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_AC4R, 0,
+    0, (ippiReorderFunc)ippiSwapChannels_32f_AC4R, 0, 0
+};
+
+static ippiColor2GrayFunc ippiColor2GrayC3Tab[] =
+{
+    (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0,
+    0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0
+};
+
+static ippiColor2GrayFunc ippiColor2GrayC4Tab[] =
+{
+    (ippiColor2GrayFunc)ippiColorToGray_8u_AC4C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_AC4C1R, 0,
+    0, (ippiColor2GrayFunc)ippiColorToGray_32f_AC4C1R, 0, 0
+};
+
+static ippiGeneralFunc ippiRGB2GrayC3Tab[] =
+{
+    (ippiGeneralFunc)ippiRGBToGray_8u_C3C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_C3C1R, 0,
+    0, (ippiGeneralFunc)ippiRGBToGray_32f_C3C1R, 0, 0
+};
+
+static ippiGeneralFunc ippiRGB2GrayC4Tab[] =
+{
+    (ippiGeneralFunc)ippiRGBToGray_8u_AC4C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_AC4C1R, 0,
+    0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0
+};
+
+static ippiGeneralFunc ippiCopyP3C3RTab[] =
+{
+    (ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0,
+    0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0
+};
+
+static ippiGeneralFunc ippiRGB2XYZTab[] =
+{
+    (ippiGeneralFunc)ippiRGBToXYZ_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToXYZ_16u_C3R, 0,
+    0, (ippiGeneralFunc)ippiRGBToXYZ_32f_C3R, 0, 0
+};
+
+static ippiGeneralFunc ippiXYZ2RGBTab[] =
+{
+    (ippiGeneralFunc)ippiXYZToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiXYZToRGB_16u_C3R, 0,
+    0, (ippiGeneralFunc)ippiXYZToRGB_32f_C3R, 0, 0
+};
+
+static ippiGeneralFunc ippiRGB2HSVTab[] =
+{
+    (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0,
+    0, 0, 0, 0
+};
+
+static ippiGeneralFunc ippiHSV2RGBTab[] =
+{
+    (ippiGeneralFunc)ippiHSVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHSVToRGB_16u_C3R, 0,
+    0, 0, 0, 0
+};
+
+static ippiGeneralFunc ippiRGB2HLSTab[] =
+{
+    (ippiGeneralFunc)ippiRGBToHLS_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHLS_16u_C3R, 0,
+    0, (ippiGeneralFunc)ippiRGBToHLS_32f_C3R, 0, 0
+};
+
+static ippiGeneralFunc ippiHLS2RGBTab[] =
+{
+    (ippiGeneralFunc)ippiHLSToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHLSToRGB_16u_C3R, 0,
+    0, (ippiGeneralFunc)ippiHLSToRGB_32f_C3R, 0, 0
+};
+
+struct IPPGeneralFunctor
+{
+    IPPGeneralFunctor(ippiGeneralFunc _func) : func(_func){}
+    bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
+    {
+        return func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
+    }
+private:
+    ippiGeneralFunc func;
+};
+
+struct IPPReorderFunctor
+{
+    IPPReorderFunctor(ippiReorderFunc _func, int _order0, int _order1, int _order2) : func(_func)
+    {
+        order[0] = _order0;
+        order[1] = _order1;
+        order[2] = _order2;
+        order[3] = 3;
+    }
+    bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
+    {
+        return func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0;
+    }
+private:
+    ippiReorderFunc func;
+    int order[4];
+};
+
+struct IPPColor2GrayFunctor
+{
+    IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : func(_func)
+    {
+        coeffs[0] = 0.114f;
+        coeffs[1] = 0.587f;
+        coeffs[2] = 0.299f;
+    }
+    bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
+    {
+        return func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0;
+    }
+private:
+    ippiColor2GrayFunc func;
+    Ipp32f coeffs[3];
+};
+
+struct IPPGray2BGRFunctor
+{
+    IPPGray2BGRFunctor(ippiGeneralFunc _func) : func(_func){}
+    bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
+    {
+        const void* srcarray[3] = { src, src, src };
+        return func(srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
+    }
+private:
+    ippiGeneralFunc func;
+};
+
+struct IPPGray2BGRAFunctor
+{
+    IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) : func1(_func1), func2(_func2), depth(_depth){}
+    bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
+    {
+        const void* srcarray[3] = { src, src, src };
+        Mat temp(rows, cols, CV_MAKETYPE(depth, 3));
+        if(func1(srcarray, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0)
+            return false;
+        int order[4] = {0, 1, 2, 3};
+        return func2(temp.data, (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
+    }
+private:
+    ippiGeneralFunc func1;
+    ippiReorderFunc func2;
+    int depth;
+};
+
+struct IPPReorderGeneralFunctor
+{
+    IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) : func1(_func1), func2(_func2), depth(_depth)
+    {
+        order[0] = _order0;
+        order[1] = _order1;
+        order[2] = _order2;
+        order[3] = 3;
+    }
+    bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
+    {
+        Mat temp;
+        temp.create(rows, cols, CV_MAKETYPE(depth, 3));
+        if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows), order) < 0)
+            return false;
+        return func2(temp.data, (int)temp.step[0], dst, dstStep, ippiSize(cols, rows)) >= 0;
+    }
+private:
+    ippiReorderFunc func1;
+    ippiGeneralFunc func2;
+    int order[4];
+    int depth;
+};
+
+struct IPPGeneralReorderFunctor
+{
+    IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) : func1(_func1), func2(_func2), depth(_depth)
+    {
+        order[0] = _order0;
+        order[1] = _order1;
+        order[2] = _order2;
+        order[3] = 3;
+    }
+    bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
+    {
+        Mat temp;
+        temp.create(rows, cols, CV_MAKETYPE(depth, 3));
+        if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0)
+            return false;
+        return func2(temp.data, (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
+    }
+private:
+    ippiGeneralFunc func1;
+    ippiReorderFunc func2;
+    int order[4];
+    int depth;
+};
+#endif
+
 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
 
 template<typename _Tp> struct RGB2RGB
@@ -2410,6 +2712,39 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             _dst.create( sz, CV_MAKETYPE(depth, dcn));
             dst = _dst.getMat();
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+            if( code == CV_BGR2BGRA || code == CV_RGB2RGBA)
+            {
+                if ( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) )
+                    return;
+            }
+            else if( code == CV_BGRA2BGR )
+            {
+                if ( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiCopyAC4C3RTab[depth])) )
+                    return;
+            }
+            else if( code == CV_BGR2RGBA )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 2, 1, 0)) )
+                    return;
+            }
+            else if( code == CV_RGBA2BGR )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC4C3RTab[depth], 2, 1, 0)) )
+                    return;
+            }
+            else if( code == CV_RGB2BGR )
+            {
+                if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) )
+                    return;
+            }
+            else if( code == CV_RGBA2BGRA )
+            {
+                if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) )
+                    return;
+            }
+#endif
+
             if( depth == CV_8U )
             {
 #ifdef HAVE_TEGRA_OPTIMIZATION
@@ -2463,6 +2798,29 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             _dst.create(sz, CV_MAKETYPE(depth, 1));
             dst = _dst.getMat();
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+            if( code == CV_BGR2GRAY )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) )
+                    return;
+            }
+            else if( code == CV_RGB2GRAY )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) )
+                    return;
+            }
+            else if( code == CV_BGRA2GRAY )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC4Tab[depth])) )
+                    return;
+            }
+            else if( code == CV_RGBA2GRAY )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC4Tab[depth])) )
+                    return;
+            }
+#endif
+
             bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
 
             if( depth == CV_8U )
@@ -2492,6 +2850,20 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             _dst.create(sz, CV_MAKETYPE(depth, dcn));
             dst = _dst.getMat();
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+            if( code == CV_GRAY2BGR )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) )
+                    return;
+            }
+            else if( code == CV_GRAY2BGRA )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) )
+                    return;
+            }
+#endif
+
+
             if( depth == CV_8U )
             {
 #ifdef HAVE_TEGRA_OPTIMIZATION
@@ -2517,7 +2889,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
         case CV_BGR2YUV: case CV_RGB2YUV:
             {
             CV_Assert( scn == 3 || scn == 4 );
-            bidx = code == CV_BGR2YCrCb || code == CV_RGB2YUV ? 0 : 2;
+            bidx = code == CV_BGR2YCrCb || code == CV_BGR2YUV ? 0 : 2;
             static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
             static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };
             const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f;
@@ -2546,7 +2918,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             {
             if( dcn <= 0 ) dcn = 3;
             CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
-            bidx = code == CV_YCrCb2BGR || code == CV_YUV2RGB ? 0 : 2;
+            bidx = code == CV_YCrCb2BGR || code == CV_YUV2BGR ? 0 : 2;
             static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };
             static const int yuv_i[] = { 33292, -6472, -9519, 18678 };
             const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f;
@@ -2571,6 +2943,29 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             _dst.create(sz, CV_MAKETYPE(depth, 3));
             dst = _dst.getMat();
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+            if( code == CV_BGR2XYZ && scn == 3 )
+            {
+                if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) )
+                    return;
+            }
+            else if( code == CV_BGR2XYZ && scn == 4 )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) )
+                    return;
+            }
+            else if( code == CV_RGB2XYZ && scn == 3 )
+            {
+                if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2XYZTab[depth])) )
+                    return;
+            }
+            else if( code == CV_RGB2XYZ && scn == 4 )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 0, 1, 2, depth)) )
+                    return;
+            }
+#endif
+
             if( depth == CV_8U )
                 CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0));
             else if( depth == CV_16U )
@@ -2587,6 +2982,29 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             _dst.create(sz, CV_MAKETYPE(depth, dcn));
             dst = _dst.getMat();
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+            if( code == CV_XYZ2BGR && dcn == 3 )
+            {
+                if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
+                    return;
+            }
+            else if( code == CV_XYZ2BGR && dcn == 4 )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
+                    return;
+            }
+            if( code == CV_XYZ2RGB && dcn == 3 )
+            {
+                if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiXYZ2RGBTab[depth])) )
+                    return;
+            }
+            else if( code == CV_XYZ2RGB && dcn == 4 )
+            {
+                if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
+                    return;
+            }
+#endif
+
             if( depth == CV_8U )
                 CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0));
             else if( depth == CV_16U )
@@ -2607,6 +3025,52 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             _dst.create(sz, CV_MAKETYPE(depth, 3));
             dst = _dst.getMat();
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+            if( depth == CV_8U || depth == CV_16U )
+            {
+                if( code == CV_BGR2HSV_FULL && scn == 3 )
+                {
+                    if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) )
+                        return;
+                }
+                else if( code == CV_BGR2HSV_FULL && scn == 4 )
+                {
+                    if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) )
+                        return;
+                }
+                else if( code == CV_RGB2HSV_FULL && scn == 3 )
+                {
+                    if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HSVTab[depth])) )
+                        return;
+                }
+                else if( code == CV_RGB2HSV_FULL && scn == 4 )
+                {
+                    if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) )
+                        return;
+                }
+                else if( code == CV_BGR2HLS_FULL && scn == 3 )
+                {
+                    if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) )
+                        return;
+                }
+                else if( code == CV_BGR2HLS_FULL && scn == 4 )
+                {
+                    if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) )
+                        return;
+                }
+                else if( code == CV_RGB2HLS_FULL && scn == 3 )
+                {
+                    if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HLSTab[depth])) )
+                        return;
+                }
+                else if( code == CV_RGB2HLS_FULL && scn == 4 )
+                {
+                    if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) )
+                        return;
+                }
+            }
+#endif
+
             if( code == CV_BGR2HSV || code == CV_RGB2HSV ||
                 code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL )
             {
@@ -2642,6 +3106,52 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             _dst.create(sz, CV_MAKETYPE(depth, dcn));
             dst = _dst.getMat();
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+            if( depth == CV_8U || depth == CV_16U )
+            {
+                if( code == CV_HSV2BGR_FULL && dcn == 3 )
+                {
+                    if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
+                        return;
+                }
+                else if( code == CV_HSV2BGR_FULL && dcn == 4 )
+                {
+                    if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
+                        return;
+                }
+                else if( code == CV_HSV2RGB_FULL && dcn == 3 )
+                {
+                    if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHSV2RGBTab[depth])) )
+                        return;
+                }
+                else if( code == CV_HSV2RGB_FULL && dcn == 4 )
+                {
+                    if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
+                        return;
+                }
+                else if( code == CV_HLS2BGR_FULL && dcn == 3 )
+                {
+                    if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
+                        return;
+                }
+                else if( code == CV_HLS2BGR_FULL && dcn == 4 )
+                {
+                    if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
+                        return;
+                }
+                else if( code == CV_HLS2RGB_FULL && dcn == 3 )
+                {
+                    if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHLS2RGBTab[depth])) )
+                        return;
+                }
+                else if( code == CV_HLS2RGB_FULL && dcn == 4 )
+                {
+                    if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
+                        return;
+                }
+            }
+#endif
+
             if( code == CV_HSV2BGR || code == CV_HSV2RGB ||
                 code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL )
             {
diff --git a/modules/imgproc/src/connectedcomponents.cpp b/modules/imgproc/src/connectedcomponents.cpp
index 4fee0aacd..41cb48094 100644
--- a/modules/imgproc/src/connectedcomponents.cpp
+++ b/modules/imgproc/src/connectedcomponents.cpp
@@ -92,20 +92,10 @@ namespace cv{
         }
         void operator()(int r, int c, int l){
             int *row = &statsv.at<int>(l, 0);
-            if(c > row[CC_STAT_WIDTH]){
-                row[CC_STAT_WIDTH] = c;
-            }else{
-                if(c < row[CC_STAT_LEFT]){
-                    row[CC_STAT_LEFT] = c;
-                }
-            }
-            if(r > row[CC_STAT_HEIGHT]){
-                row[CC_STAT_HEIGHT] = r;
-            }else{
-                if(r < row[CC_STAT_TOP]){
-                    row[CC_STAT_TOP] = r;
-                }
-            }
+            row[CC_STAT_LEFT] = MIN(row[CC_STAT_LEFT], c);
+            row[CC_STAT_WIDTH] = MAX(row[CC_STAT_WIDTH], c);
+            row[CC_STAT_TOP] = MIN(row[CC_STAT_TOP], r);
+            row[CC_STAT_HEIGHT] = MAX(row[CC_STAT_HEIGHT], r);
             row[CC_STAT_AREA]++;
             Point2ui64 &integral = integrals[l];
             integral.x += c;
@@ -114,9 +104,7 @@ namespace cv{
         void finish(){
             for(int l = 0; l < statsv.rows; ++l){
                 int *row = &statsv.at<int>(l, 0);
-                row[CC_STAT_LEFT] = std::min(row[CC_STAT_LEFT], row[CC_STAT_WIDTH]);
                 row[CC_STAT_WIDTH] = row[CC_STAT_WIDTH] - row[CC_STAT_LEFT] + 1;
-                row[CC_STAT_TOP] = std::min(row[CC_STAT_TOP], row[CC_STAT_HEIGHT]);
                 row[CC_STAT_HEIGHT] = row[CC_STAT_HEIGHT] - row[CC_STAT_TOP] + 1;
 
                 Point2ui64 &integral = integrals[l];
@@ -399,7 +387,7 @@ int cv::connectedComponentsWithStats(InputArray _img, OutputArray _labels, Outpu
     const cv::Mat img = _img.getMat();
     _labels.create(img.size(), CV_MAT_DEPTH(ltype));
     cv::Mat labels = _labels.getMat();
-    connectedcomponents::CCStatsOp sop(statsv, centroids); 
+    connectedcomponents::CCStatsOp sop(statsv, centroids);
     if(ltype == CV_16U){
         return connectedComponents_sub1(img, labels, connectivity, sop);
     }else if(ltype == CV_32S){
diff --git a/modules/imgproc/src/contours.cpp b/modules/imgproc/src/contours.cpp
index 6f5b0c8ac..797f8c56c 100644
--- a/modules/imgproc/src/contours.cpp
+++ b/modules/imgproc/src/contours.cpp
@@ -1339,8 +1339,8 @@ icvFindContoursInInterval( const CvArr* src,
     if( contourHeaderSize < (int)sizeof(CvContour))
         CV_Error( CV_StsBadSize, "Contour header size must be >= sizeof(CvContour)" );
 
-    storage00 = cvCreateChildMemStorage(storage);
-    storage01 = cvCreateChildMemStorage(storage);
+    storage00.reset(cvCreateChildMemStorage(storage));
+    storage01.reset(cvCreateChildMemStorage(storage));
 
     CvMat stub, *mat;
 
diff --git a/modules/imgproc/src/distransform.cpp b/modules/imgproc/src/distransform.cpp
index 8ebb206b3..29e3fc10a 100644
--- a/modules/imgproc/src/distransform.cpp
+++ b/modules/imgproc/src/distransform.cpp
@@ -744,6 +744,16 @@ void cv::distanceTransform( InputArray _src, OutputArray _dst, OutputArray _labe
 
         if( labelType == CV_DIST_LABEL_CCOMP )
         {
+        #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+            if( maskSize == CV_DIST_MASK_5 )
+            {
+                IppiSize roi = { src->cols, src->rows };
+                if( ippiDistanceTransform_5x5_8u32f_C1R(
+                        src->data.ptr, src->step,
+                        dst->data.fl, dst->step, roi, _mask) >= 0 )
+                    return;
+            }
+        #endif
             Mat zpix = src == 0;
             connectedComponents(zpix, labels, 8, CV_32S);
         }
diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp
index 86322e4ba..f7e7efa85 100644
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@@ -117,7 +117,7 @@ void FilterEngine::init( const Ptr<BaseFilter>& _filter2D,
 
     if( isSeparable() )
     {
-        CV_Assert( !rowFilter.empty() && !columnFilter.empty() );
+        CV_Assert( rowFilter && columnFilter );
         ksize = Size(rowFilter->ksize, columnFilter->ksize);
         anchor = Point(rowFilter->anchor, columnFilter->anchor);
     }
@@ -244,9 +244,9 @@ int FilterEngine::start(Size _wholeSize, Rect _roi, int _maxBufRows)
     rowCount = dstY = 0;
     startY = startY0 = std::max(roi.y - anchor.y, 0);
     endY = std::min(roi.y + roi.height + ksize.height - anchor.y - 1, wholeSize.height);
-    if( !columnFilter.empty() )
+    if( columnFilter )
         columnFilter->reset();
-    if( !filter2D.empty() )
+    if( filter2D )
         filter2D->reset();
 
     return startY;
@@ -2735,42 +2735,42 @@ cv::Ptr<cv::BaseRowFilter> cv::getLinearRowFilter( int srcType, int bufType,
     if( (symmetryType & (KERNEL_SYMMETRICAL|KERNEL_ASYMMETRICAL)) != 0 && ksize <= 5 )
     {
         if( sdepth == CV_8U && ddepth == CV_32S )
-            return Ptr<BaseRowFilter>(new SymmRowSmallFilter<uchar, int, SymmRowSmallVec_8u32s>
-                (kernel, anchor, symmetryType, SymmRowSmallVec_8u32s(kernel, symmetryType)));
+            return makePtr<SymmRowSmallFilter<uchar, int, SymmRowSmallVec_8u32s> >
+                (kernel, anchor, symmetryType, SymmRowSmallVec_8u32s(kernel, symmetryType));
         if( sdepth == CV_32F && ddepth == CV_32F )
-            return Ptr<BaseRowFilter>(new SymmRowSmallFilter<float, float, SymmRowSmallVec_32f>
-                (kernel, anchor, symmetryType, SymmRowSmallVec_32f(kernel, symmetryType)));
+            return makePtr<SymmRowSmallFilter<float, float, SymmRowSmallVec_32f> >
+                (kernel, anchor, symmetryType, SymmRowSmallVec_32f(kernel, symmetryType));
     }
 
     if( sdepth == CV_8U && ddepth == CV_32S )
-        return Ptr<BaseRowFilter>(new RowFilter<uchar, int, RowVec_8u32s>
-            (kernel, anchor, RowVec_8u32s(kernel)));
+        return makePtr<RowFilter<uchar, int, RowVec_8u32s> >
+            (kernel, anchor, RowVec_8u32s(kernel));
     if( sdepth == CV_8U && ddepth == CV_32F )
-        return Ptr<BaseRowFilter>(new RowFilter<uchar, float, RowNoVec>(kernel, anchor));
+        return makePtr<RowFilter<uchar, float, RowNoVec> >(kernel, anchor);
     if( sdepth == CV_8U && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowFilter<uchar, double, RowNoVec>(kernel, anchor));
+        return makePtr<RowFilter<uchar, double, RowNoVec> >(kernel, anchor);
     if( sdepth == CV_16U && ddepth == CV_32F )
-        return Ptr<BaseRowFilter>(new RowFilter<ushort, float, RowNoVec>(kernel, anchor));
+        return makePtr<RowFilter<ushort, float, RowNoVec> >(kernel, anchor);
     if( sdepth == CV_16U && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowFilter<ushort, double, RowNoVec>(kernel, anchor));
+        return makePtr<RowFilter<ushort, double, RowNoVec> >(kernel, anchor);
     if( sdepth == CV_16S && ddepth == CV_32F )
-        return Ptr<BaseRowFilter>(new RowFilter<short, float, RowVec_16s32f>
-                                  (kernel, anchor, RowVec_16s32f(kernel)));
+        return makePtr<RowFilter<short, float, RowVec_16s32f> >
+                                  (kernel, anchor, RowVec_16s32f(kernel));
     if( sdepth == CV_16S && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowFilter<short, double, RowNoVec>(kernel, anchor));
+        return makePtr<RowFilter<short, double, RowNoVec> >(kernel, anchor);
     if( sdepth == CV_32F && ddepth == CV_32F )
-        return Ptr<BaseRowFilter>(new RowFilter<float, float, RowVec_32f>
-            (kernel, anchor, RowVec_32f(kernel)));
+        return makePtr<RowFilter<float, float, RowVec_32f> >
+            (kernel, anchor, RowVec_32f(kernel));
     if( sdepth == CV_32F && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowFilter<float, double, RowNoVec>(kernel, anchor));
+        return makePtr<RowFilter<float, double, RowNoVec> >(kernel, anchor);
     if( sdepth == CV_64F && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowFilter<double, double, RowNoVec>(kernel, anchor));
+        return makePtr<RowFilter<double, double, RowNoVec> >(kernel, anchor);
 
     CV_Error_( CV_StsNotImplemented,
         ("Unsupported combination of source format (=%d), and buffer format (=%d)",
         srcType, bufType));
 
-    return Ptr<BaseRowFilter>(0);
+    return Ptr<BaseRowFilter>();
 }
 
 
@@ -2789,24 +2789,24 @@ cv::Ptr<cv::BaseColumnFilter> cv::getLinearColumnFilter( int bufType, int dstTyp
     if( !(symmetryType & (KERNEL_SYMMETRICAL|KERNEL_ASYMMETRICAL)) )
     {
         if( ddepth == CV_8U && sdepth == CV_32S )
-            return Ptr<BaseColumnFilter>(new ColumnFilter<FixedPtCastEx<int, uchar>, ColumnNoVec>
-            (kernel, anchor, delta, FixedPtCastEx<int, uchar>(bits)));
+            return makePtr<ColumnFilter<FixedPtCastEx<int, uchar>, ColumnNoVec> >
+            (kernel, anchor, delta, FixedPtCastEx<int, uchar>(bits));
         if( ddepth == CV_8U && sdepth == CV_32F )
-            return Ptr<BaseColumnFilter>(new ColumnFilter<Cast<float, uchar>, ColumnNoVec>(kernel, anchor, delta));
+            return makePtr<ColumnFilter<Cast<float, uchar>, ColumnNoVec> >(kernel, anchor, delta);
         if( ddepth == CV_8U && sdepth == CV_64F )
-            return Ptr<BaseColumnFilter>(new ColumnFilter<Cast<double, uchar>, ColumnNoVec>(kernel, anchor, delta));
+            return makePtr<ColumnFilter<Cast<double, uchar>, ColumnNoVec> >(kernel, anchor, delta);
         if( ddepth == CV_16U && sdepth == CV_32F )
-            return Ptr<BaseColumnFilter>(new ColumnFilter<Cast<float, ushort>, ColumnNoVec>(kernel, anchor, delta));
+            return makePtr<ColumnFilter<Cast<float, ushort>, ColumnNoVec> >(kernel, anchor, delta);
         if( ddepth == CV_16U && sdepth == CV_64F )
-            return Ptr<BaseColumnFilter>(new ColumnFilter<Cast<double, ushort>, ColumnNoVec>(kernel, anchor, delta));
+            return makePtr<ColumnFilter<Cast<double, ushort>, ColumnNoVec> >(kernel, anchor, delta);
         if( ddepth == CV_16S && sdepth == CV_32F )
-            return Ptr<BaseColumnFilter>(new ColumnFilter<Cast<float, short>, ColumnNoVec>(kernel, anchor, delta));
+            return makePtr<ColumnFilter<Cast<float, short>, ColumnNoVec> >(kernel, anchor, delta);
         if( ddepth == CV_16S && sdepth == CV_64F )
-            return Ptr<BaseColumnFilter>(new ColumnFilter<Cast<double, short>, ColumnNoVec>(kernel, anchor, delta));
+            return makePtr<ColumnFilter<Cast<double, short>, ColumnNoVec> >(kernel, anchor, delta);
         if( ddepth == CV_32F && sdepth == CV_32F )
-            return Ptr<BaseColumnFilter>(new ColumnFilter<Cast<float, float>, ColumnNoVec>(kernel, anchor, delta));
+            return makePtr<ColumnFilter<Cast<float, float>, ColumnNoVec> >(kernel, anchor, delta);
         if( ddepth == CV_64F && sdepth == CV_64F )
-            return Ptr<BaseColumnFilter>(new ColumnFilter<Cast<double, double>, ColumnNoVec>(kernel, anchor, delta));
+            return makePtr<ColumnFilter<Cast<double, double>, ColumnNoVec> >(kernel, anchor, delta);
     }
     else
     {
@@ -2814,60 +2814,60 @@ cv::Ptr<cv::BaseColumnFilter> cv::getLinearColumnFilter( int bufType, int dstTyp
         if( ksize == 3 )
         {
             if( ddepth == CV_8U && sdepth == CV_32S )
-                return Ptr<BaseColumnFilter>(new SymmColumnSmallFilter<
-                    FixedPtCastEx<int, uchar>, SymmColumnVec_32s8u>
+                return makePtr<SymmColumnSmallFilter<
+                    FixedPtCastEx<int, uchar>, SymmColumnVec_32s8u> >
                     (kernel, anchor, delta, symmetryType, FixedPtCastEx<int, uchar>(bits),
-                    SymmColumnVec_32s8u(kernel, symmetryType, bits, delta)));
+                    SymmColumnVec_32s8u(kernel, symmetryType, bits, delta));
             if( ddepth == CV_16S && sdepth == CV_32S && bits == 0 )
-                return Ptr<BaseColumnFilter>(new SymmColumnSmallFilter<Cast<int, short>,
-                    SymmColumnSmallVec_32s16s>(kernel, anchor, delta, symmetryType,
-                        Cast<int, short>(), SymmColumnSmallVec_32s16s(kernel, symmetryType, bits, delta)));
+                return makePtr<SymmColumnSmallFilter<Cast<int, short>,
+                    SymmColumnSmallVec_32s16s> >(kernel, anchor, delta, symmetryType,
+                        Cast<int, short>(), SymmColumnSmallVec_32s16s(kernel, symmetryType, bits, delta));
             if( ddepth == CV_32F && sdepth == CV_32F )
-                return Ptr<BaseColumnFilter>(new SymmColumnSmallFilter<
-                    Cast<float, float>,SymmColumnSmallVec_32f>
+                return makePtr<SymmColumnSmallFilter<
+                    Cast<float, float>,SymmColumnSmallVec_32f> >
                     (kernel, anchor, delta, symmetryType, Cast<float, float>(),
-                    SymmColumnSmallVec_32f(kernel, symmetryType, 0, delta)));
+                    SymmColumnSmallVec_32f(kernel, symmetryType, 0, delta));
         }
         if( ddepth == CV_8U && sdepth == CV_32S )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<FixedPtCastEx<int, uchar>, SymmColumnVec_32s8u>
+            return makePtr<SymmColumnFilter<FixedPtCastEx<int, uchar>, SymmColumnVec_32s8u> >
                 (kernel, anchor, delta, symmetryType, FixedPtCastEx<int, uchar>(bits),
-                SymmColumnVec_32s8u(kernel, symmetryType, bits, delta)));
+                SymmColumnVec_32s8u(kernel, symmetryType, bits, delta));
         if( ddepth == CV_8U && sdepth == CV_32F )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<Cast<float, uchar>, ColumnNoVec>
-                (kernel, anchor, delta, symmetryType));
+            return makePtr<SymmColumnFilter<Cast<float, uchar>, ColumnNoVec> >
+                (kernel, anchor, delta, symmetryType);
         if( ddepth == CV_8U && sdepth == CV_64F )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<Cast<double, uchar>, ColumnNoVec>
-                (kernel, anchor, delta, symmetryType));
+            return makePtr<SymmColumnFilter<Cast<double, uchar>, ColumnNoVec> >
+                (kernel, anchor, delta, symmetryType);
         if( ddepth == CV_16U && sdepth == CV_32F )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<Cast<float, ushort>, ColumnNoVec>
-                (kernel, anchor, delta, symmetryType));
+            return makePtr<SymmColumnFilter<Cast<float, ushort>, ColumnNoVec> >
+                (kernel, anchor, delta, symmetryType);
         if( ddepth == CV_16U && sdepth == CV_64F )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<Cast<double, ushort>, ColumnNoVec>
-                (kernel, anchor, delta, symmetryType));
+            return makePtr<SymmColumnFilter<Cast<double, ushort>, ColumnNoVec> >
+                (kernel, anchor, delta, symmetryType);
         if( ddepth == CV_16S && sdepth == CV_32S )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<Cast<int, short>, ColumnNoVec>
-                (kernel, anchor, delta, symmetryType));
+            return makePtr<SymmColumnFilter<Cast<int, short>, ColumnNoVec> >
+                (kernel, anchor, delta, symmetryType);
         if( ddepth == CV_16S && sdepth == CV_32F )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<Cast<float, short>, SymmColumnVec_32f16s>
+            return makePtr<SymmColumnFilter<Cast<float, short>, SymmColumnVec_32f16s> >
                  (kernel, anchor, delta, symmetryType, Cast<float, short>(),
-                  SymmColumnVec_32f16s(kernel, symmetryType, 0, delta)));
+                  SymmColumnVec_32f16s(kernel, symmetryType, 0, delta));
         if( ddepth == CV_16S && sdepth == CV_64F )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<Cast<double, short>, ColumnNoVec>
-                (kernel, anchor, delta, symmetryType));
+            return makePtr<SymmColumnFilter<Cast<double, short>, ColumnNoVec> >
+                (kernel, anchor, delta, symmetryType);
         if( ddepth == CV_32F && sdepth == CV_32F )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<Cast<float, float>, SymmColumnVec_32f>
+            return makePtr<SymmColumnFilter<Cast<float, float>, SymmColumnVec_32f> >
                 (kernel, anchor, delta, symmetryType, Cast<float, float>(),
-                SymmColumnVec_32f(kernel, symmetryType, 0, delta)));
+                SymmColumnVec_32f(kernel, symmetryType, 0, delta));
         if( ddepth == CV_64F && sdepth == CV_64F )
-            return Ptr<BaseColumnFilter>(new SymmColumnFilter<Cast<double, double>, ColumnNoVec>
-                (kernel, anchor, delta, symmetryType));
+            return makePtr<SymmColumnFilter<Cast<double, double>, ColumnNoVec> >
+                (kernel, anchor, delta, symmetryType);
     }
 
     CV_Error_( CV_StsNotImplemented,
         ("Unsupported combination of buffer format (=%d), and destination format (=%d)",
         bufType, dstType));
 
-    return Ptr<BaseColumnFilter>(0);
+    return Ptr<BaseColumnFilter>();
 }
 
 
@@ -2933,7 +2933,7 @@ cv::Ptr<cv::FilterEngine> cv::createSeparableLinearFilter(
     Ptr<BaseColumnFilter> _columnFilter = getLinearColumnFilter(
         _bufType, _dstType, columnKernel, _anchor.y, ctype, _delta, bits );
 
-    return Ptr<FilterEngine>( new FilterEngine(Ptr<BaseFilter>(0), _rowFilter, _columnFilter,
+    return Ptr<FilterEngine>( new FilterEngine(Ptr<BaseFilter>(), _rowFilter, _columnFilter,
         _srcType, _dstType, _bufType, _rowBorderType, _columnBorderType, _borderValue ));
 }
 
@@ -3085,13 +3085,13 @@ cv::Ptr<cv::BaseFilter> cv::getLinearFilter(int srcType, int dstType,
     anchor = normalizeAnchor(anchor, _kernel.size());
 
     /*if( sdepth == CV_8U && ddepth == CV_8U && kdepth == CV_32S )
-        return Ptr<BaseFilter>(new Filter2D<uchar, FixedPtCastEx<int, uchar>, FilterVec_8u>
+        return makePtr<Filter2D<uchar, FixedPtCastEx<int, uchar>, FilterVec_8u> >
             (_kernel, anchor, delta, FixedPtCastEx<int, uchar>(bits),
-            FilterVec_8u(_kernel, bits, delta)));
+            FilterVec_8u(_kernel, bits, delta));
     if( sdepth == CV_8U && ddepth == CV_16S && kdepth == CV_32S )
-        return Ptr<BaseFilter>(new Filter2D<uchar, FixedPtCastEx<int, short>, FilterVec_8u16s>
+        return makePtr<Filter2D<uchar, FixedPtCastEx<int, short>, FilterVec_8u16s> >
             (_kernel, anchor, delta, FixedPtCastEx<int, short>(bits),
-            FilterVec_8u16s(_kernel, bits, delta)));*/
+            FilterVec_8u16s(_kernel, bits, delta));*/
 
     kdepth = sdepth == CV_64F || ddepth == CV_64F ? CV_64F : CV_32F;
     Mat kernel;
@@ -3101,53 +3101,53 @@ cv::Ptr<cv::BaseFilter> cv::getLinearFilter(int srcType, int dstType,
         _kernel.convertTo(kernel, kdepth, _kernel.type() == CV_32S ? 1./(1 << bits) : 1.);
 
     if( sdepth == CV_8U && ddepth == CV_8U )
-        return Ptr<BaseFilter>(new Filter2D<uchar, Cast<float, uchar>, FilterVec_8u>
-            (kernel, anchor, delta, Cast<float, uchar>(), FilterVec_8u(kernel, 0, delta)));
+        return makePtr<Filter2D<uchar, Cast<float, uchar>, FilterVec_8u> >
+            (kernel, anchor, delta, Cast<float, uchar>(), FilterVec_8u(kernel, 0, delta));
     if( sdepth == CV_8U && ddepth == CV_16U )
-        return Ptr<BaseFilter>(new Filter2D<uchar,
-            Cast<float, ushort>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<uchar,
+            Cast<float, ushort>, FilterNoVec> >(kernel, anchor, delta);
     if( sdepth == CV_8U && ddepth == CV_16S )
-        return Ptr<BaseFilter>(new Filter2D<uchar, Cast<float, short>, FilterVec_8u16s>
-            (kernel, anchor, delta, Cast<float, short>(), FilterVec_8u16s(kernel, 0, delta)));
+        return makePtr<Filter2D<uchar, Cast<float, short>, FilterVec_8u16s> >
+            (kernel, anchor, delta, Cast<float, short>(), FilterVec_8u16s(kernel, 0, delta));
     if( sdepth == CV_8U && ddepth == CV_32F )
-        return Ptr<BaseFilter>(new Filter2D<uchar,
-            Cast<float, float>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<uchar,
+            Cast<float, float>, FilterNoVec> >(kernel, anchor, delta);
     if( sdepth == CV_8U && ddepth == CV_64F )
-        return Ptr<BaseFilter>(new Filter2D<uchar,
-            Cast<double, double>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<uchar,
+            Cast<double, double>, FilterNoVec> >(kernel, anchor, delta);
 
     if( sdepth == CV_16U && ddepth == CV_16U )
-        return Ptr<BaseFilter>(new Filter2D<ushort,
-            Cast<float, ushort>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<ushort,
+            Cast<float, ushort>, FilterNoVec> >(kernel, anchor, delta);
     if( sdepth == CV_16U && ddepth == CV_32F )
-        return Ptr<BaseFilter>(new Filter2D<ushort,
-            Cast<float, float>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<ushort,
+            Cast<float, float>, FilterNoVec> >(kernel, anchor, delta);
     if( sdepth == CV_16U && ddepth == CV_64F )
-        return Ptr<BaseFilter>(new Filter2D<ushort,
-            Cast<double, double>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<ushort,
+            Cast<double, double>, FilterNoVec> >(kernel, anchor, delta);
 
     if( sdepth == CV_16S && ddepth == CV_16S )
-        return Ptr<BaseFilter>(new Filter2D<short,
-            Cast<float, short>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<short,
+            Cast<float, short>, FilterNoVec> >(kernel, anchor, delta);
     if( sdepth == CV_16S && ddepth == CV_32F )
-        return Ptr<BaseFilter>(new Filter2D<short,
-            Cast<float, float>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<short,
+            Cast<float, float>, FilterNoVec> >(kernel, anchor, delta);
     if( sdepth == CV_16S && ddepth == CV_64F )
-        return Ptr<BaseFilter>(new Filter2D<short,
-            Cast<double, double>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<short,
+            Cast<double, double>, FilterNoVec> >(kernel, anchor, delta);
 
     if( sdepth == CV_32F && ddepth == CV_32F )
-        return Ptr<BaseFilter>(new Filter2D<float, Cast<float, float>, FilterVec_32f>
-            (kernel, anchor, delta, Cast<float, float>(), FilterVec_32f(kernel, 0, delta)));
+        return makePtr<Filter2D<float, Cast<float, float>, FilterVec_32f> >
+            (kernel, anchor, delta, Cast<float, float>(), FilterVec_32f(kernel, 0, delta));
     if( sdepth == CV_64F && ddepth == CV_64F )
-        return Ptr<BaseFilter>(new Filter2D<double,
-            Cast<double, double>, FilterNoVec>(kernel, anchor, delta));
+        return makePtr<Filter2D<double,
+            Cast<double, double>, FilterNoVec> >(kernel, anchor, delta);
 
     CV_Error_( CV_StsNotImplemented,
         ("Unsupported combination of source format (=%d), and destination format (=%d)",
         srcType, dstType));
 
-    return Ptr<BaseFilter>(0);
+    return Ptr<BaseFilter>();
 }
 
 
@@ -3178,9 +3178,9 @@ cv::Ptr<cv::FilterEngine> cv::createLinearFilter( int _srcType, int _dstType,
     Ptr<BaseFilter> _filter2D = getLinearFilter(_srcType, _dstType,
         kernel, _anchor, _delta, bits);
 
-    return Ptr<FilterEngine>(new FilterEngine(_filter2D, Ptr<BaseRowFilter>(0),
-        Ptr<BaseColumnFilter>(0), _srcType, _dstType, _srcType,
-        _rowBorderType, _columnBorderType, _borderValue ));
+    return makePtr<FilterEngine>(_filter2D, Ptr<BaseRowFilter>(),
+        Ptr<BaseColumnFilter>(), _srcType, _dstType, _srcType,
+        _rowBorderType, _columnBorderType, _borderValue );
 }
 
 
diff --git a/modules/imgproc/src/generalized_hough.cpp b/modules/imgproc/src/generalized_hough.cpp
index 8eadff200..a261d644b 100644
--- a/modules/imgproc/src/generalized_hough.cpp
+++ b/modules/imgproc/src/generalized_hough.cpp
@@ -45,17 +45,10 @@
 
 using namespace cv;
 
+// common
+
 namespace
 {
-    /////////////////////////////////////
-    // Common
-
-    template <typename T, class A> void releaseVector(std::vector<T, A>& v)
-    {
-        std::vector<T, A> empty;
-        empty.swap(v);
-    }
-
     double toRad(double a)
     {
         return a * CV_PI / 180.0;
@@ -66,70 +59,112 @@ namespace
         return fabs(v) > std::numeric_limits<float>::epsilon();
     }
 
-    class GHT_Pos : public GeneralizedHough
+    class GeneralizedHoughBase
     {
-    public:
-        GHT_Pos();
-
     protected:
-        void setTemplateImpl(const Mat& edges, const Mat& dx, const Mat& dy, Point templCenter);
-        void detectImpl(const Mat& edges, const Mat& dx, const Mat& dy, OutputArray positions, OutputArray votes);
-        void releaseImpl();
+        GeneralizedHoughBase();
+        virtual ~GeneralizedHoughBase() {}
+
+        void setTemplateImpl(InputArray templ, Point templCenter);
+        void setTemplateImpl(InputArray edges, InputArray dx, InputArray dy, Point templCenter);
+
+        void detectImpl(InputArray image, OutputArray positions, OutputArray votes);
+        void detectImpl(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes);
 
         virtual void processTempl() = 0;
         virtual void processImage() = 0;
 
+        int cannyLowThresh_;
+        int cannyHighThresh_;
+        double minDist_;
+        double dp_;
+
+        Size templSize_;
+        Point templCenter_;
+        Mat templEdges_;
+        Mat templDx_;
+        Mat templDy_;
+
+        Size imageSize_;
+        Mat imageEdges_;
+        Mat imageDx_;
+        Mat imageDy_;
+
+        std::vector<Vec4f> posOutBuf_;
+        std::vector<Vec3i> voteOutBuf_;
+
+    private:
+        void calcEdges(InputArray src, Mat& edges, Mat& dx, Mat& dy);
         void filterMinDist();
         void convertTo(OutputArray positions, OutputArray votes);
-
-        double minDist;
-
-        Size templSize;
-        Point templCenter;
-        Mat templEdges;
-        Mat templDx;
-        Mat templDy;
-
-        Size imageSize;
-        Mat imageEdges;
-        Mat imageDx;
-        Mat imageDy;
-
-        std::vector<Vec4f> posOutBuf;
-        std::vector<Vec3i> voteOutBuf;
     };
 
-    GHT_Pos::GHT_Pos()
+    GeneralizedHoughBase::GeneralizedHoughBase()
     {
-        minDist = 1.0;
+        cannyLowThresh_ = 50;
+        cannyHighThresh_ = 100;
+        minDist_ = 1.0;
+        dp_ = 1.0;
     }
 
-    void GHT_Pos::setTemplateImpl(const Mat& edges, const Mat& dx, const Mat& dy, Point templCenter_)
+    void GeneralizedHoughBase::calcEdges(InputArray _src, Mat& edges, Mat& dx, Mat& dy)
     {
-        templSize = edges.size();
-        templCenter = templCenter_;
-        edges.copyTo(templEdges);
-        dx.copyTo(templDx);
-        dy.copyTo(templDy);
+        Mat src = _src.getMat();
+
+        CV_Assert( src.type() == CV_8UC1 );
+        CV_Assert( cannyLowThresh_ > 0 && cannyLowThresh_ < cannyHighThresh_ );
+
+        Canny(src, edges, cannyLowThresh_, cannyHighThresh_);
+        Sobel(src, dx, CV_32F, 1, 0);
+        Sobel(src, dy, CV_32F, 0, 1);
+    }
+
+    void GeneralizedHoughBase::setTemplateImpl(InputArray templ, Point templCenter)
+    {
+        calcEdges(templ, templEdges_, templDx_, templDy_);
+
+        if (templCenter == Point(-1, -1))
+            templCenter = Point(templEdges_.cols / 2, templEdges_.rows / 2);
+
+        templSize_ = templEdges_.size();
+        templCenter_ = templCenter;
 
         processTempl();
     }
 
-    void GHT_Pos::detectImpl(const Mat& edges, const Mat& dx, const Mat& dy, OutputArray positions, OutputArray votes)
+    void GeneralizedHoughBase::setTemplateImpl(InputArray edges, InputArray dx, InputArray dy, Point templCenter)
     {
-        imageSize = edges.size();
-        edges.copyTo(imageEdges);
-        dx.copyTo(imageDx);
-        dy.copyTo(imageDy);
+        edges.getMat().copyTo(templEdges_);
+        dx.getMat().copyTo(templDx_);
+        dy.getMat().copyTo(templDy_);
 
-        posOutBuf.clear();
-        voteOutBuf.clear();
+        CV_Assert( templEdges_.type() == CV_8UC1 );
+        CV_Assert( templDx_.type() == CV_32FC1 && templDx_.size() == templEdges_.size() );
+        CV_Assert( templDy_.type() == templDx_.type() && templDy_.size() == templEdges_.size() );
+
+        if (templCenter == Point(-1, -1))
+            templCenter = Point(templEdges_.cols / 2, templEdges_.rows / 2);
+
+        templSize_ = templEdges_.size();
+        templCenter_ = templCenter;
+
+        processTempl();
+    }
+
+    void GeneralizedHoughBase::detectImpl(InputArray image, OutputArray positions, OutputArray votes)
+    {
+        calcEdges(image, imageEdges_, imageDx_, imageDy_);
+
+        imageSize_ = imageEdges_.size();
+
+        posOutBuf_.clear();
+        voteOutBuf_.clear();
 
         processImage();
 
-        if (!posOutBuf.empty())
+        if (!posOutBuf_.empty())
         {
-            if (minDist > 1)
+            if (minDist_ > 1)
                 filterMinDist();
             convertTo(positions, votes);
         }
@@ -141,21 +176,35 @@ namespace
         }
     }
 
-    void GHT_Pos::releaseImpl()
+    void GeneralizedHoughBase::detectImpl(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes)
     {
-        templSize = Size();
-        templCenter = Point(-1, -1);
-        templEdges.release();
-        templDx.release();
-        templDy.release();
+        edges.getMat().copyTo(imageEdges_);
+        dx.getMat().copyTo(imageDx_);
+        dy.getMat().copyTo(imageDy_);
 
-        imageSize = Size();
-        imageEdges.release();
-        imageDx.release();
-        imageDy.release();
+        CV_Assert( imageEdges_.type() == CV_8UC1 );
+        CV_Assert( imageDx_.type() == CV_32FC1 && imageDx_.size() == imageEdges_.size() );
+        CV_Assert( imageDy_.type() == imageDx_.type() && imageDy_.size() == imageEdges_.size() );
 
-        releaseVector(posOutBuf);
-        releaseVector(voteOutBuf);
+        imageSize_ = imageEdges_.size();
+
+        posOutBuf_.clear();
+        voteOutBuf_.clear();
+
+        processImage();
+
+        if (!posOutBuf_.empty())
+        {
+            if (minDist_ > 1)
+                filterMinDist();
+            convertTo(positions, votes);
+        }
+        else
+        {
+            positions.release();
+            if (votes.needed())
+                votes.release();
+        }
     }
 
     class Vec3iGreaterThanIdx
@@ -166,31 +215,31 @@ namespace
         const Vec3i* arr;
     };
 
-    void GHT_Pos::filterMinDist()
+    void GeneralizedHoughBase::filterMinDist()
     {
-        size_t oldSize = posOutBuf.size();
-        const bool hasVotes = !voteOutBuf.empty();
+        size_t oldSize = posOutBuf_.size();
+        const bool hasVotes = !voteOutBuf_.empty();
 
-        CV_Assert(!hasVotes || voteOutBuf.size() == oldSize);
+        CV_Assert( !hasVotes || voteOutBuf_.size() == oldSize );
 
-        std::vector<Vec4f> oldPosBuf(posOutBuf);
-        std::vector<Vec3i> oldVoteBuf(voteOutBuf);
+        std::vector<Vec4f> oldPosBuf(posOutBuf_);
+        std::vector<Vec3i> oldVoteBuf(voteOutBuf_);
 
         std::vector<size_t> indexies(oldSize);
         for (size_t i = 0; i < oldSize; ++i)
             indexies[i] = i;
         std::sort(indexies.begin(), indexies.end(), Vec3iGreaterThanIdx(&oldVoteBuf[0]));
 
-        posOutBuf.clear();
-        voteOutBuf.clear();
+        posOutBuf_.clear();
+        voteOutBuf_.clear();
 
-        const int cellSize = cvRound(minDist);
-        const int gridWidth = (imageSize.width + cellSize - 1) / cellSize;
-        const int gridHeight = (imageSize.height + cellSize - 1) / cellSize;
+        const int cellSize = cvRound(minDist_);
+        const int gridWidth = (imageSize_.width + cellSize - 1) / cellSize;
+        const int gridHeight = (imageSize_.height + cellSize - 1) / cellSize;
 
         std::vector< std::vector<Point2f> > grid(gridWidth * gridHeight);
 
-        const double minDist2 = minDist * minDist;
+        const double minDist2 = minDist_ * minDist_;
 
         for (size_t i = 0; i < oldSize; ++i)
         {
@@ -239,108 +288,112 @@ namespace
             {
                 grid[yCell * gridWidth + xCell].push_back(p);
 
-                posOutBuf.push_back(oldPosBuf[ind]);
+                posOutBuf_.push_back(oldPosBuf[ind]);
                 if (hasVotes)
-                    voteOutBuf.push_back(oldVoteBuf[ind]);
+                    voteOutBuf_.push_back(oldVoteBuf[ind]);
             }
         }
     }
 
-    void GHT_Pos::convertTo(OutputArray _positions, OutputArray _votes)
+    void GeneralizedHoughBase::convertTo(OutputArray _positions, OutputArray _votes)
     {
-        const int total = static_cast<int>(posOutBuf.size());
-        const bool hasVotes = !voteOutBuf.empty();
+        const int total = static_cast<int>(posOutBuf_.size());
+        const bool hasVotes = !voteOutBuf_.empty();
 
-        CV_Assert(!hasVotes || voteOutBuf.size() == posOutBuf.size());
+        CV_Assert( !hasVotes || voteOutBuf_.size() == posOutBuf_.size() );
 
         _positions.create(1, total, CV_32FC4);
         Mat positions = _positions.getMat();
-        Mat(1, total, CV_32FC4, &posOutBuf[0]).copyTo(positions);
+        Mat(1, total, CV_32FC4, &posOutBuf_[0]).copyTo(positions);
 
         if (_votes.needed())
         {
             if (!hasVotes)
+            {
                 _votes.release();
+            }
             else
             {
                 _votes.create(1, total, CV_32SC3);
                 Mat votes = _votes.getMat();
-                Mat(1, total, CV_32SC3, &voteOutBuf[0]).copyTo(votes);
+                Mat(1, total, CV_32SC3, &voteOutBuf_[0]).copyTo(votes);
             }
         }
     }
+}
 
-    /////////////////////////////////////
-    // POSITION Ballard
+// GeneralizedHoughBallard
 
-    class GHT_Ballard_Pos : public GHT_Pos
+namespace
+{
+    class GeneralizedHoughBallardImpl : public GeneralizedHoughBallard, private GeneralizedHoughBase
     {
     public:
-        AlgorithmInfo* info() const;
+        GeneralizedHoughBallardImpl();
 
-        GHT_Ballard_Pos();
+        void setTemplate(InputArray templ, Point templCenter) { setTemplateImpl(templ, templCenter); }
+        void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter) { setTemplateImpl(edges, dx, dy, templCenter); }
 
-    protected:
-        void releaseImpl();
+        void detect(InputArray image, OutputArray positions, OutputArray votes) { detectImpl(image, positions, votes); }
+        void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes) { detectImpl(edges, dx, dy, positions, votes); }
 
+        void setCannyLowThresh(int cannyLowThresh) { cannyLowThresh_ = cannyLowThresh; }
+        int getCannyLowThresh() const { return cannyLowThresh_; }
+
+        void setCannyHighThresh(int cannyHighThresh) { cannyHighThresh_ = cannyHighThresh; }
+        int getCannyHighThresh() const { return cannyHighThresh_; }
+
+        void setMinDist(double minDist) { minDist_ = minDist; }
+        double getMinDist() const { return minDist_; }
+
+        void setDp(double dp) { dp_ = dp; }
+        double getDp() const { return dp_; }
+
+        void setMaxBufferSize(int) {  }
+        int getMaxBufferSize() const { return 0; }
+
+        void setLevels(int levels) { levels_ = levels; }
+        int getLevels() const { return levels_; }
+
+        void setVotesThreshold(int votesThreshold) { votesThreshold_ = votesThreshold; }
+        int getVotesThreshold() const { return votesThreshold_; }
+
+    private:
         void processTempl();
         void processImage();
 
-        virtual void calcHist();
-        virtual void findPosInHist();
+        void calcHist();
+        void findPosInHist();
 
-        int levels;
-        int votesThreshold;
-        double dp;
+        int levels_;
+        int votesThreshold_;
 
-        std::vector< std::vector<Point> > r_table;
-        Mat hist;
+        std::vector< std::vector<Point> > r_table_;
+        Mat hist_;
     };
 
-    CV_INIT_ALGORITHM(GHT_Ballard_Pos, "GeneralizedHough.POSITION",
-                      obj.info()->addParam(obj, "minDist", obj.minDist, false, 0, 0,
-                                           "Minimum distance between the centers of the detected objects.");
-                      obj.info()->addParam(obj, "levels", obj.levels, false, 0, 0,
-                                           "R-Table levels.");
-                      obj.info()->addParam(obj, "votesThreshold", obj.votesThreshold, false, 0, 0,
-                                           "The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected.");
-                      obj.info()->addParam(obj, "dp", obj.dp, false, 0, 0,
-                                           "Inverse ratio of the accumulator resolution to the image resolution."));
-
-    GHT_Ballard_Pos::GHT_Ballard_Pos()
+    GeneralizedHoughBallardImpl::GeneralizedHoughBallardImpl()
     {
-        levels = 360;
-        votesThreshold = 100;
-        dp = 1.0;
+        levels_ = 360;
+        votesThreshold_ = 100;
     }
 
-    void GHT_Ballard_Pos::releaseImpl()
+    void GeneralizedHoughBallardImpl::processTempl()
     {
-        GHT_Pos::releaseImpl();
+        CV_Assert( levels_ > 0 );
 
-        releaseVector(r_table);
-        hist.release();
-    }
+        const double thetaScale = levels_ / 360.0;
 
-    void GHT_Ballard_Pos::processTempl()
-    {
-        CV_Assert(templEdges.type() == CV_8UC1);
-        CV_Assert(templDx.type() == CV_32FC1 && templDx.size() == templSize);
-        CV_Assert(templDy.type() == templDx.type() && templDy.size() == templSize);
-        CV_Assert(levels > 0);
+        r_table_.resize(levels_ + 1);
+        std::for_each(r_table_.begin(), r_table_.end(), std::mem_fun_ref(&std::vector<Point>::clear));
 
-        const double thetaScale = levels / 360.0;
-
-        r_table.resize(levels + 1);
-        for_each(r_table.begin(), r_table.end(), mem_fun_ref(&std::vector<Point>::clear));
-
-        for (int y = 0; y < templSize.height; ++y)
+        for (int y = 0; y < templSize_.height; ++y)
         {
-            const uchar* edgesRow = templEdges.ptr(y);
-            const float* dxRow = templDx.ptr<float>(y);
-            const float* dyRow = templDy.ptr<float>(y);
+            const uchar* edgesRow = templEdges_.ptr(y);
+            const float* dxRow = templDx_.ptr<float>(y);
+            const float* dyRow = templDy_.ptr<float>(y);
 
-            for (int x = 0; x < templSize.width; ++x)
+            for (int x = 0; x < templSize_.width; ++x)
             {
                 const Point p(x, y);
 
@@ -348,42 +401,42 @@ namespace
                 {
                     const float theta = fastAtan2(dyRow[x], dxRow[x]);
                     const int n = cvRound(theta * thetaScale);
-                    r_table[n].push_back(p - templCenter);
+                    r_table_[n].push_back(p - templCenter_);
                 }
             }
         }
     }
 
-    void GHT_Ballard_Pos::processImage()
+    void GeneralizedHoughBallardImpl::processImage()
     {
         calcHist();
         findPosInHist();
     }
 
-    void GHT_Ballard_Pos::calcHist()
+    void GeneralizedHoughBallardImpl::calcHist()
     {
-        CV_Assert(imageEdges.type() == CV_8UC1);
-        CV_Assert(imageDx.type() == CV_32FC1 && imageDx.size() == imageSize);
-        CV_Assert(imageDy.type() == imageDx.type() && imageDy.size() == imageSize);
-        CV_Assert(levels > 0 && r_table.size() == static_cast<size_t>(levels + 1));
-        CV_Assert(dp > 0.0);
+        CV_Assert( imageEdges_.type() == CV_8UC1 );
+        CV_Assert( imageDx_.type() == CV_32FC1 && imageDx_.size() == imageSize_);
+        CV_Assert( imageDy_.type() == imageDx_.type() && imageDy_.size() == imageSize_);
+        CV_Assert( levels_ > 0 && r_table_.size() == static_cast<size_t>(levels_ + 1) );
+        CV_Assert( dp_ > 0.0 );
 
-        const double thetaScale = levels / 360.0;
-        const double idp = 1.0 / dp;
+        const double thetaScale = levels_ / 360.0;
+        const double idp = 1.0 / dp_;
 
-        hist.create(cvCeil(imageSize.height * idp) + 2, cvCeil(imageSize.width * idp) + 2, CV_32SC1);
-        hist.setTo(0);
+        hist_.create(cvCeil(imageSize_.height * idp) + 2, cvCeil(imageSize_.width * idp) + 2, CV_32SC1);
+        hist_.setTo(0);
 
-        const int rows = hist.rows - 2;
-        const int cols = hist.cols - 2;
+        const int rows = hist_.rows - 2;
+        const int cols = hist_.cols - 2;
 
-        for (int y = 0; y < imageSize.height; ++y)
+        for (int y = 0; y < imageSize_.height; ++y)
         {
-            const uchar* edgesRow = imageEdges.ptr(y);
-            const float* dxRow = imageDx.ptr<float>(y);
-            const float* dyRow = imageDy.ptr<float>(y);
+            const uchar* edgesRow = imageEdges_.ptr(y);
+            const float* dxRow = imageDx_.ptr<float>(y);
+            const float* dyRow = imageDy_.ptr<float>(y);
 
-            for (int x = 0; x < imageSize.width; ++x)
+            for (int x = 0; x < imageSize_.width; ++x)
             {
                 const Point p(x, y);
 
@@ -392,7 +445,7 @@ namespace
                     const float theta = fastAtan2(dyRow[x], dxRow[x]);
                     const int n = cvRound(theta * thetaScale);
 
-                    const std::vector<Point>& r_row = r_table[n];
+                    const std::vector<Point>& r_row = r_table_[n];
 
                     for (size_t j = 0; j < r_row.size(); ++j)
                     {
@@ -402,407 +455,132 @@ namespace
                         c.y = cvRound(c.y * idp);
 
                         if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows)
-                            ++hist.at<int>(c.y + 1, c.x + 1);
+                            ++hist_.at<int>(c.y + 1, c.x + 1);
                     }
                 }
             }
         }
     }
 
-    void GHT_Ballard_Pos::findPosInHist()
+    void GeneralizedHoughBallardImpl::findPosInHist()
     {
-        CV_Assert(votesThreshold > 0);
+        CV_Assert( votesThreshold_ > 0 );
 
-        const int histRows = hist.rows - 2;
-        const int histCols = hist.cols - 2;
+        const int histRows = hist_.rows - 2;
+        const int histCols = hist_.cols - 2;
 
         for(int y = 0; y < histRows; ++y)
         {
-            const int* prevRow = hist.ptr<int>(y);
-            const int* curRow = hist.ptr<int>(y + 1);
-            const int* nextRow = hist.ptr<int>(y + 2);
+            const int* prevRow = hist_.ptr<int>(y);
+            const int* curRow = hist_.ptr<int>(y + 1);
+            const int* nextRow = hist_.ptr<int>(y + 2);
 
             for(int x = 0; x < histCols; ++x)
             {
                 const int votes = curRow[x + 1];
 
-                if (votes > votesThreshold && votes > curRow[x] && votes >= curRow[x + 2] && votes > prevRow[x + 1] && votes >= nextRow[x + 1])
+                if (votes > votesThreshold_ && votes > curRow[x] && votes >= curRow[x + 2] && votes > prevRow[x + 1] && votes >= nextRow[x + 1])
                 {
-                    posOutBuf.push_back(Vec4f(static_cast<float>(x * dp), static_cast<float>(y * dp), 1.0f, 0.0f));
-                    voteOutBuf.push_back(Vec3i(votes, 0, 0));
+                    posOutBuf_.push_back(Vec4f(static_cast<float>(x * dp_), static_cast<float>(y * dp_), 1.0f, 0.0f));
+                    voteOutBuf_.push_back(Vec3i(votes, 0, 0));
                 }
             }
         }
     }
+}
 
-    /////////////////////////////////////
-    // POSITION & SCALE
+Ptr<GeneralizedHoughBallard> cv::createGeneralizedHoughBallard()
+{
+    return makePtr<GeneralizedHoughBallardImpl>();
+}
 
-    class GHT_Ballard_PosScale : public GHT_Ballard_Pos
+// GeneralizedHoughGuil
+
+namespace
+{
+    class GeneralizedHoughGuilImpl : public GeneralizedHoughGuil, private GeneralizedHoughBase
     {
     public:
-        AlgorithmInfo* info() const;
+        GeneralizedHoughGuilImpl();
 
-        GHT_Ballard_PosScale();
+        void setTemplate(InputArray templ, Point templCenter) { setTemplateImpl(templ, templCenter); }
+        void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter) { setTemplateImpl(edges, dx, dy, templCenter); }
 
-    protected:
-        void calcHist();
-        void findPosInHist();
+        void detect(InputArray image, OutputArray positions, OutputArray votes) { detectImpl(image, positions, votes); }
+        void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes) { detectImpl(edges, dx, dy, positions, votes); }
 
-        double minScale;
-        double maxScale;
-        double scaleStep;
+        void setCannyLowThresh(int cannyLowThresh) { cannyLowThresh_ = cannyLowThresh; }
+        int getCannyLowThresh() const { return cannyLowThresh_; }
 
-        class Worker;
-        friend class Worker;
-    };
+        void setCannyHighThresh(int cannyHighThresh) { cannyHighThresh_ = cannyHighThresh; }
+        int getCannyHighThresh() const { return cannyHighThresh_; }
 
-    CV_INIT_ALGORITHM(GHT_Ballard_PosScale, "GeneralizedHough.POSITION_SCALE",
-                      obj.info()->addParam(obj, "minDist", obj.minDist, false, 0, 0,
-                                           "Minimum distance between the centers of the detected objects.");
-                      obj.info()->addParam(obj, "levels", obj.levels, false, 0, 0,
-                                           "R-Table levels.");
-                      obj.info()->addParam(obj, "votesThreshold", obj.votesThreshold, false, 0, 0,
-                                           "The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected.");
-                      obj.info()->addParam(obj, "dp", obj.dp, false, 0, 0,
-                                           "Inverse ratio of the accumulator resolution to the image resolution.");
-                      obj.info()->addParam(obj, "minScale", obj.minScale, false, 0, 0,
-                                           "Minimal scale to detect.");
-                      obj.info()->addParam(obj, "maxScale", obj.maxScale, false, 0, 0,
-                                           "Maximal scale to detect.");
-                      obj.info()->addParam(obj, "scaleStep", obj.scaleStep, false, 0, 0,
-                                           "Scale step."));
+        void setMinDist(double minDist) { minDist_ = minDist; }
+        double getMinDist() const { return minDist_; }
 
-    GHT_Ballard_PosScale::GHT_Ballard_PosScale()
-    {
-        minScale = 0.5;
-        maxScale = 2.0;
-        scaleStep = 0.05;
-    }
+        void setDp(double dp) { dp_ = dp; }
+        double getDp() const { return dp_; }
 
-    class GHT_Ballard_PosScale::Worker : public ParallelLoopBody
-    {
-    public:
-        explicit Worker(GHT_Ballard_PosScale* base_) : base(base_) {}
+        void setMaxBufferSize(int maxBufferSize) { maxBufferSize_ = maxBufferSize; }
+        int getMaxBufferSize() const { return maxBufferSize_; }
 
-        void operator ()(const Range& range) const;
+        void setXi(double xi) { xi_ = xi; }
+        double getXi() const { return xi_; }
+
+        void setLevels(int levels) { levels_ = levels; }
+        int getLevels() const { return levels_; }
+
+        void setAngleEpsilon(double angleEpsilon) { angleEpsilon_ = angleEpsilon; }
+        double getAngleEpsilon() const { return angleEpsilon_; }
+
+        void setMinAngle(double minAngle) { minAngle_ = minAngle; }
+        double getMinAngle() const { return minAngle_; }
+
+        void setMaxAngle(double maxAngle) { maxAngle_ = maxAngle; }
+        double getMaxAngle() const { return maxAngle_; }
+
+        void setAngleStep(double angleStep) { angleStep_ = angleStep; }
+        double getAngleStep() const { return angleStep_; }
+
+        void setAngleThresh(int angleThresh) { angleThresh_ = angleThresh; }
+        int getAngleThresh() const { return angleThresh_; }
+
+        void setMinScale(double minScale) { minScale_ = minScale; }
+        double getMinScale() const { return minScale_; }
+
+        void setMaxScale(double maxScale) { maxScale_ = maxScale; }
+        double getMaxScale() const { return maxScale_; }
+
+        void setScaleStep(double scaleStep) { scaleStep_ = scaleStep; }
+        double getScaleStep() const { return scaleStep_; }
+
+        void setScaleThresh(int scaleThresh) { scaleThresh_ = scaleThresh; }
+        int getScaleThresh() const { return scaleThresh_; }
+
+        void setPosThresh(int posThresh) { posThresh_ = posThresh; }
+        int getPosThresh() const { return posThresh_; }
 
     private:
-        GHT_Ballard_PosScale* base;
-    };
-
-    void GHT_Ballard_PosScale::Worker::operator ()(const Range& range) const
-    {
-        const double thetaScale = base->levels / 360.0;
-        const double idp = 1.0 / base->dp;
-
-        for (int s = range.start; s < range.end; ++s)
-        {
-            const double scale = base->minScale + s * base->scaleStep;
-
-            Mat curHist(base->hist.size[1], base->hist.size[2], CV_32SC1, base->hist.ptr(s + 1), base->hist.step[1]);
-
-            for (int y = 0; y < base->imageSize.height; ++y)
-            {
-                const uchar* edgesRow = base->imageEdges.ptr(y);
-                const float* dxRow = base->imageDx.ptr<float>(y);
-                const float* dyRow = base->imageDy.ptr<float>(y);
-
-                for (int x = 0; x < base->imageSize.width; ++x)
-                {
-                    const Point2d p(x, y);
-
-                    if (edgesRow[x] && (notNull(dyRow[x]) || notNull(dxRow[x])))
-                    {
-                        const float theta = fastAtan2(dyRow[x], dxRow[x]);
-                        const int n = cvRound(theta * thetaScale);
-
-                        const std::vector<Point>& r_row = base->r_table[n];
-
-                        for (size_t j = 0; j < r_row.size(); ++j)
-                        {
-                            Point2d d = r_row[j];
-                            Point2d c = p - d * scale;
-
-                            c.x *= idp;
-                            c.y *= idp;
-
-                            if (c.x >= 0 && c.x < base->hist.size[2] - 2 && c.y >= 0 && c.y < base->hist.size[1] - 2)
-                                ++curHist.at<int>(cvRound(c.y + 1), cvRound(c.x + 1));
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    void GHT_Ballard_PosScale::calcHist()
-    {
-        CV_Assert(imageEdges.type() == CV_8UC1);
-        CV_Assert(imageDx.type() == CV_32FC1 && imageDx.size() == imageSize);
-        CV_Assert(imageDy.type() == imageDx.type() && imageDy.size() == imageSize);
-        CV_Assert(levels > 0 && r_table.size() == static_cast<size_t>(levels + 1));
-        CV_Assert(dp > 0.0);
-        CV_Assert(minScale > 0.0 && minScale < maxScale);
-        CV_Assert(scaleStep > 0.0);
-
-        const double idp = 1.0 / dp;
-        const int scaleRange = cvCeil((maxScale - minScale) / scaleStep);
-
-        const int sizes[] = {scaleRange + 2, cvCeil(imageSize.height * idp) + 2, cvCeil(imageSize.width * idp) + 2};
-        hist.create(3, sizes, CV_32SC1);
-        hist.setTo(0);
-
-        parallel_for_(Range(0, scaleRange), Worker(this));
-    }
-
-    void GHT_Ballard_PosScale::findPosInHist()
-    {
-        CV_Assert(votesThreshold > 0);
-
-        const int scaleRange = hist.size[0] - 2;
-        const int histRows = hist.size[1] - 2;
-        const int histCols = hist.size[2] - 2;
-
-        for (int s = 0; s < scaleRange; ++s)
-        {
-            const float scale = static_cast<float>(minScale + s * scaleStep);
-
-            const Mat prevHist(histRows + 2, histCols + 2, CV_32SC1, hist.ptr(s), hist.step[1]);
-            const Mat curHist(histRows + 2, histCols + 2, CV_32SC1, hist.ptr(s + 1), hist.step[1]);
-            const Mat nextHist(histRows + 2, histCols + 2, CV_32SC1, hist.ptr(s + 2), hist.step[1]);
-
-            for(int y = 0; y < histRows; ++y)
-            {
-                const int* prevHistRow = prevHist.ptr<int>(y + 1);
-                const int* prevRow = curHist.ptr<int>(y);
-                const int* curRow = curHist.ptr<int>(y + 1);
-                const int* nextRow = curHist.ptr<int>(y + 2);
-                const int* nextHistRow = nextHist.ptr<int>(y + 1);
-
-                for(int x = 0; x < histCols; ++x)
-                {
-                    const int votes = curRow[x + 1];
-
-                    if (votes > votesThreshold &&
-                        votes > curRow[x] &&
-                        votes >= curRow[x + 2] &&
-                        votes > prevRow[x + 1] &&
-                        votes >= nextRow[x + 1] &&
-                        votes > prevHistRow[x + 1] &&
-                        votes >= nextHistRow[x + 1])
-                    {
-                        posOutBuf.push_back(Vec4f(static_cast<float>(x * dp), static_cast<float>(y * dp), scale, 0.0f));
-                        voteOutBuf.push_back(Vec3i(votes, votes, 0));
-                    }
-                }
-            }
-        }
-    }
-
-    /////////////////////////////////////
-    // POSITION & ROTATION
-
-    class GHT_Ballard_PosRotation : public GHT_Ballard_Pos
-    {
-    public:
-        AlgorithmInfo* info() const;
-
-        GHT_Ballard_PosRotation();
-
-    protected:
-        void calcHist();
-        void findPosInHist();
-
-        double minAngle;
-        double maxAngle;
-        double angleStep;
-
-        class Worker;
-        friend class Worker;
-    };
-
-    CV_INIT_ALGORITHM(GHT_Ballard_PosRotation, "GeneralizedHough.POSITION_ROTATION",
-                      obj.info()->addParam(obj, "minDist", obj.minDist, false, 0, 0,
-                                           "Minimum distance between the centers of the detected objects.");
-                      obj.info()->addParam(obj, "levels", obj.levels, false, 0, 0,
-                                           "R-Table levels.");
-                      obj.info()->addParam(obj, "votesThreshold", obj.votesThreshold, false, 0, 0,
-                                           "The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected.");
-                      obj.info()->addParam(obj, "dp", obj.dp, false, 0, 0,
-                                           "Inverse ratio of the accumulator resolution to the image resolution.");
-                      obj.info()->addParam(obj, "minAngle", obj.minAngle, false, 0, 0,
-                                           "Minimal rotation angle to detect in degrees.");
-                      obj.info()->addParam(obj, "maxAngle", obj.maxAngle, false, 0, 0,
-                                           "Maximal rotation angle to detect in degrees.");
-                      obj.info()->addParam(obj, "angleStep", obj.angleStep, false, 0, 0,
-                                           "Angle step in degrees."));
-
-    GHT_Ballard_PosRotation::GHT_Ballard_PosRotation()
-    {
-        minAngle = 0.0;
-        maxAngle = 360.0;
-        angleStep = 1.0;
-    }
-
-    class GHT_Ballard_PosRotation::Worker : public ParallelLoopBody
-    {
-    public:
-        explicit Worker(GHT_Ballard_PosRotation* base_) : base(base_) {}
-
-        void operator ()(const Range& range) const;
-
-    private:
-        GHT_Ballard_PosRotation* base;
-    };
-
-    void GHT_Ballard_PosRotation::Worker::operator ()(const Range& range) const
-    {
-        const double thetaScale = base->levels / 360.0;
-        const double idp = 1.0 / base->dp;
-
-        for (int a = range.start; a < range.end; ++a)
-        {
-            const double angle = base->minAngle + a * base->angleStep;
-
-            const double sinA = ::sin(toRad(angle));
-            const double cosA = ::cos(toRad(angle));
-
-            Mat curHist(base->hist.size[1], base->hist.size[2], CV_32SC1, base->hist.ptr(a + 1), base->hist.step[1]);
-
-            for (int y = 0; y < base->imageSize.height; ++y)
-            {
-                const uchar* edgesRow = base->imageEdges.ptr(y);
-                const float* dxRow = base->imageDx.ptr<float>(y);
-                const float* dyRow = base->imageDy.ptr<float>(y);
-
-                for (int x = 0; x < base->imageSize.width; ++x)
-                {
-                    const Point2d p(x, y);
-
-                    if (edgesRow[x] && (notNull(dyRow[x]) || notNull(dxRow[x])))
-                    {
-                        double theta = fastAtan2(dyRow[x], dxRow[x]) - angle;
-                        if (theta < 0)
-                            theta += 360.0;
-                        const int n = cvRound(theta * thetaScale);
-
-                        const std::vector<Point>& r_row = base->r_table[n];
-
-                        for (size_t j = 0; j < r_row.size(); ++j)
-                        {
-                            Point2d d = r_row[j];
-                            Point2d c = p - Point2d(d.x * cosA - d.y * sinA, d.x * sinA + d.y * cosA);
-
-                            c.x *= idp;
-                            c.y *= idp;
-
-                            if (c.x >= 0 && c.x < base->hist.size[2] - 2 && c.y >= 0 && c.y < base->hist.size[1] - 2)
-                                ++curHist.at<int>(cvRound(c.y + 1), cvRound(c.x + 1));
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    void GHT_Ballard_PosRotation::calcHist()
-    {
-        CV_Assert(imageEdges.type() == CV_8UC1);
-        CV_Assert(imageDx.type() == CV_32FC1 && imageDx.size() == imageSize);
-        CV_Assert(imageDy.type() == imageDx.type() && imageDy.size() == imageSize);
-        CV_Assert(levels > 0 && r_table.size() == static_cast<size_t>(levels + 1));
-        CV_Assert(dp > 0.0);
-        CV_Assert(minAngle >= 0.0 && minAngle < maxAngle && maxAngle <= 360.0);
-        CV_Assert(angleStep > 0.0 && angleStep < 360.0);
-
-        const double idp = 1.0 / dp;
-        const int angleRange = cvCeil((maxAngle - minAngle) / angleStep);
-
-        const int sizes[] = {angleRange + 2, cvCeil(imageSize.height * idp) + 2, cvCeil(imageSize.width * idp) + 2};
-        hist.create(3, sizes, CV_32SC1);
-        hist.setTo(0);
-
-        parallel_for_(Range(0, angleRange), Worker(this));
-    }
-
-    void GHT_Ballard_PosRotation::findPosInHist()
-    {
-        CV_Assert(votesThreshold > 0);
-
-        const int angleRange = hist.size[0] - 2;
-        const int histRows = hist.size[1] - 2;
-        const int histCols = hist.size[2] - 2;
-
-        for (int a = 0; a < angleRange; ++a)
-        {
-            const float angle = static_cast<float>(minAngle + a * angleStep);
-
-            const Mat prevHist(histRows + 2, histCols + 2, CV_32SC1, hist.ptr(a), hist.step[1]);
-            const Mat curHist(histRows + 2, histCols + 2, CV_32SC1, hist.ptr(a + 1), hist.step[1]);
-            const Mat nextHist(histRows + 2, histCols + 2, CV_32SC1, hist.ptr(a + 2), hist.step[1]);
-
-            for(int y = 0; y < histRows; ++y)
-            {
-                const int* prevHistRow = prevHist.ptr<int>(y + 1);
-                const int* prevRow = curHist.ptr<int>(y);
-                const int* curRow = curHist.ptr<int>(y + 1);
-                const int* nextRow = curHist.ptr<int>(y + 2);
-                const int* nextHistRow = nextHist.ptr<int>(y + 1);
-
-                for(int x = 0; x < histCols; ++x)
-                {
-                    const int votes = curRow[x + 1];
-
-                    if (votes > votesThreshold &&
-                        votes > curRow[x] &&
-                        votes >= curRow[x + 2] &&
-                        votes > prevRow[x + 1] &&
-                        votes >= nextRow[x + 1] &&
-                        votes > prevHistRow[x + 1] &&
-                        votes >= nextHistRow[x + 1])
-                    {
-                        posOutBuf.push_back(Vec4f(static_cast<float>(x * dp), static_cast<float>(y * dp), 1.0f, angle));
-                        voteOutBuf.push_back(Vec3i(votes, 0, votes));
-                    }
-                }
-            }
-        }
-    }
-
-    /////////////////////////////////////////
-    // POSITION & SCALE & ROTATION
-
-    double clampAngle(double a)
-    {
-        double res = a;
-
-        while (res > 360.0)
-            res -= 360.0;
-        while (res < 0)
-            res += 360.0;
-
-        return res;
-    }
-
-    bool angleEq(double a, double b, double eps = 1.0)
-    {
-        return (fabs(clampAngle(a - b)) <= eps);
-    }
-
-    class GHT_Guil_Full : public GHT_Pos
-    {
-    public:
-        AlgorithmInfo* info() const;
-
-        GHT_Guil_Full();
-
-    protected:
-        void releaseImpl();
-
         void processTempl();
         void processImage();
 
+        int maxBufferSize_;
+        double xi_;
+        int levels_;
+        double angleEpsilon_;
+
+        double minAngle_;
+        double maxAngle_;
+        double angleStep_;
+        int angleThresh_;
+
+        double minScale_;
+        double maxScale_;
+        double scaleStep_;
+        int scaleThresh_;
+
+        int posThresh_;
+
         struct ContourPoint
         {
             Point2d pos;
@@ -828,137 +606,92 @@ namespace
         void calcScale(double angle);
         void calcPosition(double angle, int angleVotes, double scale, int scaleVotes);
 
-        int maxSize;
-        double xi;
-        int levels;
-        double angleEpsilon;
+        std::vector< std::vector<Feature> > templFeatures_;
+        std::vector< std::vector<Feature> > imageFeatures_;
 
-        double minAngle;
-        double maxAngle;
-        double angleStep;
-        int angleThresh;
-
-        double minScale;
-        double maxScale;
-        double scaleStep;
-        int scaleThresh;
-
-        double dp;
-        int posThresh;
-
-        std::vector< std::vector<Feature> > templFeatures;
-        std::vector< std::vector<Feature> > imageFeatures;
-
-        std::vector< std::pair<double, int> > angles;
-        std::vector< std::pair<double, int> > scales;
+        std::vector< std::pair<double, int> > angles_;
+        std::vector< std::pair<double, int> > scales_;
     };
 
-    CV_INIT_ALGORITHM(GHT_Guil_Full, "GeneralizedHough.POSITION_SCALE_ROTATION",
-                      obj.info()->addParam(obj, "minDist", obj.minDist, false, 0, 0,
-                                           "Minimum distance between the centers of the detected objects.");
-                      obj.info()->addParam(obj, "maxSize", obj.maxSize, false, 0, 0,
-                                           "Maximal size of inner buffers.");
-                      obj.info()->addParam(obj, "xi", obj.xi, false, 0, 0,
-                                           "Angle difference in degrees between two points in feature.");
-                      obj.info()->addParam(obj, "levels", obj.levels, false, 0, 0,
-                                           "Feature table levels.");
-                      obj.info()->addParam(obj, "angleEpsilon", obj.angleEpsilon, false, 0, 0,
-                                           "Maximal difference between angles that treated as equal.");
-                      obj.info()->addParam(obj, "minAngle", obj.minAngle, false, 0, 0,
-                                           "Minimal rotation angle to detect in degrees.");
-                      obj.info()->addParam(obj, "maxAngle", obj.maxAngle, false, 0, 0,
-                                           "Maximal rotation angle to detect in degrees.");
-                      obj.info()->addParam(obj, "angleStep", obj.angleStep, false, 0, 0,
-                                           "Angle step in degrees.");
-                      obj.info()->addParam(obj, "angleThresh", obj.angleThresh, false, 0, 0,
-                                           "Angle threshold.");
-                      obj.info()->addParam(obj, "minScale", obj.minScale, false, 0, 0,
-                                           "Minimal scale to detect.");
-                      obj.info()->addParam(obj, "maxScale", obj.maxScale, false, 0, 0,
-                                           "Maximal scale to detect.");
-                      obj.info()->addParam(obj, "scaleStep", obj.scaleStep, false, 0, 0,
-                                           "Scale step.");
-                      obj.info()->addParam(obj, "scaleThresh", obj.scaleThresh, false, 0, 0,
-                                           "Scale threshold.");
-                      obj.info()->addParam(obj, "dp", obj.dp, false, 0, 0,
-                                           "Inverse ratio of the accumulator resolution to the image resolution.");
-                      obj.info()->addParam(obj, "posThresh", obj.posThresh, false, 0, 0,
-                                           "Position threshold."));
-
-    GHT_Guil_Full::GHT_Guil_Full()
+    double clampAngle(double a)
     {
-        maxSize = 1000;
-        xi = 90.0;
-        levels = 360;
-        angleEpsilon = 1.0;
+        double res = a;
 
-        minAngle = 0.0;
-        maxAngle = 360.0;
-        angleStep = 1.0;
-        angleThresh = 15000;
+        while (res > 360.0)
+            res -= 360.0;
+        while (res < 0)
+            res += 360.0;
 
-        minScale = 0.5;
-        maxScale = 2.0;
-        scaleStep = 0.05;
-        scaleThresh = 1000;
-
-        dp = 1.0;
-        posThresh = 100;
+        return res;
     }
 
-    void GHT_Guil_Full::releaseImpl()
+    bool angleEq(double a, double b, double eps = 1.0)
     {
-        GHT_Pos::releaseImpl();
-
-        releaseVector(templFeatures);
-        releaseVector(imageFeatures);
-
-        releaseVector(angles);
-        releaseVector(scales);
+        return (fabs(clampAngle(a - b)) <= eps);
     }
 
-    void GHT_Guil_Full::processTempl()
+    GeneralizedHoughGuilImpl::GeneralizedHoughGuilImpl()
     {
-        buildFeatureList(templEdges, templDx, templDy, templFeatures, templCenter);
+        maxBufferSize_ = 1000;
+        xi_ = 90.0;
+        levels_ = 360;
+        angleEpsilon_ = 1.0;
+
+        minAngle_ = 0.0;
+        maxAngle_ = 360.0;
+        angleStep_ = 1.0;
+        angleThresh_ = 15000;
+
+        minScale_ = 0.5;
+        maxScale_ = 2.0;
+        scaleStep_ = 0.05;
+        scaleThresh_ = 1000;
+
+        posThresh_ = 100;
     }
 
-    void GHT_Guil_Full::processImage()
+    void GeneralizedHoughGuilImpl::processTempl()
     {
-        buildFeatureList(imageEdges, imageDx, imageDy, imageFeatures);
+        buildFeatureList(templEdges_, templDx_, templDy_, templFeatures_, templCenter_);
+    }
+
+    void GeneralizedHoughGuilImpl::processImage()
+    {
+        buildFeatureList(imageEdges_, imageDx_, imageDy_, imageFeatures_);
 
         calcOrientation();
 
-        for (size_t i = 0; i < angles.size(); ++i)
+        for (size_t i = 0; i < angles_.size(); ++i)
         {
-            const double angle = angles[i].first;
-            const int angleVotes = angles[i].second;
+            const double angle = angles_[i].first;
+            const int angleVotes = angles_[i].second;
 
             calcScale(angle);
 
-            for (size_t j = 0; j < scales.size(); ++j)
+            for (size_t j = 0; j < scales_.size(); ++j)
             {
-                const double scale = scales[j].first;
-                const int scaleVotes = scales[j].second;
+                const double scale = scales_[j].first;
+                const int scaleVotes = scales_[j].second;
 
                 calcPosition(angle, angleVotes, scale, scaleVotes);
             }
         }
     }
 
-    void GHT_Guil_Full::buildFeatureList(const Mat& edges, const Mat& dx, const Mat& dy, std::vector< std::vector<Feature> >& features, Point2d center)
+    void GeneralizedHoughGuilImpl::buildFeatureList(const Mat& edges, const Mat& dx, const Mat& dy, std::vector< std::vector<Feature> >& features, Point2d center)
     {
-        CV_Assert(levels > 0);
+        CV_Assert( levels_ > 0 );
 
-        const double maxDist = sqrt((double) templSize.width * templSize.width + templSize.height * templSize.height) * maxScale;
+        const double maxDist = sqrt((double) templSize_.width * templSize_.width + templSize_.height * templSize_.height) * maxScale_;
 
-        const double alphaScale = levels / 360.0;
+        const double alphaScale = levels_ / 360.0;
 
         std::vector<ContourPoint> points;
         getContourPoints(edges, dx, dy, points);
 
-        features.resize(levels + 1);
-        for_each(features.begin(), features.end(), mem_fun_ref(&std::vector<Feature>::clear));
-        for_each(features.begin(), features.end(), bind2nd(mem_fun_ref(&std::vector<Feature>::reserve), maxSize));
+        features.resize(levels_ + 1);
+        std::for_each(features.begin(), features.end(), std::mem_fun_ref(&std::vector<Feature>::clear));
+        std::for_each(features.begin(), features.end(), std::bind2nd(std::mem_fun_ref(&std::vector<Feature>::reserve), maxBufferSize_));
 
         for (size_t i = 0; i < points.size(); ++i)
         {
@@ -968,7 +701,7 @@ namespace
             {
                 ContourPoint p2 = points[j];
 
-                if (angleEq(p1.theta - p2.theta, xi, angleEpsilon))
+                if (angleEq(p1.theta - p2.theta, xi_, angleEpsilon_))
                 {
                     const Point2d d = p1.pos - p2.pos;
 
@@ -988,18 +721,18 @@ namespace
 
                     const int n = cvRound(f.alpha12 * alphaScale);
 
-                    if (features[n].size() < static_cast<size_t>(maxSize))
+                    if (features[n].size() < static_cast<size_t>(maxBufferSize_))
                         features[n].push_back(f);
                 }
             }
         }
     }
 
-    void GHT_Guil_Full::getContourPoints(const Mat& edges, const Mat& dx, const Mat& dy, std::vector<ContourPoint>& points)
+    void GeneralizedHoughGuilImpl::getContourPoints(const Mat& edges, const Mat& dx, const Mat& dy, std::vector<ContourPoint>& points)
     {
-        CV_Assert(edges.type() == CV_8UC1);
-        CV_Assert(dx.type() == CV_32FC1 && dx.size == edges.size);
-        CV_Assert(dy.type() == dx.type() && dy.size == edges.size);
+        CV_Assert( edges.type() == CV_8UC1 );
+        CV_Assert( dx.type() == CV_32FC1 && dx.size == edges.size );
+        CV_Assert( dy.type() == dx.type() && dy.size == edges.size );
 
         points.clear();
         points.reserve(edges.size().area());
@@ -1025,23 +758,23 @@ namespace
         }
     }
 
-    void GHT_Guil_Full::calcOrientation()
+    void GeneralizedHoughGuilImpl::calcOrientation()
     {
-        CV_Assert(levels > 0);
-        CV_Assert(templFeatures.size() == static_cast<size_t>(levels + 1));
-        CV_Assert(imageFeatures.size() == templFeatures.size());
-        CV_Assert(minAngle >= 0.0 && minAngle < maxAngle && maxAngle <= 360.0);
-        CV_Assert(angleStep > 0.0 && angleStep < 360.0);
-        CV_Assert(angleThresh > 0);
+        CV_Assert( levels_ > 0 );
+        CV_Assert( templFeatures_.size() == static_cast<size_t>(levels_ + 1) );
+        CV_Assert( imageFeatures_.size() == templFeatures_.size() );
+        CV_Assert( minAngle_ >= 0.0 && minAngle_ < maxAngle_ && maxAngle_ <= 360.0 );
+        CV_Assert( angleStep_ > 0.0 && angleStep_ < 360.0 );
+        CV_Assert( angleThresh_ > 0 );
 
-        const double iAngleStep = 1.0 / angleStep;
-        const int angleRange = cvCeil((maxAngle - minAngle) * iAngleStep);
+        const double iAngleStep = 1.0 / angleStep_;
+        const int angleRange = cvCeil((maxAngle_ - minAngle_) * iAngleStep);
 
         std::vector<int> OHist(angleRange + 1, 0);
-        for (int i = 0; i <= levels; ++i)
+        for (int i = 0; i <= levels_; ++i)
         {
-            const std::vector<Feature>& templRow = templFeatures[i];
-            const std::vector<Feature>& imageRow = imageFeatures[i];
+            const std::vector<Feature>& templRow = templFeatures_[i];
+            const std::vector<Feature>& imageRow = imageFeatures_[i];
 
             for (size_t j = 0; j < templRow.size(); ++j)
             {
@@ -1052,45 +785,45 @@ namespace
                     Feature imF = imageRow[k];
 
                     const double angle = clampAngle(imF.p1.theta - templF.p1.theta);
-                    if (angle >= minAngle && angle <= maxAngle)
+                    if (angle >= minAngle_ && angle <= maxAngle_)
                     {
-                        const int n = cvRound((angle - minAngle) * iAngleStep);
+                        const int n = cvRound((angle - minAngle_) * iAngleStep);
                         ++OHist[n];
                     }
                 }
             }
         }
 
-        angles.clear();
+        angles_.clear();
 
         for (int n = 0; n < angleRange; ++n)
         {
-            if (OHist[n] >= angleThresh)
+            if (OHist[n] >= angleThresh_)
             {
-                const double angle = minAngle + n * angleStep;
-                angles.push_back(std::make_pair(angle, OHist[n]));
+                const double angle = minAngle_ + n * angleStep_;
+                angles_.push_back(std::make_pair(angle, OHist[n]));
             }
         }
     }
 
-    void GHT_Guil_Full::calcScale(double angle)
+    void GeneralizedHoughGuilImpl::calcScale(double angle)
     {
-        CV_Assert(levels > 0);
-        CV_Assert(templFeatures.size() == static_cast<size_t>(levels + 1));
-        CV_Assert(imageFeatures.size() == templFeatures.size());
-        CV_Assert(minScale > 0.0 && minScale < maxScale);
-        CV_Assert(scaleStep > 0.0);
-        CV_Assert(scaleThresh > 0);
+        CV_Assert( levels_ > 0 );
+        CV_Assert( templFeatures_.size() == static_cast<size_t>(levels_ + 1) );
+        CV_Assert( imageFeatures_.size() == templFeatures_.size() );
+        CV_Assert( minScale_ > 0.0 && minScale_ < maxScale_ );
+        CV_Assert( scaleStep_ > 0.0 );
+        CV_Assert( scaleThresh_ > 0 );
 
-        const double iScaleStep = 1.0 / scaleStep;
-        const int scaleRange = cvCeil((maxScale - minScale) * iScaleStep);
+        const double iScaleStep = 1.0 / scaleStep_;
+        const int scaleRange = cvCeil((maxScale_ - minScale_) * iScaleStep);
 
         std::vector<int> SHist(scaleRange + 1, 0);
 
-        for (int i = 0; i <= levels; ++i)
+        for (int i = 0; i <= levels_; ++i)
         {
-            const std::vector<Feature>& templRow = templFeatures[i];
-            const std::vector<Feature>& imageRow = imageFeatures[i];
+            const std::vector<Feature>& templRow = templFeatures_[i];
+            const std::vector<Feature>& imageRow = imageFeatures_[i];
 
             for (size_t j = 0; j < templRow.size(); ++j)
             {
@@ -1102,12 +835,12 @@ namespace
                 {
                     Feature imF = imageRow[k];
 
-                    if (angleEq(imF.p1.theta, templF.p1.theta, angleEpsilon))
+                    if (angleEq(imF.p1.theta, templF.p1.theta, angleEpsilon_))
                     {
                         const double scale = imF.d12 / templF.d12;
-                        if (scale >= minScale && scale <= maxScale)
+                        if (scale >= minScale_ && scale <= maxScale_)
                         {
-                            const int s = cvRound((scale - minScale) * iScaleStep);
+                            const int s = cvRound((scale - minScale_) * iScaleStep);
                             ++SHist[s];
                         }
                     }
@@ -1115,39 +848,39 @@ namespace
             }
         }
 
-        scales.clear();
+        scales_.clear();
 
         for (int s = 0; s < scaleRange; ++s)
         {
-            if (SHist[s] >= scaleThresh)
+            if (SHist[s] >= scaleThresh_)
             {
-                const double scale = minScale + s * scaleStep;
-                scales.push_back(std::make_pair(scale, SHist[s]));
+                const double scale = minScale_ + s * scaleStep_;
+                scales_.push_back(std::make_pair(scale, SHist[s]));
             }
         }
     }
 
-    void GHT_Guil_Full::calcPosition(double angle, int angleVotes, double scale, int scaleVotes)
+    void GeneralizedHoughGuilImpl::calcPosition(double angle, int angleVotes, double scale, int scaleVotes)
     {
-        CV_Assert(levels > 0);
-        CV_Assert(templFeatures.size() == static_cast<size_t>(levels + 1));
-        CV_Assert(imageFeatures.size() == templFeatures.size());
-        CV_Assert(dp > 0.0);
-        CV_Assert(posThresh > 0);
+        CV_Assert( levels_ > 0 );
+        CV_Assert( templFeatures_.size() == static_cast<size_t>(levels_ + 1) );
+        CV_Assert( imageFeatures_.size() == templFeatures_.size() );
+        CV_Assert( dp_ > 0.0 );
+        CV_Assert( posThresh_ > 0 );
 
         const double sinVal = sin(toRad(angle));
         const double cosVal = cos(toRad(angle));
-        const double idp = 1.0 / dp;
+        const double idp = 1.0 / dp_;
 
-        const int histRows = cvCeil(imageSize.height * idp);
-        const int histCols = cvCeil(imageSize.width * idp);
+        const int histRows = cvCeil(imageSize_.height * idp);
+        const int histCols = cvCeil(imageSize_.width * idp);
 
         Mat DHist(histRows + 2, histCols + 2, CV_32SC1, Scalar::all(0));
 
-        for (int i = 0; i <= levels; ++i)
+        for (int i = 0; i <= levels_; ++i)
         {
-            const std::vector<Feature>& templRow = templFeatures[i];
-            const std::vector<Feature>& imageRow = imageFeatures[i];
+            const std::vector<Feature>& templRow = templFeatures_[i];
+            const std::vector<Feature>& imageRow = imageFeatures_[i];
 
             for (size_t j = 0; j < templRow.size(); ++j)
             {
@@ -1165,7 +898,7 @@ namespace
                 {
                     Feature imF = imageRow[k];
 
-                    if (angleEq(imF.p1.theta, templF.p1.theta, angleEpsilon))
+                    if (angleEq(imF.p1.theta, templF.p1.theta, angleEpsilon_))
                     {
                         Point2d c1, c2;
 
@@ -1195,101 +928,17 @@ namespace
             {
                 const int votes = curRow[x + 1];
 
-                if (votes > posThresh && votes > curRow[x] && votes >= curRow[x + 2] && votes > prevRow[x + 1] && votes >= nextRow[x + 1])
+                if (votes > posThresh_ && votes > curRow[x] && votes >= curRow[x + 2] && votes > prevRow[x + 1] && votes >= nextRow[x + 1])
                 {
-                    posOutBuf.push_back(Vec4f(static_cast<float>(x * dp), static_cast<float>(y * dp), static_cast<float>(scale), static_cast<float>(angle)));
-                    voteOutBuf.push_back(Vec3i(votes, scaleVotes, angleVotes));
+                    posOutBuf_.push_back(Vec4f(static_cast<float>(x * dp_), static_cast<float>(y * dp_), static_cast<float>(scale), static_cast<float>(angle)));
+                    voteOutBuf_.push_back(Vec3i(votes, scaleVotes, angleVotes));
                 }
             }
         }
     }
 }
 
-Ptr<GeneralizedHough> cv::GeneralizedHough::create(int method)
+Ptr<GeneralizedHoughGuil> cv::createGeneralizedHoughGuil()
 {
-    switch (method)
-    {
-    case GHT_POSITION:
-        CV_Assert( !GHT_Ballard_Pos_info_auto.name().empty() );
-        return new GHT_Ballard_Pos();
-
-    case (GHT_POSITION | GHT_SCALE):
-        CV_Assert( !GHT_Ballard_PosScale_info_auto.name().empty() );
-        return new GHT_Ballard_PosScale();
-
-    case (GHT_POSITION | GHT_ROTATION):
-        CV_Assert( !GHT_Ballard_PosRotation_info_auto.name().empty() );
-        return new GHT_Ballard_PosRotation();
-
-    case (GHT_POSITION | GHT_SCALE | GHT_ROTATION):
-        CV_Assert( !GHT_Guil_Full_info_auto.name().empty() );
-        return new GHT_Guil_Full();
-    }
-
-    CV_Error(CV_StsBadArg, "Unsupported method");
-    return Ptr<GeneralizedHough>();
-}
-
-cv::GeneralizedHough::~GeneralizedHough()
-{
-}
-
-void cv::GeneralizedHough::setTemplate(InputArray _templ, int cannyThreshold, Point templCenter)
-{
-    Mat templ = _templ.getMat();
-
-    CV_Assert(templ.type() == CV_8UC1);
-    CV_Assert(cannyThreshold > 0);
-
-    Canny(templ, edges_, cannyThreshold / 2, cannyThreshold);
-    Sobel(templ, dx_, CV_32F, 1, 0);
-    Sobel(templ, dy_, CV_32F, 0, 1);
-
-    if (templCenter == Point(-1, -1))
-        templCenter = Point(templ.cols / 2, templ.rows / 2);
-
-    setTemplateImpl(edges_, dx_, dy_, templCenter);
-}
-
-void cv::GeneralizedHough::setTemplate(InputArray _edges, InputArray _dx, InputArray _dy, Point templCenter)
-{
-    Mat edges = _edges.getMat();
-    Mat dx = _dx.getMat();
-    Mat dy = _dy.getMat();
-
-    if (templCenter == Point(-1, -1))
-        templCenter = Point(edges.cols / 2, edges.rows / 2);
-
-    setTemplateImpl(edges, dx, dy, templCenter);
-}
-
-void cv::GeneralizedHough::detect(InputArray _image, OutputArray positions, OutputArray votes, int cannyThreshold)
-{
-    Mat image = _image.getMat();
-
-    CV_Assert(image.type() == CV_8UC1);
-    CV_Assert(cannyThreshold > 0);
-
-    Canny(image, edges_, cannyThreshold / 2, cannyThreshold);
-    Sobel(image, dx_, CV_32F, 1, 0);
-    Sobel(image, dy_, CV_32F, 0, 1);
-
-    detectImpl(edges_, dx_, dy_, positions, votes);
-}
-
-void cv::GeneralizedHough::detect(InputArray _edges, InputArray _dx, InputArray _dy, OutputArray positions, OutputArray votes)
-{
-    cv::Mat edges = _edges.getMat();
-    cv::Mat dx = _dx.getMat();
-    cv::Mat dy = _dy.getMat();
-
-    detectImpl(edges, dx, dy, positions, votes);
-}
-
-void cv::GeneralizedHough::release()
-{
-    edges_.release();
-    dx_.release();
-    dy_.release();
-    releaseImpl();
+    return makePtr<GeneralizedHoughGuilImpl>();
 }
diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp
index 554e59fe6..f0a78187d 100644
--- a/modules/imgproc/src/histogram.cpp
+++ b/modules/imgproc/src/histogram.cpp
@@ -266,6 +266,8 @@ public:
     }
 
 private:
+    calcHist1D_Invoker operator=(const calcHist1D_Invoker&);
+
     T* p_[one];
     uchar* mask_;
     int step_[one];
@@ -338,6 +340,8 @@ public:
     }
 
 private:
+    calcHist2D_Invoker operator=(const calcHist2D_Invoker&);
+
     T* p_[two];
     uchar* mask_;
     int step_[two];
@@ -428,6 +432,8 @@ public:
     }
 
 private:
+    calcHist3D_Invoker operator=(const calcHist3D_Invoker&);
+
     T* p_[three];
     uchar* mask_;
     int step_[three];
@@ -767,8 +773,7 @@ calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
 #ifdef HAVE_TBB
             calcHist1D_Invoker<T> body(_ptrs, _deltas, hist, _uniranges, size[0], dims, imsize);
             parallel_for(BlockedRange(0, imsize.height), body);
-            return;
-#endif
+#else
             double a = uniranges[0], b = uniranges[1];
             int sz = size[0], d0 = deltas[0], step0 = deltas[1];
             const T* p0 = (const T*)ptrs[0];
@@ -791,14 +796,15 @@ calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
                                 ((int*)H)[idx]++;
                         }
             }
+#endif //HAVE_TBB
+            return;
         }
         else if( dims == 2 )
         {
 #ifdef HAVE_TBB
             calcHist2D_Invoker<T> body(_ptrs, _deltas, hist, _uniranges, size, dims, imsize, hstep);
             parallel_for(BlockedRange(0, imsize.height), body);
-            return;
-#endif
+#else
             double a0 = uniranges[0], b0 = uniranges[1], a1 = uniranges[2], b1 = uniranges[3];
             int sz0 = size[0], sz1 = size[1];
             int d0 = deltas[0], step0 = deltas[1],
@@ -827,6 +833,8 @@ calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
                                 ((int*)(H + hstep0*idx0))[idx1]++;
                         }
             }
+#endif //HAVE_TBB
+            return;
         }
         else if( dims == 3 )
         {
@@ -1982,12 +1990,12 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
         const float* h2 = (const float*)it.planes[1].data;
         len = it.planes[0].rows*it.planes[0].cols*H1.channels();
 
-        if( method == CV_COMP_CHISQR )
+        if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT))
         {
             for( j = 0; j < len; j++ )
             {
                 double a = h1[j] - h2[j];
-                double b = h1[j];
+                double b = (method == CV_COMP_CHISQR) ? h1[j] : h1[j] + h2[j];
                 if( fabs(b) > DBL_EPSILON )
                     result += a*a/b;
             }
@@ -2026,7 +2034,9 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
             CV_Error( CV_StsBadArg, "Unknown comparison method" );
     }
 
-    if( method == CV_COMP_CORREL )
+    if( method == CV_COMP_CHISQR_ALT )
+        result *= 2;
+    else if( method == CV_COMP_CORREL )
     {
         size_t total = H1.total();
         double scale = 1./total;
@@ -2055,13 +2065,13 @@ double cv::compareHist( const SparseMat& H1, const SparseMat& H2, int method )
         CV_Assert( H1.size(i) == H2.size(i) );
 
     const SparseMat *PH1 = &H1, *PH2 = &H2;
-    if( PH1->nzcount() > PH2->nzcount() && method != CV_COMP_CHISQR )
+    if( PH1->nzcount() > PH2->nzcount() && method != CV_COMP_CHISQR && method != CV_COMP_CHISQR_ALT)
         std::swap(PH1, PH2);
 
     SparseMatConstIterator it = PH1->begin();
     int N1 = (int)PH1->nzcount(), N2 = (int)PH2->nzcount();
 
-    if( method == CV_COMP_CHISQR )
+    if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT) )
     {
         for( i = 0; i < N1; i++, ++it )
         {
@@ -2069,7 +2079,7 @@ double cv::compareHist( const SparseMat& H1, const SparseMat& H2, int method )
             const SparseMat::Node* node = it.node();
             float v2 = PH2->value<float>(node->idx, (size_t*)&node->hashval);
             double a = v1 - v2;
-            double b = v1;
+            double b = (method == CV_COMP_CHISQR) ? v1 : v1 + v2;
             if( fabs(b) > DBL_EPSILON )
                 result += a*a/b;
         }
@@ -2138,6 +2148,9 @@ double cv::compareHist( const SparseMat& H1, const SparseMat& H2, int method )
     else
         CV_Error( CV_StsBadArg, "Unknown comparison method" );
 
+    if( method == CV_COMP_CHISQR_ALT )
+        result *= 2;
+
     return result;
 }
 
@@ -2477,13 +2490,13 @@ cvCompareHist( const CvHistogram* hist1,
     CvSparseMatIterator iterator;
     CvSparseNode *node1, *node2;
 
-    if( mat1->heap->active_count > mat2->heap->active_count && method != CV_COMP_CHISQR )
+    if( mat1->heap->active_count > mat2->heap->active_count && method != CV_COMP_CHISQR && method != CV_COMP_CHISQR_ALT)
     {
         CvSparseMat* t;
         CV_SWAP( mat1, mat2, t );
     }
 
-    if( method == CV_COMP_CHISQR )
+    if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT) )
     {
         for( node1 = cvInitSparseMatIterator( mat1, &iterator );
              node1 != 0; node1 = cvGetNextSparseNode( &iterator ))
@@ -2492,7 +2505,7 @@ cvCompareHist( const CvHistogram* hist1,
             uchar* node2_data = cvPtrND( mat2, CV_NODE_IDX(mat1,node1), 0, 0, &node1->hashval );
             double v2 = node2_data ? *(float*)node2_data : 0.f;
             double a = v1 - v2;
-            double b = v1;
+            double b = (method == CV_COMP_CHISQR) ? v1 : v1 + v2;
             if( fabs(b) > DBL_EPSILON )
                 result += a*a/b;
         }
@@ -2582,6 +2595,9 @@ cvCompareHist( const CvHistogram* hist1,
     else
         CV_Error( CV_StsBadArg, "Unknown comparison method" );
 
+    if( method == CV_COMP_CHISQR_ALT )
+        result *= 2;
+
     return result;
 }
 
diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp
index f33c6d203..9c8eaca8f 100644
--- a/modules/imgproc/src/hough.cpp
+++ b/modules/imgproc/src/hough.cpp
@@ -766,21 +766,21 @@ icvHoughCirclesGradient( CvMat* img, float dp, float min_dist,
     float idp, dr;
     CvSeqReader reader;
 
-    edges = cvCreateMat( img->rows, img->cols, CV_8UC1 );
+    edges.reset(cvCreateMat( img->rows, img->cols, CV_8UC1 ));
     cvCanny( img, edges, MAX(canny_threshold/2,1), canny_threshold, 3 );
 
-    dx = cvCreateMat( img->rows, img->cols, CV_16SC1 );
-    dy = cvCreateMat( img->rows, img->cols, CV_16SC1 );
+    dx.reset(cvCreateMat( img->rows, img->cols, CV_16SC1 ));
+    dy.reset(cvCreateMat( img->rows, img->cols, CV_16SC1 ));
     cvSobel( img, dx, 1, 0, 3 );
     cvSobel( img, dy, 0, 1, 3 );
 
     if( dp < 1.f )
         dp = 1.f;
     idp = 1.f/dp;
-    accum = cvCreateMat( cvCeil(img->rows*idp)+2, cvCeil(img->cols*idp)+2, CV_32SC1 );
+    accum.reset(cvCreateMat( cvCeil(img->rows*idp)+2, cvCeil(img->cols*idp)+2, CV_32SC1 ));
     cvZero(accum);
 
-    storage = cvCreateMemStorage();
+    storage.reset(cvCreateMemStorage());
     nz = cvCreateSeq( CV_32SC2, sizeof(CvSeq), sizeof(CvPoint), storage );
     centers = cvCreateSeq( CV_32SC1, sizeof(CvSeq), sizeof(int), storage );
 
@@ -866,7 +866,7 @@ icvHoughCirclesGradient( CvMat* img, float dp, float min_dist,
     cvClearSeq( centers );
     cvSeqPushMulti( centers, &sort_buf[0], center_count );
 
-    dist_buf = cvCreateMat( 1, nz_count, CV_32FC1 );
+    dist_buf.reset(cvCreateMat( 1, nz_count, CV_32FC1 ));
     ddata = dist_buf->data.fl;
 
     dr = dp;
@@ -1060,7 +1060,7 @@ void cv::HoughCircles( InputArray _image, OutputArray _circles,
                        double param1, double param2,
                        int minRadius, int maxRadius )
 {
-    Ptr<CvMemStorage> storage = cvCreateMemStorage(STORAGE_SIZE);
+    Ptr<CvMemStorage> storage(cvCreateMemStorage(STORAGE_SIZE));
     Mat image = _image.getMat();
     CvMat c_image = image;
     CvSeq* seq = cvHoughCircles( &c_image, storage, method,
diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index 05a96300b..9e2048407 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -50,9 +50,73 @@
 #include <iostream>
 #include <vector>
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+static IppStatus sts = ippInit();
+#endif
+
 namespace cv
 {
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
+    typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
+    typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
+    typedef IppStatus (CV_STDCALL* ippiResizeSqrPixelFunc)(const void*, IppiSize, int, IppiRect, void*, int, IppiRect, double, double, double, double, int, Ipp8u *);
+
+    template <int channels, typename Type>
+    bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
+    {
+        Type values[channels];
+        for( int i = 0; i < channels; i++ )
+            values[i] = (Type)value[i];
+        return func(values, dataPointer, step, size) >= 0;
+    }
+
+    bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
+    {
+        if( channels == 1 )
+        {
+            switch( depth )
+            {
+            case CV_8U:
+                return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0;
+            case CV_16U:
+                return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0;
+            case CV_32F:
+                return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0;
+            }
+        }
+        else
+        {
+            if( channels == 3 )
+            {
+                switch( depth )
+                {
+                case CV_8U:
+                    return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
+                case CV_16U:
+                    return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
+                case CV_32F:
+                    return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
+                }
+            }
+            else if( channels == 4 )
+            {
+                switch( depth )
+                {
+                case CV_8U:
+                    return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
+                case CV_16U:
+                    return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
+                case CV_32F:
+                    return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
+                }
+            }
+        }
+        return false;
+    }
+#endif
+
 /************** interpolation formulas and tables ***************/
 
 const int INTER_RESIZE_COEF_BITS=11;
@@ -1795,6 +1859,45 @@ static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, Dec
     return k;
 }
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPresizeInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPresizeInvoker(Mat &_src, Mat &_dst, double &_inv_scale_x, double &_inv_scale_y, int _mode, ippiResizeSqrPixelFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          int cn = src.channels();
+          IppiRect srcroi = { 0, range.start, src.cols, range.end - range.start };
+          int dsty = CV_IMIN(cvRound(range.start * inv_scale_y), dst.rows);
+          int dstwidth = CV_IMIN(cvRound(src.cols * inv_scale_x), dst.cols);
+          int dstheight = CV_IMIN(cvRound(range.end * inv_scale_y), dst.rows);
+          IppiRect dstroi = { 0, dsty, dstwidth, dstheight - dsty };
+          int bufsize;
+          ippiResizeGetBufSize( srcroi, dstroi, cn, mode, &bufsize );
+          Ipp8u *buf;
+          buf = ippsMalloc_8u( bufsize );
+          IppStatus sts;
+          if( func( src.data, ippiSize(src.cols, src.rows), (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, inv_scale_x, inv_scale_y, 0, 0, mode, buf ) < 0 )
+              *ok = false;
+          ippsFree(buf);
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double inv_scale_x;
+    double inv_scale_y;
+    int mode;
+    ippiResizeSqrPixelFunc func;
+    bool *ok;
+    const IPPresizeInvoker& operator= (const IPPresizeInvoker&);
+};
+#endif
 
 }
 
@@ -1937,6 +2040,34 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
     double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
     int k, sx, sy, dx, dy;
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int mode = interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : 0;
+    int type = src.type();
+    ippiResizeSqrPixelFunc ippFunc =
+        type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R :
+        type == CV_8UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C3R :
+        type == CV_8UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C4R :
+        type == CV_16UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C1R :
+        type == CV_16UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C3R :
+        type == CV_16UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C4R :
+        type == CV_16SC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C1R :
+        type == CV_16SC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C3R :
+        type == CV_16SC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C4R :
+        type == CV_32FC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C1R :
+        type == CV_32FC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C3R :
+        type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R :
+        0;
+    if( ippFunc && mode != 0 )
+    {
+        bool ok;
+        Range range(0, src.rows);
+        IPPresizeInvoker invoker(src, dst, inv_scale_x, inv_scale_y, mode, ippFunc, &ok);
+        parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+        if( ok )
+            return;
+    }
+#endif
+
     if( interpolation == INTER_NEAREST )
     {
         resizeNN( src, dst, inv_scale_x, inv_scale_y );
@@ -3446,6 +3577,49 @@ private:
     double *M;
 };
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPwarpAffineInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          IppiSize srcsize = { src.cols, src.rows };
+          IppiRect srcroi = { 0, 0, src.cols, src.rows };
+          IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
+          int cnn = src.channels();
+          if( borderType == BORDER_CONSTANT )
+          {
+              IppiSize setSize = { dst.cols, range.end - range.start };
+              void *dataPointer = dst.data + dst.step[0] * range.start;
+              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+              {
+                  *ok = false;
+                  return;
+              }
+          }
+          if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double (&coeffs)[2][3];
+    int mode;
+    int borderType;
+    Scalar borderValue;
+    ippiWarpAffineBackFunc func;
+    bool *ok;
+    const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&);
+};
+#endif
+
 }
 
 
@@ -3492,6 +3666,50 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
     const int AB_BITS = MAX(10, (int)INTER_BITS);
     const int AB_SCALE = 1 << AB_BITS;
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int depth = src.depth();
+    int channels = src.channels();
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
+        ( channels == 1 || channels == 3 || channels == 4 ) &&
+        ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
+    {
+        int type = src.type();
+        ippiWarpAffineBackFunc ippFunc =
+            type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
+            0;
+        int mode =
+            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
+            flags == INTER_NEAREST ? IPPI_INTER_NN :
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
+            0;
+        if( mode && ippFunc )
+        {
+            double coeffs[2][3];
+            for( int i = 0; i < 2; i++ )
+            {
+                for( int j = 0; j < 3; j++ )
+                {
+                    coeffs[i][j] = matM.at<double>(i, j);
+                }
+            }
+            bool ok;
+            Range range(0, dst.rows);
+            IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+            if( ok )
+                return;
+        }
+    }
+#endif
+
     for( x = 0; x < dst.cols; x++ )
     {
         adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
@@ -3599,6 +3817,50 @@ private:
     Scalar borderValue;
 };
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPwarpPerspectiveInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          IppiSize srcsize = {src.cols, src.rows};
+          IppiRect srcroi = {0, 0, src.cols, src.rows};
+          IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
+          int cnn = src.channels();
+
+          if( borderType == BORDER_CONSTANT )
+          {
+              IppiSize setSize = {dst.cols, range.end - range.start};
+              void *dataPointer = dst.data + dst.step[0] * range.start;
+              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+              {
+                  *ok = false;
+                  return;
+              }
+          }
+          if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double (&coeffs)[3][3];
+    int mode;
+    int borderType;
+    const Scalar borderValue;
+    ippiWarpPerspectiveBackFunc func;
+    bool *ok;
+    const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&);
+};
+#endif
+
 }
 
 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
@@ -3629,6 +3891,50 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
     if( !(flags & WARP_INVERSE_MAP) )
          invert(matM, matM);
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int depth = src.depth();
+    int channels = src.channels();
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
+        ( channels == 1 || channels == 3 || channels == 4 ) &&
+        ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
+    {
+        int type = src.type();
+        ippiWarpPerspectiveBackFunc ippFunc =
+            type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
+            0;
+        int mode =
+            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
+            flags == INTER_NEAREST ? IPPI_INTER_NN :
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
+            0;
+        if( mode && ippFunc )
+        {
+            double coeffs[3][3];
+            for( int i = 0; i < 3; i++ )
+            {
+                for( int j = 0; j < 3; j++ )
+                {
+                    coeffs[i][j] = matM.at<double>(i, j);
+                }
+            }
+            bool ok;
+            Range range(0, dst.rows);
+            IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+            if( ok )
+                return;
+        }
+    }
+#endif
+
     Range range(0, dst.rows);
     warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
     parallel_for_(range, invoker, dst.total()/(double)(1<<16));
@@ -3931,8 +4237,8 @@ cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
     ssize = cvGetMatSize(src);
     dsize = cvGetMatSize(dst);
 
-    mapx = cvCreateMat( dsize.height, dsize.width, CV_32F );
-    mapy = cvCreateMat( dsize.height, dsize.width, CV_32F );
+    mapx.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
+    mapy.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
 
     if( !(flags & CV_WARP_INVERSE_MAP) )
     {
@@ -4049,8 +4355,8 @@ void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
     dsize.width = dst->cols;
     dsize.height = dst->rows;
 
-    mapx = cvCreateMat( dsize.height, dsize.width, CV_32F );
-    mapy = cvCreateMat( dsize.height, dsize.width, CV_32F );
+    mapx.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
+    mapy.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
 
     if( !(flags & CV_WARP_INVERSE_MAP) )
     {
diff --git a/modules/imgproc/src/linefit.cpp b/modules/imgproc/src/linefit.cpp
index 61969b502..dc71d888a 100644
--- a/modules/imgproc/src/linefit.cpp
+++ b/modules/imgproc/src/linefit.cpp
@@ -571,14 +571,14 @@ static void fitLine3D( Point3f * points, int count, int dist,
                 for( j = 0; j < count; j++ )
                     w[j] = 1.f;
             }
-            
+
             /* save the line parameters */
             memcpy( _lineprev, _line, 6 * sizeof( float ));
-            
+
             /* Run again... */
             fitLine3D_wods( points, count, w, _line );
         }
-        
+
         if( err < min_err )
         {
             min_err = err;
@@ -595,27 +595,27 @@ void cv::fitLine( InputArray _points, OutputArray _line, int distType,
                  double param, double reps, double aeps )
 {
     Mat points = _points.getMat();
-    
+
     float linebuf[6]={0.f};
     int npoints2 = points.checkVector(2, -1, false);
     int npoints3 = points.checkVector(3, -1, false);
-    
+
     CV_Assert( npoints2 >= 0 || npoints3 >= 0 );
-    
+
     if( points.depth() != CV_32F || !points.isContinuous() )
     {
         Mat temp;
         points.convertTo(temp, CV_32F);
         points = temp;
     }
-    
+
     if( npoints2 >= 0 )
         fitLine2D( points.ptr<Point2f>(), npoints2, distType,
                    (float)param, (float)reps, (float)aeps, linebuf);
     else
         fitLine3D( points.ptr<Point3f>(), npoints3, distType,
                    (float)param, (float)reps, (float)aeps, linebuf);
-    
+
     Mat(npoints2 >= 0 ? 4 : 6, 1, CV_32F, linebuf).copyTo(_line);
 }
 
diff --git a/modules/imgproc/src/lsd.cpp b/modules/imgproc/src/lsd.cpp
new file mode 100644
index 000000000..bb3895448
--- /dev/null
+++ b/modules/imgproc/src/lsd.cpp
@@ -0,0 +1,1246 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <vector>
+
+/////////////////////////////////////////////////////////////////////////////////////////
+// Default LSD parameters
+// SIGMA_SCALE 0.6    - Sigma for Gaussian filter is computed as sigma = sigma_scale/scale.
+// QUANT       2.0    - Bound to the quantization error on the gradient norm.
+// ANG_TH      22.5   - Gradient angle tolerance in degrees.
+// LOG_EPS     0.0    - Detection threshold: -log10(NFA) > log_eps
+// DENSITY_TH  0.7    - Minimal density of region points in rectangle.
+// N_BINS      1024   - Number of bins in pseudo-ordering of gradient modulus.
+
+#define M_3_2_PI    (3 * CV_PI) / 2   // 3/2 pi
+#define M_2__PI     (2 * CV_PI)         // 2 pi
+
+#ifndef M_LN10
+#define M_LN10      2.30258509299404568402
+#endif
+
+#define NOTDEF      double(-1024.0) // Label for pixels with undefined gradient.
+
+#define NOTUSED     0   // Label for pixels not used in yet.
+#define USED        1   // Label for pixels already used in detection.
+
+#define RELATIVE_ERROR_FACTOR 100.0
+
+const double DEG_TO_RADS = CV_PI / 180;
+
+#define log_gamma(x) ((x)>15.0?log_gamma_windschitl(x):log_gamma_lanczos(x))
+
+struct edge
+{
+    cv::Point p;
+    bool taken;
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+inline double distSq(const double x1, const double y1,
+                     const double x2, const double y2)
+{
+    return (x2 - x1)*(x2 - x1) + (y2 - y1)*(y2 - y1);
+}
+
+inline double dist(const double x1, const double y1,
+                   const double x2, const double y2)
+{
+    return sqrt(distSq(x1, y1, x2, y2));
+}
+
+// Signed angle difference
+inline double angle_diff_signed(const double& a, const double& b)
+{
+    double diff = a - b;
+    while(diff <= -CV_PI) diff += M_2__PI;
+    while(diff >   CV_PI) diff -= M_2__PI;
+    return diff;
+}
+
+// Absolute value angle difference
+inline double angle_diff(const double& a, const double& b)
+{
+    return std::fabs(angle_diff_signed(a, b));
+}
+
+// Compare doubles by relative error.
+inline bool double_equal(const double& a, const double& b)
+{
+    // trivial case
+    if(a == b) return true;
+
+    double abs_diff = fabs(a - b);
+    double aa = fabs(a);
+    double bb = fabs(b);
+    double abs_max = (aa > bb)? aa : bb;
+
+    if(abs_max < DBL_MIN) abs_max = DBL_MIN;
+
+    return (abs_diff / abs_max) <= (RELATIVE_ERROR_FACTOR * DBL_EPSILON);
+}
+
+inline bool AsmallerB_XoverY(const edge& a, const edge& b)
+{
+    if (a.p.x == b.p.x) return a.p.y < b.p.y;
+    else return a.p.x < b.p.x;
+}
+
+/**
+ *   Computes the natural logarithm of the absolute value of
+ *   the gamma function of x using Windschitl method.
+ *   See http://www.rskey.org/gamma.htm
+ */
+inline double log_gamma_windschitl(const double& x)
+{
+    return 0.918938533204673 + (x-0.5)*log(x) - x
+         + 0.5*x*log(x*sinh(1/x) + 1/(810.0*pow(x, 6.0)));
+}
+
+/**
+ *   Computes the natural logarithm of the absolute value of
+ *   the gamma function of x using the Lanczos approximation.
+ *   See http://www.rskey.org/gamma.htm
+ */
+inline double log_gamma_lanczos(const double& x)
+{
+    static double q[7] = { 75122.6331530, 80916.6278952, 36308.2951477,
+                         8687.24529705, 1168.92649479, 83.8676043424,
+                         2.50662827511 };
+    double a = (x + 0.5) * log(x + 5.5) - (x + 5.5);
+    double b = 0;
+    for(int n = 0; n < 7; ++n)
+    {
+        a -= log(x + double(n));
+        b += q[n] * pow(x, double(n));
+    }
+    return a + log(b);
+}
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace cv{
+
+class LineSegmentDetectorImpl : public LineSegmentDetector
+{
+public:
+
+/**
+ * Create a LineSegmentDetectorImpl object. Specifying scale, number of subdivisions for the image, should the lines be refined and other constants as follows:
+ *
+ * @param _refine       How should the lines found be refined?
+ *                      LSD_REFINE_NONE - No refinement applied.
+ *                      LSD_REFINE_STD  - Standard refinement is applied. E.g. breaking arches into smaller line approximations.
+ *                      LSD_REFINE_ADV  - Advanced refinement. Number of false alarms is calculated,
+ *                                    lines are refined through increase of precision, decrement in size, etc.
+ * @param _scale        The scale of the image that will be used to find the lines. Range (0..1].
+ * @param _sigma_scale  Sigma for Gaussian filter is computed as sigma = _sigma_scale/_scale.
+ * @param _quant        Bound to the quantization error on the gradient norm.
+ * @param _ang_th       Gradient angle tolerance in degrees.
+ * @param _log_eps      Detection threshold: -log10(NFA) > _log_eps
+ * @param _density_th   Minimal density of aligned region points in rectangle.
+ * @param _n_bins       Number of bins in pseudo-ordering of gradient modulus.
+ */
+    LineSegmentDetectorImpl(int _refine = LSD_REFINE_STD, double _scale = 0.8,
+        double _sigma_scale = 0.6, double _quant = 2.0, double _ang_th = 22.5,
+        double _log_eps = 0, double _density_th = 0.7, int _n_bins = 1024);
+
+/**
+ * Detect lines in the input image.
+ *
+ * @param _image    A grayscale(CV_8UC1) input image.
+ *                  If only a roi needs to be selected, use
+ *                  lsd_ptr->detect(image(roi), ..., lines);
+ *                  lines += Scalar(roi.x, roi.y, roi.x, roi.y);
+ * @param _lines    Return: A vector of Vec4i elements specifying the beginning and ending point of a line.
+ *                          Where Vec4i is (x1, y1, x2, y2), point 1 is the start, point 2 - end.
+ *                          Returned lines are strictly oriented depending on the gradient.
+ * @param width     Return: Vector of widths of the regions, where the lines are found. E.g. Width of line.
+ * @param prec      Return: Vector of precisions with which the lines are found.
+ * @param nfa       Return: Vector containing number of false alarms in the line region, with precision of 10%.
+ *                          The bigger the value, logarithmically better the detection.
+ *                              * -1 corresponds to 10 mean false alarms
+ *                              * 0 corresponds to 1 mean false alarm
+ *                              * 1 corresponds to 0.1 mean false alarms
+ *                          This vector will be calculated _only_ when the objects type is REFINE_ADV
+ */
+    void detect(InputArray _image, OutputArray _lines,
+                OutputArray width = noArray(), OutputArray prec = noArray(),
+                OutputArray nfa = noArray());
+
+/**
+ * Draw lines on the given canvas.
+ *
+ * @param image     The image, where lines will be drawn.
+ *                  Should have the size of the image, where the lines were found
+ * @param lines     The lines that need to be drawn
+ */
+    void drawSegments(InputOutputArray _image, InputArray lines);
+
+/**
+ * Draw both vectors on the image canvas. Uses blue for lines 1 and red for lines 2.
+ *
+ * @param size      The size of the image, where lines1 and lines2 were found.
+ * @param lines1    The first lines that need to be drawn. Color - Blue.
+ * @param lines2    The second lines that need to be drawn. Color - Red.
+ * @param image     An optional image, where lines will be drawn.
+ *                  Should have the size of the image, where the lines were found
+ * @return          The number of mismatching pixels between lines1 and lines2.
+ */
+    int compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray _image = noArray());
+
+private:
+    Mat image;
+    Mat_<double> scaled_image;
+    double *scaled_image_data;
+    Mat_<double> angles;     // in rads
+    double *angles_data;
+    Mat_<double> modgrad;
+    double *modgrad_data;
+    Mat_<uchar> used;
+
+    int img_width;
+    int img_height;
+    double LOG_NT;
+
+    bool w_needed;
+    bool p_needed;
+    bool n_needed;
+
+    const double SCALE;
+    const int doRefine;
+    const double SIGMA_SCALE;
+    const double QUANT;
+    const double ANG_TH;
+    const double LOG_EPS;
+    const double DENSITY_TH;
+    const int N_BINS;
+
+    struct RegionPoint {
+        int x;
+        int y;
+        uchar* used;
+        double angle;
+        double modgrad;
+    };
+
+
+    struct coorlist
+    {
+        Point2i p;
+        struct coorlist* next;
+    };
+
+    struct rect
+    {
+        double x1, y1, x2, y2;    // first and second point of the line segment
+        double width;             // rectangle width
+        double x, y;              // center of the rectangle
+        double theta;             // angle
+        double dx,dy;             // (dx,dy) is vector oriented as the line segment
+        double prec;              // tolerance angle
+        double p;                 // probability of a point with angle within 'prec'
+    };
+
+    LineSegmentDetectorImpl& operator= (const LineSegmentDetectorImpl&); // to quiet MSVC
+
+/**
+ * Detect lines in the whole input image.
+ *
+ * @param lines         Return: A vector of Vec4i elements specifying the beginning and ending point of a line.
+ *                              Where Vec4i is (x1, y1, x2, y2), point 1 is the start, point 2 - end.
+ *                              Returned lines are strictly oriented depending on the gradient.
+ * @param widths        Return: Vector of widths of the regions, where the lines are found. E.g. Width of line.
+ * @param precisions    Return: Vector of precisions with which the lines are found.
+ * @param nfas          Return: Vector containing number of false alarms in the line region, with precision of 10%.
+ *                              The bigger the value, logarithmically better the detection.
+ *                                  * -1 corresponds to 10 mean false alarms
+ *                                  * 0 corresponds to 1 mean false alarm
+ *                                  * 1 corresponds to 0.1 mean false alarms
+ */
+    void flsd(std::vector<Vec4i>& lines,
+              std::vector<double>& widths, std::vector<double>& precisions,
+              std::vector<double>& nfas);
+
+/**
+ * Finds the angles and the gradients of the image. Generates a list of pseudo ordered points.
+ *
+ * @param threshold The minimum value of the angle that is considered defined, otherwise NOTDEF
+ * @param n_bins    The number of bins with which gradients are ordered by, using bucket sort.
+ * @param list      Return: Vector of coordinate points that are pseudo ordered by magnitude.
+ *                  Pixels would be ordered by norm value, up to a precision given by max_grad/n_bins.
+ */
+    void ll_angle(const double& threshold, const unsigned int& n_bins, std::vector<coorlist>& list);
+
+/**
+ * Grow a region starting from point s with a defined precision,
+ * returning the containing points size and the angle of the gradients.
+ *
+ * @param s         Starting point for the region.
+ * @param reg       Return: Vector of points, that are part of the region
+ * @param reg_size  Return: The size of the region.
+ * @param reg_angle Return: The mean angle of the region.
+ * @param prec      The precision by which each region angle should be aligned to the mean.
+ */
+    void region_grow(const Point2i& s, std::vector<RegionPoint>& reg,
+                     int& reg_size, double& reg_angle, const double& prec);
+
+/**
+ * Finds the bounding rotated rectangle of a region.
+ *
+ * @param reg       The region of points, from which the rectangle to be constructed from.
+ * @param reg_size  The number of points in the region.
+ * @param reg_angle The mean angle of the region.
+ * @param prec      The precision by which points were found.
+ * @param p         Probability of a point with angle within 'prec'.
+ * @param rec       Return: The generated rectangle.
+ */
+    void region2rect(const std::vector<RegionPoint>& reg, const int reg_size, const double reg_angle,
+                     const double prec, const double p, rect& rec) const;
+
+/**
+ * Compute region's angle as the principal inertia axis of the region.
+ * @return          Regions angle.
+ */
+    double get_theta(const std::vector<RegionPoint>& reg, const int& reg_size, const double& x,
+                     const double& y, const double& reg_angle, const double& prec) const;
+
+/**
+ * An estimation of the angle tolerance is performed by the standard deviation of the angle at points
+ * near the region's starting point. Then, a new region is grown starting from the same point, but using the
+ * estimated angle tolerance. If this fails to produce a rectangle with the right density of region points,
+ * 'reduce_region_radius' is called to try to satisfy this condition.
+ */
+    bool refine(std::vector<RegionPoint>& reg, int& reg_size, double reg_angle,
+                const double prec, double p, rect& rec, const double& density_th);
+
+/**
+ * Reduce the region size, by elimination the points far from the starting point, until that leads to
+ * rectangle with the right density of region points or to discard the region if too small.
+ */
+    bool reduce_region_radius(std::vector<RegionPoint>& reg, int& reg_size, double reg_angle,
+                const double prec, double p, rect& rec, double density, const double& density_th);
+
+/**
+ * Try some rectangles variations to improve NFA value. Only if the rectangle is not meaningful (i.e., log_nfa <= log_eps).
+ * @return      The new NFA value.
+ */
+    double rect_improve(rect& rec) const;
+
+/**
+ * Calculates the number of correctly aligned points within the rectangle.
+ * @return      The new NFA value.
+ */
+    double rect_nfa(const rect& rec) const;
+
+/**
+ * Computes the NFA values based on the total number of points, points that agree.
+ * n, k, p are the binomial parameters.
+ * @return      The new NFA value.
+ */
+    double nfa(const int& n, const int& k, const double& p) const;
+
+/**
+ * Is the point at place 'address' aligned to angle theta, up to precision 'prec'?
+ * @return      Whether the point is aligned.
+ */
+    bool isAligned(const int& address, const double& theta, const double& prec) const;
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+CV_EXPORTS Ptr<LineSegmentDetector> createLineSegmentDetectorPtr(
+        int _refine, double _scale, double _sigma_scale, double _quant, double _ang_th,
+        double _log_eps, double _density_th, int _n_bins)
+{
+    return makePtr<LineSegmentDetectorImpl>(
+            _refine, _scale, _sigma_scale, _quant, _ang_th,
+            _log_eps, _density_th, _n_bins);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+LineSegmentDetectorImpl::LineSegmentDetectorImpl(int _refine, double _scale, double _sigma_scale, double _quant,
+        double _ang_th, double _log_eps, double _density_th, int _n_bins)
+        :SCALE(_scale), doRefine(_refine), SIGMA_SCALE(_sigma_scale), QUANT(_quant),
+        ANG_TH(_ang_th), LOG_EPS(_log_eps), DENSITY_TH(_density_th), N_BINS(_n_bins)
+{
+    CV_Assert(_scale > 0 && _sigma_scale > 0 && _quant >= 0 &&
+              _ang_th > 0 && _ang_th < 180 && _density_th >= 0 && _density_th < 1 &&
+              _n_bins > 0);
+}
+
+void LineSegmentDetectorImpl::detect(InputArray _image, OutputArray _lines,
+                OutputArray _width, OutputArray _prec, OutputArray _nfa)
+{
+    Mat_<double> img = _image.getMat();
+    CV_Assert(!img.empty() && img.channels() == 1);
+
+    // Convert image to double
+    img.convertTo(image, CV_64FC1);
+
+    std::vector<Vec4i> lines;
+    std::vector<double> w, p, n;
+    w_needed = _width.needed();
+    p_needed = _prec.needed();
+    n_needed = _nfa.needed();
+
+    CV_Assert((!_nfa.needed()) ||                              // NFA InputArray will be filled _only_ when
+              (_nfa.needed() && doRefine >= LSD_REFINE_ADV));  // REFINE_ADV type LineSegmentDetectorImpl object is created.
+
+    flsd(lines, w, p, n);
+
+    Mat(lines).copyTo(_lines);
+    if(w_needed) Mat(w).copyTo(_width);
+    if(p_needed) Mat(p).copyTo(_prec);
+    if(n_needed) Mat(n).copyTo(_nfa);
+}
+
+void LineSegmentDetectorImpl::flsd(std::vector<Vec4i>& lines,
+    std::vector<double>& widths, std::vector<double>& precisions,
+    std::vector<double>& nfas)
+{
+    // Angle tolerance
+    const double prec = CV_PI * ANG_TH / 180;
+    const double p = ANG_TH / 180;
+    const double rho = QUANT / sin(prec);    // gradient magnitude threshold
+
+    std::vector<coorlist> list;
+    if(SCALE != 1)
+    {
+        Mat gaussian_img;
+        const double sigma = (SCALE < 1)?(SIGMA_SCALE / SCALE):(SIGMA_SCALE);
+        const double sprec = 3;
+        const unsigned int h =  (unsigned int)(ceil(sigma * sqrt(2 * sprec * log(10.0))));
+        Size ksize(1 + 2 * h, 1 + 2 * h); // kernel size
+        GaussianBlur(image, gaussian_img, ksize, sigma);
+        // Scale image to needed size
+        resize(gaussian_img, scaled_image, Size(), SCALE, SCALE);
+        ll_angle(rho, N_BINS, list);
+    }
+    else
+    {
+        scaled_image = image;
+        ll_angle(rho, N_BINS, list);
+    }
+
+    LOG_NT = 5 * (log10(double(img_width)) + log10(double(img_height))) / 2 + log10(11.0);
+    const int min_reg_size = int(-LOG_NT/log10(p)); // minimal number of points in region that can give a meaningful event
+
+    // // Initialize region only when needed
+    // Mat region = Mat::zeros(scaled_image.size(), CV_8UC1);
+    used = Mat_<uchar>::zeros(scaled_image.size()); // zeros = NOTUSED
+    std::vector<RegionPoint> reg(img_width * img_height);
+
+    // Search for line segments
+    unsigned int ls_count = 0;
+    unsigned int list_size = list.size();
+    for(unsigned int i = 0; i < list_size; ++i)
+    {
+        unsigned int adx = list[i].p.x + list[i].p.y * img_width;
+        if((used.data[adx] == NOTUSED) && (angles_data[adx] != NOTDEF))
+        {
+            int reg_size;
+            double reg_angle;
+            region_grow(list[i].p, reg, reg_size, reg_angle, prec);
+
+            // Ignore small regions
+            if(reg_size < min_reg_size) { continue; }
+
+            // Construct rectangular approximation for the region
+            rect rec;
+            region2rect(reg, reg_size, reg_angle, prec, p, rec);
+
+            double log_nfa = -1;
+            if(doRefine > LSD_REFINE_NONE)
+            {
+                // At least REFINE_STANDARD lvl.
+                if(!refine(reg, reg_size, reg_angle, prec, p, rec, DENSITY_TH)) { continue; }
+
+                if(doRefine >= LSD_REFINE_ADV)
+                {
+                    // Compute NFA
+                    log_nfa = rect_improve(rec);
+                    if(log_nfa <= LOG_EPS) { continue; }
+                }
+            }
+            // Found new line
+            ++ls_count;
+
+            // Add the offset
+            rec.x1 += 0.5; rec.y1 += 0.5;
+            rec.x2 += 0.5; rec.y2 += 0.5;
+
+            // scale the result values if a sub-sampling was performed
+            if(SCALE != 1)
+            {
+                rec.x1 /= SCALE; rec.y1 /= SCALE;
+                rec.x2 /= SCALE; rec.y2 /= SCALE;
+                rec.width /= SCALE;
+            }
+
+            //Store the relevant data
+            lines.push_back(Vec4i(int(rec.x1), int(rec.y1), int(rec.x2), int(rec.y2)));
+            if(w_needed) widths.push_back(rec.width);
+            if(p_needed) precisions.push_back(rec.p);
+            if(n_needed && doRefine >= LSD_REFINE_ADV) nfas.push_back(log_nfa);
+
+
+            // //Add the linesID to the region on the image
+            // for(unsigned int el = 0; el < reg_size; el++)
+            // {
+            //     region.data[reg[i].x + reg[i].y * width] = ls_count;
+            // }
+        }
+    }
+}
+
+void LineSegmentDetectorImpl::ll_angle(const double& threshold,
+                                   const unsigned int& n_bins,
+                                   std::vector<coorlist>& list)
+{
+    //Initialize data
+    angles = Mat_<double>(scaled_image.size());
+    modgrad = Mat_<double>(scaled_image.size());
+
+    angles_data = angles.ptr<double>(0);
+    modgrad_data = modgrad.ptr<double>(0);
+    scaled_image_data = scaled_image.ptr<double>(0);
+
+    img_width = scaled_image.cols;
+    img_height = scaled_image.rows;
+
+    // Undefined the down and right boundaries
+    angles.row(img_height - 1).setTo(NOTDEF);
+    angles.col(img_width - 1).setTo(NOTDEF);
+
+    // Computing gradient for remaining pixels
+    CV_Assert(scaled_image.isContinuous() &&
+              modgrad.isContinuous() &&
+              angles.isContinuous());   // Accessing image data linearly
+
+    double max_grad = -1;
+    for(int y = 0; y < img_height - 1; ++y)
+    {
+        for(int addr = y * img_width, addr_end = addr + img_width - 1; addr < addr_end; ++addr)
+        {
+            double DA = scaled_image_data[addr + img_width + 1] - scaled_image_data[addr];
+            double BC = scaled_image_data[addr + 1] - scaled_image_data[addr + img_width];
+            double gx = DA + BC;    // gradient x component
+            double gy = DA - BC;    // gradient y component
+            double norm = std::sqrt((gx * gx + gy * gy) / 4); // gradient norm
+
+            modgrad_data[addr] = norm;    // store gradient
+
+            if (norm <= threshold)  // norm too small, gradient no defined
+            {
+                angles_data[addr] = NOTDEF;
+            }
+            else
+            {
+                angles_data[addr] = fastAtan2(float(gx), float(-gy)) * DEG_TO_RADS;  // gradient angle computation
+                if (norm > max_grad) { max_grad = norm; }
+            }
+
+        }
+    }
+
+    // Compute histogram of gradient values
+    list = std::vector<coorlist>(img_width * img_height);
+    std::vector<coorlist*> range_s(n_bins);
+    std::vector<coorlist*> range_e(n_bins);
+    unsigned int count = 0;
+    double bin_coef = (max_grad > 0) ? double(n_bins - 1) / max_grad : 0; // If all image is smooth, max_grad <= 0
+
+    for(int y = 0; y < img_height - 1; ++y)
+    {
+        const double* norm = modgrad_data + y * img_width;
+        for(int x = 0; x < img_width - 1; ++x, ++norm)
+        {
+            // Store the point in the right bin according to its norm
+            int i = int((*norm) * bin_coef);
+            if(!range_e[i])
+            {
+                range_e[i] = range_s[i] = &list[count];
+                ++count;
+            }
+            else
+            {
+                range_e[i]->next = &list[count];
+                range_e[i] = &list[count];
+                ++count;
+            }
+            range_e[i]->p = Point(x, y);
+            range_e[i]->next = 0;
+        }
+    }
+
+    // Sort
+    int idx = n_bins - 1;
+    for(;idx > 0 && !range_s[idx]; --idx);
+    coorlist* start = range_s[idx];
+    coorlist* end = range_e[idx];
+    if(start)
+    {
+        while(idx > 0)
+        {
+            --idx;
+            if(range_s[idx])
+            {
+                end->next = range_s[idx];
+                end = range_e[idx];
+            }
+        }
+    }
+}
+
+void LineSegmentDetectorImpl::region_grow(const Point2i& s, std::vector<RegionPoint>& reg,
+                                      int& reg_size, double& reg_angle, const double& prec)
+{
+    // Point to this region
+    reg_size = 1;
+    reg[0].x = s.x;
+    reg[0].y = s.y;
+    int addr = s.x + s.y * img_width;
+    reg[0].used = used.data + addr;
+    reg_angle = angles_data[addr];
+    reg[0].angle = reg_angle;
+    reg[0].modgrad = modgrad_data[addr];
+
+    float sumdx = float(std::cos(reg_angle));
+    float sumdy = float(std::sin(reg_angle));
+    *reg[0].used = USED;
+
+    //Try neighboring regions
+    for(int i = 0; i < reg_size; ++i)
+    {
+        const RegionPoint& rpoint = reg[i];
+        int xx_min = std::max(rpoint.x - 1, 0), xx_max = std::min(rpoint.x + 1, img_width - 1);
+        int yy_min = std::max(rpoint.y - 1, 0), yy_max = std::min(rpoint.y + 1, img_height - 1);
+        for(int yy = yy_min; yy <= yy_max; ++yy)
+        {
+            int c_addr = xx_min + yy * img_width;
+            for(int xx = xx_min; xx <= xx_max; ++xx, ++c_addr)
+            {
+                if((used.data[c_addr] != USED) &&
+                   (isAligned(c_addr, reg_angle, prec)))
+                {
+                    // Add point
+                    used.data[c_addr] = USED;
+                    RegionPoint& region_point = reg[reg_size];
+                    region_point.x = xx;
+                    region_point.y = yy;
+                    region_point.used = &(used.data[c_addr]);
+                    region_point.modgrad = modgrad_data[c_addr];
+                    const double& angle = angles_data[c_addr];
+                    region_point.angle = angle;
+                    ++reg_size;
+
+                    // Update region's angle
+                    sumdx += cos(float(angle));
+                    sumdy += sin(float(angle));
+                    // reg_angle is used in the isAligned, so it needs to be updates?
+                    reg_angle = fastAtan2(sumdy, sumdx) * DEG_TO_RADS;
+                }
+            }
+        }
+    }
+}
+
+void LineSegmentDetectorImpl::region2rect(const std::vector<RegionPoint>& reg, const int reg_size,
+                                      const double reg_angle, const double prec, const double p, rect& rec) const
+{
+    double x = 0, y = 0, sum = 0;
+    for(int i = 0; i < reg_size; ++i)
+    {
+        const RegionPoint& pnt = reg[i];
+        const double& weight = pnt.modgrad;
+        x += double(pnt.x) * weight;
+        y += double(pnt.y) * weight;
+        sum += weight;
+    }
+
+    // Weighted sum must differ from 0
+    CV_Assert(sum > 0);
+
+    x /= sum;
+    y /= sum;
+
+    double theta = get_theta(reg, reg_size, x, y, reg_angle, prec);
+
+    // Find length and width
+    double dx = cos(theta);
+    double dy = sin(theta);
+    double l_min = 0, l_max = 0, w_min = 0, w_max = 0;
+
+    for(int i = 0; i < reg_size; ++i)
+    {
+        double regdx = double(reg[i].x) - x;
+        double regdy = double(reg[i].y) - y;
+
+        double l = regdx * dx + regdy * dy;
+        double w = -regdx * dy + regdy * dx;
+
+        if(l > l_max) l_max = l;
+        else if(l < l_min) l_min = l;
+        if(w > w_max) w_max = w;
+        else if(w < w_min) w_min = w;
+    }
+
+    // Store values
+    rec.x1 = x + l_min * dx;
+    rec.y1 = y + l_min * dy;
+    rec.x2 = x + l_max * dx;
+    rec.y2 = y + l_max * dy;
+    rec.width = w_max - w_min;
+    rec.x = x;
+    rec.y = y;
+    rec.theta = theta;
+    rec.dx = dx;
+    rec.dy = dy;
+    rec.prec = prec;
+    rec.p = p;
+
+    // Min width of 1 pixel
+    if(rec.width < 1.0) rec.width = 1.0;
+}
+
+double LineSegmentDetectorImpl::get_theta(const std::vector<RegionPoint>& reg, const int& reg_size, const double& x,
+                                      const double& y, const double& reg_angle, const double& prec) const
+{
+    double Ixx = 0.0;
+    double Iyy = 0.0;
+    double Ixy = 0.0;
+
+    // Compute inertia matrix
+    for(int i = 0; i < reg_size; ++i)
+    {
+        const double& regx = reg[i].x;
+        const double& regy = reg[i].y;
+        const double& weight = reg[i].modgrad;
+        double dx = regx - x;
+        double dy = regy - y;
+        Ixx += dy * dy * weight;
+        Iyy += dx * dx * weight;
+        Ixy -= dx * dy * weight;
+    }
+
+    // Check if inertia matrix is null
+    CV_Assert(!(double_equal(Ixx, 0) && double_equal(Iyy, 0) && double_equal(Ixy, 0)));
+
+    // Compute smallest eigenvalue
+    double lambda = 0.5 * (Ixx + Iyy - sqrt((Ixx - Iyy) * (Ixx - Iyy) + 4.0 * Ixy * Ixy));
+
+    // Compute angle
+    double theta = (fabs(Ixx)>fabs(Iyy))?
+                    double(fastAtan2(float(lambda - Ixx), float(Ixy))):
+                    double(fastAtan2(float(Ixy), float(lambda - Iyy))); // in degs
+    theta *= DEG_TO_RADS;
+
+    // Correct angle by 180 deg if necessary
+    if(angle_diff(theta, reg_angle) > prec) { theta += CV_PI; }
+
+    return theta;
+}
+
+bool LineSegmentDetectorImpl::refine(std::vector<RegionPoint>& reg, int& reg_size, double reg_angle,
+                                 const double prec, double p, rect& rec, const double& density_th)
+{
+    double density = double(reg_size) / (dist(rec.x1, rec.y1, rec.x2, rec.y2) * rec.width);
+
+    if (density >= density_th) { return true; }
+
+    // Try to reduce angle tolerance
+    double xc = double(reg[0].x);
+    double yc = double(reg[0].y);
+    const double& ang_c = reg[0].angle;
+    double sum = 0, s_sum = 0;
+    int n = 0;
+
+    for (int i = 0; i < reg_size; ++i)
+    {
+        *(reg[i].used) = NOTUSED;
+        if (dist(xc, yc, reg[i].x, reg[i].y) < rec.width)
+        {
+            const double& angle = reg[i].angle;
+            double ang_d = angle_diff_signed(angle, ang_c);
+            sum += ang_d;
+            s_sum += ang_d * ang_d;
+            ++n;
+        }
+    }
+    double mean_angle = sum / double(n);
+    // 2 * standard deviation
+    double tau = 2.0 * sqrt((s_sum - 2.0 * mean_angle * sum) / double(n) + mean_angle * mean_angle);
+
+    // Try new region
+    region_grow(Point(reg[0].x, reg[0].y), reg, reg_size, reg_angle, tau);
+
+    if (reg_size < 2) { return false; }
+
+    region2rect(reg, reg_size, reg_angle, prec, p, rec);
+    density = double(reg_size) / (dist(rec.x1, rec.y1, rec.x2, rec.y2) * rec.width);
+
+    if (density < density_th)
+    {
+        return reduce_region_radius(reg, reg_size, reg_angle, prec, p, rec, density, density_th);
+    }
+    else
+    {
+        return true;
+    }
+}
+
+bool LineSegmentDetectorImpl::reduce_region_radius(std::vector<RegionPoint>& reg, int& reg_size, double reg_angle,
+                const double prec, double p, rect& rec, double density, const double& density_th)
+{
+    // Compute region's radius
+    double xc = double(reg[0].x);
+    double yc = double(reg[0].y);
+    double radSq1 = distSq(xc, yc, rec.x1, rec.y1);
+    double radSq2 = distSq(xc, yc, rec.x2, rec.y2);
+    double radSq = radSq1 > radSq2 ? radSq1 : radSq2;
+
+    while(density < density_th)
+    {
+        radSq *= 0.75*0.75; // Reduce region's radius to 75% of its value
+        // Remove points from the region and update 'used' map
+        for(int i = 0; i < reg_size; ++i)
+        {
+            if(distSq(xc, yc, double(reg[i].x), double(reg[i].y)) > radSq)
+            {
+                // Remove point from the region
+                *(reg[i].used) = NOTUSED;
+                std::swap(reg[i], reg[reg_size - 1]);
+                --reg_size;
+                --i; // To avoid skipping one point
+            }
+        }
+
+        if(reg_size < 2) { return false; }
+
+        // Re-compute rectangle
+        region2rect(reg, reg_size ,reg_angle, prec, p, rec);
+
+        // Re-compute region points density
+        density = double(reg_size) /
+                  (dist(rec.x1, rec.y1, rec.x2, rec.y2) * rec.width);
+    }
+
+    return true;
+}
+
+double LineSegmentDetectorImpl::rect_improve(rect& rec) const
+{
+    double delta = 0.5;
+    double delta_2 = delta / 2.0;
+
+    double log_nfa = rect_nfa(rec);
+
+    if(log_nfa > LOG_EPS) return log_nfa; // Good rectangle
+
+    // Try to improve
+    // Finer precision
+    rect r = rect(rec); // Copy
+    for(int n = 0; n < 5; ++n)
+    {
+        r.p /= 2;
+        r.prec = r.p * CV_PI;
+        double log_nfa_new = rect_nfa(r);
+        if(log_nfa_new > log_nfa)
+        {
+            log_nfa = log_nfa_new;
+            rec = rect(r);
+        }
+    }
+    if(log_nfa > LOG_EPS) return log_nfa;
+
+    // Try to reduce width
+    r = rect(rec);
+    for(unsigned int n = 0; n < 5; ++n)
+    {
+        if((r.width - delta) >= 0.5)
+        {
+            r.width -= delta;
+            double log_nfa_new = rect_nfa(r);
+            if(log_nfa_new > log_nfa)
+            {
+                rec = rect(r);
+                log_nfa = log_nfa_new;
+            }
+        }
+    }
+    if(log_nfa > LOG_EPS) return log_nfa;
+
+    // Try to reduce one side of rectangle
+    r = rect(rec);
+    for(unsigned int n = 0; n < 5; ++n)
+    {
+        if((r.width - delta) >= 0.5)
+        {
+            r.x1 += -r.dy * delta_2;
+            r.y1 +=  r.dx * delta_2;
+            r.x2 += -r.dy * delta_2;
+            r.y2 +=  r.dx * delta_2;
+            r.width -= delta;
+            double log_nfa_new = rect_nfa(r);
+            if(log_nfa_new > log_nfa)
+            {
+                rec = rect(r);
+                log_nfa = log_nfa_new;
+            }
+        }
+    }
+    if(log_nfa > LOG_EPS) return log_nfa;
+
+    // Try to reduce other side of rectangle
+    r = rect(rec);
+    for(unsigned int n = 0; n < 5; ++n)
+    {
+        if((r.width - delta) >= 0.5)
+        {
+            r.x1 -= -r.dy * delta_2;
+            r.y1 -=  r.dx * delta_2;
+            r.x2 -= -r.dy * delta_2;
+            r.y2 -=  r.dx * delta_2;
+            r.width -= delta;
+            double log_nfa_new = rect_nfa(r);
+            if(log_nfa_new > log_nfa)
+            {
+                rec = rect(r);
+                log_nfa = log_nfa_new;
+            }
+        }
+    }
+    if(log_nfa > LOG_EPS) return log_nfa;
+
+    // Try finer precision
+    r = rect(rec);
+    for(unsigned int n = 0; n < 5; ++n)
+    {
+        if((r.width - delta) >= 0.5)
+        {
+            r.p /= 2;
+            r.prec = r.p * CV_PI;
+            double log_nfa_new = rect_nfa(r);
+            if(log_nfa_new > log_nfa)
+            {
+                rec = rect(r);
+                log_nfa = log_nfa_new;
+            }
+        }
+    }
+
+    return log_nfa;
+}
+
+double LineSegmentDetectorImpl::rect_nfa(const rect& rec) const
+{
+    int total_pts = 0, alg_pts = 0;
+    double half_width = rec.width / 2.0;
+    double dyhw = rec.dy * half_width;
+    double dxhw = rec.dx * half_width;
+
+    std::vector<edge> ordered_x(4);
+    edge* min_y = &ordered_x[0];
+    edge* max_y = &ordered_x[0]; // Will be used for loop range
+
+    ordered_x[0].p.x = int(rec.x1 - dyhw); ordered_x[0].p.y = int(rec.y1 + dxhw); ordered_x[0].taken = false;
+    ordered_x[1].p.x = int(rec.x2 - dyhw); ordered_x[1].p.y = int(rec.y2 + dxhw); ordered_x[1].taken = false;
+    ordered_x[2].p.x = int(rec.x2 + dyhw); ordered_x[2].p.y = int(rec.y2 - dxhw); ordered_x[2].taken = false;
+    ordered_x[3].p.x = int(rec.x1 + dyhw); ordered_x[3].p.y = int(rec.y1 - dxhw); ordered_x[3].taken = false;
+
+    std::sort(ordered_x.begin(), ordered_x.end(), AsmallerB_XoverY);
+
+    // Find min y. And mark as taken. find max y.
+    for(unsigned int i = 1; i < 4; ++i)
+    {
+        if(min_y->p.y > ordered_x[i].p.y) {min_y = &ordered_x[i]; }
+        if(max_y->p.y < ordered_x[i].p.y) {max_y = &ordered_x[i]; }
+    }
+    min_y->taken = true;
+
+    // Find leftmost untaken point;
+    edge* leftmost = 0;
+    for(unsigned int i = 0; i < 4; ++i)
+    {
+        if(!ordered_x[i].taken)
+        {
+            if(!leftmost) // if uninitialized
+            {
+                leftmost = &ordered_x[i];
+            }
+            else if (leftmost->p.x > ordered_x[i].p.x)
+            {
+                leftmost = &ordered_x[i];
+            }
+        }
+    }
+    leftmost->taken = true;
+
+    // Find rightmost untaken point;
+    edge* rightmost = 0;
+    for(unsigned int i = 0; i < 4; ++i)
+    {
+        if(!ordered_x[i].taken)
+        {
+            if(!rightmost) // if uninitialized
+            {
+                rightmost = &ordered_x[i];
+            }
+            else if (rightmost->p.x < ordered_x[i].p.x)
+            {
+                rightmost = &ordered_x[i];
+            }
+        }
+    }
+    rightmost->taken = true;
+
+    // Find last untaken point;
+    edge* tailp = 0;
+    for(unsigned int i = 0; i < 4; ++i)
+    {
+        if(!ordered_x[i].taken)
+        {
+            if(!tailp) // if uninitialized
+            {
+                tailp = &ordered_x[i];
+            }
+            else if (tailp->p.x > ordered_x[i].p.x)
+            {
+                tailp = &ordered_x[i];
+            }
+        }
+    }
+    tailp->taken = true;
+
+    double flstep = (min_y->p.y != leftmost->p.y) ?
+                    (min_y->p.x - leftmost->p.x) / (min_y->p.y - leftmost->p.y) : 0; //first left step
+    double slstep = (leftmost->p.y != tailp->p.x) ?
+                    (leftmost->p.x - tailp->p.x) / (leftmost->p.y - tailp->p.x) : 0; //second left step
+
+    double frstep = (min_y->p.y != rightmost->p.y) ?
+                    (min_y->p.x - rightmost->p.x) / (min_y->p.y - rightmost->p.y) : 0; //first right step
+    double srstep = (rightmost->p.y != tailp->p.x) ?
+                    (rightmost->p.x - tailp->p.x) / (rightmost->p.y - tailp->p.x) : 0; //second right step
+
+    double lstep = flstep, rstep = frstep;
+
+    double left_x = min_y->p.x, right_x = min_y->p.x;
+
+    // Loop around all points in the region and count those that are aligned.
+    int min_iter = std::max(min_y->p.y, 0);
+    int max_iter = std::min(max_y->p.y, img_height - 1);
+    for(int y = min_iter; y <= max_iter; ++y)
+    {
+        int adx = y * img_width + int(left_x);
+        for(int x = int(left_x); x <= int(right_x); ++x, ++adx)
+        {
+            ++total_pts;
+            if(isAligned(adx, rec.theta, rec.prec))
+            {
+                ++alg_pts;
+            }
+        }
+
+        if(y >= leftmost->p.y) { lstep = slstep; }
+        if(y >= rightmost->p.y) { rstep = srstep; }
+
+        left_x += lstep;
+        right_x += rstep;
+    }
+
+    return nfa(total_pts, alg_pts, rec.p);
+}
+
+double LineSegmentDetectorImpl::nfa(const int& n, const int& k, const double& p) const
+{
+    // Trivial cases
+    if(n == 0 || k == 0) { return -LOG_NT; }
+    if(n == k) { return -LOG_NT - double(n) * log10(p); }
+
+    double p_term = p / (1 - p);
+
+    double log1term = (double(n) + 1) - log_gamma(double(k) + 1)
+                - log_gamma(double(n-k) + 1)
+                + double(k) * log(p) + double(n-k) * log(1.0 - p);
+    double term = exp(log1term);
+
+    if(double_equal(term, 0))
+    {
+        if(k > n * p) return -log1term / M_LN10 - LOG_NT;
+        else return -LOG_NT;
+    }
+
+    // Compute more terms if needed
+    double bin_tail = term;
+    double tolerance = 0.1; // an error of 10% in the result is accepted
+    for(int i = k + 1; i <= n; ++i)
+    {
+        double bin_term = double(n - i + 1) / double(i);
+        double mult_term = bin_term * p_term;
+        term *= mult_term;
+        bin_tail += term;
+        if(bin_term < 1)
+        {
+            double err = term * ((1 - pow(mult_term, double(n-i+1))) / (1 - mult_term) - 1);
+            if(err < tolerance * fabs(-log10(bin_tail) - LOG_NT) * bin_tail) break;
+        }
+
+    }
+    return -log10(bin_tail) - LOG_NT;
+}
+
+inline bool LineSegmentDetectorImpl::isAligned(const int& address, const double& theta, const double& prec) const
+{
+    if(address < 0) { return false; }
+    const double& a = angles_data[address];
+    if(a == NOTDEF) { return false; }
+
+    // It is assumed that 'theta' and 'a' are in the range [-pi,pi]
+    double n_theta = theta - a;
+    if(n_theta < 0) { n_theta = -n_theta; }
+    if(n_theta > M_3_2_PI)
+    {
+        n_theta -= M_2__PI;
+        if(n_theta < 0) n_theta = -n_theta;
+    }
+
+    return n_theta <= prec;
+}
+
+
+void LineSegmentDetectorImpl::drawSegments(InputOutputArray _image, InputArray lines)
+{
+    CV_Assert(!_image.empty() && (_image.channels() == 1 || _image.channels() == 3));
+
+    Mat gray;
+    if (_image.channels() == 1)
+    {
+        gray = _image.getMatRef();
+    }
+    else if (_image.channels() == 3)
+    {
+        cvtColor(_image, gray, CV_BGR2GRAY);
+    }
+
+    // Create a 3 channel image in order to draw colored lines
+    std::vector<Mat> planes;
+    planes.push_back(gray);
+    planes.push_back(gray);
+    planes.push_back(gray);
+
+    merge(planes, _image);
+
+    Mat _lines;
+    _lines = lines.getMat();
+
+    // Draw segments
+    for(int i = 0; i < _lines.size().width; ++i)
+    {
+        const Vec4i& v = _lines.at<Vec4i>(i);
+        Point b(v[0], v[1]);
+        Point e(v[2], v[3]);
+        line(_image.getMatRef(), b, e, Scalar(0, 0, 255), 1);
+    }
+}
+
+
+int LineSegmentDetectorImpl::compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray _image)
+{
+    Size sz = size;
+    if (_image.needed() && _image.size() != size) sz = _image.size();
+    CV_Assert(sz.area());
+
+    Mat_<uchar> I1 = Mat_<uchar>::zeros(sz);
+    Mat_<uchar> I2 = Mat_<uchar>::zeros(sz);
+
+    Mat _lines1;
+    Mat _lines2;
+    _lines1 = lines1.getMat();
+    _lines2 = lines2.getMat();
+    // Draw segments
+    std::vector<Mat> _lines;
+    for(int i = 0; i < _lines1.size().width; ++i)
+    {
+        Point b(_lines1.at<Vec4i>(i)[0], _lines1.at<Vec4i>(i)[1]);
+        Point e(_lines1.at<Vec4i>(i)[2], _lines1.at<Vec4i>(i)[3]);
+        line(I1, b, e, Scalar::all(255), 1);
+    }
+    for(int i = 0; i < _lines2.size().width; ++i)
+    {
+        Point b(_lines2.at<Vec4i>(i)[0], _lines2.at<Vec4i>(i)[1]);
+        Point e(_lines2.at<Vec4i>(i)[2], _lines2.at<Vec4i>(i)[3]);
+        line(I2, b, e, Scalar::all(255), 1);
+    }
+
+    // Count the pixels that don't agree
+    Mat Ixor;
+    bitwise_xor(I1, I2, Ixor);
+    int N = countNonZero(Ixor);
+
+    if (_image.needed())
+    {
+        CV_Assert(_image.channels() == 3);
+        Mat img = _image.getMatRef();
+        CV_Assert(img.isContinuous() && I1.isContinuous() && I2.isContinuous());
+
+        for (unsigned int i = 0; i < I1.total(); ++i)
+        {
+            uchar i1 = I1.data[i];
+            uchar i2 = I2.data[i];
+            if (i1 || i2)
+            {
+                unsigned int base_idx = i * 3;
+                if (i1) img.data[base_idx] = 255;
+                else img.data[base_idx] = 0;
+                img.data[base_idx + 1] = 0;
+                if (i2) img.data[base_idx + 2] = 255;
+                else img.data[base_idx + 2] = 0;
+            }
+        }
+    }
+
+    return N;
+}
+
+} // namespace cv
diff --git a/modules/imgproc/src/matchcontours.cpp b/modules/imgproc/src/matchcontours.cpp
index eca385900..1ac6c1662 100644
--- a/modules/imgproc/src/matchcontours.cpp
+++ b/modules/imgproc/src/matchcontours.cpp
@@ -142,7 +142,7 @@ double cv::matchShapes(InputArray contour1, InputArray contour2, int method, dou
     default:
         CV_Error( CV_StsBadArg, "Unknown comparison method" );
     }
-    
+
     return result;
 }
 
diff --git a/modules/imgproc/src/moments.cpp b/modules/imgproc/src/moments.cpp
index 40b44df7c..14e672abd 100644
--- a/modules/imgproc/src/moments.cpp
+++ b/modules/imgproc/src/moments.cpp
@@ -159,7 +159,7 @@ static Moments contourMoments( const Mat& contour )
     if( fabs(a00) > FLT_EPSILON )
     {
         double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60;
-        
+
         if( a00 > 0 )
         {
             db1_2 = 0.5;
@@ -464,7 +464,7 @@ cv::Moments cv::moments( InputArray _src, bool binary )
             m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
         }
     }
-    
+
     completeMomentState( &m );
     return m;
 }
diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp
index e0e27cc2b..845e00124 100644
--- a/modules/imgproc/src/morph.cpp
+++ b/modules/imgproc/src/morph.cpp
@@ -857,42 +857,42 @@ cv::Ptr<cv::BaseRowFilter> cv::getMorphologyRowFilter(int op, int type, int ksiz
     if( op == MORPH_ERODE )
     {
         if( depth == CV_8U )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MinOp<uchar>,
-                                      ErodeRowVec8u>(ksize, anchor));
+            return makePtr<MorphRowFilter<MinOp<uchar>,
+                                      ErodeRowVec8u> >(ksize, anchor);
         if( depth == CV_16U )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MinOp<ushort>,
-                                      ErodeRowVec16u>(ksize, anchor));
+            return makePtr<MorphRowFilter<MinOp<ushort>,
+                                      ErodeRowVec16u> >(ksize, anchor);
         if( depth == CV_16S )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MinOp<short>,
-                                      ErodeRowVec16s>(ksize, anchor));
+            return makePtr<MorphRowFilter<MinOp<short>,
+                                      ErodeRowVec16s> >(ksize, anchor);
         if( depth == CV_32F )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MinOp<float>,
-                                      ErodeRowVec32f>(ksize, anchor));
+            return makePtr<MorphRowFilter<MinOp<float>,
+                                      ErodeRowVec32f> >(ksize, anchor);
         if( depth == CV_64F )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MinOp<double>,
-                                      ErodeRowVec64f>(ksize, anchor));
+            return makePtr<MorphRowFilter<MinOp<double>,
+                                      ErodeRowVec64f> >(ksize, anchor);
     }
     else
     {
         if( depth == CV_8U )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MaxOp<uchar>,
-                                      DilateRowVec8u>(ksize, anchor));
+            return makePtr<MorphRowFilter<MaxOp<uchar>,
+                                      DilateRowVec8u> >(ksize, anchor);
         if( depth == CV_16U )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MaxOp<ushort>,
-                                      DilateRowVec16u>(ksize, anchor));
+            return makePtr<MorphRowFilter<MaxOp<ushort>,
+                                      DilateRowVec16u> >(ksize, anchor);
         if( depth == CV_16S )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MaxOp<short>,
-                                      DilateRowVec16s>(ksize, anchor));
+            return makePtr<MorphRowFilter<MaxOp<short>,
+                                      DilateRowVec16s> >(ksize, anchor);
         if( depth == CV_32F )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MaxOp<float>,
-                                      DilateRowVec32f>(ksize, anchor));
+            return makePtr<MorphRowFilter<MaxOp<float>,
+                                      DilateRowVec32f> >(ksize, anchor);
         if( depth == CV_64F )
-            return Ptr<BaseRowFilter>(new MorphRowFilter<MaxOp<double>,
-                                      DilateRowVec64f>(ksize, anchor));
+            return makePtr<MorphRowFilter<MaxOp<double>,
+                                      DilateRowVec64f> >(ksize, anchor);
     }
 
     CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type));
-    return Ptr<BaseRowFilter>(0);
+    return Ptr<BaseRowFilter>();
 }
 
 cv::Ptr<cv::BaseColumnFilter> cv::getMorphologyColumnFilter(int op, int type, int ksize, int anchor)
@@ -904,42 +904,42 @@ cv::Ptr<cv::BaseColumnFilter> cv::getMorphologyColumnFilter(int op, int type, in
     if( op == MORPH_ERODE )
     {
         if( depth == CV_8U )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MinOp<uchar>,
-                                         ErodeColumnVec8u>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MinOp<uchar>,
+                                         ErodeColumnVec8u> >(ksize, anchor);
         if( depth == CV_16U )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MinOp<ushort>,
-                                         ErodeColumnVec16u>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MinOp<ushort>,
+                                         ErodeColumnVec16u> >(ksize, anchor);
         if( depth == CV_16S )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MinOp<short>,
-                                         ErodeColumnVec16s>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MinOp<short>,
+                                         ErodeColumnVec16s> >(ksize, anchor);
         if( depth == CV_32F )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MinOp<float>,
-                                         ErodeColumnVec32f>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MinOp<float>,
+                                         ErodeColumnVec32f> >(ksize, anchor);
         if( depth == CV_64F )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MinOp<double>,
-                                         ErodeColumnVec64f>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MinOp<double>,
+                                         ErodeColumnVec64f> >(ksize, anchor);
     }
     else
     {
         if( depth == CV_8U )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MaxOp<uchar>,
-                                         DilateColumnVec8u>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MaxOp<uchar>,
+                                         DilateColumnVec8u> >(ksize, anchor);
         if( depth == CV_16U )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MaxOp<ushort>,
-                                         DilateColumnVec16u>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MaxOp<ushort>,
+                                         DilateColumnVec16u> >(ksize, anchor);
         if( depth == CV_16S )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MaxOp<short>,
-                                         DilateColumnVec16s>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MaxOp<short>,
+                                         DilateColumnVec16s> >(ksize, anchor);
         if( depth == CV_32F )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MaxOp<float>,
-                                         DilateColumnVec32f>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MaxOp<float>,
+                                         DilateColumnVec32f> >(ksize, anchor);
         if( depth == CV_64F )
-            return Ptr<BaseColumnFilter>(new MorphColumnFilter<MaxOp<double>,
-                                         DilateColumnVec64f>(ksize, anchor));
+            return makePtr<MorphColumnFilter<MaxOp<double>,
+                                         DilateColumnVec64f> >(ksize, anchor);
     }
 
     CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type));
-    return Ptr<BaseColumnFilter>(0);
+    return Ptr<BaseColumnFilter>();
 }
 
 
@@ -952,32 +952,32 @@ cv::Ptr<cv::BaseFilter> cv::getMorphologyFilter(int op, int type, InputArray _ke
     if( op == MORPH_ERODE )
     {
         if( depth == CV_8U )
-            return Ptr<BaseFilter>(new MorphFilter<MinOp<uchar>, ErodeVec8u>(kernel, anchor));
+            return makePtr<MorphFilter<MinOp<uchar>, ErodeVec8u> >(kernel, anchor);
         if( depth == CV_16U )
-            return Ptr<BaseFilter>(new MorphFilter<MinOp<ushort>, ErodeVec16u>(kernel, anchor));
+            return makePtr<MorphFilter<MinOp<ushort>, ErodeVec16u> >(kernel, anchor);
         if( depth == CV_16S )
-            return Ptr<BaseFilter>(new MorphFilter<MinOp<short>, ErodeVec16s>(kernel, anchor));
+            return makePtr<MorphFilter<MinOp<short>, ErodeVec16s> >(kernel, anchor);
         if( depth == CV_32F )
-            return Ptr<BaseFilter>(new MorphFilter<MinOp<float>, ErodeVec32f>(kernel, anchor));
+            return makePtr<MorphFilter<MinOp<float>, ErodeVec32f> >(kernel, anchor);
         if( depth == CV_64F )
-            return Ptr<BaseFilter>(new MorphFilter<MinOp<double>, ErodeVec64f>(kernel, anchor));
+            return makePtr<MorphFilter<MinOp<double>, ErodeVec64f> >(kernel, anchor);
     }
     else
     {
         if( depth == CV_8U )
-            return Ptr<BaseFilter>(new MorphFilter<MaxOp<uchar>, DilateVec8u>(kernel, anchor));
+            return makePtr<MorphFilter<MaxOp<uchar>, DilateVec8u> >(kernel, anchor);
         if( depth == CV_16U )
-            return Ptr<BaseFilter>(new MorphFilter<MaxOp<ushort>, DilateVec16u>(kernel, anchor));
+            return makePtr<MorphFilter<MaxOp<ushort>, DilateVec16u> >(kernel, anchor);
         if( depth == CV_16S )
-            return Ptr<BaseFilter>(new MorphFilter<MaxOp<short>, DilateVec16s>(kernel, anchor));
+            return makePtr<MorphFilter<MaxOp<short>, DilateVec16s> >(kernel, anchor);
         if( depth == CV_32F )
-            return Ptr<BaseFilter>(new MorphFilter<MaxOp<float>, DilateVec32f>(kernel, anchor));
+            return makePtr<MorphFilter<MaxOp<float>, DilateVec32f> >(kernel, anchor);
         if( depth == CV_64F )
-            return Ptr<BaseFilter>(new MorphFilter<MaxOp<double>, DilateVec64f>(kernel, anchor));
+            return makePtr<MorphFilter<MaxOp<double>, DilateVec64f> >(kernel, anchor);
     }
 
     CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type));
-    return Ptr<BaseFilter>(0);
+    return Ptr<BaseFilter>();
 }
 
 
@@ -1020,8 +1020,8 @@ cv::Ptr<cv::FilterEngine> cv::createMorphologyFilter( int op, int type, InputArr
                                        depth == CV_32F ? (double)-FLT_MAX : -DBL_MAX);
     }
 
-    return Ptr<FilterEngine>(new FilterEngine(filter2D, rowFilter, columnFilter,
-                                              type, type, type, _rowBorderType, _columnBorderType, borderValue ));
+    return makePtr<FilterEngine>(filter2D, rowFilter, columnFilter,
+                                 type, type, type, _rowBorderType, _columnBorderType, borderValue );
 }
 
 
@@ -1136,17 +1136,171 @@ private:
     Scalar borderValue;
 };
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel,
+                              const Size& ksize, const Point &anchor, bool rectKernel)
+{
+    int type = src.type();
+    const Mat* _src = &src;
+    Mat temp;
+    if( src.data == dst.data )
+    {
+        src.copyTo(temp);
+        _src = &temp;
+    }
+    //DEPRECATED. Allocates and initializes morphology state structure for erosion or dilation operation.
+    typedef IppStatus (CV_STDCALL* ippiMorphologyInitAllocFunc)(int, const void*, IppiSize, IppiPoint, IppiMorphState **);
+    typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int,
+                                                                      IppiSize, IppiBorderType, IppiMorphState *);
+    typedef IppStatus (CV_STDCALL* ippiFilterMinMaxGetBufferSizeFunc)(int, IppiSize, int*);
+    typedef IppStatus (CV_STDCALL* ippiFilterMinMaxBorderReplicateFunc)(const void*, int, void*, int,
+                                                                        IppiSize, IppiSize, IppiPoint, void*);
+
+    ippiMorphologyInitAllocFunc initAllocFunc = 0;
+    ippiMorphologyBorderReplicateFunc morphFunc = 0;
+    ippiFilterMinMaxGetBufferSizeFunc getBufSizeFunc = 0;
+    ippiFilterMinMaxBorderReplicateFunc morphRectFunc = 0;
+
+    #define IPP_MORPH_CASE(type, flavor) \
+    case type: \
+        initAllocFunc = (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_##flavor; \
+        morphFunc = op == MORPH_ERODE ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_##flavor : \
+                                        (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_##flavor; \
+        getBufSizeFunc = (ippiFilterMinMaxGetBufferSizeFunc)ippiFilterMinGetBufferSize_##flavor; \
+        morphRectFunc = op == MORPH_ERODE ? (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMinBorderReplicate_##flavor : \
+                                            (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMaxBorderReplicate_##flavor; \
+        break
+
+    switch( type )
+    {
+    IPP_MORPH_CASE(CV_8UC1, 8u_C1R);
+    IPP_MORPH_CASE(CV_8UC3, 8u_C3R);
+    IPP_MORPH_CASE(CV_8UC4, 8u_C4R);
+    IPP_MORPH_CASE(CV_32FC1, 32f_C1R);
+    IPP_MORPH_CASE(CV_32FC3, 32f_C3R);
+    IPP_MORPH_CASE(CV_32FC4, 32f_C4R);
+    default:
+        return false;
+    }
+    #undef IPP_MORPH_CASE
+
+    IppiSize roiSize = {src.cols, src.rows};
+    IppiSize kernelSize = {ksize.width, ksize.height};
+    IppiPoint point = {anchor.x, anchor.y};
+
+    if( !rectKernel && morphFunc && initAllocFunc )
+    {
+        IppiMorphState* pState;
+        if( initAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 )
+            return false;
+        bool is_ok = morphFunc( _src->data, (int)_src->step[0],
+                               dst.data, (int)dst.step[0],
+                               roiSize, ippBorderRepl, pState ) >= 0;
+        ippiMorphologyFree(pState);
+        return is_ok;
+    }
+    else if( rectKernel && morphRectFunc && getBufSizeFunc )
+    {
+        int bufSize = 0;
+        if( getBufSizeFunc( src.cols, kernelSize, &bufSize) < 0 )
+            return false;
+        AutoBuffer<uchar> buf(bufSize + 64);
+        uchar* buffer = alignPtr((uchar*)buf, 32);
+        return morphRectFunc(_src->data, (int)_src->step[0], dst.data, (int)dst.step[0],
+                             roiSize, kernelSize, point, buffer) >= 0;
+    }
+    return false;
+}
+
+static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
+    const Mat& _kernel, Point anchor, int iterations,
+    int borderType, const Scalar &borderValue)
+{
+    Mat src = _src.getMat(), kernel = _kernel;
+    if( !( src.depth() == CV_8U || src.depth() == CV_32F ) || ( iterations > 1 ) ||
+        !( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue()) )
+        || !( op == MORPH_DILATE || op == MORPH_ERODE) )
+        return false;
+    if( borderType == cv::BORDER_CONSTANT && kernel.data )
+    {
+        int x, y;
+        for( y = 0; y < kernel.rows; y++ )
+        {
+            if( kernel.at<uchar>(y, anchor.x) != 0 )
+                continue;
+            for( x = 0; x < kernel.cols; x++ )
+            {
+                if( kernel.at<uchar>(y,x) != 0 )
+                    return false;
+            }
+        }
+        for( x = 0; y < kernel.cols; x++ )
+        {
+            if( kernel.at<uchar>(anchor.y, x) != 0 )
+                continue;
+            for( y = 0; y < kernel.rows; y++ )
+            {
+                if( kernel.at<uchar>(y,x) != 0 )
+                    return false;
+            }
+        }
+
+    }
+    Size ksize = kernel.data ? kernel.size() : Size(3,3);
+
+    _dst.create( src.size(), src.type() );
+    Mat dst = _dst.getMat();
+
+    if( iterations == 0 || kernel.rows*kernel.cols == 1 )
+    {
+        src.copyTo(dst);
+        return true;
+    }
+
+    bool rectKernel = false;
+    if( !kernel.data )
+    {
+        ksize = Size(1+iterations*2,1+iterations*2);
+        anchor = Point(iterations, iterations);
+        rectKernel = true;
+        iterations = 1;
+    }
+    else if( iterations >= 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
+    {
+        ksize = Size(ksize.width + (iterations-1)*(ksize.width-1),
+             ksize.height + (iterations-1)*(ksize.height-1)),
+        anchor = Point(anchor.x*iterations, anchor.y*iterations);
+        kernel = Mat();
+        rectKernel = true;
+        iterations = 1;
+    }
+
+    // TODO: implement the case of iterations > 1.
+    if( iterations > 1 )
+        return false;
+
+    return IPPMorphReplicate( op, src, dst, kernel, ksize, anchor, rectKernel );
+}
+#endif
+
 static void morphOp( int op, InputArray _src, OutputArray _dst,
                      InputArray _kernel,
                      Point anchor, int iterations,
                      int borderType, const Scalar& borderValue )
 {
-    Mat src = _src.getMat(), kernel = _kernel.getMat();
+    Mat kernel = _kernel.getMat();
     Size ksize = kernel.data ? kernel.size() : Size(3,3);
     anchor = normalizeAnchor(anchor, ksize);
 
     CV_Assert( anchor.inside(Rect(0, 0, ksize.width, ksize.height)) );
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) )
+        return;
+#endif
+
+    Mat src = _src.getMat();
+
     _dst.create( src.size(), src.type() );
     Mat dst = _dst.getMat();
 
@@ -1316,7 +1470,7 @@ static void convertConvKernel( const IplConvKernel* src, cv::Mat& dst, cv::Point
 
     int i, size = src->nRows*src->nCols;
     for( i = 0; i < size; i++ )
-        dst.data[i] = (uchar)src->values[i];
+        dst.data[i] = (uchar)(src->values[i] != 0);
 }
 
 
diff --git a/modules/imgproc/src/precomp.cpp b/modules/imgproc/src/precomp.cpp
deleted file mode 100644
index 3e0ec42de..000000000
--- a/modules/imgproc/src/precomp.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-/* End of file. */
diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp
index e7d315cb5..9ebf11ea8 100644
--- a/modules/imgproc/src/pyramids.cpp
+++ b/modules/imgproc/src/pyramids.cpp
@@ -191,6 +191,7 @@ pyrDown_( const Mat& _src, Mat& _dst, int borderType )
     typedef typename CastOp::type1 WT;
     typedef typename CastOp::rtype T;
 
+    CV_Assert( !_src.empty() );
     Size ssize = _src.size(), dsize = _dst.size();
     int cn = _src.channels();
     int bufstep = (int)alignSize(dsize.width*cn, 16);
@@ -203,7 +204,8 @@ pyrDown_( const Mat& _src, Mat& _dst, int borderType )
     CastOp castOp;
     VecOp vecOp;
 
-    CV_Assert( std::abs(dsize.width*2 - ssize.width) <= 2 &&
+    CV_Assert( ssize.width > 0 && ssize.height > 0 &&
+               std::abs(dsize.width*2 - ssize.width) <= 2 &&
                std::abs(dsize.height*2 - ssize.height) <= 2 );
     int k, x, sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width);
 
diff --git a/modules/imgproc/src/rotcalipers.cpp b/modules/imgproc/src/rotcalipers.cpp
index cc43732c2..0983d4d35 100644
--- a/modules/imgproc/src/rotcalipers.cpp
+++ b/modules/imgproc/src/rotcalipers.cpp
@@ -398,3 +398,9 @@ cvMinAreaRect2( const CvArr* array, CvMemStorage* /*storage*/ )
     return (CvBox2D)rr;
 }
 
+void cv::boxPoints(cv::RotatedRect box, OutputArray _pts)
+{
+    _pts.create(4, 2, CV_32F);
+    Mat pts = _pts.getMat();
+    box.points((Point2f*)pts.data);
+}
diff --git a/modules/imgproc/src/samplers.cpp b/modules/imgproc/src/samplers.cpp
index 80e2706ba..64b7b6296 100644
--- a/modules/imgproc/src/samplers.cpp
+++ b/modules/imgproc/src/samplers.cpp
@@ -267,6 +267,11 @@ static void getRectSubPix_8u32f
     }
 }
 
+typedef CvStatus (CV_STDCALL *CvIPPGetRectSubPixFunc)( const void* src, int src_step,
+                                                       CvSize src_size, void* dst,
+                                                       int dst_step, CvSize win_size,
+                                                       CvPoint2D32f center,
+                                                       CvPoint* minpt, CvPoint* maxpt );
 
 static void
 getQuadrangleSubPix_8u32f_CnR( const uchar* src, size_t src_step, Size src_size,
@@ -336,12 +341,12 @@ getQuadrangleSubPix_8u32f_CnR( const uchar* src, size_t src_step, Size src_size,
                 float w00 = a1*b1, w01 = a*b1, w10 = a1*b, w11 = a*b;
                 const uchar *ptr0, *ptr1;
                 xs += A11; ys += A21;
-                
+
                 if( (unsigned)iys < (unsigned)(src_size.height-1) )
                     ptr0 = src + src_step*iys, ptr1 = ptr0 + src_step;
                 else
                     ptr0 = ptr1 = src + (iys < 0 ? 0 : src_size.height-1)*src_step;
-                
+
                 if( (unsigned)ixs < (unsigned)(src_size.width-1) )
                 {
                     ptr0 += ixs*cn; ptr1 += ixs*cn;
@@ -374,6 +379,19 @@ void cv::getRectSubPix( InputArray _image, Size patchSize, Point2f center,
     _patch.create(patchSize, CV_MAKETYPE(ddepth, cn));
     Mat patch = _patch.getMat();
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    CvPoint minpt, maxpt;
+    int srctype = CV_MAT_TYPE(src->type), dsttype = CV_MAT_TYPE(dst->type);
+    CvIPPGetRectSubPixFunc ippfunc =
+        srctype == CV_8UC1 && dsttype == CV_8UC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_8u_C1R :
+        srctype == CV_8UC1 && dsttype == CV_32FC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_8u32f_C1R :
+        srctype == CV_32FC1 && dsttype == CV_32FC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_32f_C1R : 0;
+
+    if( ippfunc && ippfunc(src->data.ptr, src->step, src_size, dst->data.ptr,
+                           dst->step, dst_size, center, &minpt, &maxpt) >= 0 )
+        return;
+#endif
+
     if( depth == CV_8U && ddepth == CV_8U )
         getRectSubPix_Cn_<uchar, uchar, int, scale_fixpt, cast_8u>
         (image.data, image.step, image.size(), patch.data, patch.step, patch.size(), center, cn);
@@ -438,7 +456,7 @@ cvSampleLine( const void* _img, CvPoint pt1, CvPoint pt2,
     cv::LineIterator li(img, pt1, pt2, connectivity, false);
     uchar* buffer = (uchar*)_buffer;
     size_t pixsize = img.elemSize();
-    
+
     if( !buffer )
         CV_Error( CV_StsNullPtr, "" );
 
diff --git a/modules/imgproc/src/segmentation.cpp b/modules/imgproc/src/segmentation.cpp
index e4cd43401..320f2273a 100644
--- a/modules/imgproc/src/segmentation.cpp
+++ b/modules/imgproc/src/segmentation.cpp
@@ -327,7 +327,7 @@ void cv::pyrMeanShiftFiltering( InputArray _src, OutputArray _dst,
     double sr2 = sr * sr;
     int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);
     int tab[768];
-    
+
 
     if( src0.type() != CV_8UC3 )
         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 3-channel images are supported" );
@@ -534,4 +534,3 @@ cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,
 
     cv::pyrMeanShiftFiltering(src, dst, sp0, sr, max_level, termcrit);
 }
-
diff --git a/modules/imgproc/src/shapedescr.cpp b/modules/imgproc/src/shapedescr.cpp
index 59868a238..4258d473a 100644
--- a/modules/imgproc/src/shapedescr.cpp
+++ b/modules/imgproc/src/shapedescr.cpp
@@ -381,7 +381,7 @@ cv::RotatedRect cv::fitEllipse( InputArray _points )
     // New fitellipse algorithm, contributed by Dr. Daniel Weiss
     Point2f c(0,0);
     double gfp[5], rp[5], t;
-    const double min_eps = 1e-6;
+    const double min_eps = 1e-8;
     bool is_float = depth == CV_32F;
     const Point* ptsi = (const Point*)points.data;
     const Point2f* ptsf = (const Point2f*)points.data;
diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp
index a56ff27b3..4c53e3345 100644
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -622,29 +622,29 @@ cv::Ptr<cv::BaseRowFilter> cv::getRowSumFilter(int srcType, int sumType, int ksi
         anchor = ksize/2;
 
     if( sdepth == CV_8U && ddepth == CV_32S )
-        return Ptr<BaseRowFilter>(new RowSum<uchar, int>(ksize, anchor));
+        return makePtr<RowSum<uchar, int> >(ksize, anchor);
     if( sdepth == CV_8U && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowSum<uchar, double>(ksize, anchor));
+        return makePtr<RowSum<uchar, double> >(ksize, anchor);
     if( sdepth == CV_16U && ddepth == CV_32S )
-        return Ptr<BaseRowFilter>(new RowSum<ushort, int>(ksize, anchor));
+        return makePtr<RowSum<ushort, int> >(ksize, anchor);
     if( sdepth == CV_16U && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowSum<ushort, double>(ksize, anchor));
+        return makePtr<RowSum<ushort, double> >(ksize, anchor);
     if( sdepth == CV_16S && ddepth == CV_32S )
-        return Ptr<BaseRowFilter>(new RowSum<short, int>(ksize, anchor));
+        return makePtr<RowSum<short, int> >(ksize, anchor);
     if( sdepth == CV_32S && ddepth == CV_32S )
-        return Ptr<BaseRowFilter>(new RowSum<int, int>(ksize, anchor));
+        return makePtr<RowSum<int, int> >(ksize, anchor);
     if( sdepth == CV_16S && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowSum<short, double>(ksize, anchor));
+        return makePtr<RowSum<short, double> >(ksize, anchor);
     if( sdepth == CV_32F && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowSum<float, double>(ksize, anchor));
+        return makePtr<RowSum<float, double> >(ksize, anchor);
     if( sdepth == CV_64F && ddepth == CV_64F )
-        return Ptr<BaseRowFilter>(new RowSum<double, double>(ksize, anchor));
+        return makePtr<RowSum<double, double> >(ksize, anchor);
 
     CV_Error_( CV_StsNotImplemented,
         ("Unsupported combination of source format (=%d), and buffer format (=%d)",
         srcType, sumType));
 
-    return Ptr<BaseRowFilter>(0);
+    return Ptr<BaseRowFilter>();
 }
 
 
@@ -658,33 +658,33 @@ cv::Ptr<cv::BaseColumnFilter> cv::getColumnSumFilter(int sumType, int dstType, i
         anchor = ksize/2;
 
     if( ddepth == CV_8U && sdepth == CV_32S )
-        return Ptr<BaseColumnFilter>(new ColumnSum<int, uchar>(ksize, anchor, scale));
+        return makePtr<ColumnSum<int, uchar> >(ksize, anchor, scale);
     if( ddepth == CV_8U && sdepth == CV_64F )
-        return Ptr<BaseColumnFilter>(new ColumnSum<double, uchar>(ksize, anchor, scale));
+        return makePtr<ColumnSum<double, uchar> >(ksize, anchor, scale);
     if( ddepth == CV_16U && sdepth == CV_32S )
-        return Ptr<BaseColumnFilter>(new ColumnSum<int, ushort>(ksize, anchor, scale));
+        return makePtr<ColumnSum<int, ushort> >(ksize, anchor, scale);
     if( ddepth == CV_16U && sdepth == CV_64F )
-        return Ptr<BaseColumnFilter>(new ColumnSum<double, ushort>(ksize, anchor, scale));
+        return makePtr<ColumnSum<double, ushort> >(ksize, anchor, scale);
     if( ddepth == CV_16S && sdepth == CV_32S )
-        return Ptr<BaseColumnFilter>(new ColumnSum<int, short>(ksize, anchor, scale));
+        return makePtr<ColumnSum<int, short> >(ksize, anchor, scale);
     if( ddepth == CV_16S && sdepth == CV_64F )
-        return Ptr<BaseColumnFilter>(new ColumnSum<double, short>(ksize, anchor, scale));
+        return makePtr<ColumnSum<double, short> >(ksize, anchor, scale);
     if( ddepth == CV_32S && sdepth == CV_32S )
-        return Ptr<BaseColumnFilter>(new ColumnSum<int, int>(ksize, anchor, scale));
+        return makePtr<ColumnSum<int, int> >(ksize, anchor, scale);
     if( ddepth == CV_32F && sdepth == CV_32S )
-        return Ptr<BaseColumnFilter>(new ColumnSum<int, float>(ksize, anchor, scale));
+        return makePtr<ColumnSum<int, float> >(ksize, anchor, scale);
     if( ddepth == CV_32F && sdepth == CV_64F )
-        return Ptr<BaseColumnFilter>(new ColumnSum<double, float>(ksize, anchor, scale));
+        return makePtr<ColumnSum<double, float> >(ksize, anchor, scale);
     if( ddepth == CV_64F && sdepth == CV_32S )
-        return Ptr<BaseColumnFilter>(new ColumnSum<int, double>(ksize, anchor, scale));
+        return makePtr<ColumnSum<int, double> >(ksize, anchor, scale);
     if( ddepth == CV_64F && sdepth == CV_64F )
-        return Ptr<BaseColumnFilter>(new ColumnSum<double, double>(ksize, anchor, scale));
+        return makePtr<ColumnSum<double, double> >(ksize, anchor, scale);
 
     CV_Error_( CV_StsNotImplemented,
         ("Unsupported combination of sum format (=%d), and destination format (=%d)",
         sumType, dstType));
 
-    return Ptr<BaseColumnFilter>(0);
+    return Ptr<BaseColumnFilter>();
 }
 
 
@@ -703,8 +703,8 @@ cv::Ptr<cv::FilterEngine> cv::createBoxFilter( int srcType, int dstType, Size ks
     Ptr<BaseColumnFilter> columnFilter = getColumnSumFilter(sumType,
         dstType, ksize.height, anchor.y, normalize ? 1./(ksize.width*ksize.height) : 1);
 
-    return Ptr<FilterEngine>(new FilterEngine(Ptr<BaseFilter>(0), rowFilter, columnFilter,
-           srcType, dstType, sumType, borderType ));
+    return makePtr<FilterEngine>(Ptr<BaseFilter>(), rowFilter, columnFilter,
+           srcType, dstType, sumType, borderType );
 }
 
 
@@ -856,6 +856,22 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
         return;
 #endif
 
+#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+    if(src.type() == CV_32FC1 && sigma1 == sigma2 && ksize.width == ksize.height && sigma1 != 0.0 )
+    {
+        IppiSize roi = {src.cols, src.rows};
+        int bufSize = 0;
+        ippiFilterGaussGetBufferSize_32f_C1R(roi, ksize.width, &bufSize);
+        AutoBuffer<uchar> buf(bufSize+128);
+        if( ippiFilterGaussBorder_32f_C1R((const Ipp32f *)src.data, (int)src.step,
+                                          (Ipp32f *)dst.data, (int)dst.step,
+                                          roi, ksize.width, (Ipp32f)sigma1,
+                                          (IppiBorderType)borderType, 0.0,
+                                          alignPtr(&buf[0],32)) >= 0 )
+            return;
+    }
+#endif
+
     Ptr<FilterEngine> f = createGaussianFilter( src.type(), ksize, sigma1, sigma2, borderType );
     f->apply( src, dst );
 }
@@ -1863,6 +1879,41 @@ private:
     float *space_weight, *color_weight;
 };
 
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPBilateralFilter_8u_Invoker :
+    public ParallelLoopBody
+{
+public:
+    IPPBilateralFilter_8u_Invoker(Mat &_src, Mat &_dst, double _sigma_color, double _sigma_space, int _radius, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), sigma_color(_sigma_color), sigma_space(_sigma_space), radius(_radius), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          int d = radius * 2 + 1;
+          IppiSize kernel = {d, d};
+          IppiSize roi={dst.cols, range.end - range.start};
+          int bufsize=0;
+          ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
+          AutoBuffer<uchar> buf(bufsize);
+          IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
+          ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec );
+          if( ippiFilterBilateral_8u_C1R( src.ptr<uchar>(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr<uchar>(range.start), (int)dst.step[0], roi, kernel, pSpec ) < 0)
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double sigma_color;
+    double sigma_space;
+    int radius;
+    bool *ok;
+    const IPPBilateralFilter_8u_Invoker& operator= (const IPPBilateralFilter_8u_Invoker&);
+};
+#endif
+
 static void
 bilateralFilter_8u( const Mat& src, Mat& dst, int d,
     double sigma_color, double sigma_space,
@@ -1895,6 +1946,16 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
     Mat temp;
     copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
 
+#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+    if( cn == 1 )
+    {
+        bool ok;
+        IPPBilateralFilter_8u_Invoker body(temp, dst, sigma_color * sigma_color, sigma_space * sigma_space, radius, &ok );
+        parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16));
+        if( ok ) return;
+    }
+#endif
+
     std::vector<float> _color_weight(cn*256);
     std::vector<float> _space_weight(d*d);
     std::vector<int> _space_ofs(d*d);
@@ -2219,6 +2280,236 @@ void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d,
         "Bilateral filtering is only implemented for 8u and 32f images" );
 }
 
+
+/****************************************************************************************\
+                                  Adaptive Bilateral Filtering
+\****************************************************************************************/
+
+namespace cv
+{
+#define CALCVAR 1
+#define FIXED_WEIGHT 0
+
+class adaptiveBilateralFilter_8u_Invoker :
+    public ParallelLoopBody
+{
+public:
+    adaptiveBilateralFilter_8u_Invoker(Mat& _dest, const Mat& _temp, Size _ksize, double _sigma_space, Point _anchor) :
+        temp(&_temp), dest(&_dest), ksize(_ksize), sigma_space(_sigma_space), anchor(_anchor)
+    {
+        if( sigma_space <= 0 )
+            sigma_space = 1;
+        CV_Assert((ksize.width & 1) && (ksize.height & 1));
+        space_weight.resize(ksize.width * ksize.height);
+        double sigma2 = sigma_space * sigma_space;
+        int idx = 0;
+        int w = ksize.width / 2;
+        int h = ksize.height / 2;
+        for(int y=-h; y<=h; y++)
+            for(int x=-w; x<=w; x++)
+        {
+            space_weight[idx++] = (float)(sigma2 / (sigma2 + x * x + y * y));
+        }
+    }
+    virtual void operator()(const Range& range) const
+    {
+        int cn = dest->channels();
+        int anX = anchor.x;
+
+        const uchar *tptr;
+
+        for(int i = range.start;i < range.end; i++)
+        {
+            int startY = i;
+            if(cn == 1)
+            {
+                float var;
+                int currVal;
+                int sumVal = 0;
+                int sumValSqr = 0;
+                int currValCenter;
+                int currWRTCenter;
+                float weight;
+                float totalWeight = 0.;
+                float tmpSum = 0.;
+
+                for(int j = 0;j < dest->cols *cn; j+=cn)
+                {
+                    sumVal = 0;
+                    sumValSqr= 0;
+                    totalWeight = 0.;
+                    tmpSum = 0.;
+
+                    // Top row: don't sum the very last element
+                    int startLMJ = 0;
+                    int endLMJ  = ksize.width  - 1;
+                    int howManyAll = (anX *2 +1)*(ksize.width );
+#if CALCVAR
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+                            currVal = tptr[cn*(y+anX)];
+                            sumVal += currVal;
+                            sumValSqr += (currVal *currVal);
+                        }
+                    }
+                    var = ( (sumValSqr * howManyAll)- sumVal * sumVal )  /  ( (float)(howManyAll*howManyAll));
+#else
+                    var = 900.0;
+#endif
+                    startLMJ = 0;
+                    endLMJ = ksize.width;
+                    tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2);
+                    currValCenter =tptr[j+cn*anX];
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+#if FIXED_WEIGHT
+                            weight = 1.0;
+#else
+                            currVal = tptr[cn*(y+anX)];
+                            currWRTCenter = currVal - currValCenter;
+
+                            weight = var / ( var + (currWRTCenter * currWRTCenter) ) * space_weight[x*ksize.width+y+anX];;
+#endif
+                            tmpSum += ((float)tptr[cn*(y+anX)] * weight);
+                            totalWeight += weight;
+                        }
+                    }
+                    tmpSum /= totalWeight;
+
+                   dest->at<uchar>(startY ,j)= static_cast<uchar>(tmpSum);
+                }
+            }
+            else
+            {
+                assert(cn == 3);
+                float var_b, var_g, var_r;
+                int currVal_b, currVal_g, currVal_r;
+                int sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
+                int sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
+                int currValCenter_b= 0, currValCenter_g= 0, currValCenter_r= 0;
+                int currWRTCenter_b, currWRTCenter_g, currWRTCenter_r;
+                float weight_b, weight_g, weight_r;
+                float totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
+                float tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
+
+                for(int j = 0;j < dest->cols *cn; j+=cn)
+                {
+                    sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
+                    sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
+                    totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
+                    tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
+
+                    // Top row: don't sum the very last element
+                    int startLMJ = 0;
+                    int endLMJ  = ksize.width - 1;
+                    int howManyAll = (anX *2 +1)*(ksize.width);
+#if CALCVAR
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+                            currVal_b = tptr[cn*(y+anX)], currVal_g = tptr[cn*(y+anX)+1], currVal_r =tptr[cn*(y+anX)+2];
+                            sumVal_b += currVal_b;
+                            sumVal_g += currVal_g;
+                            sumVal_r += currVal_r;
+                            sumValSqr_b += (currVal_b *currVal_b);
+                            sumValSqr_g += (currVal_g *currVal_g);
+                            sumValSqr_r += (currVal_r *currVal_r);
+                        }
+                    }
+                    var_b = ( (sumValSqr_b * howManyAll)- sumVal_b * sumVal_b )  /  ( (float)(howManyAll*howManyAll));
+                    var_g = ( (sumValSqr_g * howManyAll)- sumVal_g * sumVal_g )  /  ( (float)(howManyAll*howManyAll));
+                    var_r = ( (sumValSqr_r * howManyAll)- sumVal_r * sumVal_r )  /  ( (float)(howManyAll*howManyAll));
+#else
+                    var_b = 900.0; var_g = 900.0;var_r = 900.0;
+#endif
+                    startLMJ = 0;
+                    endLMJ = ksize.width;
+                    tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2) + j;
+                    currValCenter_b =tptr[cn*anX], currValCenter_g =tptr[cn*anX+1], currValCenter_r =tptr[cn*anX+2];
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+#if FIXED_WEIGHT
+                            weight_b = 1.0;
+                            weight_g = 1.0;
+                            weight_r = 1.0;
+#else
+                            currVal_b = tptr[cn*(y+anX)];currVal_g=tptr[cn*(y+anX)+1];currVal_r=tptr[cn*(y+anX)+2];
+                            currWRTCenter_b = currVal_b - currValCenter_b;
+                            currWRTCenter_g = currVal_g - currValCenter_g;
+                            currWRTCenter_r = currVal_r - currValCenter_r;
+
+                            float cur_spw = space_weight[x*ksize.width+y+anX];
+                            weight_b = var_b / ( var_b + (currWRTCenter_b * currWRTCenter_b) ) * cur_spw;
+                            weight_g = var_g / ( var_g + (currWRTCenter_g * currWRTCenter_g) ) * cur_spw;
+                            weight_r = var_r / ( var_r + (currWRTCenter_r * currWRTCenter_r) ) * cur_spw;
+#endif
+                            tmpSum_b += ((float)tptr[cn*(y+anX)]   * weight_b);
+                            tmpSum_g += ((float)tptr[cn*(y+anX)+1] * weight_g);
+                            tmpSum_r += ((float)tptr[cn*(y+anX)+2] * weight_r);
+                            totalWeight_b += weight_b, totalWeight_g += weight_g, totalWeight_r += weight_r;
+                        }
+                    }
+                    tmpSum_b /= totalWeight_b;
+                    tmpSum_g /= totalWeight_g;
+                    tmpSum_r /= totalWeight_r;
+
+                    dest->at<uchar>(startY,j  )= static_cast<uchar>(tmpSum_b);
+                    dest->at<uchar>(startY,j+1)= static_cast<uchar>(tmpSum_g);
+                    dest->at<uchar>(startY,j+2)= static_cast<uchar>(tmpSum_r);
+                }
+            }
+        }
+    }
+private:
+    const Mat *temp;
+    Mat *dest;
+    Size ksize;
+    double sigma_space;
+    Point anchor;
+    std::vector<float> space_weight;
+};
+static void adaptiveBilateralFilter_8u( const Mat& src, Mat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType )
+{
+    Size size = src.size();
+
+    CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) &&
+              src.type() == dst.type() && src.size() == dst.size() &&
+              src.data != dst.data );
+    Mat temp;
+    copyMakeBorder(src, temp, anchor.x, anchor.y, anchor.x, anchor.y, borderType);
+
+    adaptiveBilateralFilter_8u_Invoker body(dst, temp, ksize, sigmaSpace, anchor);
+    parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16));
+}
+}
+void cv::adaptiveBilateralFilter( InputArray _src, OutputArray _dst, Size ksize,
+                                  double sigmaSpace, Point anchor, int borderType )
+{
+    Mat src = _src.getMat();
+    _dst.create(src.size(), src.type());
+    Mat dst = _dst.getMat();
+
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);
+
+    anchor = normalizeAnchor(anchor,ksize);
+    if( src.depth() == CV_8U )
+        adaptiveBilateralFilter_8u( src, dst, ksize, sigmaSpace, anchor, borderType );
+    else
+        CV_Error( CV_StsUnsupportedFormat,
+        "Adaptive Bilateral filtering is only implemented for 8u images" );
+}
+
 //////////////////////////////////////////////////////////////////////////////////////////
 
 CV_IMPL void
diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.cpp
index b44197047..229bbcb8c 100644
--- a/modules/imgproc/src/sumpixels.cpp
+++ b/modules/imgproc/src/sumpixels.cpp
@@ -41,6 +41,9 @@
 //M*/
 
 #include "precomp.hpp"
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+static IppStatus sts = ippInit();
+#endif
 
 namespace cv
 {
@@ -234,6 +237,53 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
     if( sdepth <= 0 )
         sdepth = depth == CV_8U ? CV_32S : CV_64F;
     sdepth = CV_MAT_DEPTH(sdepth);
+
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    if( ( depth == CV_8U ) && ( !_tilted.needed() ) )
+    {
+        if( sdepth == CV_32F )
+        {
+            if( cn == 1 )
+            {
+                IppiSize srcRoiSize = ippiSize( src.cols, src.rows );
+                _sum.create( isize, CV_MAKETYPE( sdepth, cn ) );
+                sum = _sum.getMat();
+                if( _sqsum.needed() )
+                {
+                    _sqsum.create( isize, CV_MAKETYPE( CV_64F, cn ) );
+                    sqsum = _sqsum.getMat();
+                    ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, src.step, (Ipp32f*)sum.data, sum.step, (Ipp64f*)sqsum.data, sqsum.step, srcRoiSize, 0, 0 );
+                }
+                else
+                {
+                    ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, src.step, (Ipp32f*)sum.data, sum.step, srcRoiSize, 0 );
+                }
+                return;
+            }
+        }
+        if( sdepth == CV_32S )
+        {
+            if( cn == 1 )
+            {
+                IppiSize srcRoiSize = ippiSize( src.cols, src.rows );
+                _sum.create( isize, CV_MAKETYPE( sdepth, cn ) );
+                sum = _sum.getMat();
+                if( _sqsum.needed() )
+                {
+                    _sqsum.create( isize, CV_MAKETYPE( CV_64F, cn ) );
+                    sqsum = _sqsum.getMat();
+                    ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, src.step, (Ipp32s*)sum.data, sum.step, (Ipp64f*)sqsum.data, sqsum.step, srcRoiSize, 0, 0 );
+                }
+                else
+                {
+                    ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, src.step, (Ipp32s*)sum.data, sum.step, srcRoiSize, 0 );
+                }
+                return;
+            }
+        }
+    }
+#endif
+
     _sum.create( isize, CV_MAKETYPE(sdepth, cn) );
     sum = _sum.getMat();
 
diff --git a/modules/imgproc/src/templmatch.cpp b/modules/imgproc/src/templmatch.cpp
index 23e740439..bfe7ce600 100644
--- a/modules/imgproc/src/templmatch.cpp
+++ b/modules/imgproc/src/templmatch.cpp
@@ -248,6 +248,8 @@ void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result,
     CV_Assert( (img.depth() == CV_8U || img.depth() == CV_32F) &&
                img.type() == templ.type() );
 
+    CV_Assert( img.rows >= templ.rows && img.cols >= templ.cols);
+
     Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
     _result.create(corrSize, CV_32F);
     Mat result = _result.getMat();
diff --git a/modules/imgproc/test/test_approxpoly.cpp b/modules/imgproc/test/test_approxpoly.cpp
index d087d541e..c4973c31e 100644
--- a/modules/imgproc/test/test_approxpoly.cpp
+++ b/modules/imgproc/test/test_approxpoly.cpp
@@ -356,4 +356,3 @@ _exit_:
 }
 
 TEST(Imgproc_ApproxPoly, accuracy) { CV_ApproxPolyTest test; test.safe_run(); }
-
diff --git a/modules/imgproc/test/test_bilateral_filter.cpp b/modules/imgproc/test/test_bilateral_filter.cpp
index 2d45fdcf7..0bfc3dc4c 100644
--- a/modules/imgproc/test/test_bilateral_filter.cpp
+++ b/modules/imgproc/test/test_bilateral_filter.cpp
@@ -251,7 +251,7 @@ namespace cvtest
 
     int CV_BilateralFilterTest::validate_test_results(int test_case_index)
     {
-        static const double eps = 1;
+        static const double eps = 4;
 
         Mat reference_dst, reference_src;
         if (_src.depth() == CV_32F)
diff --git a/modules/imgproc/test/test_color.cpp b/modules/imgproc/test/test_color.cpp
index 0434c6c13..0c94f8f50 100644
--- a/modules/imgproc/test/test_color.cpp
+++ b/modules/imgproc/test/test_color.cpp
@@ -1690,7 +1690,7 @@ TEST(Imgproc_ColorBayer, regression)
     Mat given = imread(string(ts->get_data_path()) + "/cvtcolor/bayer_input.png", IMREAD_GRAYSCALE);
     Mat gold = imread(string(ts->get_data_path()) + "/cvtcolor/bayer_gold.png", IMREAD_UNCHANGED);
     Mat result;
-    
+
     CV_Assert(given.data != NULL && gold.data != NULL);
 
     cvtColor(given, result, CV_BayerBG2GRAY);
diff --git a/modules/imgproc/test/test_connectedcomponents.cpp b/modules/imgproc/test/test_connectedcomponents.cpp
index c428cc074..a9567da97 100644
--- a/modules/imgproc/test/test_connectedcomponents.cpp
+++ b/modules/imgproc/test/test_connectedcomponents.cpp
@@ -105,4 +105,3 @@ void CV_ConnectedComponentsTest::run( int /* start_from */)
 }
 
 TEST(Imgproc_ConnectedComponents, regression) { CV_ConnectedComponentsTest test; test.safe_run(); }
-
diff --git a/modules/imgproc/test/test_convhull.cpp b/modules/imgproc/test/test_convhull.cpp
index 19f536a94..49456c6cc 100644
--- a/modules/imgproc/test/test_convhull.cpp
+++ b/modules/imgproc/test/test_convhull.cpp
@@ -1626,7 +1626,7 @@ CV_PerimeterAreaSliceTest::~CV_PerimeterAreaSliceTest() {}
 
 void CV_PerimeterAreaSliceTest::run( int )
 {
-    Ptr<CvMemStorage> storage = cvCreateMemStorage();
+    Ptr<CvMemStorage> storage(cvCreateMemStorage());
     RNG& rng = theRNG();
     const double min_r = 90, max_r = 120;
 
@@ -1700,4 +1700,3 @@ TEST(Imgproc_ContourPerimeterSlice, accuracy) { CV_PerimeterAreaSliceTest test;
 TEST(Imgproc_FitEllipse, small) { CV_FitEllipseSmallTest test; test.safe_run(); }
 
 /* End of file. */
-
diff --git a/modules/imgproc/test/test_distancetransform.cpp b/modules/imgproc/test/test_distancetransform.cpp
index 9ce9c6c42..dd3c2e8b4 100644
--- a/modules/imgproc/test/test_distancetransform.cpp
+++ b/modules/imgproc/test/test_distancetransform.cpp
@@ -283,5 +283,3 @@ void CV_DisTransTest::prepare_to_validation( int /*test_case_idx*/ )
 
 
 TEST(Imgproc_DistanceTransform, accuracy) { CV_DisTransTest test; test.safe_run(); }
-
-
diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp
index 6bf529b0e..efbad9974 100644
--- a/modules/imgproc/test/test_filter.cpp
+++ b/modules/imgproc/test/test_filter.cpp
@@ -1886,4 +1886,3 @@ protected:
 };
 
 TEST(Imgproc_Filtering, supportedFormats) { CV_FilterSupportedFormatsTest test; test.safe_run(); }
-
diff --git a/modules/imgproc/test/test_histograms.cpp b/modules/imgproc/test/test_histograms.cpp
index ccdaa74f2..19ccc656b 100644
--- a/modules/imgproc/test/test_histograms.cpp
+++ b/modules/imgproc/test/test_histograms.cpp
@@ -948,7 +948,7 @@ int CV_ThreshHistTest::validate_test_results( int /*test_case_idx*/ )
 class CV_CompareHistTest : public CV_BaseHistTest
 {
 public:
-    enum { MAX_METHOD = 4 };
+    enum { MAX_METHOD = 5 };
 
     CV_CompareHistTest();
 protected:
@@ -1014,6 +1014,8 @@ int CV_CompareHistTest::validate_test_results( int /*test_case_idx*/ )
             result0[CV_COMP_INTERSECT] += MIN(v0,v1);
             if( fabs(v0) > DBL_EPSILON )
                 result0[CV_COMP_CHISQR] += (v0 - v1)*(v0 - v1)/v0;
+            if( fabs(v0 + v1) > DBL_EPSILON )
+                result0[CV_COMP_CHISQR_ALT] += (v0 - v1)*(v0 - v1)/(v0 + v1);
             s0 += v0;
             s1 += v1;
             sq0 += v0*v0;
@@ -1039,6 +1041,8 @@ int CV_CompareHistTest::validate_test_results( int /*test_case_idx*/ )
             result0[CV_COMP_INTERSECT] += MIN(v0,v1);
             if( fabs(v0) > DBL_EPSILON )
                 result0[CV_COMP_CHISQR] += (v0 - v1)*(v0 - v1)/v0;
+            if( fabs(v0 + v1) > DBL_EPSILON )
+                result0[CV_COMP_CHISQR_ALT] += (v0 - v1)*(v0 - v1)/(v0 + v1);
             s0 += v0;
             sq0 += v0*v0;
             result0[CV_COMP_BHATTACHARYYA] += sqrt(v0*v1);
@@ -1053,6 +1057,8 @@ int CV_CompareHistTest::validate_test_results( int /*test_case_idx*/ )
         }
     }
 
+    result0[CV_COMP_CHISQR_ALT] *= 2;
+
     t = (sq0 - s0*s0/total_size)*(sq1 - s1*s1/total_size);
     result0[CV_COMP_CORREL] = fabs(t) > DBL_EPSILON ?
         (result0[CV_COMP_CORREL] - s0*s1/total_size)/sqrt(t) : 1;
@@ -1067,6 +1073,7 @@ int CV_CompareHistTest::validate_test_results( int /*test_case_idx*/ )
         double v = result[i], v0 = result0[i];
         const char* method_name =
             i == CV_COMP_CHISQR ? "Chi-Square" :
+            i == CV_COMP_CHISQR_ALT ? "Alternative Chi-Square" :
             i == CV_COMP_CORREL ? "Correlation" :
             i == CV_COMP_INTERSECT ? "Intersection" :
             i == CV_COMP_BHATTACHARYYA ? "Bhattacharyya" : "Unknown";
diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp
index 4ea3b76ba..53f7d5c2f 100644
--- a/modules/imgproc/test/test_imgwarp.cpp
+++ b/modules/imgproc/test/test_imgwarp.cpp
@@ -1424,7 +1424,7 @@ TEST(Imgproc_fitLine_vector_2d, regression)
 
 TEST(Imgproc_fitLine_Mat_2dC2, regression)
 {
-    cv::Mat mat1(3, 1, CV_32SC2);
+    cv::Mat mat1 = Mat::zeros(3, 1, CV_32SC2);
     std::vector<float> line1;
 
     cv::fitLine(mat1, line1, CV_DIST_L2, 0 ,0 ,0);
@@ -1444,7 +1444,7 @@ TEST(Imgproc_fitLine_Mat_2dC1, regression)
 
 TEST(Imgproc_fitLine_Mat_3dC3, regression)
 {
-    cv::Mat mat1(2, 1, CV_32SC3);
+    cv::Mat mat1 = Mat::zeros(2, 1, CV_32SC3);
     std::vector<float> line1;
 
     cv::fitLine(mat1, line1, CV_DIST_L2, 0 ,0 ,0);
@@ -1454,7 +1454,7 @@ TEST(Imgproc_fitLine_Mat_3dC3, regression)
 
 TEST(Imgproc_fitLine_Mat_3dC1, regression)
 {
-    cv::Mat mat2(2, 3, CV_32SC1);
+    cv::Mat mat2 = Mat::zeros(2, 3, CV_32SC1);
     std::vector<float> line2;
 
     cv::fitLine(mat2, line2, CV_DIST_L2, 0 ,0 ,0);
diff --git a/modules/imgproc/test/test_imgwarp_strict.cpp b/modules/imgproc/test/test_imgwarp_strict.cpp
index e04a65a42..76d65b198 100644
--- a/modules/imgproc/test/test_imgwarp_strict.cpp
+++ b/modules/imgproc/test/test_imgwarp_strict.cpp
@@ -678,8 +678,8 @@ void CV_Remap_Test::generate_test_data()
             MatIterator_<Vec2s> begin_x = mapx.begin<Vec2s>(), end_x = mapx.end<Vec2s>();
             for ( ; begin_x != end_x; ++begin_x)
             {
-                begin_x[0] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.cols + n - 1, 0)));
-                begin_x[1] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.rows + n - 1, 0)));
+                (*begin_x)[0] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.cols + n - 1, 0)));
+                (*begin_x)[1] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.rows + n - 1, 0)));
             }
 
             if (interpolation != INTER_NEAREST)
diff --git a/modules/imgproc/test/test_lsd.cpp b/modules/imgproc/test/test_lsd.cpp
new file mode 100644
index 000000000..7d8b7f243
--- /dev/null
+++ b/modules/imgproc/test/test_lsd.cpp
@@ -0,0 +1,265 @@
+#include "test_precomp.hpp"
+
+#include <vector>
+
+using namespace cv;
+using namespace std;
+
+const Size img_size(640, 480);
+const int LSD_TEST_SEED = 0x134679;
+const int EPOCHS = 20;
+
+class LSDBase : public testing::Test
+{
+public:
+    LSDBase() {};
+
+protected:
+    Mat test_image;
+    vector<Vec4i> lines;
+    RNG rng;
+    int passedtests;
+
+    void GenerateWhiteNoise(Mat& image);
+    void GenerateConstColor(Mat& image);
+    void GenerateLines(Mat& image, const unsigned int numLines);
+    void GenerateRotatedRect(Mat& image);
+    virtual void SetUp();
+};
+
+class Imgproc_LSD_ADV: public LSDBase
+{
+public:
+    Imgproc_LSD_ADV() {};
+protected:
+
+};
+
+class Imgproc_LSD_STD: public LSDBase
+{
+public:
+    Imgproc_LSD_STD() {};
+protected:
+
+};
+
+class Imgproc_LSD_NONE: public LSDBase
+{
+public:
+    Imgproc_LSD_NONE() {};
+protected:
+
+};
+
+void LSDBase::GenerateWhiteNoise(Mat& image)
+{
+    image = Mat(img_size, CV_8UC1);
+    rng.fill(image, RNG::UNIFORM, 0, 256);
+}
+
+void LSDBase::GenerateConstColor(Mat& image)
+{
+    image = Mat(img_size, CV_8UC1, Scalar::all(rng.uniform(0, 256)));
+}
+
+void LSDBase::GenerateLines(Mat& image, const unsigned int numLines)
+{
+    image = Mat(img_size, CV_8UC1, Scalar::all(rng.uniform(0, 128)));
+
+    for(unsigned int i = 0; i < numLines; ++i)
+    {
+        int y = rng.uniform(10, img_size.width - 10);
+        Point p1(y, 10);
+        Point p2(y, img_size.height - 10);
+        line(image, p1, p2, Scalar(255), 3);
+    }
+}
+
+void LSDBase::GenerateRotatedRect(Mat& image)
+{
+    image = Mat::zeros(img_size, CV_8UC1);
+
+    Point center(rng.uniform(img_size.width/4, img_size.width*3/4),
+                 rng.uniform(img_size.height/4, img_size.height*3/4));
+    Size rect_size(rng.uniform(img_size.width/8, img_size.width/6),
+                   rng.uniform(img_size.height/8, img_size.height/6));
+    float angle = rng.uniform(0.f, 360.f);
+
+    Point2f vertices[4];
+
+    RotatedRect rRect = RotatedRect(center, rect_size, angle);
+
+    rRect.points(vertices);
+    for (int i = 0; i < 4; i++)
+    {
+        line(image, vertices[i], vertices[(i + 1) % 4], Scalar(255), 3);
+    }
+}
+
+void LSDBase::SetUp()
+{
+    lines.clear();
+    test_image = Mat();
+    rng = RNG(LSD_TEST_SEED);
+    passedtests = 0;
+}
+
+
+TEST_F(Imgproc_LSD_ADV, whiteNoise)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        GenerateWhiteNoise(test_image);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_ADV);
+        detector->detect(test_image, lines);
+
+        if(40u >= lines.size()) ++passedtests;
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_ADV, constColor)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        GenerateConstColor(test_image);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_ADV);
+        detector->detect(test_image, lines);
+
+        if(0u == lines.size()) ++passedtests;
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_ADV, lines)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        const unsigned int numOfLines = 1;
+        GenerateLines(test_image, numOfLines);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_ADV);
+        detector->detect(test_image, lines);
+
+        if(numOfLines * 2 == lines.size()) ++passedtests;  // * 2 because of Gibbs effect
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_ADV, rotatedRect)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        GenerateRotatedRect(test_image);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_ADV);
+        detector->detect(test_image, lines);
+
+        if(2u <= lines.size())  ++passedtests;
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_STD, whiteNoise)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        GenerateWhiteNoise(test_image);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_STD);
+        detector->detect(test_image, lines);
+
+        if(50u >= lines.size()) ++passedtests;
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_STD, constColor)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        GenerateConstColor(test_image);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_STD);
+        detector->detect(test_image, lines);
+
+        if(0u == lines.size()) ++passedtests;
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_STD, lines)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        const unsigned int numOfLines = 1;
+        GenerateLines(test_image, numOfLines);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_STD);
+        detector->detect(test_image, lines);
+
+        if(numOfLines * 2 == lines.size()) ++passedtests;  // * 2 because of Gibbs effect
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_STD, rotatedRect)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        GenerateRotatedRect(test_image);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_STD);
+        detector->detect(test_image, lines);
+
+        if(4u <= lines.size()) ++passedtests;
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_NONE, whiteNoise)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        GenerateWhiteNoise(test_image);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_STD);
+        detector->detect(test_image, lines);
+
+        if(50u >= lines.size()) ++passedtests;
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_NONE, constColor)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        GenerateConstColor(test_image);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_NONE);
+        detector->detect(test_image, lines);
+
+        if(0u == lines.size()) ++passedtests;
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_NONE, lines)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        const unsigned int numOfLines = 1;
+        GenerateLines(test_image, numOfLines);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_NONE);
+        detector->detect(test_image, lines);
+
+        if(numOfLines * 2 == lines.size()) ++passedtests;  // * 2 because of Gibbs effect
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
+
+TEST_F(Imgproc_LSD_NONE, rotatedRect)
+{
+    for (int i = 0; i < EPOCHS; ++i)
+    {
+        GenerateRotatedRect(test_image);
+        Ptr<LineSegmentDetector> detector = createLineSegmentDetectorPtr(LSD_REFINE_NONE);
+        detector->detect(test_image, lines);
+
+        if(8u <= lines.size()) ++passedtests;
+    }
+    ASSERT_EQ(EPOCHS, passedtests);
+}
diff --git a/modules/imgproc/test/test_precomp.cpp b/modules/imgproc/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/imgproc/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/imgproc/test/test_thresh.cpp b/modules/imgproc/test/test_thresh.cpp
index 30a760da5..f59fec19e 100644
--- a/modules/imgproc/test/test_thresh.cpp
+++ b/modules/imgproc/test/test_thresh.cpp
@@ -318,4 +318,3 @@ void CV_ThreshTest::prepare_to_validation( int /*test_case_idx*/ )
 }
 
 TEST(Imgproc_Threshold, accuracy) { CV_ThreshTest test; test.safe_run(); }
-
diff --git a/modules/imgproc/test/test_watershed.cpp b/modules/imgproc/test/test_watershed.cpp
index 52cb37a4f..0edb18e02 100644
--- a/modules/imgproc/test/test_watershed.cpp
+++ b/modules/imgproc/test/test_watershed.cpp
@@ -130,4 +130,3 @@ void CV_WatershedTest::run( int /* start_from */)
 }
 
 TEST(Imgproc_Watershed, regression) { CV_WatershedTest test; test.safe_run(); }
-
diff --git a/modules/java/android_test/.gitignore b/modules/java/android_test/.gitignore
index 2d406cbbc..a21956836 100644
--- a/modules/java/android_test/.gitignore
+++ b/modules/java/android_test/.gitignore
@@ -4,4 +4,4 @@ build.xml
 local.properties
 proguard-project.txt
 project.properties
-default.properties
\ No newline at end of file
+default.properties
diff --git a/modules/java/android_test/AndroidManifest.xml b/modules/java/android_test/AndroidManifest.xml
index e261e2d70..81f2bc134 100644
--- a/modules/java/android_test/AndroidManifest.xml
+++ b/modules/java/android_test/AndroidManifest.xml
@@ -3,8 +3,8 @@
       package="org.opencv.test"
       android:versionCode="1"
       android:versionName="1.0">
-    
-	<uses-sdk android:minSdkVersion="8" />
+
+    <uses-sdk android:minSdkVersion="8" />
 
     <!-- We add an application tag here just so that we can indicate that
          this package needs to link against the android.test library,
@@ -20,7 +20,7 @@
     <instrumentation android:name="org.opencv.test.OpenCVTestRunner"
                      android:targetPackage="org.opencv.test"
                      android:label="Tests for org.opencv"/>
-    
+
     <uses-permission android:name="android.permission.CAMERA"/>
     <uses-feature android:name="android.hardware.camera" />
     <uses-feature android:name="android.hardware.camera.autofocus" />
diff --git a/modules/java/android_test/res/layout/main.xml b/modules/java/android_test/res/layout/main.xml
index 3a5f117d3..1fb4479c2 100644
--- a/modules/java/android_test/res/layout/main.xml
+++ b/modules/java/android_test/res/layout/main.xml
@@ -4,9 +4,9 @@
     android:layout_width="fill_parent"
     android:layout_height="fill_parent"
     >
-<TextView  
-    android:layout_width="fill_parent" 
-    android:layout_height="wrap_content" 
+<TextView
+    android:layout_width="fill_parent"
+    android:layout_height="wrap_content"
     android:text="@string/hello"
     />
 </LinearLayout>
diff --git a/modules/java/android_test/src/org/opencv/test/OpenCVTestRunner.java b/modules/java/android_test/src/org/opencv/test/OpenCVTestRunner.java
index 2dc4aead8..9425e2644 100644
--- a/modules/java/android_test/src/org/opencv/test/OpenCVTestRunner.java
+++ b/modules/java/android_test/src/org/opencv/test/OpenCVTestRunner.java
@@ -17,8 +17,6 @@ import android.util.Log;
 
 /**
  * This only class is Android specific.
- *
- * @see <a href="http://opencv.itseez.com">OpenCV</a>
  */
 
 public class OpenCVTestRunner extends InstrumentationTestRunner {
diff --git a/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java b/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java
index e6520a43d..69b8d0fcb 100644
--- a/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java
+++ b/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java
@@ -585,4 +585,18 @@ public class Calib3dTest extends OpenCVTestCase {
     public void testValidateDisparityMatMatIntIntInt() {
         fail("Not yet implemented");
     }
+
+    public void testComputeCorrespondEpilines()
+    {
+        Mat fundamental = new Mat(3, 3, CvType.CV_64F);
+        fundamental.put(0, 0, 0, -0.577, 0.288, 0.577, 0, 0.288, -0.288, -0.288, 0);
+        MatOfPoint2f left = new MatOfPoint2f();
+        left.alloc(1);
+        left.put(0, 0, 2, 3); //add(new Point(x, y));
+        Mat lines = new Mat();
+        Mat truth = new Mat(1, 1, CvType.CV_32FC3);
+        truth.put(0, 0, -0.70735186, 0.70686162, -0.70588124);
+        Calib3d.computeCorrespondEpilines(left, 1, fundamental, lines);
+        assertMatEqual(truth, lines, EPS);
+    }
 }
diff --git a/modules/java/android_test/src/org/opencv/test/core/CoreTest.java b/modules/java/android_test/src/org/opencv/test/core/CoreTest.java
index e0a882bbc..c13df193e 100644
--- a/modules/java/android_test/src/org/opencv/test/core/CoreTest.java
+++ b/modules/java/android_test/src/org/opencv/test/core/CoreTest.java
@@ -491,20 +491,6 @@ public class CoreTest extends OpenCVTestCase {
 
         Point truth[] = {
                 new Point(5, 6),
-                new Point(5, 6),
-                new Point(5, 6),
-                new Point(5, 6),
-                new Point(5, 6),
-                new Point(5, 6),
-                new Point(5, 6),
-                new Point(5, 6),
-                new Point(4, 6),
-                new Point(4, 6),
-                new Point(4, 6),
-                new Point(4, 6),
-                new Point(4, 6),
-                new Point(4, 6),
-                new Point(4, 6),
                 new Point(4, 6)
         };
         assertArrayPointsEquals(truth, pts.toArray(), EPS);
diff --git a/modules/java/android_test/src/org/opencv/test/imgproc/ImgprocTest.java b/modules/java/android_test/src/org/opencv/test/imgproc/ImgprocTest.java
index b4221b133..7971ea2f1 100644
--- a/modules/java/android_test/src/org/opencv/test/imgproc/ImgprocTest.java
+++ b/modules/java/android_test/src/org/opencv/test/imgproc/ImgprocTest.java
@@ -1932,4 +1932,4 @@ public class ImgprocTest extends OpenCVTestCase {
         assertMatEqual(truth, markers);
     }
 
-}
\ No newline at end of file
+}
diff --git a/modules/java/check-tests.py b/modules/java/check-tests.py
index c4d34f61e..394b377ef 100755
--- a/modules/java/check-tests.py
+++ b/modules/java/check-tests.py
@@ -162,4 +162,3 @@ if __name__ == '__main__':
     print "Not tested methods found:", len(funcs)
     print "Total tests found:", parser.get_tests_count()
     print "Empty test stubs found:", parser.get_empty_stubs_count()
-
diff --git a/modules/java/generator/gen_java.py b/modules/java/generator/gen_java.py
index ae8c3b497..d0238458e 100755
--- a/modules/java/generator/gen_java.py
+++ b/modules/java/generator/gen_java.py
@@ -12,7 +12,7 @@ class_ignore_list = (
     #core
     "FileNode", "FileStorage", "KDTree", "KeyPoint", "DMatch",
     #highgui
-    "VideoWriter", "VideoCapture",
+    "VideoWriter",
 )
 
 const_ignore_list = (
@@ -510,6 +510,54 @@ JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Core_n_1getTextSize
         "resizeWindow"      : {'j_code' : '', 'jn_code' : '', 'cpp_code' : '' },
     }, # Highgui
 
+    'VideoCapture' :
+    {
+        "getSupportedPreviewSizes" :
+        {
+            'j_code' :
+"""
+    public java.util.List<org.opencv.core.Size> getSupportedPreviewSizes()
+    {
+        String[] sizes_str = getSupportedPreviewSizes_0(nativeObj).split(",");
+        java.util.List<org.opencv.core.Size> sizes = new java.util.ArrayList<org.opencv.core.Size>(sizes_str.length);
+
+        for (String str : sizes_str) {
+            String[] wh = str.split("x");
+            sizes.add(new org.opencv.core.Size(Double.parseDouble(wh[0]), Double.parseDouble(wh[1])));
+        }
+
+        return sizes;
+    }
+
+""",
+            'jn_code' :
+"""\n    private static native String getSupportedPreviewSizes_0(long nativeObj);\n""",
+            'cpp_code' :
+"""
+JNIEXPORT jstring JNICALL Java_org_opencv_highgui_VideoCapture_getSupportedPreviewSizes_10
+  (JNIEnv *env, jclass, jlong self);
+
+JNIEXPORT jstring JNICALL Java_org_opencv_highgui_VideoCapture_getSupportedPreviewSizes_10
+  (JNIEnv *env, jclass, jlong self)
+{
+    static const char method_name[] = "highgui::VideoCapture_getSupportedPreviewSizes_10()";
+    try {
+        LOGD("%s", method_name);
+        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
+        union {double prop; const char* name;} u;
+        u.prop = me->get(CAP_PROP_ANDROID_PREVIEW_SIZES_STRING);
+        return env->NewStringUTF(u.name);
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
+    } catch (...) {
+        throwJavaException(env, 0, method_name);
+    }
+    return env->NewStringUTF("");
+}
+
+""",
+        }, # getSupportedPreviewSizes
+    }, # VideoCapture
 }
 
 # { class : { func : { arg_name : {"ctype" : ctype, "attrib" : [attrib]} } } }
@@ -584,7 +632,8 @@ def getLibVersion(version_hpp_path):
     major = re.search("^W*#\W*define\W+CV_VERSION_MAJOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
     minor = re.search("^W*#\W*define\W+CV_VERSION_MINOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
     revision = re.search("^W*#\W*define\W+CV_VERSION_REVISION\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
-    return (epoch, major, minor, revision)
+    status = re.search("^W*#\W*define\W+CV_VERSION_STATUS\W+\"(.*?)\"\W*$", version_file, re.MULTILINE).group(1)
+    return (epoch, major, minor, revision, status)
 
 class ConstInfo(object):
     def __init__(self, cname, name, val, addedManually=False):
@@ -751,15 +800,19 @@ public class %(jc)s {
 """ % { 'm' : self.module, 'jc' : jname } )
 
         if class_name == 'Core':
-            (epoch, major, minor, revision) = getLibVersion(
+            (epoch, major, minor, revision, status) = getLibVersion(
                 (os.path.dirname(__file__) or '.') + '/../../core/include/opencv2/core/version.hpp')
-            version_str    = '.'.join( (epoch, major, minor, revision) )
+            version_str    = '.'.join( (epoch, major, minor, revision) ) + status
             version_suffix =  ''.join( (epoch, major, minor) )
             self.classes[class_name].imports.add("java.lang.String")
             self.java_code[class_name]["j_code"].write("""
     public static final String VERSION = "%(v)s", NATIVE_LIBRARY_NAME = "opencv_java%(vs)s";
-    public static final int VERSION_EPOCH = %(ep)s, VERSION_MAJOR = %(ma)s, VERSION_MINOR = %(mi)s, VERSION_REVISION = %(re)s;
-""" % { 'v' : version_str, 'vs' : version_suffix, 'ep' : epoch, 'ma' : major, 'mi' : minor, 're' : revision } )
+    public static final int VERSION_EPOCH = %(ep)s;
+    public static final int VERSION_MAJOR = %(ma)s;
+    public static final int VERSION_MINOR = %(mi)s;
+    public static final int VERSION_REVISION = %(re)s;
+    public static final String VERSION_STATUS = "%(st)s";
+""" % { 'v' : version_str, 'vs' : version_suffix, 'ep' : epoch, 'ma' : major, 'mi' : minor, 're' : revision, 'st': status } )
 
 
     def add_class(self, decl):
@@ -898,21 +951,51 @@ public class %(jc)s {
                     self.add_func(decl)
 
         self.cpp_code = StringIO()
-        self.cpp_code.write("""
+        self.cpp_code.write(Template("""
 //
 // This file is auto-generated, please don't edit!
 //
 
-#define LOG_TAG "org.opencv.%(m)s"
+#define LOG_TAG "org.opencv.$m"
 
 #include "common.h"
-#include "opencv2/%(m)s.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+#ifdef HAVE_OPENCV_$M
+
+#include <string>
+
+#include "opencv2/$m.hpp"
 
 using namespace cv;
 
+/// throw java exception
+static void throwJavaException(JNIEnv *env, const std::exception *e, const char *method) {
+  std::string what = "unknown exception";
+  jclass je = 0;
+
+  if(e) {
+    std::string exception_type = "std::exception";
+
+    if(dynamic_cast<const cv::Exception*>(e)) {
+      exception_type = "cv::Exception";
+      je = env->FindClass("org/opencv/core/CvException");
+    }
+
+    what = exception_type + ": " + e->what();
+  }
+
+  if(!je) je = env->FindClass("java/lang/Exception");
+  env->ThrowNew(je, what.c_str());
+
+  LOGE("%s caught %s", method, what.c_str());
+  (void)method;        // avoid "unused" warning
+}
+
+
 extern "C" {
 
-""" % {'m' : module} )
+""").substitute( m = module, M = module.upper() ) )
 
         # generate code for the classes
         for name in self.classes.keys():
@@ -927,7 +1010,7 @@ extern "C" {
             java_code = Template(java_code).substitute(imports = imports)
             self.save("%s/%s+%s.java" % (output_path, module, self.classes[name].jname), java_code)
 
-        self.cpp_code.write( '\n} // extern "C"\n' )
+        self.cpp_code.write( '\n} // extern "C"\n\n#endif // HAVE_OPENCV_%s\n' % module.upper() )
         self.save(output_path+"/"+module+".cpp",  self.cpp_code.getvalue())
 
         # report
@@ -1293,23 +1376,18 @@ JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname ($argst);
 JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname
   ($args)
 {
+    static const char method_name[] = "$module::$fname()";
     try {
-        LOGD("$module::$fname()");
+        LOGD("%s", method_name);
         $prologue
         $retval$cvname( $cvargs );
         $epilogue$ret
-    } catch(cv::Exception e) {
-        LOGD("$module::$fname() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        $default
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("$module::$fname() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {$module::$fname()}");
-        $default
+        throwJavaException(env, 0, method_name);
     }
+    $default
 }
 
 
@@ -1442,4 +1520,3 @@ if __name__ == "__main__":
     #print "Generating module '" + module + "' from headers:\n\t" + "\n\t".join(srcfiles)
     generator = JavaWrapperGenerator()
     generator.gen(srcfiles, module, dstdir)
-
diff --git a/modules/java/generator/src/cpp/Mat.cpp b/modules/java/generator/src/cpp/Mat.cpp
index a07f0671b..b3b0f66e7 100644
--- a/modules/java/generator/src/cpp/Mat.cpp
+++ b/modules/java/generator/src/cpp/Mat.cpp
@@ -1,10 +1,36 @@
 #define LOG_TAG "org.opencv.core.Mat"
 
+#include <stdexcept>
+#include <string>
+
 #include "common.h"
 #include "opencv2/core.hpp"
 
 using namespace cv;
 
+/// throw java exception
+static void throwJavaException(JNIEnv *env, const std::exception *e, const char *method) {
+  std::string what = "unknown exception";
+  jclass je = 0;
+
+  if(e) {
+    std::string exception_type = "std::exception";
+
+    if(dynamic_cast<const cv::Exception*>(e)) {
+      exception_type = "cv::Exception";
+      je = env->FindClass("org/opencv/core/CvException");
+    }
+
+    what = exception_type + ": " + e->what();
+  }
+
+  if(!je) je = env->FindClass("java/lang/Exception");
+  env->ThrowNew(je, what.c_str());
+
+  LOGE("%s caught %s", method, what.c_str());
+  (void)method;        // avoid "unused" warning
+}
+
 extern "C" {
 
 
@@ -35,24 +61,17 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__III
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__III
   (JNIEnv* env, jclass, jint rows, jint cols, jint type)
 {
+    static const char method_name[] = "Mat::n_1Mat__III()";
     try {
-        LOGD("Mat::n_1Mat__III()");
-
-        Mat* _retval_ = new Mat( rows, cols, type );
-
-        return (jlong) _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1Mat__III() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        LOGD("%s", method_name);
+        return (jlong) new Mat( rows, cols, type );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1Mat__III() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1Mat__III()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -67,24 +86,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__DDI
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__DDI
   (JNIEnv* env, jclass, jdouble size_width, jdouble size_height, jint type)
 {
+    static const char method_name[] = "Mat::n_1Mat__DDI()";
     try {
-        LOGD("Mat::n_1Mat__DDI()");
+        LOGD("%s", method_name);
         Size size((int)size_width, (int)size_height);
-        Mat* _retval_ = new Mat( size, type );
-
-        return (jlong) _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1Mat__DDI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return (jlong) new Mat( size, type );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1Mat__DDI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1Mat__DDI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -100,24 +113,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__IIIDDDD
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__IIIDDDD
   (JNIEnv* env, jclass, jint rows, jint cols, jint type, jdouble s_val0, jdouble s_val1, jdouble s_val2, jdouble s_val3)
 {
+    static const char method_name[] = "Mat::n_1Mat__IIIDDDD()";
     try {
-        LOGD("Mat::n_1Mat__IIIDDDD()");
+        LOGD("%s", method_name);
         Scalar s(s_val0, s_val1, s_val2, s_val3);
-        Mat* _retval_ = new Mat( rows, cols, type, s );
-
-        return (jlong) _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1Mat__IIIDDDD() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return (jlong) new Mat( rows, cols, type, s );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1Mat__IIIDDDD() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1Mat__IIIDDDD()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -132,25 +139,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__DDIDDDD
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__DDIDDDD
   (JNIEnv* env, jclass, jdouble size_width, jdouble size_height, jint type, jdouble s_val0, jdouble s_val1, jdouble s_val2, jdouble s_val3)
 {
+    static const char method_name[] = "Mat::n_1Mat__DDIDDDD()";
     try {
-        LOGD("Mat::n_1Mat__DDIDDDD()");
+        LOGD("%s", method_name);
         Size size((int)size_width, (int)size_height);
         Scalar s(s_val0, s_val1, s_val2, s_val3);
-        Mat* _retval_ = new Mat( size, type, s );
-
-        return (jlong) _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1Mat__DDIDDDD() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return (jlong) new Mat( size, type, s );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1Mat__DDIDDDD() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1Mat__DDIDDDD()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -165,25 +166,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__JIIII
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__JIIII
   (JNIEnv* env, jclass, jlong m_nativeObj, jint rowRange_start, jint rowRange_end, jint colRange_start, jint colRange_end)
 {
+    static const char method_name[] = "Mat::n_1Mat__JIIII()";
     try {
-        LOGD("Mat::n_1Mat__JIIII()");
+        LOGD("%s", method_name);
         Range rowRange(rowRange_start, rowRange_end);
         Range colRange(colRange_start, colRange_end);
-        Mat* _retval_ = new Mat( (*(Mat*)m_nativeObj), rowRange, colRange );
-
-        return (jlong) _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1Mat__JIIII() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return (jlong) new Mat( (*(Mat*)m_nativeObj), rowRange, colRange );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1Mat__JIIII() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1Mat__JIIII()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -194,24 +189,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__JII
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__JII
   (JNIEnv* env, jclass, jlong m_nativeObj, jint rowRange_start, jint rowRange_end)
 {
+    static const char method_name[] = "Mat::n_1Mat__JII()";
     try {
-        LOGD("Mat::n_1Mat__JII()");
+        LOGD("%s", method_name);
         Range rowRange(rowRange_start, rowRange_end);
-        Mat* _retval_ = new Mat( (*(Mat*)m_nativeObj), rowRange );
-
-        return (jlong) _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1Mat__JII() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return (jlong) new Mat( (*(Mat*)m_nativeObj), rowRange );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1Mat__JII() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1Mat__JII()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -225,24 +214,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1adjustROI
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1adjustROI
   (JNIEnv* env, jclass, jlong self, jint dtop, jint dbottom, jint dleft, jint dright)
 {
+    static const char method_name[] = "Mat::n_1adjustROI()";
     try {
-        LOGD("Mat::n_1adjustROI()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->adjustROI( dtop, dbottom, dleft, dright );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1adjustROI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1adjustROI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1adjustROI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -257,23 +241,15 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1assignTo__JJI
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1assignTo__JJI
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj, jint type)
 {
+    static const char method_name[] = "Mat::n_1assignTo__JJI()";
     try {
-        LOGD("Mat::n_1assignTo__JJI()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         me->assignTo( (*(Mat*)m_nativeObj), type );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1assignTo__JJI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1assignTo__JJI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1assignTo__JJI()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -284,23 +260,15 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1assignTo__JJ
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1assignTo__JJ
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1assignTo__JJ()";
     try {
-        LOGD("Mat::n_1assignTo__JJ()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         me->assignTo( (*(Mat*)m_nativeObj) );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1assignTo__JJ() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1assignTo__JJ() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1assignTo__JJ()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -316,24 +284,18 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1channels
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1channels
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1channels()";
     try {
-        LOGD("Mat::n_1channels()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        int _retval_ = me->channels(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1channels() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->channels(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1channels() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1channels()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -348,24 +310,18 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1checkVector__JIIZ
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1checkVector__JIIZ
   (JNIEnv* env, jclass, jlong self, jint elemChannels, jint depth, jboolean requireContinuous)
 {
+    static const char method_name[] = "Mat::n_1checkVector__JIIZ()";
     try {
-        LOGD("Mat::n_1checkVector__JIIZ()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        int _retval_ = me->checkVector( elemChannels, depth, requireContinuous );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1checkVector__JIIZ() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->checkVector( elemChannels, depth, requireContinuous );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1checkVector__JIIZ() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1checkVector__JIIZ()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -376,24 +332,18 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1checkVector__JII
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1checkVector__JII
   (JNIEnv* env, jclass, jlong self, jint elemChannels, jint depth)
 {
+    static const char method_name[] = "Mat::n_1checkVector__JII()";
     try {
-        LOGD("Mat::n_1checkVector__JII()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        int _retval_ = me->checkVector( elemChannels, depth );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1checkVector__JII() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->checkVector( elemChannels, depth );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1checkVector__JII() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1checkVector__JII()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -404,24 +354,18 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1checkVector__JI
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1checkVector__JI
   (JNIEnv* env, jclass, jlong self, jint elemChannels)
 {
+    static const char method_name[] = "Mat::n_1checkVector__JI()";
     try {
-        LOGD("Mat::n_1checkVector__JI()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        int _retval_ = me->checkVector( elemChannels );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1checkVector__JI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->checkVector( elemChannels );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1checkVector__JI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1checkVector__JI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -437,24 +381,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1clone
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1clone
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1clone()";
     try {
-        LOGD("Mat::n_1clone()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->clone(  );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1clone() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1clone() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1clone()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -469,24 +408,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1col
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1col
   (JNIEnv* env, jclass, jlong self, jint x)
 {
+    static const char method_name[] = "Mat::n_1col()";
     try {
-        LOGD("Mat::n_1col()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->col( x );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1col() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1col() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1col()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -501,24 +435,45 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1colRange
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1colRange
   (JNIEnv* env, jclass, jlong self, jint startcol, jint endcol)
 {
+    static const char method_name[] = "Mat::n_1colRange()";
     try {
-        LOGD("Mat::n_1colRange()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->colRange( startcol, endcol );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1colRange() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1colRange() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1colRange()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
+}
+
+
+
+//
+//  int Mat::dims()
+//
+
+JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1dims
+  (JNIEnv* env, jclass, jlong self);
+
+JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1dims
+  (JNIEnv* env, jclass, jlong self)
+{
+    static const char method_name[] = "Mat::n_1dims()";
+    try {
+        LOGD("%s", method_name);
+        Mat* me = (Mat*) self; //TODO: check for NULL
+        return me->dims;
+    } catch(cv::Exception e) {
+        throwJavaException(env, &e, method_name);
+    } catch (...) {
+        throwJavaException(env, 0, method_name);
+    }
+
+    return 0;
 }
 
 
@@ -533,24 +488,18 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1cols
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1cols
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1cols()";
     try {
-        LOGD("Mat::n_1cols()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        int _retval_ = me->cols;
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1cols() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->cols;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1cols() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1cols()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -565,24 +514,16 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1convertTo__JJIDD
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1convertTo__JJIDD
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj, jint rtype, jdouble alpha, jdouble beta)
 {
+    static const char method_name[] = "Mat::n_1convertTo__JJIDD()";
     try {
-        LOGD("Mat::n_1convertTo__JJIDD()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& m = *((Mat*)m_nativeObj);
         me->convertTo( m, rtype, alpha, beta );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1convertTo__JJIDD() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1convertTo__JJIDD() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1convertTo__JJIDD()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -593,24 +534,16 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1convertTo__JJID
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1convertTo__JJID
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj, jint rtype, jdouble alpha)
 {
+    static const char method_name[] = "Mat::n_1convertTo__JJID()";
     try {
-        LOGD("Mat::n_1convertTo__JJID()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& m = *((Mat*)m_nativeObj);
         me->convertTo( m, rtype, alpha );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1convertTo__JJID() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1convertTo__JJID() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1convertTo__JJID()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -621,24 +554,16 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1convertTo__JJI
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1convertTo__JJI
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj, jint rtype)
 {
+    static const char method_name[] = "Mat::n_1convertTo__JJI()";
     try {
-        LOGD("Mat::n_1convertTo__JJI()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& m = *((Mat*)m_nativeObj);
         me->convertTo( m, rtype );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1convertTo__JJI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1convertTo__JJI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1convertTo__JJI()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -654,24 +579,16 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1copyTo__JJ
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1copyTo__JJ
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1copyTo__JJ()";
     try {
-        LOGD("Mat::n_1copyTo__JJ()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& m = *((Mat*)m_nativeObj);
         me->copyTo( m );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1copyTo__JJ() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1copyTo__JJ() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1copyTo__JJ()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -687,25 +604,17 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1copyTo__JJJ
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1copyTo__JJJ
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj, jlong mask_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1copyTo__JJJ()";
     try {
-        LOGD("Mat::n_1copyTo__JJJ()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& m = *((Mat*)m_nativeObj);
         Mat& mask = *((Mat*)mask_nativeObj);
         me->copyTo( m, mask );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1copyTo__JJJ() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1copyTo__JJJ() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1copyTo__JJJ()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -721,23 +630,15 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1create__JIII
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1create__JIII
   (JNIEnv* env, jclass, jlong self, jint rows, jint cols, jint type)
 {
+    static const char method_name[] = "Mat::n_1create__JIII()";
     try {
-        LOGD("Mat::n_1create__JIII()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         me->create( rows, cols, type );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1create__JIII() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1create__JIII() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1create__JIII()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -753,24 +654,16 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1create__JDDI
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1create__JDDI
   (JNIEnv* env, jclass, jlong self, jdouble size_width, jdouble size_height, jint type)
 {
+    static const char method_name[] = "Mat::n_1create__JDDI()";
     try {
-        LOGD("Mat::n_1create__JDDI()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Size size((int)size_width, (int)size_height);
         me->create( size, type );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1create__JDDI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1create__JDDI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1create__JDDI()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -786,25 +679,20 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1cross
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1cross
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1cross()";
     try {
-        LOGD("Mat::n_1cross()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& m = *((Mat*)m_nativeObj);
         Mat _retval_ = me->cross( m );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1cross() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1cross() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1cross()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -836,24 +724,18 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1depth
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1depth
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1depth()";
     try {
-        LOGD("Mat::n_1depth()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        int _retval_ = me->depth(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1depth() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->depth(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1depth() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1depth()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -868,24 +750,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1diag__JI
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1diag__JI
   (JNIEnv* env, jclass, jlong self, jint d)
 {
+    static const char method_name[] = "Mat::n_1diag__JI()";
     try {
-        LOGD("Mat::n_1diag__JI()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->diag( d );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1diag__JI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1diag__JI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1diag__JI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -901,24 +778,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1diag__J
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1diag__J
   (JNIEnv* env, jclass, jlong d_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1diag__J()";
     try {
-        LOGD("Mat::n_1diag__J()");
-
+        LOGD("%s", method_name);
         Mat _retval_ = Mat::diag( (*(Mat*)d_nativeObj) );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1diag__J() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1diag__J() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1diag__J()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -933,25 +804,19 @@ JNIEXPORT jdouble JNICALL Java_org_opencv_core_Mat_n_1dot
 JNIEXPORT jdouble JNICALL Java_org_opencv_core_Mat_n_1dot
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1dot()";
     try {
-        LOGD("Mat::n_1dot()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& m = *((Mat*)m_nativeObj);
-        double _retval_ = me->dot( m );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1dot() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->dot( m );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1dot() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1dot()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -966,24 +831,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1elemSize
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1elemSize
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1elemSize()";
     try {
-        LOGD("Mat::n_1elemSize()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        size_t _retval_ = me->elemSize(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1elemSize() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->elemSize(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1elemSize() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1elemSize()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -998,24 +857,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1elemSize1
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1elemSize1
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1elemSize1()";
     try {
-        LOGD("Mat::n_1elemSize1()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        size_t _retval_ = me->elemSize1(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1elemSize1() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->elemSize1(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1elemSize1() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1elemSize1()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1030,24 +883,18 @@ JNIEXPORT jboolean JNICALL Java_org_opencv_core_Mat_n_1empty
 JNIEXPORT jboolean JNICALL Java_org_opencv_core_Mat_n_1empty
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1empty()";
     try {
-        LOGD("Mat::n_1empty()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        bool _retval_ = me->empty(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1empty() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->empty(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1empty() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1empty()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1062,24 +909,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1eye__III
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1eye__III
   (JNIEnv* env, jclass, jint rows, jint cols, jint type)
 {
+    static const char method_name[] = "Mat::n_1eye__III()";
     try {
-        LOGD("Mat::n_1eye__III()");
-
+        LOGD("%s", method_name);
         Mat _retval_ = Mat::eye( rows, cols, type );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1eye__III() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1eye__III() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1eye__III()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1094,24 +935,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1eye__DDI
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1eye__DDI
   (JNIEnv* env, jclass, jdouble size_width, jdouble size_height, jint type)
 {
+    static const char method_name[] = "Mat::n_1eye__DDI()";
     try {
-        LOGD("Mat::n_1eye__DDI()");
+        LOGD("%s", method_name);
         Size size((int)size_width, (int)size_height);
         Mat _retval_ = Mat::eye( size, type );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1eye__DDI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1eye__DDI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1eye__DDI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1126,24 +962,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1inv__JI
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1inv__JI
   (JNIEnv* env, jclass, jlong self, jint method)
 {
+    static const char method_name[] = "Mat::n_1inv__JI()";
     try {
-        LOGD("Mat::n_1inv__JI()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->inv( method );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1inv__JI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1inv__JI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1inv__JI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1153,24 +984,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1inv__J
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1inv__J
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1inv__J()";
     try {
-        LOGD("Mat::n_1inv__J()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->inv(  );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1inv__J() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1inv__J() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1inv__J()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1185,24 +1011,18 @@ JNIEXPORT jboolean JNICALL Java_org_opencv_core_Mat_n_1isContinuous
 JNIEXPORT jboolean JNICALL Java_org_opencv_core_Mat_n_1isContinuous
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1isContinuous()";
     try {
-        LOGD("Mat::n_1isContinuous()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        bool _retval_ = me->isContinuous(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1isContinuous() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->isContinuous(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1isContinuous() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1isContinuous()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1217,24 +1037,18 @@ JNIEXPORT jboolean JNICALL Java_org_opencv_core_Mat_n_1isSubmatrix
 JNIEXPORT jboolean JNICALL Java_org_opencv_core_Mat_n_1isSubmatrix
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1isSubmatrix()";
     try {
-        LOGD("Mat::n_1isSubmatrix()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        bool _retval_ = me->isSubmatrix(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1isSubmatrix() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->isSubmatrix(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1isSubmatrix() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1isSubmatrix()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1249,25 +1063,18 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_locateROI_10
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_locateROI_10
   (JNIEnv* env, jclass, jlong self, jdoubleArray wholeSize_out, jdoubleArray ofs_out)
 {
+    static const char method_name[] = "core::locateROI_10()";
     try {
-        LOGD("core::locateROI_10()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Size wholeSize;
         Point ofs;
         me->locateROI( wholeSize, ofs );
         jdouble tmp_wholeSize[2] = {wholeSize.width, wholeSize.height}; env->SetDoubleArrayRegion(wholeSize_out, 0, 2, tmp_wholeSize);  jdouble tmp_ofs[2] = {ofs.x, ofs.y}; env->SetDoubleArrayRegion(ofs_out, 0, 2, tmp_ofs);
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::locateROI_10() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::locateROI_10() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::locateROI_10()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -1283,25 +1090,20 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1mul__JJD
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1mul__JJD
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj, jdouble scale)
 {
+    static const char method_name[] = "Mat::n_1mul__JJD()";
     try {
-        LOGD("Mat::n_1mul__JJD()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& m = *((Mat*)m_nativeObj);
         Mat _retval_ = me->mul( m, scale );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1mul__JJD() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1mul__JJD() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1mul__JJD()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1312,25 +1114,20 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1mul__JJ
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1mul__JJ
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1mul__JJ()";
     try {
-        LOGD("Mat::n_1mul__JJ()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& m = *((Mat*)m_nativeObj);
         Mat _retval_ = me->mul( m );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1mul__JJ() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1mul__JJ() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1mul__JJ()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1345,24 +1142,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1ones__III
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1ones__III
   (JNIEnv* env, jclass, jint rows, jint cols, jint type)
 {
+    static const char method_name[] = "Mat::n_1ones__III()";
     try {
-        LOGD("Mat::n_1ones__III()");
-
+        LOGD("%s", method_name);
         Mat _retval_ = Mat::ones( rows, cols, type );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1ones__III() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1ones__III() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1ones__III()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1377,24 +1168,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1ones__DDI
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1ones__DDI
   (JNIEnv* env, jclass, jdouble size_width, jdouble size_height, jint type)
 {
+    static const char method_name[] = "Mat::n_1ones__DDI()";
     try {
-        LOGD("Mat::n_1ones__DDI()");
+        LOGD("%s", method_name);
         Size size((int)size_width, (int)size_height);
         Mat _retval_ = Mat::ones( size, type );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1ones__DDI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1ones__DDI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1ones__DDI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1409,23 +1195,15 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1push_1back
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1push_1back
   (JNIEnv* env, jclass, jlong self, jlong m_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1push_1back()";
     try {
-        LOGD("Mat::n_1push_1back()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         me->push_back( (*(Mat*)m_nativeObj) );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1push_1back() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1push_1back() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1push_1back()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -1441,23 +1219,15 @@ JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1release
 JNIEXPORT void JNICALL Java_org_opencv_core_Mat_n_1release
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1release()";
     try {
-        LOGD("Mat::n_1release()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         me->release(  );
-
-        return;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1release() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1release() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1release()}");
-        return;
+        throwJavaException(env, 0, method_name);
     }
 }
 
@@ -1473,24 +1243,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1reshape__JII
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1reshape__JII
   (JNIEnv* env, jclass, jlong self, jint cn, jint rows)
 {
+    static const char method_name[] = "Mat::n_1reshape__JII()";
     try {
-        LOGD("Mat::n_1reshape__JII()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->reshape( cn, rows );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1reshape__JII() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1reshape__JII() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1reshape__JII()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1501,24 +1266,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1reshape__JI
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1reshape__JI
   (JNIEnv* env, jclass, jlong self, jint cn)
 {
+    static const char method_name[] = "Mat::n_1reshape__JI()";
     try {
-        LOGD("Mat::n_1reshape__JI()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->reshape( cn );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1reshape__JI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1reshape__JI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1reshape__JI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1533,24 +1293,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1row
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1row
   (JNIEnv* env, jclass, jlong self, jint y)
 {
+    static const char method_name[] = "Mat::n_1row()";
     try {
-        LOGD("Mat::n_1row()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->row( y );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1row() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1row() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1row()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1565,24 +1320,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1rowRange
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1rowRange
   (JNIEnv* env, jclass, jlong self, jint startrow, jint endrow)
 {
+    static const char method_name[] = "Mat::n_1rowRange()";
     try {
-        LOGD("Mat::n_1rowRange()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->rowRange( startrow, endrow );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1rowRange() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1rowRange() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1rowRange()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1597,24 +1347,18 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1rows
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1rows
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1rows()";
     try {
-        LOGD("Mat::n_1rows()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        int _retval_ = me->rows;
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1rows() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->rows;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1rows() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1rows()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1629,25 +1373,20 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1setTo__JDDDD
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1setTo__JDDDD
   (JNIEnv* env, jclass, jlong self, jdouble s_val0, jdouble s_val1, jdouble s_val2, jdouble s_val3)
 {
+    static const char method_name[] = "Mat::n_1setTo__JDDDD()";
     try {
-        LOGD("Mat::n_1setTo__JDDDD()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Scalar s(s_val0, s_val1, s_val2, s_val3);
         Mat _retval_ = me->operator =( s );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1setTo__JDDDD() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1setTo__JDDDD() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1setTo__JDDDD()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1662,26 +1401,21 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1setTo__JDDDDJ
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1setTo__JDDDDJ
   (JNIEnv* env, jclass, jlong self, jdouble s_val0, jdouble s_val1, jdouble s_val2, jdouble s_val3, jlong mask_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1setTo__JDDDDJ()";
     try {
-        LOGD("Mat::n_1setTo__JDDDDJ()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Scalar s(s_val0, s_val1, s_val2, s_val3);
         Mat& mask = *((Mat*)mask_nativeObj);
         Mat _retval_ = me->setTo( s, mask );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1setTo__JDDDDJ() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1setTo__JDDDDJ() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1setTo__JDDDDJ()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1696,26 +1430,21 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1setTo__JJJ
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1setTo__JJJ
   (JNIEnv* env, jclass, jlong self, jlong value_nativeObj, jlong mask_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1setTo__JJJ()";
     try {
-        LOGD("Mat::n_1setTo__JJJ()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& value = *((Mat*)value_nativeObj);
         Mat& mask = *((Mat*)mask_nativeObj);
         Mat _retval_ = me->setTo( value, mask );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1setTo__JJJ() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1setTo__JJJ() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1setTo__JJJ()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1726,25 +1455,20 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1setTo__JJ
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1setTo__JJ
   (JNIEnv* env, jclass, jlong self, jlong value_nativeObj)
 {
+    static const char method_name[] = "Mat::n_1setTo__JJ()";
     try {
-        LOGD("Mat::n_1setTo__JJ()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat& value = *((Mat*)value_nativeObj);
         Mat _retval_ = me->setTo( value );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1setTo__JJ() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1setTo__JJ() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1setTo__JJ()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1759,24 +1483,22 @@ JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_n_1size
 JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_n_1size
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1size()";
     try {
-        LOGD("Mat::n_1size()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Size _retval_ = me->size(  );
-        jdoubleArray _da_retval_ = env->NewDoubleArray(2);  jdouble _tmp_retval_[2] = {_retval_.width, _retval_.height}; env->SetDoubleArrayRegion(_da_retval_, 0, 2, _tmp_retval_);
+        jdoubleArray _da_retval_ = env->NewDoubleArray(2);
+        jdouble _tmp_retval_[2] = {_retval_.width, _retval_.height};
+        env->SetDoubleArrayRegion(_da_retval_, 0, 2, _tmp_retval_);
         return _da_retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1size() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1size() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1size()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1791,24 +1513,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1step1__JI
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1step1__JI
   (JNIEnv* env, jclass, jlong self, jint i)
 {
+    static const char method_name[] = "Mat::n_1step1__JI()";
     try {
-        LOGD("Mat::n_1step1__JI()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        size_t _retval_ = me->step1( i );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1step1__JI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->step1( i );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1step1__JI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1step1__JI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1819,24 +1535,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1step1__J
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1step1__J
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1step1__J()";
     try {
-        LOGD("Mat::n_1step1__J()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        size_t _retval_ = me->step1(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1step1__J() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->step1(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1step1__J() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1step1__J()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 //
@@ -1849,26 +1559,21 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1submat_1rr
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1submat_1rr
   (JNIEnv* env, jclass, jlong self, jint rowRange_start, jint rowRange_end, jint colRange_start, jint colRange_end)
 {
+    static const char method_name[] = "Mat::n_1submat_1rr()";
     try {
-        LOGD("Mat::n_1submat_1rr()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Range rowRange(rowRange_start, rowRange_end);
         Range colRange(colRange_start, colRange_end);
         Mat _retval_ = me->operator()( rowRange, colRange );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1submat_1rr() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1submat_1rr() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1submat_1rr()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1883,25 +1588,20 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1submat
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1submat
   (JNIEnv* env, jclass, jlong self, jint roi_x, jint roi_y, jint roi_width, jint roi_height)
 {
+    static const char method_name[] = "Mat::n_1submat()";
     try {
-        LOGD("Mat::n_1submat()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Rect roi(roi_x, roi_y, roi_width, roi_height);
         Mat _retval_ = me->operator()( roi );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1submat() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1submat() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1submat()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1916,24 +1616,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1t
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1t
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1t()";
     try {
-        LOGD("Mat::n_1t()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
         Mat _retval_ = me->t(  );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1t() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1t() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1t()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1948,24 +1643,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1total
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1total
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1total()";
     try {
-        LOGD("Mat::n_1total()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        size_t _retval_ = me->total(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1total() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->total(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1total() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1total()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -1980,24 +1669,18 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1type
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_n_1type
   (JNIEnv* env, jclass, jlong self)
 {
+    static const char method_name[] = "Mat::n_1type()";
     try {
-        LOGD("Mat::n_1type()");
+        LOGD("%s", method_name);
         Mat* me = (Mat*) self; //TODO: check for NULL
-        int _retval_ = me->type(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1type() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+        return me->type(  );
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1type() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1type()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -2012,24 +1695,18 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1zeros__III
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1zeros__III
   (JNIEnv* env, jclass, jint rows, jint cols, jint type)
 {
+    static const char method_name[] = "Mat::n_1zeros__III()";
     try {
-        LOGD("Mat::n_1zeros__III()");
-
+        LOGD("%s", method_name);
         Mat _retval_ = Mat::zeros( rows, cols, type );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1zeros__III() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1zeros__III() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1zeros__III()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -2044,24 +1721,19 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1zeros__DDI
 JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1zeros__DDI
   (JNIEnv* env, jclass, jdouble size_width, jdouble size_height, jint type)
 {
+    static const char method_name[] = "Mat::n_1zeros__DDI()";
     try {
-        LOGD("Mat::n_1zeros__DDI()");
+        LOGD("%s", method_name);
         Size size((int)size_width, (int)size_height);
         Mat _retval_ = Mat::zeros( size, type );
-
         return (jlong) new Mat(_retval_);
-    } catch(cv::Exception e) {
-        LOGD("Mat::n_1zeros__DDI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::n_1zeros__DDI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::n_1zeros__DDI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -2089,8 +1761,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutD
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutD
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jdoubleArray vals)
 {
+    static const char method_name[] = "Mat::nPutD()";
     try {
-        LOGD("Mat::nPutD()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(!me || !me->data) return 0;  // no native object behind
         if(me->rows<=row || me->cols<=col) return 0; // indexes out of range
@@ -2130,18 +1803,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutD
 
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nPutD() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nPutD() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nPutD()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -2186,8 +1854,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutB
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutB
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jbyteArray vals)
 {
+    static const char method_name[] = "Mat::nPutB()";
     try {
-        LOGD("Mat::nPutB()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->depth() != CV_8U && me->depth() != CV_8S) return 0; // incompatible type
@@ -2197,18 +1866,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutB
         int res = mat_put<char>(me, row, col, count, values);
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nPutB() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nPutB() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nPutB()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutS
@@ -2217,8 +1881,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutS
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutS
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jshortArray vals)
 {
+    static const char method_name[] = "Mat::nPutS()";
     try {
-        LOGD("Mat::nPutS()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->depth() != CV_16U && me->depth() != CV_16S) return 0; // incompatible type
@@ -2228,18 +1893,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutS
         int res = mat_put<short>(me, row, col, count, values);
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nPutS() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nPutS() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nPutS()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutI
@@ -2248,8 +1908,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutI
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutI
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jintArray vals)
 {
+    static const char method_name[] = "Mat::nPutI()";
     try {
-        LOGD("Mat::nPutI()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->depth() != CV_32S) return 0; // incompatible type
@@ -2259,18 +1920,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutI
         int res = mat_put<int>(me, row, col, count, values);
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nPutI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nPutI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nPutI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutF
@@ -2279,8 +1935,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutF
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutF
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jfloatArray vals)
 {
+    static const char method_name[] = "Mat::nPutF()";
     try {
-        LOGD("Mat::nPutF()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->depth() != CV_32F) return 0; // incompatible type
@@ -2290,18 +1947,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nPutF
         int res = mat_put<float>(me, row, col, count, values);
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nPutF() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nPutF() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nPutF()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 
@@ -2345,8 +1997,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetB
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetB
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jbyteArray vals)
 {
+    static const char method_name[] = "Mat::nGetB()";
     try {
-        LOGD("Mat::nGetB()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->depth() != CV_8U && me->depth() != CV_8S) return 0; // incompatible type
@@ -2356,18 +2009,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetB
         int res = mat_get<char>(me, row, col, count, values);
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nGetB() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nGetB() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nGetB()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetS
@@ -2376,8 +2024,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetS
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetS
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jshortArray vals)
 {
+    static const char method_name[] = "Mat::nGetS()";
     try {
-        LOGD("Mat::nGetS()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->depth() != CV_16U && me->depth() != CV_16S) return 0; // incompatible type
@@ -2387,18 +2036,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetS
         int res = mat_get<short>(me, row, col, count, values);
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nGetS() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nGetS() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nGetS()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetI
@@ -2407,8 +2051,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetI
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetI
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jintArray vals)
 {
+    static const char method_name[] = "Mat::nGetI()";
     try {
-        LOGD("Mat::nGetI()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->depth() != CV_32S) return 0; // incompatible type
@@ -2418,18 +2063,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetI
         int res = mat_get<int>(me, row, col, count, values);
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nGetI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nGetI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nGetI()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetF
@@ -2438,8 +2078,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetF
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetF
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jfloatArray vals)
 {
+    static const char method_name[] = "Mat::nGetF()";
     try {
-        LOGD("Mat::nGetF()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->depth() != CV_32F) return 0; // incompatible type
@@ -2449,18 +2090,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetF
         int res = mat_get<float>(me, row, col, count, values);
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nGetF() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nGetF() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nGetF()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetD
@@ -2469,8 +2105,9 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetD
 JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetD
     (JNIEnv* env, jclass, jlong self, jint row, jint col, jint count, jdoubleArray vals)
 {
+    static const char method_name[] = "Mat::nGetD()";
     try {
-        LOGD("Mat::nGetD()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->depth() != CV_64F) return 0; // incompatible type
@@ -2480,18 +2117,13 @@ JNIEXPORT jint JNICALL Java_org_opencv_core_Mat_nGetD
         int res = mat_get<double>(me, row, col, count, values);
         env->ReleasePrimitiveArrayCritical(vals, values, 0);
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nGetD() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nGetD() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nGetD()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_nGet
@@ -2500,8 +2132,9 @@ JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_nGet
 JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_nGet
     (JNIEnv* env, jclass, jlong self, jint row, jint col)
 {
+    static const char method_name[] = "Mat::nGet()";
     try {
-        LOGD("Mat::nGet()");
+        LOGD("%s", method_name);
         cv::Mat* me = (cv::Mat*) self;
         if(! self) return 0; // no native object behind
         if(me->rows<=row || me->cols<=col) return 0; // indexes out of range
@@ -2522,18 +2155,13 @@ JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_nGet
             env->SetDoubleArrayRegion(res, 0, me->channels(), buff);
         }
         return res;
-    } catch(cv::Exception e) {
-        LOGD("Mat::nGet() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
     } catch (...) {
-        LOGD("Mat::nGet() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {Mat::nGet()}");
-        return 0;
+        throwJavaException(env, 0, method_name);
     }
+
+    return 0;
 }
 
 JNIEXPORT jstring JNICALL Java_org_opencv_core_Mat_nDump
@@ -2542,28 +2170,24 @@ JNIEXPORT jstring JNICALL Java_org_opencv_core_Mat_nDump
 JNIEXPORT jstring JNICALL Java_org_opencv_core_Mat_nDump
   (JNIEnv *env, jclass, jlong self)
 {
-    cv::Mat* me = (cv::Mat*) self; //TODO: check for NULL
+    static const char method_name[] = "Mat::nDump()";
     try {
-            LOGD("Mat::nDump()");
-            String s;
-            Ptr<Formatted> fmtd = Formatter::get()->format(*me);
-            for(const char* str = fmtd->next(); str; str = fmtd->next())
-            {
-                s = s + String(str);
-            }
-            return env->NewStringUTF(s.c_str());
-        } catch(cv::Exception e) {
-            LOGE("Mat::nDump() catched cv::Exception: %s", e.what());
-            jclass je = env->FindClass("org/opencv/core/CvException");
-            if(!je) je = env->FindClass("java/lang/Exception");
-            env->ThrowNew(je, e.what());
-            return env->NewStringUTF("ERROR");
-        } catch (...) {
-            LOGE("Mat::nDump() catched unknown exception (...)");
-            jclass je = env->FindClass("java/lang/Exception");
-            env->ThrowNew(je, "Unknown exception in JNI code {Mat::nDump()}");
-            return env->NewStringUTF("ERROR");
+        LOGD("%s", method_name);
+        cv::Mat* me = (cv::Mat*) self; //TODO: check for NULL
+        String s;
+        Ptr<Formatted> fmtd = Formatter::get()->format(*me);
+        for(const char* str = fmtd->next(); str; str = fmtd->next())
+        {
+            s = s + String(str);
         }
+        return env->NewStringUTF(s.c_str());
+    } catch(const std::exception &e) {
+        throwJavaException(env, &e, method_name);
+    } catch (...) {
+        throwJavaException(env, 0, method_name);
+    }
+
+    return 0;
 }
 
 
diff --git a/modules/java/generator/src/cpp/VideoCapture.cpp b/modules/java/generator/src/cpp/VideoCapture.cpp
deleted file mode 100644
index 312d71020..000000000
--- a/modules/java/generator/src/cpp/VideoCapture.cpp
+++ /dev/null
@@ -1,434 +0,0 @@
-#define LOG_TAG "org.opencv.highgui.VideoCapture"
-#include "common.h"
-
-#include "opencv2/opencv_modules.hpp"
-#ifdef HAVE_OPENCV_HIGHGUI
-
-#include "opencv2/highgui.hpp"
-using namespace cv;
-
-
-extern "C" {
-
-//
-//   VideoCapture::VideoCapture()
-//
-
-JNIEXPORT jlong JNICALL Java_org_opencv_highgui_VideoCapture_n_1VideoCapture__
-  (JNIEnv* env, jclass);
-
-JNIEXPORT jlong JNICALL Java_org_opencv_highgui_VideoCapture_n_1VideoCapture__
-  (JNIEnv* env, jclass)
-{
-    try {
-        LOGD("highgui::VideoCapture_n_1VideoCapture__()");
-
-        VideoCapture* _retval_ = new VideoCapture(  );
-
-        return (jlong) _retval_;
-    } catch(cv::Exception e) {
-        LOGD("highgui::VideoCapture_n_1VideoCapture__() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-        LOGD("highgui::VideoCapture_n_1VideoCapture__() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1VideoCapture__()}");
-        return 0;
-    }
-}
-
-
-//
-//   VideoCapture::VideoCapture(int device)
-//
-
-JNIEXPORT jlong JNICALL Java_org_opencv_highgui_VideoCapture_n_1VideoCapture__I
-  (JNIEnv* env, jclass, jint device);
-
-JNIEXPORT jlong JNICALL Java_org_opencv_highgui_VideoCapture_n_1VideoCapture__I
-  (JNIEnv* env, jclass, jint device)
-{
-    try {
-        LOGD("highgui::VideoCapture_n_1VideoCapture__I()");
-
-        VideoCapture* _retval_ = new VideoCapture( device );
-
-        return (jlong) _retval_;
-    } catch(cv::Exception e) {
-        LOGD("highgui::VideoCapture_n_1VideoCapture__I() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-        LOGD("highgui::VideoCapture_n_1VideoCapture__I() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1VideoCapture__I()}");
-        return 0;
-    }
-}
-
-
-
-//
-//  double VideoCapture::get(int propId)
-//
-
-JNIEXPORT jdouble JNICALL Java_org_opencv_highgui_VideoCapture_n_1get
-  (JNIEnv* env, jclass, jlong self, jint propId);
-
-JNIEXPORT jdouble JNICALL Java_org_opencv_highgui_VideoCapture_n_1get
-  (JNIEnv* env, jclass, jlong self, jint propId)
-{
-    try {
-        LOGD("highgui::VideoCapture_n_1get()");
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        double _retval_ = me->get( propId );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("highgui::VideoCapture_n_1get() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-        LOGD("highgui::VideoCapture_n_1get() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1get()}");
-        return 0;
-    }
-}
-
-
-
-//
-//  bool VideoCapture::grab()
-//
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1grab
-  (JNIEnv* env, jclass, jlong self);
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1grab
-  (JNIEnv* env, jclass, jlong self)
-{
-    try {
-        LOGD("highgui::VideoCapture_n_1grab()");
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        bool _retval_ = me->grab(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("highgui::VideoCapture_n_1grab() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-        LOGD("highgui::VideoCapture_n_1grab() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1grab()}");
-        return 0;
-    }
-}
-
-
-
-//
-//  bool VideoCapture::isOpened()
-//
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1isOpened
-  (JNIEnv* env, jclass, jlong self);
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1isOpened
-  (JNIEnv* env, jclass, jlong self)
-{
-    try {
-        LOGD("highgui::VideoCapture_n_1isOpened()");
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        bool _retval_ = me->isOpened(  );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("highgui::VideoCapture_n_1isOpened() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-        LOGD("highgui::VideoCapture_n_1isOpened() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1isOpened()}");
-        return 0;
-    }
-}
-
-
-//
-//  bool VideoCapture::open(int device)
-//
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1open__JI
-  (JNIEnv* env, jclass, jlong self, jint device);
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1open__JI
-  (JNIEnv* env, jclass, jlong self, jint device)
-{
-    try {
-        LOGD("highgui::VideoCapture_n_1open__JI()");
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        bool _retval_ = me->open( device );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("highgui::VideoCapture_n_1open__JI() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-        LOGD("highgui::VideoCapture_n_1open__JI() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1open__JI()}");
-        return 0;
-    }
-}
-
-
-
-//
-//  bool VideoCapture::read(Mat image)
-//
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1read
-  (JNIEnv* env, jclass, jlong self, jlong image_nativeObj);
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1read
-  (JNIEnv* env, jclass, jlong self, jlong image_nativeObj)
-{
-    try {
-        LOGD("highgui::VideoCapture_n_1read()");
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        Mat& image = *((Mat*)image_nativeObj);
-        bool _retval_ = me->read( image );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-        LOGD("highgui::VideoCapture_n_1read() catched cv::Exception: %s", e.what());
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-        LOGD("highgui::VideoCapture_n_1read() catched unknown exception (...)");
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1read()}");
-        return 0;
-    }
-}
-
-
-
-//
-//  void VideoCapture::release()
-//
-
-JNIEXPORT void JNICALL Java_org_opencv_highgui_VideoCapture_n_1release
-  (JNIEnv* env, jclass, jlong self);
-
-JNIEXPORT void JNICALL Java_org_opencv_highgui_VideoCapture_n_1release
-  (JNIEnv* env, jclass, jlong self)
-{
-    try {
-
-        LOGD("highgui::VideoCapture_n_1release()");
-
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        me->release(  );
-
-        return;
-    } catch(cv::Exception e) {
-
-        LOGD("highgui::VideoCapture_n_1release() catched cv::Exception: %s", e.what());
-
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return;
-    } catch (...) {
-
-        LOGD("highgui::VideoCapture_n_1release() catched unknown exception (...)");
-
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1release()}");
-        return;
-    }
-}
-
-
-
-//
-//  bool VideoCapture::retrieve(Mat image, int channel = 0)
-//
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1retrieve__JJI
-  (JNIEnv* env, jclass, jlong self, jlong image_nativeObj, jint channel);
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1retrieve__JJI
-  (JNIEnv* env, jclass, jlong self, jlong image_nativeObj, jint channel)
-{
-    try {
-
-        LOGD("highgui::VideoCapture_n_1retrieve__JJI()");
-
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        Mat& image = *((Mat*)image_nativeObj);
-        bool _retval_ = me->retrieve( image, channel );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-
-        LOGD("highgui::VideoCapture_n_1retrieve__JJI() catched cv::Exception: %s", e.what());
-
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-
-        LOGD("highgui::VideoCapture_n_1retrieve__JJI() catched unknown exception (...)");
-
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1retrieve__JJI()}");
-        return 0;
-    }
-}
-
-
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1retrieve__JJ
-  (JNIEnv* env, jclass, jlong self, jlong image_nativeObj);
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1retrieve__JJ
-  (JNIEnv* env, jclass, jlong self, jlong image_nativeObj)
-{
-    try {
-
-        LOGD("highgui::VideoCapture_n_1retrieve__JJ()");
-
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        Mat& image = *((Mat*)image_nativeObj);
-        bool _retval_ = me->retrieve( image );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-
-        LOGD("highgui::VideoCapture_n_1retrieve__JJ() catched cv::Exception: %s", e.what());
-
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-
-        LOGD("highgui::VideoCapture_n_1retrieve__JJ() catched unknown exception (...)");
-
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1retrieve__JJ()}");
-        return 0;
-    }
-}
-
-
-
-//
-//  bool VideoCapture::set(int propId, double value)
-//
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1set
-  (JNIEnv* env, jclass, jlong self, jint propId, jdouble value);
-
-JNIEXPORT jboolean JNICALL Java_org_opencv_highgui_VideoCapture_n_1set
-  (JNIEnv* env, jclass, jlong self, jint propId, jdouble value)
-{
-    try {
-
-        LOGD("highgui::VideoCapture_n_1set()");
-
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        bool _retval_ = me->set( propId, value );
-
-        return _retval_;
-    } catch(cv::Exception e) {
-
-        LOGD("highgui::VideoCapture_n_1set() catched cv::Exception: %s", e.what());
-
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return 0;
-    } catch (...) {
-
-        LOGD("highgui::VideoCapture_n_1set() catched unknown exception (...)");
-
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1set()}");
-        return 0;
-    }
-}
-
-JNIEXPORT jstring JNICALL Java_org_opencv_highgui_VideoCapture_n_1getSupportedPreviewSizes
-  (JNIEnv *env, jclass, jlong self);
-
-JNIEXPORT jstring JNICALL Java_org_opencv_highgui_VideoCapture_n_1getSupportedPreviewSizes
-  (JNIEnv *env, jclass, jlong self)
-{
-    try {
-
-        LOGD("highgui::VideoCapture_n_1set()");
-
-        VideoCapture* me = (VideoCapture*) self; //TODO: check for NULL
-        union {double prop; const char* name;} u;
-        u.prop = me->get(CAP_PROP_ANDROID_PREVIEW_SIZES_STRING);
-        return env->NewStringUTF(u.name);
-    } catch(cv::Exception e) {
-
-        LOGD("highgui::VideoCapture_n_1getSupportedPreviewSizes() catched cv::Exception: %s", e.what());
-
-        jclass je = env->FindClass("org/opencv/core/CvException");
-        if(!je) je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, e.what());
-        return env->NewStringUTF("");
-    } catch (...) {
-
-        LOGD("highgui::VideoCapture_n_1getSupportedPreviewSizes() catched unknown exception (...)");
-
-        jclass je = env->FindClass("java/lang/Exception");
-        env->ThrowNew(je, "Unknown exception in JNI code {highgui::VideoCapture_n_1getSupportedPreviewSizes()}");
-        return env->NewStringUTF("");
-    }
-}
-
-
-
-//
-//  native support for java finalize()
-//  static void VideoCapture::n_delete( __int64 self )
-//
-
-JNIEXPORT void JNICALL Java_org_opencv_highgui_VideoCapture_n_1delete
-  (JNIEnv*, jclass, jlong self);
-
-JNIEXPORT void JNICALL Java_org_opencv_highgui_VideoCapture_n_1delete
-  (JNIEnv*, jclass, jlong self)
-{
-    delete (VideoCapture*) self;
-}
-
-} // extern "C"
-
-#endif // HAVE_OPENCV_HIGHGUI
diff --git a/modules/java/generator/src/cpp/common.h b/modules/java/generator/src/cpp/common.h
index db343823f..0c0b025f5 100644
--- a/modules/java/generator/src/cpp/common.h
+++ b/modules/java/generator/src/cpp/common.h
@@ -32,4 +32,4 @@
 #  pragma warning(disable:4800 4244)
 #endif
 
-#endif //__JAVA_COMMON_H__
\ No newline at end of file
+#endif //__JAVA_COMMON_H__
diff --git a/modules/java/generator/src/cpp/core_manual.cpp b/modules/java/generator/src/cpp/core_manual.cpp
index 8a45528fc..97ce27ee3 100644
--- a/modules/java/generator/src/cpp/core_manual.cpp
+++ b/modules/java/generator/src/cpp/core_manual.cpp
@@ -14,4 +14,4 @@ void cv::setErrorVerbosity(bool verbose)
         cv::redirectError(0);
     else
         cv::redirectError((cv::ErrorCallback)quietCallback);
-}
\ No newline at end of file
+}
diff --git a/modules/java/generator/src/cpp/features2d_manual.hpp b/modules/java/generator/src/cpp/features2d_manual.hpp
index a51b1eaa3..723f820e0 100644
--- a/modules/java/generator/src/cpp/features2d_manual.hpp
+++ b/modules/java/generator/src/cpp/features2d_manual.hpp
@@ -11,15 +11,17 @@
 namespace cv
 {
 
-class CV_EXPORTS_AS(FeatureDetector) javaFeatureDetector : public FeatureDetector
+class CV_EXPORTS_AS(FeatureDetector) javaFeatureDetector
 {
 public:
-#if 0
-    //DO NOT REMOVE! The block is required for sources parser
-    CV_WRAP void detect( const Mat& image, CV_OUT std::vector<KeyPoint>& keypoints, const Mat& mask=Mat() ) const;
-    CV_WRAP void detect( const std::vector<Mat>& images, CV_OUT std::vector<std::vector<KeyPoint> >& keypoints, const std::vector<Mat>& masks=std::vector<Mat>() ) const;
-    CV_WRAP virtual bool empty() const;
-#endif
+    CV_WRAP void detect( const Mat& image, CV_OUT std::vector<KeyPoint>& keypoints, const Mat& mask=Mat() ) const
+    { return wrapped->detect(image, keypoints, mask); }
+
+    CV_WRAP void detect( const std::vector<Mat>& images, CV_OUT std::vector<std::vector<KeyPoint> >& keypoints, const std::vector<Mat>& masks=std::vector<Mat>() ) const
+    { return wrapped->detect(images, keypoints, masks); }
+
+    CV_WRAP bool empty() const
+    { return wrapped->empty(); }
 
     enum
     {
@@ -141,52 +143,74 @@ public:
             break;
         }
 
-        Ptr<FeatureDetector> detector = FeatureDetector::create(name);
-        detector.addref();
-        return (javaFeatureDetector*)((FeatureDetector*) detector);
+        return new javaFeatureDetector(FeatureDetector::create(name));
     }
 
     CV_WRAP void write( const String& fileName ) const
     {
         FileStorage fs(fileName, FileStorage::WRITE);
-        ((FeatureDetector*)this)->write(fs);
-        fs.release();
+        wrapped->write(fs);
     }
 
     CV_WRAP void read( const String& fileName )
     {
         FileStorage fs(fileName, FileStorage::READ);
-        ((FeatureDetector*)this)->read(fs.root());
-        fs.release();
+        wrapped->read(fs.root());
     }
+
+private:
+    javaFeatureDetector(Ptr<FeatureDetector> _wrapped) : wrapped(_wrapped)
+    {}
+
+    Ptr<FeatureDetector> wrapped;
 };
 
-class CV_EXPORTS_AS(DescriptorMatcher) javaDescriptorMatcher : public DescriptorMatcher
+class CV_EXPORTS_AS(DescriptorMatcher) javaDescriptorMatcher
 {
 public:
-#if 0
-    //DO NOT REMOVE! The block is required for sources parser
-    CV_WRAP virtual bool isMaskSupported() const;
-    CV_WRAP virtual void add( const std::vector<Mat>& descriptors );
-    CV_WRAP const std::vector<Mat>& getTrainDescriptors() const;
-    CV_WRAP virtual void clear();
-    CV_WRAP virtual bool empty() const;
-    CV_WRAP virtual void train();
+    CV_WRAP bool isMaskSupported() const
+    { return wrapped->isMaskSupported(); }
+
+    CV_WRAP void add( const std::vector<Mat>& descriptors )
+    { return wrapped->add(descriptors); }
+
+    CV_WRAP const std::vector<Mat>& getTrainDescriptors() const
+    { return wrapped->getTrainDescriptors(); }
+
+    CV_WRAP void clear()
+    { return wrapped->clear(); }
+
+    CV_WRAP bool empty() const
+    { return wrapped->empty(); }
+
+    CV_WRAP void train()
+    { return wrapped->train(); }
+
     CV_WRAP void match( const Mat& queryDescriptors, const Mat& trainDescriptors,
-                CV_OUT std::vector<DMatch>& matches, const Mat& mask=Mat() ) const;
+                CV_OUT std::vector<DMatch>& matches, const Mat& mask=Mat() ) const
+    { return wrapped->match(queryDescriptors, trainDescriptors, matches, mask); }
+
     CV_WRAP void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors,
                    CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
-                   const Mat& mask=Mat(), bool compactResult=false ) const;
+                   const Mat& mask=Mat(), bool compactResult=false ) const
+    { return wrapped->knnMatch(queryDescriptors, trainDescriptors, matches, k, mask, compactResult); }
+
     CV_WRAP void radiusMatch( const Mat& queryDescriptors, const Mat& trainDescriptors,
                       CV_OUT std::vector<std::vector<DMatch> >& matches, float maxDistance,
-                      const Mat& mask=Mat(), bool compactResult=false ) const;
+                      const Mat& mask=Mat(), bool compactResult=false ) const
+    { return wrapped->radiusMatch(queryDescriptors, trainDescriptors, matches, maxDistance, mask, compactResult); }
+
     CV_WRAP void match( const Mat& queryDescriptors, CV_OUT std::vector<DMatch>& matches,
-                const std::vector<Mat>& masks=std::vector<Mat>() );
+                const std::vector<Mat>& masks=std::vector<Mat>() )
+    { return wrapped->match(queryDescriptors, matches, masks); }
+
     CV_WRAP void knnMatch( const Mat& queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
-           const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
+           const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false )
+    { return wrapped->knnMatch(queryDescriptors, matches, k, masks, compactResult); }
+
     CV_WRAP void radiusMatch( const Mat& queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, float maxDistance,
-                   const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
-#endif
+                   const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false )
+    { return wrapped->radiusMatch(queryDescriptors, matches, maxDistance, masks, compactResult); }
 
     enum
     {
@@ -200,9 +224,7 @@ public:
 
     CV_WRAP_AS(clone) javaDescriptorMatcher* jclone( bool emptyTrainData=false ) const
     {
-        Ptr<DescriptorMatcher> matcher = this->clone(emptyTrainData);
-        matcher.addref();
-        return (javaDescriptorMatcher*)((DescriptorMatcher*) matcher);
+        return new javaDescriptorMatcher(wrapped->clone(emptyTrainData));
     }
 
     //supported: FlannBased, BruteForce, BruteForce-L1, BruteForce-Hamming, BruteForce-HammingLUT
@@ -235,38 +257,45 @@ public:
             break;
         }
 
-        Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create(name);
-        matcher.addref();
-        return (javaDescriptorMatcher*)((DescriptorMatcher*) matcher);
+        return new javaDescriptorMatcher(DescriptorMatcher::create(name));
     }
 
     CV_WRAP void write( const String& fileName ) const
     {
         FileStorage fs(fileName, FileStorage::WRITE);
-        ((DescriptorMatcher*)this)->write(fs);
-        fs.release();
+        wrapped->write(fs);
     }
 
     CV_WRAP void read( const String& fileName )
     {
         FileStorage fs(fileName, FileStorage::READ);
-        ((DescriptorMatcher*)this)->read(fs.root());
-        fs.release();
+        wrapped->read(fs.root());
     }
+
+private:
+    javaDescriptorMatcher(Ptr<DescriptorMatcher> _wrapped) : wrapped(_wrapped)
+    {}
+
+    Ptr<DescriptorMatcher> wrapped;
 };
 
-class CV_EXPORTS_AS(DescriptorExtractor) javaDescriptorExtractor : public DescriptorExtractor
+class CV_EXPORTS_AS(DescriptorExtractor) javaDescriptorExtractor
 {
 public:
-#if 0
-    //DO NOT REMOVE! The block is required for sources parser
-    CV_WRAP void compute( const Mat& image, CV_IN_OUT std::vector<KeyPoint>& keypoints, Mat& descriptors ) const;
-    CV_WRAP void compute( const std::vector<Mat>& images, CV_IN_OUT std::vector<std::vector<KeyPoint> >& keypoints, CV_OUT std::vector<Mat>& descriptors ) const;
-    CV_WRAP virtual int descriptorSize() const;
-    CV_WRAP virtual int descriptorType() const;
+    CV_WRAP void compute( const Mat& image, CV_IN_OUT std::vector<KeyPoint>& keypoints, Mat& descriptors ) const
+    { return wrapped->compute(image, keypoints, descriptors); }
 
-    CV_WRAP virtual bool empty() const;
-#endif
+    CV_WRAP void compute( const std::vector<Mat>& images, CV_IN_OUT std::vector<std::vector<KeyPoint> >& keypoints, CV_OUT std::vector<Mat>& descriptors ) const
+    { return wrapped->compute(images, keypoints, descriptors); }
+
+    CV_WRAP int descriptorSize() const
+    { return wrapped->descriptorSize(); }
+
+    CV_WRAP int descriptorType() const
+    { return wrapped->descriptorType(); }
+
+    CV_WRAP bool empty() const
+    { return wrapped->empty(); }
 
     enum
     {
@@ -327,62 +356,93 @@ public:
             break;
         }
 
-        Ptr<DescriptorExtractor> extractor = DescriptorExtractor::create(name);
-        extractor.addref();
-        return (javaDescriptorExtractor*)((DescriptorExtractor*) extractor);
+        return new javaDescriptorExtractor(DescriptorExtractor::create(name));
     }
 
     CV_WRAP void write( const String& fileName ) const
     {
         FileStorage fs(fileName, FileStorage::WRITE);
-        ((DescriptorExtractor*)this)->write(fs);
-        fs.release();
+        wrapped->write(fs);
     }
 
     CV_WRAP void read( const String& fileName )
     {
         FileStorage fs(fileName, FileStorage::READ);
-        ((DescriptorExtractor*)this)->read(fs.root());
-        fs.release();
+        wrapped->read(fs.root());
     }
+
+private:
+    javaDescriptorExtractor(Ptr<DescriptorExtractor> _wrapped) : wrapped(_wrapped)
+    {}
+
+    Ptr<DescriptorExtractor> wrapped;
 };
 
-class CV_EXPORTS_AS(GenericDescriptorMatcher) javaGenericDescriptorMatcher : public GenericDescriptorMatcher
+class CV_EXPORTS_AS(GenericDescriptorMatcher) javaGenericDescriptorMatcher
 {
 public:
-#if 0
-    //DO NOT REMOVE! The block is required for sources parser
-    CV_WRAP virtual void add( const std::vector<Mat>& images,
-                      std::vector<std::vector<KeyPoint> >& keypoints );
-    CV_WRAP const std::vector<Mat>& getTrainImages() const;
-    CV_WRAP const std::vector<std::vector<KeyPoint> >& getTrainKeypoints() const;
-    CV_WRAP virtual void clear();
-    CV_WRAP virtual bool isMaskSupported();
-    CV_WRAP virtual void train();
+    CV_WRAP void add( const std::vector<Mat>& images,
+                      std::vector<std::vector<KeyPoint> >& keypoints )
+    { return wrapped->add(images, keypoints); }
+
+    CV_WRAP const std::vector<Mat>& getTrainImages() const
+    { return wrapped->getTrainImages(); }
+
+    CV_WRAP const std::vector<std::vector<KeyPoint> >& getTrainKeypoints() const
+    { return wrapped->getTrainKeypoints(); }
+
+    CV_WRAP void clear()
+    { return wrapped->clear(); }
+
+    CV_WRAP bool isMaskSupported()
+    { return wrapped->isMaskSupported(); }
+
+    CV_WRAP void train()
+    { return wrapped->train(); }
+
     CV_WRAP void classify( const Mat& queryImage, CV_IN_OUT std::vector<KeyPoint>& queryKeypoints,
-                           const Mat& trainImage, std::vector<KeyPoint>& trainKeypoints ) const;
-    CV_WRAP void classify( const Mat& queryImage, CV_IN_OUT std::vector<KeyPoint>& queryKeypoints );
+                           const Mat& trainImage, std::vector<KeyPoint>& trainKeypoints ) const
+    { return wrapped->classify(queryImage, queryKeypoints, trainImage, trainKeypoints); }
+
+    CV_WRAP void classify( const Mat& queryImage, CV_IN_OUT std::vector<KeyPoint>& queryKeypoints )
+    { return wrapped->classify(queryImage, queryKeypoints); }
+
     CV_WRAP void match( const Mat& queryImage, std::vector<KeyPoint>& queryKeypoints,
                 const Mat& trainImage, std::vector<KeyPoint>& trainKeypoints,
-                CV_OUT std::vector<DMatch>& matches, const Mat& mask=Mat() ) const;
+                CV_OUT std::vector<DMatch>& matches, const Mat& mask=Mat() ) const
+    { return wrapped->match(queryImage, queryKeypoints, trainImage, trainKeypoints, matches, mask); }
+
     CV_WRAP void knnMatch( const Mat& queryImage, std::vector<KeyPoint>& queryKeypoints,
                    const Mat& trainImage, std::vector<KeyPoint>& trainKeypoints,
                    CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
-                   const Mat& mask=Mat(), bool compactResult=false ) const;
+                   const Mat& mask=Mat(), bool compactResult=false ) const
+    { return wrapped->knnMatch(queryImage, queryKeypoints, trainImage, trainKeypoints,
+                               matches, k, mask, compactResult); }
+
     CV_WRAP void radiusMatch( const Mat& queryImage, std::vector<KeyPoint>& queryKeypoints,
                       const Mat& trainImage, std::vector<KeyPoint>& trainKeypoints,
                       CV_OUT std::vector<std::vector<DMatch> >& matches, float maxDistance,
-                      const Mat& mask=Mat(), bool compactResult=false ) const;
+                      const Mat& mask=Mat(), bool compactResult=false ) const
+    { return wrapped->radiusMatch(queryImage, queryKeypoints, trainImage, trainKeypoints,
+                                   matches, maxDistance, mask, compactResult); }
+
     CV_WRAP void match( const Mat& queryImage, std::vector<KeyPoint>& queryKeypoints,
-                CV_OUT std::vector<DMatch>& matches, const std::vector<Mat>& masks=std::vector<Mat>() );
+                CV_OUT std::vector<DMatch>& matches, const std::vector<Mat>& masks=std::vector<Mat>() )
+    { return wrapped->match(queryImage, queryKeypoints, matches, masks); }
+
     CV_WRAP void knnMatch( const Mat& queryImage, std::vector<KeyPoint>& queryKeypoints,
                    CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
-                   const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
+                   const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false )
+    { return wrapped->knnMatch(queryImage, queryKeypoints, matches, k, masks, compactResult); }
+
     CV_WRAP void radiusMatch( const Mat& queryImage, std::vector<KeyPoint>& queryKeypoints,
                       CV_OUT std::vector<std::vector<DMatch> >& matches, float maxDistance,
-                      const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
-    CV_WRAP virtual bool empty() const;
-#endif
+                      const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false )
+    { return wrapped->radiusMatch(queryImage, queryKeypoints, matches, maxDistance, masks, compactResult); }
+
+    CV_WRAP bool empty() const
+    { return wrapped->empty(); }
+
 
     enum
     {
@@ -392,9 +452,7 @@ public:
 
     CV_WRAP_AS(clone) javaGenericDescriptorMatcher* jclone( bool emptyTrainData=false ) const
     {
-        Ptr<GenericDescriptorMatcher> matcher = this->clone(emptyTrainData);
-        matcher.addref();
-        return (javaGenericDescriptorMatcher*)((GenericDescriptorMatcher*) matcher);
+        return new javaGenericDescriptorMatcher(wrapped->clone(emptyTrainData));
     }
 
     //supported: OneWay, Fern
@@ -416,24 +474,26 @@ public:
             break;
         }
 
-        Ptr<GenericDescriptorMatcher> matcher = GenericDescriptorMatcher::create(name);
-        matcher.addref();
-        return (javaGenericDescriptorMatcher*)((GenericDescriptorMatcher*) matcher);
+        return new javaGenericDescriptorMatcher(GenericDescriptorMatcher::create(name));
     }
 
     CV_WRAP void write( const String& fileName ) const
     {
         FileStorage fs(fileName, FileStorage::WRITE);
-        ((GenericDescriptorMatcher*)this)->write(fs);
-        fs.release();
+        wrapped->write(fs);
     }
 
     CV_WRAP void read( const String& fileName )
     {
         FileStorage fs(fileName, FileStorage::READ);
-        ((GenericDescriptorMatcher*)this)->read(fs.root());
-        fs.release();
+        wrapped->read(fs.root());
     }
+
+private:
+    javaGenericDescriptorMatcher(Ptr<GenericDescriptorMatcher> _wrapped) : wrapped(_wrapped)
+    {}
+
+    Ptr<GenericDescriptorMatcher> wrapped;
 };
 
 #if 0
diff --git a/modules/java/generator/src/cpp/jni_part.cpp b/modules/java/generator/src/cpp/jni_part.cpp
index 8f6eb8161..ccd870cdf 100644
--- a/modules/java/generator/src/cpp/jni_part.cpp
+++ b/modules/java/generator/src/cpp/jni_part.cpp
@@ -63,4 +63,4 @@ JNI_OnUnload(JavaVM*, void*)
   //do nothing
 }
 
-} // extern "C"
\ No newline at end of file
+} // extern "C"
diff --git a/modules/java/generator/src/cpp/utils.cpp b/modules/java/generator/src/cpp/utils.cpp
index 90abfd9f5..40811e8f9 100644
--- a/modules/java/generator/src/cpp/utils.cpp
+++ b/modules/java/generator/src/cpp/utils.cpp
@@ -158,4 +158,4 @@ JNIEXPORT void JNICALL Java_org_opencv_android_Utils_nMatToBitmap
 
 } // extern "C"
 
-#endif //__ANDROID__
\ No newline at end of file
+#endif //__ANDROID__
diff --git a/modules/java/generator/src/java/android+JavaCameraView.java b/modules/java/generator/src/java/android+JavaCameraView.java
index 0dcdad2fb..f864e5370 100644
--- a/modules/java/generator/src/java/android+JavaCameraView.java
+++ b/modules/java/generator/src/java/android+JavaCameraView.java
@@ -6,7 +6,6 @@ import android.content.Context;
 import android.graphics.ImageFormat;
 import android.graphics.SurfaceTexture;
 import android.hardware.Camera;
-import android.hardware.Camera.CameraInfo;
 import android.hardware.Camera.PreviewCallback;
 import android.os.Build;
 import android.util.AttributeSet;
@@ -39,7 +38,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
     private boolean mStopThread;
 
     protected Camera mCamera;
-    protected JavaCameraFrame mCameraFrame;
+    protected JavaCameraFrame[] mCameraFrame;
     private SurfaceTexture mSurfaceTexture;
 
     public static class JavaCameraSizeAccessor implements ListItemAccessor {
@@ -181,7 +180,9 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
 
                     AllocateCache();
 
-                    mCameraFrame = new JavaCameraFrame(mFrameChain[mChainIdx], mFrameWidth, mFrameHeight);
+                    mCameraFrame = new JavaCameraFrame[2];
+                    mCameraFrame[0] = new JavaCameraFrame(mFrameChain[0], mFrameWidth, mFrameHeight);
+                    mCameraFrame[1] = new JavaCameraFrame(mFrameChain[1], mFrameWidth, mFrameHeight);
 
                     if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.HONEYCOMB) {
                         mSurfaceTexture = new SurfaceTexture(MAGIC_TEXTURE_ID);
@@ -217,8 +218,10 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
                 mFrameChain[0].release();
                 mFrameChain[1].release();
             }
-            if (mCameraFrame != null)
-                mCameraFrame.release();
+            if (mCameraFrame != null) {
+                mCameraFrame[0].release();
+                mCameraFrame[1].release();
+            }
         }
     }
 
@@ -319,7 +322,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
 
                 if (!mStopThread) {
                     if (!mFrameChain[mChainIdx].empty())
-                        deliverAndDrawFrame(mCameraFrame);
+                        deliverAndDrawFrame(mCameraFrame[mChainIdx]);
                     mChainIdx = 1 - mChainIdx;
                 }
             } while (!mStopThread);
diff --git a/modules/java/generator/src/java/android+OpenCVLoader.java b/modules/java/generator/src/java/android+OpenCVLoader.java
index fb05b826c..a76471eac 100644
--- a/modules/java/generator/src/java/android+OpenCVLoader.java
+++ b/modules/java/generator/src/java/android+OpenCVLoader.java
@@ -27,6 +27,11 @@ public class OpenCVLoader
      */
     public static final String OPENCV_VERSION_2_4_5 = "2.4.5";
 
+    /**
+     * OpenCV Library version 2.4.6.
+     */
+    public static final String OPENCV_VERSION_2_4_6 = "2.4.6";
+
 
     /**
      * Loads and initializes OpenCV library from current application package. Roughly, it's an analog of system.loadLibrary("opencv_java").
diff --git a/modules/java/generator/src/java/android+sync.py b/modules/java/generator/src/java/android+sync.py
index baf95cb6e..ed78537f0 100755
--- a/modules/java/generator/src/java/android+sync.py
+++ b/modules/java/generator/src/java/android+sync.py
@@ -4,4 +4,4 @@ import os
 import shutil
 
 for f in os.listdir("."):
-    shutil.copyfile(f, os.path.join("../../../../../../modules/java/generator/src/java/", "android+" + f));
\ No newline at end of file
+    shutil.copyfile(f, os.path.join("../../../../../../modules/java/generator/src/java/", "android+" + f));
diff --git a/modules/java/generator/src/java/core+Mat.java b/modules/java/generator/src/java/core+Mat.java
index 8dd72fb68..6db255417 100644
--- a/modules/java/generator/src/java/core+Mat.java
+++ b/modules/java/generator/src/java/core+Mat.java
@@ -245,6 +245,19 @@ public class Mat {
         return retVal;
     }
 
+    //
+    // C++: int Mat::dims()
+    //
+
+    // javadoc: Mat::dims()
+    public int dims()
+    {
+
+        int retVal = n_dims(nativeObj);
+
+        return retVal;
+    }
+
     //
     // C++: int Mat::cols()
     //
@@ -1130,6 +1143,9 @@ public class Mat {
     // C++: Mat Mat::colRange(int startcol, int endcol)
     private static native long n_colRange(long nativeObj, int startcol, int endcol);
 
+    // C++: int Mat::dims()
+    private static native int n_dims(long nativeObj);
+
     // C++: int Mat::cols()
     private static native int n_cols(long nativeObj);
 
diff --git a/modules/java/generator/src/java/core+MatOfByte.java b/modules/java/generator/src/java/core+MatOfByte.java
index b3fe5691e..7756eb94f 100644
--- a/modules/java/generator/src/java/core+MatOfByte.java
+++ b/modules/java/generator/src/java/core+MatOfByte.java
@@ -15,7 +15,7 @@ public class MatOfByte extends Mat {
     protected MatOfByte(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -26,7 +26,7 @@ public class MatOfByte extends Mat {
     public MatOfByte(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfDMatch.java b/modules/java/generator/src/java/core+MatOfDMatch.java
index aec3b99e4..2c99e14d1 100644
--- a/modules/java/generator/src/java/core+MatOfDMatch.java
+++ b/modules/java/generator/src/java/core+MatOfDMatch.java
@@ -17,7 +17,7 @@ public class MatOfDMatch extends Mat {
     protected MatOfDMatch(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat: " + toString());
+            throw new IllegalArgumentException("Incompatible Mat: " + toString());
         //FIXME: do we need release() here?
     }
 
@@ -28,7 +28,7 @@ public class MatOfDMatch extends Mat {
     public MatOfDMatch(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat: " + toString());
+            throw new IllegalArgumentException("Incompatible Mat: " + toString());
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfDouble.java b/modules/java/generator/src/java/core+MatOfDouble.java
index 4eb7cbc28..1a8e23ca3 100644
--- a/modules/java/generator/src/java/core+MatOfDouble.java
+++ b/modules/java/generator/src/java/core+MatOfDouble.java
@@ -15,7 +15,7 @@ public class MatOfDouble extends Mat {
     protected MatOfDouble(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -26,7 +26,7 @@ public class MatOfDouble extends Mat {
     public MatOfDouble(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfFloat.java b/modules/java/generator/src/java/core+MatOfFloat.java
index 96bbeab9f..dfd6b8404 100644
--- a/modules/java/generator/src/java/core+MatOfFloat.java
+++ b/modules/java/generator/src/java/core+MatOfFloat.java
@@ -15,7 +15,7 @@ public class MatOfFloat extends Mat {
     protected MatOfFloat(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -26,7 +26,7 @@ public class MatOfFloat extends Mat {
     public MatOfFloat(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfFloat4.java b/modules/java/generator/src/java/core+MatOfFloat4.java
index aaa97b799..96f9e5cde 100644
--- a/modules/java/generator/src/java/core+MatOfFloat4.java
+++ b/modules/java/generator/src/java/core+MatOfFloat4.java
@@ -15,7 +15,7 @@ public class MatOfFloat4 extends Mat {
     protected MatOfFloat4(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -26,7 +26,7 @@ public class MatOfFloat4 extends Mat {
     public MatOfFloat4(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfFloat6.java b/modules/java/generator/src/java/core+MatOfFloat6.java
index 68e6249b6..140bb57a9 100644
--- a/modules/java/generator/src/java/core+MatOfFloat6.java
+++ b/modules/java/generator/src/java/core+MatOfFloat6.java
@@ -15,7 +15,7 @@ public class MatOfFloat6 extends Mat {
     protected MatOfFloat6(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -26,7 +26,7 @@ public class MatOfFloat6 extends Mat {
     public MatOfFloat6(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfInt.java b/modules/java/generator/src/java/core+MatOfInt.java
index 33e5124e4..7d076878f 100644
--- a/modules/java/generator/src/java/core+MatOfInt.java
+++ b/modules/java/generator/src/java/core+MatOfInt.java
@@ -16,7 +16,7 @@ public class MatOfInt extends Mat {
     protected MatOfInt(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -27,7 +27,7 @@ public class MatOfInt extends Mat {
     public MatOfInt(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfInt4.java b/modules/java/generator/src/java/core+MatOfInt4.java
index c924233a6..4ca4d9e31 100644
--- a/modules/java/generator/src/java/core+MatOfInt4.java
+++ b/modules/java/generator/src/java/core+MatOfInt4.java
@@ -16,7 +16,7 @@ public class MatOfInt4 extends Mat {
     protected MatOfInt4(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -27,7 +27,7 @@ public class MatOfInt4 extends Mat {
     public MatOfInt4(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfKeyPoint.java b/modules/java/generator/src/java/core+MatOfKeyPoint.java
index 48ad3ca65..24b9a817f 100644
--- a/modules/java/generator/src/java/core+MatOfKeyPoint.java
+++ b/modules/java/generator/src/java/core+MatOfKeyPoint.java
@@ -17,7 +17,7 @@ public class MatOfKeyPoint extends Mat {
     protected MatOfKeyPoint(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -28,7 +28,7 @@ public class MatOfKeyPoint extends Mat {
     public MatOfKeyPoint(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfPoint.java b/modules/java/generator/src/java/core+MatOfPoint.java
index 6d23ed116..f4d573bb7 100644
--- a/modules/java/generator/src/java/core+MatOfPoint.java
+++ b/modules/java/generator/src/java/core+MatOfPoint.java
@@ -15,7 +15,7 @@ public class MatOfPoint extends Mat {
     protected MatOfPoint(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -26,7 +26,7 @@ public class MatOfPoint extends Mat {
     public MatOfPoint(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfPoint2f.java b/modules/java/generator/src/java/core+MatOfPoint2f.java
index 0c6960730..4b8c926ff 100644
--- a/modules/java/generator/src/java/core+MatOfPoint2f.java
+++ b/modules/java/generator/src/java/core+MatOfPoint2f.java
@@ -15,7 +15,7 @@ public class MatOfPoint2f extends Mat {
     protected MatOfPoint2f(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -26,7 +26,7 @@ public class MatOfPoint2f extends Mat {
     public MatOfPoint2f(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfPoint3.java b/modules/java/generator/src/java/core+MatOfPoint3.java
index 0c8374f25..3b50561e9 100644
--- a/modules/java/generator/src/java/core+MatOfPoint3.java
+++ b/modules/java/generator/src/java/core+MatOfPoint3.java
@@ -15,7 +15,7 @@ public class MatOfPoint3 extends Mat {
     protected MatOfPoint3(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -26,7 +26,7 @@ public class MatOfPoint3 extends Mat {
     public MatOfPoint3(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfPoint3f.java b/modules/java/generator/src/java/core+MatOfPoint3f.java
index b0d50d450..fc5fee436 100644
--- a/modules/java/generator/src/java/core+MatOfPoint3f.java
+++ b/modules/java/generator/src/java/core+MatOfPoint3f.java
@@ -15,7 +15,7 @@ public class MatOfPoint3f extends Mat {
     protected MatOfPoint3f(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -26,7 +26,7 @@ public class MatOfPoint3f extends Mat {
     public MatOfPoint3f(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/core+MatOfRect.java b/modules/java/generator/src/java/core+MatOfRect.java
index 3844d9dfb..ec0fb01e4 100644
--- a/modules/java/generator/src/java/core+MatOfRect.java
+++ b/modules/java/generator/src/java/core+MatOfRect.java
@@ -16,7 +16,7 @@ public class MatOfRect extends Mat {
     protected MatOfRect(long addr) {
         super(addr);
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
@@ -27,7 +27,7 @@ public class MatOfRect extends Mat {
     public MatOfRect(Mat m) {
         super(m, Range.all());
         if( !empty() && checkVector(_channels, _depth) < 0 )
-            throw new IllegalArgumentException("Incomatible Mat");
+            throw new IllegalArgumentException("Incompatible Mat");
         //FIXME: do we need release() here?
     }
 
diff --git a/modules/java/generator/src/java/engine+OpenCVEngineInterface.aidl b/modules/java/generator/src/java/engine+OpenCVEngineInterface.aidl
index 7a47a9073..21fe5f716 100644
--- a/modules/java/generator/src/java/engine+OpenCVEngineInterface.aidl
+++ b/modules/java/generator/src/java/engine+OpenCVEngineInterface.aidl
@@ -30,4 +30,4 @@ interface OpenCVEngineInterface
     * @return Returns names of OpenCV libraries, separated by semicolon.
     */
     String getLibraryList(String version);
-}
\ No newline at end of file
+}
diff --git a/modules/java/generator/src/java/highgui+VideoCapture.java b/modules/java/generator/src/java/highgui+VideoCapture.java
deleted file mode 100644
index b8569bb9c..000000000
--- a/modules/java/generator/src/java/highgui+VideoCapture.java
+++ /dev/null
@@ -1,240 +0,0 @@
-package org.opencv.highgui;
-
-import java.util.List;
-import java.util.LinkedList;
-
-import org.opencv.core.Mat;
-import org.opencv.core.Size;
-
-// C++: class VideoCapture
-//javadoc: VideoCapture
-public class VideoCapture {
-
-    protected final long nativeObj;
-
-    protected VideoCapture(long addr) {
-        nativeObj = addr;
-    }
-
-    //
-    // C++: VideoCapture::VideoCapture()
-    //
-
-    // javadoc: VideoCapture::VideoCapture()
-    public VideoCapture()
-    {
-
-        nativeObj = n_VideoCapture();
-
-        return;
-    }
-
-    //
-    // C++: VideoCapture::VideoCapture(int device)
-    //
-
-    // javadoc: VideoCapture::VideoCapture(device)
-    public VideoCapture(int device)
-    {
-
-        nativeObj = n_VideoCapture(device);
-
-        return;
-    }
-
-    //
-    // C++: double VideoCapture::get(int propId)
-    //
-
-/**
- * Returns the specified "VideoCapture" property.
- *
- * Note: When querying a property that is not supported by the backend used by
- * the "VideoCapture" class, value 0 is returned.
- *
- * @param propId property identifier; it can be one of the following:
- *   * CV_CAP_PROP_FRAME_WIDTH width of the frames in the video stream.
- *   * CV_CAP_PROP_FRAME_HEIGHT height of the frames in the video stream.
- *
- * @see <a href="http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#videocapture-get">org.opencv.highgui.VideoCapture.get</a>
- */
-    public double get(int propId)
-    {
-
-        double retVal = n_get(nativeObj, propId);
-
-        return retVal;
-    }
-
-    public List<Size> getSupportedPreviewSizes()
-    {
-        String[] sizes_str = n_getSupportedPreviewSizes(nativeObj).split(",");
-        List<Size> sizes = new LinkedList<Size>();
-
-        for (String str : sizes_str) {
-            String[] wh = str.split("x");
-            sizes.add(new Size(Double.parseDouble(wh[0]), Double.parseDouble(wh[1])));
-        }
-
-        return sizes;
-    }
-
-    //
-    // C++: bool VideoCapture::grab()
-    //
-
-    // javadoc: VideoCapture::grab()
-    public boolean grab()
-    {
-
-        boolean retVal = n_grab(nativeObj);
-
-        return retVal;
-    }
-
-    //
-    // C++: bool VideoCapture::isOpened()
-    //
-
-    // javadoc: VideoCapture::isOpened()
-    public boolean isOpened()
-    {
-
-        boolean retVal = n_isOpened(nativeObj);
-
-        return retVal;
-    }
-
-    //
-    // C++: bool VideoCapture::open(int device)
-    //
-
-    // javadoc: VideoCapture::open(device)
-    public boolean open(int device)
-    {
-
-        boolean retVal = n_open(nativeObj, device);
-
-        return retVal;
-    }
-
-    //
-    // C++: bool VideoCapture::read(Mat image)
-    //
-
-    // javadoc: VideoCapture::read(image)
-    public boolean read(Mat image)
-    {
-
-        boolean retVal = n_read(nativeObj, image.nativeObj);
-
-        return retVal;
-    }
-
-    //
-    // C++: void VideoCapture::release()
-    //
-
-    // javadoc: VideoCapture::release()
-    public void release()
-    {
-
-        n_release(nativeObj);
-
-        return;
-    }
-
-    //
-    // C++: bool VideoCapture::retrieve(Mat image, int channel = 0)
-    //
-
-    // javadoc: VideoCapture::retrieve(image, channel)
-    public boolean retrieve(Mat image, int channel)
-    {
-
-        boolean retVal = n_retrieve(nativeObj, image.nativeObj, channel);
-
-        return retVal;
-    }
-
-    // javadoc: VideoCapture::retrieve(image)
-    public boolean retrieve(Mat image)
-    {
-
-        boolean retVal = n_retrieve(nativeObj, image.nativeObj);
-
-        return retVal;
-    }
-
-    //
-    // C++: bool VideoCapture::set(int propId, double value)
-    //
-
-/**
- * Sets a property in the "VideoCapture".
- *
- * @param propId property identifier; it can be one of the following:
- *   * CV_CAP_PROP_FRAME_WIDTH width of the frames in the video stream.
- *   * CV_CAP_PROP_FRAME_HEIGHT height of the frames in the video stream.
- * @param value value of the property.
- *
- * @see <a href="http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#videocapture-set">org.opencv.highgui.VideoCapture.set</a>
- */
-    public boolean set(int propId, double value)
-    {
-
-        boolean retVal = n_set(nativeObj, propId, value);
-
-        return retVal;
-    }
-
-    @Override
-    protected void finalize() throws Throwable {
-        n_delete(nativeObj);
-        super.finalize();
-    }
-
-    // C++: VideoCapture::VideoCapture()
-    private static native long n_VideoCapture();
-
-    // C++: VideoCapture::VideoCapture(string filename)
-    private static native long n_VideoCapture(java.lang.String filename);
-
-    // C++: VideoCapture::VideoCapture(int device)
-    private static native long n_VideoCapture(int device);
-
-    // C++: double VideoCapture::get(int propId)
-    private static native double n_get(long nativeObj, int propId);
-
-    // C++: bool VideoCapture::grab()
-    private static native boolean n_grab(long nativeObj);
-
-    // C++: bool VideoCapture::isOpened()
-    private static native boolean n_isOpened(long nativeObj);
-
-    // C++: bool VideoCapture::open(string filename)
-    private static native boolean n_open(long nativeObj, java.lang.String filename);
-
-    // C++: bool VideoCapture::open(int device)
-    private static native boolean n_open(long nativeObj, int device);
-
-    // C++: bool VideoCapture::read(Mat image)
-    private static native boolean n_read(long nativeObj, long image_nativeObj);
-
-    // C++: void VideoCapture::release()
-    private static native void n_release(long nativeObj);
-
-    // C++: bool VideoCapture::retrieve(Mat image, int channel = 0)
-    private static native boolean n_retrieve(long nativeObj, long image_nativeObj, int channel);
-
-    private static native boolean n_retrieve(long nativeObj, long image_nativeObj);
-
-    // C++: bool VideoCapture::set(int propId, double value)
-    private static native boolean n_set(long nativeObj, int propId, double value);
-
-    private static native String n_getSupportedPreviewSizes(long nativeObj);
-
-    // native support for java finalize()
-    private static native void n_delete(long nativeObj);
-
-}
diff --git a/modules/java/test/src/org/opencv/test/OpenCVTestCase.java b/modules/java/test/src/org/opencv/test/OpenCVTestCase.java
index fd2909664..496f96242 100644
--- a/modules/java/test/src/org/opencv/test/OpenCVTestCase.java
+++ b/modules/java/test/src/org/opencv/test/OpenCVTestCase.java
@@ -97,7 +97,7 @@ public class OpenCVTestCase extends TestCase {
         super.setUp();
 
         try {
-            System.loadLibrary("opencv_java");
+            System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
         } catch (SecurityException e) {
             System.out.println(e.toString());
             System.exit(-1);
diff --git a/modules/legacy/doc/common_interfaces_of_descriptor_extractors.rst b/modules/legacy/doc/common_interfaces_of_descriptor_extractors.rst
index f13f742f1..4f13385dc 100644
--- a/modules/legacy/doc/common_interfaces_of_descriptor_extractors.rst
+++ b/modules/legacy/doc/common_interfaces_of_descriptor_extractors.rst
@@ -30,4 +30,4 @@ Wrapping class for computing descriptors by using the
         virtual int descriptorType() const;
     protected:
         ...
-    }
\ No newline at end of file
+    }
diff --git a/modules/legacy/doc/common_interfaces_of_generic_descriptor_matchers.rst b/modules/legacy/doc/common_interfaces_of_generic_descriptor_matchers.rst
index 3c3835937..e959bbf69 100644
--- a/modules/legacy/doc/common_interfaces_of_generic_descriptor_matchers.rst
+++ b/modules/legacy/doc/common_interfaces_of_generic_descriptor_matchers.rst
@@ -361,4 +361,3 @@ Wrapping class for computing, matching, and classifying descriptors using the
     protected:
             ...
     };
-
diff --git a/modules/legacy/doc/expectation_maximization.rst b/modules/legacy/doc/expectation_maximization.rst
index d97d152e7..6a5418f1f 100644
--- a/modules/legacy/doc/expectation_maximization.rst
+++ b/modules/legacy/doc/expectation_maximization.rst
@@ -5,6 +5,12 @@ This section describes obsolete ``C`` interface of EM algorithm. Details of the
 
 .. highlight:: cpp
 
+.. note::
+
+   * An example on using the Expectation Maximalization algorithm can be found at opencv_source_code/samples/cpp/em.cpp
+
+   * (Python) An example using Expectation Maximalization for Gaussian Mixing can be found at opencv_source_code/samples/python2/gaussian_mix.py
+
 
 CvEMParams
 ----------
@@ -185,4 +191,3 @@ Reads the trained Gaussian mixture model from the file storage.
     :param fs: A file storage with the trained model.
 
     :param node: The parent map. If it is NULL, the function searches a node with parameters in all the top-level nodes (streams), starting with the first one.
-
diff --git a/modules/legacy/doc/feature_detection_and_description.rst b/modules/legacy/doc/feature_detection_and_description.rst
index 222d7a03a..3dc1ae579 100644
--- a/modules/legacy/doc/feature_detection_and_description.rst
+++ b/modules/legacy/doc/feature_detection_and_description.rst
@@ -75,7 +75,9 @@ Class containing a base structure for ``RTreeClassifier``. ::
             void estimateQuantPercForPosteriors(float perc[2]);
     };
 
+.. note::
 
+   * : PYTHON : An example using Randomized Tree training for letter recognition can be found at opencv_source_code/samples/python2/letter_recog.py
 
 RandomizedTree::train
 -------------------------
@@ -99,7 +101,9 @@ Trains a randomized tree using an input set of keypoints.
 
     :param num_quant_bits: Number of bits used for quantization.
 
+.. note::
 
+   * : An example on training a Random Tree Classifier for letter recognition can be found at opencv_source_code\samples\cpp\letter_recog.cpp
 
 RandomizedTree::read
 ------------------------
diff --git a/modules/legacy/doc/histograms.rst b/modules/legacy/doc/histograms.rst
index c4359e9e8..fba348c3e 100644
--- a/modules/legacy/doc/histograms.rst
+++ b/modules/legacy/doc/histograms.rst
@@ -87,4 +87,3 @@ Returns a pointer to the histogram bin.
 ..
 
 The macros ``GetHistValue`` return a pointer to the specified bin of the 1D, 2D, 3D, or N-D histogram. In case of a sparse histogram, the function creates a new bin and sets it to 0, unless it exists already.
-
diff --git a/modules/legacy/doc/motion_analysis.rst b/modules/legacy/doc/motion_analysis.rst
index cd419b800..8c046bf24 100644
--- a/modules/legacy/doc/motion_analysis.rst
+++ b/modules/legacy/doc/motion_analysis.rst
@@ -78,5 +78,3 @@ Calculates the optical flow for two images using Lucas-Kanade algorithm.
     :param vely: Vertical component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
 
 The function computes the flow for every pixel of the first input image using the Lucas and Kanade algorithm [Lucas81]_. The function is obsolete. To track sparse features, use :ocv:func:`calcOpticalFlowPyrLK`. To track all the pixels, use :ocv:func:`calcOpticalFlowFarneback`.
-
-
diff --git a/modules/legacy/doc/planar_subdivisions.rst b/modules/legacy/doc/planar_subdivisions.rst
index 096309e93..f4ce8cbb5 100644
--- a/modules/legacy/doc/planar_subdivisions.rst
+++ b/modules/legacy/doc/planar_subdivisions.rst
@@ -312,4 +312,3 @@ Inserts a single point into a Delaunay triangulation.
     :param pt: Inserted point.
 
 The function inserts a single point into a subdivision and modifies the subdivision topology appropriately. If a point with the same coordinates exists already, no new point is added. The function returns a pointer to the allocated point. No virtual point coordinates are calculated at this stage.
-
diff --git a/modules/legacy/include/opencv2/legacy/streams.hpp b/modules/legacy/include/opencv2/legacy/streams.hpp
index 6935b00c3..e164bf4a8 100644
--- a/modules/legacy/include/opencv2/legacy/streams.hpp
+++ b/modules/legacy/include/opencv2/legacy/streams.hpp
@@ -90,4 +90,3 @@ and DirectX SDK (2006 April or later).
 #endif
 
 #endif
-
diff --git a/modules/legacy/src/_vm.h b/modules/legacy/src/_vm.h
index df247fffb..7964cd35b 100644
--- a/modules/legacy/src/_vm.h
+++ b/modules/legacy/src/_vm.h
@@ -293,4 +293,3 @@ int icvSingularValueDecomposition(	int		M,
 
 /*======================================================================================*/
 #endif/*_CV_VM_H_*/
-
diff --git a/modules/legacy/src/bgfg_codebook.cpp b/modules/legacy/src/bgfg_codebook.cpp
index 03e7e0978..2a53b29bf 100644
--- a/modules/legacy/src/bgfg_codebook.cpp
+++ b/modules/legacy/src/bgfg_codebook.cpp
@@ -359,4 +359,3 @@ void cvBGCodeBookClearStale( CvBGCodeBookModel* model, int staleThresh,
 }
 
 /* End of file. */
-
diff --git a/modules/legacy/src/bgfg_common.cpp b/modules/legacy/src/bgfg_common.cpp
index 5431c2703..2bcea5573 100644
--- a/modules/legacy/src/bgfg_common.cpp
+++ b/modules/legacy/src/bgfg_common.cpp
@@ -135,4 +135,3 @@ cvSegmentFGMask( CvArr* _mask, int poly1Hull0, float perimScale,
 }
 
 /* End of file. */
-
diff --git a/modules/legacy/src/bgfg_gaussmix.cpp b/modules/legacy/src/bgfg_gaussmix.cpp
index e678bdfd6..ae75e40c2 100644
--- a/modules/legacy/src/bgfg_gaussmix.cpp
+++ b/modules/legacy/src/bgfg_gaussmix.cpp
@@ -1317,4 +1317,3 @@ icvUpdateGaussianBGModel2( IplImage* curr_frame, CvGaussBGModel2*  bg_model )
 }
 
 /* End of file. */
-
diff --git a/modules/legacy/src/blobtrack.cpp b/modules/legacy/src/blobtrack.cpp
index 6d0a5e3f9..48b83ef91 100644
--- a/modules/legacy/src/blobtrack.cpp
+++ b/modules/legacy/src/blobtrack.cpp
@@ -637,4 +637,3 @@ void cvReleaseBlobTracker(CvBlobTracker**ppT )
     ppT[0]->Release();
     ppT[0] = 0;
 }
-
diff --git a/modules/legacy/src/blobtrackanalysis.cpp b/modules/legacy/src/blobtrackanalysis.cpp
index 5095d3aee..6416ae688 100644
--- a/modules/legacy/src/blobtrackanalysis.cpp
+++ b/modules/legacy/src/blobtrackanalysis.cpp
@@ -125,4 +125,3 @@ CvBlobTrackAnalysis* cvCreateBlobTrackAnalysisList(CvBlobTrackAnalysisOne* (*cre
 }
 
 /* ======================== Analyser modules ============================= */
-
diff --git a/modules/legacy/src/blobtrackanalysisior.cpp b/modules/legacy/src/blobtrackanalysisior.cpp
index 571ca9ef0..c5b2222e6 100644
--- a/modules/legacy/src/blobtrackanalysisior.cpp
+++ b/modules/legacy/src/blobtrackanalysisior.cpp
@@ -171,4 +171,3 @@ CvBlobTrackAnalysis* cvCreateModuleBlobTrackAnalysisIOR()
     return (CvBlobTrackAnalysis*)pIOR;
 }/* cvCreateCvBlobTrackAnalysisIOR */
 /* ======================== Analyser modules ============================= */
-
diff --git a/modules/legacy/src/blobtrackanalysistrackdist.cpp b/modules/legacy/src/blobtrackanalysistrackdist.cpp
index 9e93ca27a..2adba7e3a 100644
--- a/modules/legacy/src/blobtrackanalysistrackdist.cpp
+++ b/modules/legacy/src/blobtrackanalysistrackdist.cpp
@@ -578,4 +578,3 @@ public:
 
 CvBlobTrackAnalysis* cvCreateModuleBlobTrackAnalysisTrackDist()
 {return (CvBlobTrackAnalysis*) new CvBlobTrackAnalysisTrackDist;}
-
diff --git a/modules/legacy/src/blobtrackgen1.cpp b/modules/legacy/src/blobtrackgen1.cpp
index b76086e3e..7114f2b4c 100644
--- a/modules/legacy/src/blobtrackgen1.cpp
+++ b/modules/legacy/src/blobtrackgen1.cpp
@@ -181,5 +181,3 @@ CvBlobTrackGen* cvCreateModuleBlobTrackGen1()
 {
     return (CvBlobTrackGen*) new CvBlobTrackGen1(0);
 }
-
-
diff --git a/modules/legacy/src/blobtrackgenyml.cpp b/modules/legacy/src/blobtrackgenyml.cpp
index 8a414915c..0d9de45ee 100644
--- a/modules/legacy/src/blobtrackgenyml.cpp
+++ b/modules/legacy/src/blobtrackgenyml.cpp
@@ -215,5 +215,3 @@ CvBlobTrackGen* cvCreateModuleBlobTrackGenYML()
 {
     return (CvBlobTrackGen*) new CvBlobTrackGenYML;
 }
-
-
diff --git a/modules/legacy/src/blobtrackingauto.cpp b/modules/legacy/src/blobtrackingauto.cpp
index 59e0ee60f..ada0bdff4 100644
--- a/modules/legacy/src/blobtrackingauto.cpp
+++ b/modules/legacy/src/blobtrackingauto.cpp
@@ -479,4 +479,3 @@ void CvBlobTrackerAuto1::Process(IplImage* pImg, IplImage* pMask)
     TIME_END("TrackAnalysis",m_BlobList.GetBlobNum())
 
 } /* CvBlobTrackerAuto1::Process */
-
diff --git a/modules/legacy/src/blobtrackingmsfg.cpp b/modules/legacy/src/blobtrackingmsfg.cpp
index 844496444..5fd9634d4 100644
--- a/modules/legacy/src/blobtrackingmsfg.cpp
+++ b/modules/legacy/src/blobtrackingmsfg.cpp
@@ -1179,4 +1179,3 @@ CvBlobTracker* cvCreateBlobTrackerMSPF()
 {
     return cvCreateBlobTrackerList(cvCreateBlobTrackerOneMSPF);
 }
-
diff --git a/modules/legacy/src/blobtrackingmsfgs.cpp b/modules/legacy/src/blobtrackingmsfgs.cpp
index 3e1c7a066..f3b3cf375 100644
--- a/modules/legacy/src/blobtrackingmsfgs.cpp
+++ b/modules/legacy/src/blobtrackingmsfgs.cpp
@@ -454,4 +454,3 @@ CvBlobTracker* cvCreateBlobTrackerMSFGS()
 {
     return cvCreateBlobTrackerList(cvCreateBlobTrackerOneMSFGS);
 }
-
diff --git a/modules/legacy/src/blobtrackpostprockalman.cpp b/modules/legacy/src/blobtrackpostprockalman.cpp
index fb6f8dee9..378e1e78f 100644
--- a/modules/legacy/src/blobtrackpostprockalman.cpp
+++ b/modules/legacy/src/blobtrackpostprockalman.cpp
@@ -325,4 +325,3 @@ CvBlobTrackPredictor* cvCreateModuleBlobTrackPredictKalman()
     return (CvBlobTrackPredictor*) new CvBlobTrackPredictKalman;
 }
 /*======================= KALMAN PREDICTOR =========================*/
-
diff --git a/modules/legacy/src/calcimagehomography.cpp b/modules/legacy/src/calcimagehomography.cpp
index 502c24d98..928e056b4 100644
--- a/modules/legacy/src/calcimagehomography.cpp
+++ b/modules/legacy/src/calcimagehomography.cpp
@@ -111,4 +111,3 @@ cvCalcImageHomography( float* line, CvPoint3D32f* _center,
 }
 
 /* End of file. */
-
diff --git a/modules/legacy/src/calibfilter.cpp b/modules/legacy/src/calibfilter.cpp
index 7db27a162..99e9ec7c1 100644
--- a/modules/legacy/src/calibfilter.cpp
+++ b/modules/legacy/src/calibfilter.cpp
@@ -919,5 +919,3 @@ bool CvCalibFilter::Undistort( CvMat** srcarr, CvMat** dstarr )
 
     return true;
 }
-
-
diff --git a/modules/legacy/src/clique.cpp b/modules/legacy/src/clique.cpp
index 9280cb17b..d8f2f59da 100644
--- a/modules/legacy/src/clique.cpp
+++ b/modules/legacy/src/clique.cpp
@@ -706,4 +706,3 @@ void cvBronKerbosch( CvGraph* graph )
 }//end cvBronKerbosch
 
 #endif
-
diff --git a/modules/legacy/src/decomppoly.cpp b/modules/legacy/src/decomppoly.cpp
index c577a9c1f..0cb10551c 100644
--- a/modules/legacy/src/decomppoly.cpp
+++ b/modules/legacy/src/decomppoly.cpp
@@ -626,4 +626,3 @@ void cvDecompPoly( CvContour* cont,
 #endif
 
 // End of file decomppoly.cpp
-
diff --git a/modules/legacy/src/dpstereo.cpp b/modules/legacy/src/dpstereo.cpp
index a55e1cafb..838cf8ae0 100644
--- a/modules/legacy/src/dpstereo.cpp
+++ b/modules/legacy/src/dpstereo.cpp
@@ -76,7 +76,7 @@ typedef struct _CvRightImData
     uchar min_val, max_val;
 } _CvRightImData;
 
-#define CV_IMAX3(a,b,c) ((temp3 = (a) >= (b) ? (a) : (b)),(temp3 >= (c) ? temp3 : (c)))
+#define CV_IMAX3(a,b,c) ((temp2 = (a) >= (b) ? (a) : (b)),(temp2 >= (c) ? temp2 : (c)))
 #define CV_IMIN3(a,b,c) ((temp3 = (a) <= (b) ? (a) : (b)),(temp3 <= (c) ? temp3 : (c)))
 
 static void icvFindStereoCorrespondenceByBirchfieldDP( uchar* src1, uchar* src2,
@@ -87,7 +87,7 @@ static void icvFindStereoCorrespondenceByBirchfieldDP( uchar* src1, uchar* src2,
                                                 float  _param3, float _param4,
                                                 float  _param5 )
 {
-    int     x, y, i, j, temp3;
+    int     x, y, i, j, temp2, temp3;
     int     d, s;
     int     dispH =  maxDisparity + 3;
     uchar  *dispdata;
@@ -551,4 +551,3 @@ cvFindStereoCorrespondence(
 }
 
 /* End of file. */
-
diff --git a/modules/legacy/src/em.cpp b/modules/legacy/src/em.cpp
index c11c23598..b49eb9131 100644
--- a/modules/legacy/src/em.cpp
+++ b/modules/legacy/src/em.cpp
@@ -85,13 +85,12 @@ void CvEM::read( CvFileStorage* fs, CvFileNode* node )
 
 void CvEM::write( CvFileStorage* _fs, const char* name ) const
 {
-    FileStorage fs = _fs;
+    FileStorage fs(_fs, false);
     if(name)
         fs << name << "{";
     emObj.write(fs);
     if(name)
         fs << "}";
-    fs.fs.obj = 0;
 }
 
 double CvEM::calcLikelihood( const Mat &input_sample ) const
diff --git a/modules/legacy/src/enteringblobdetection.cpp b/modules/legacy/src/enteringblobdetection.cpp
index 32a83bf0f..d66a997a7 100644
--- a/modules/legacy/src/enteringblobdetection.cpp
+++ b/modules/legacy/src/enteringblobdetection.cpp
@@ -1060,5 +1060,3 @@ int CvBlobDetectorCC::DetectNewBlob(IplImage* /*pImg*/, IplImage* pFGMask, CvBlo
     return result;
 
 }   /* cvDetectNewBlob */
-
-
diff --git a/modules/legacy/src/enteringblobdetectionreal.cpp b/modules/legacy/src/enteringblobdetectionreal.cpp
index b0ec792a7..9458ab53e 100644
--- a/modules/legacy/src/enteringblobdetectionreal.cpp
+++ b/modules/legacy/src/enteringblobdetectionreal.cpp
@@ -161,4 +161,3 @@ public:
 
 /* Blob detector constructor: */
 //CvBlobDetector* cvCreateBlobDetectorReal(CvTestSeq* pTestSeq){return new CvBlobDetectorReal(pTestSeq);}
-
diff --git a/modules/legacy/src/epilines.cpp b/modules/legacy/src/epilines.cpp
index e4f3eda4f..8407e646b 100644
--- a/modules/legacy/src/epilines.cpp
+++ b/modules/legacy/src/epilines.cpp
@@ -3698,5 +3698,3 @@ int cvComputeEssentialMatrix(  CvMatr32f rotMatr,
 
     return CV_OK;
 }
-
-
diff --git a/modules/legacy/src/extendededges.cpp b/modules/legacy/src/extendededges.cpp
index 8ade446e1..aa2488f83 100644
--- a/modules/legacy/src/extendededges.cpp
+++ b/modules/legacy/src/extendededges.cpp
@@ -263,7 +263,3 @@ CvSeq* icvCutContourRaster( CvSeq* current, CvMemStorage* storage, IplImage* ima
 
 */
 #endif /* WIN32 */
-
-
-
-
diff --git a/modules/legacy/src/face.cpp b/modules/legacy/src/face.cpp
index 7d2a5f13e..b188a10de 100644
--- a/modules/legacy/src/face.cpp
+++ b/modules/legacy/src/face.cpp
@@ -351,4 +351,3 @@ inline void RFace::ResizeRect(CvRect Rect,CvRect * lpRect,long lDir,long lD)
     }
 
 }// inline void RFace::ResizeRect(CvRect * lpRect,long lDir,long lD)
-
diff --git a/modules/legacy/src/face.h b/modules/legacy/src/face.h
index e7bda1a0c..74a96f8de 100644
--- a/modules/legacy/src/face.h
+++ b/modules/legacy/src/face.h
@@ -133,4 +133,3 @@ private:
 
 
 #endif //__FACE_H__
-
diff --git a/modules/legacy/src/facedetection.cpp b/modules/legacy/src/facedetection.cpp
index 4487fdad1..b93d9aa6b 100644
--- a/modules/legacy/src/facedetection.cpp
+++ b/modules/legacy/src/facedetection.cpp
@@ -499,5 +499,3 @@ Face * FaceDetectionList::GetData()
     m_pCurElem = m_pCurElem->m_pNext;
     return m_pCurElem->m_pFace;
 }//Face * FaceDetectionList::GetData()
-
-
diff --git a/modules/legacy/src/facetemplate.h b/modules/legacy/src/facetemplate.h
index b0ff47c2c..31a3a8303 100644
--- a/modules/legacy/src/facetemplate.h
+++ b/modules/legacy/src/facetemplate.h
@@ -199,4 +199,3 @@ inline MouthFaceTemplate::MouthFaceTemplate(long lNumber,CvRect rect,double dEye
 
 
 #endif//__FACETEMPLATE_H__
-
diff --git a/modules/legacy/src/features2d.cpp b/modules/legacy/src/features2d.cpp
index e985ec226..f313875ac 100644
--- a/modules/legacy/src/features2d.cpp
+++ b/modules/legacy/src/features2d.cpp
@@ -73,7 +73,7 @@ cvExtractSURF( const CvArr* _img, const CvArr* _mask,
     Mat descr;
 
     Ptr<Feature2D> surf = Algorithm::create<Feature2D>("Feature2D.SURF");
-    if( surf.empty() )
+    if( !surf )
         CV_Error(CV_StsNotImplemented, "OpenCV was built without SURF support");
 
     surf->set("hessianThreshold", params.hessianThreshold);
@@ -107,10 +107,10 @@ CV_IMPL CvSeq*
 cvGetStarKeypoints( const CvArr* _img, CvMemStorage* storage,
                     CvStarDetectorParams params )
 {
-    Ptr<StarDetector> star = new StarDetector(params.maxSize, params.responseThreshold,
-                                              params.lineThresholdProjected,
-                                              params.lineThresholdBinarized,
-                                              params.suppressNonmaxSize);
+    Ptr<StarDetector> star(new StarDetector(params.maxSize, params.responseThreshold,
+                                            params.lineThresholdProjected,
+                                            params.lineThresholdBinarized,
+                                            params.suppressNonmaxSize));
     std::vector<KeyPoint> kpts;
     star->detect(cvarrToMat(_img), kpts, Mat());
 
@@ -122,5 +122,3 @@ cvGetStarKeypoints( const CvArr* _img, CvMemStorage* storage,
     }
     return seq;
 }
-
-
diff --git a/modules/legacy/src/findface.cpp b/modules/legacy/src/findface.cpp
index 819099481..a41c579d4 100644
--- a/modules/legacy/src/findface.cpp
+++ b/modules/legacy/src/findface.cpp
@@ -65,4 +65,3 @@ CvSeq * cvPostBoostingFindFace(IplImage * Image,CvMemStorage* lpStorage)
 
     return lpSeq;
 }//cvPostBoostingFindFace(IplImage * Image)
-
diff --git a/modules/legacy/src/findhandregion.cpp b/modules/legacy/src/findhandregion.cpp
index 8a568833e..23fc41935 100644
--- a/modules/legacy/src/findhandregion.cpp
+++ b/modules/legacy/src/findhandregion.cpp
@@ -631,4 +631,3 @@ cvFindHandRegionA( CvPoint3D32f * points, int count,
     IPPI_CALL( icvFindHandRegionA( points, count, indexs, line, size, jc,
                                     center, storage, numbers ));
 }
-
diff --git a/modules/legacy/src/hmm.cpp b/modules/legacy/src/hmm.cpp
index d1af33618..d1196feac 100644
--- a/modules/legacy/src/hmm.cpp
+++ b/modules/legacy/src/hmm.cpp
@@ -1695,4 +1695,3 @@ cvMixSegmL2( CvImgObsInfo ** obs_info_array, int num_img, CvEHMM * hmm )
 }
 
 /* End of file */
-
diff --git a/modules/legacy/src/hmm1d.cpp b/modules/legacy/src/hmm1d.cpp
index 056bbf8c7..5daee8693 100644
--- a/modules/legacy/src/hmm1d.cpp
+++ b/modules/legacy/src/hmm1d.cpp
@@ -1148,4 +1148,3 @@ CvStatus icvEstimate1DHMMStateParams(CvImgObsInfo** obs_info_array, int num_img,
 
 
 #endif
-
diff --git a/modules/legacy/src/image.cpp b/modules/legacy/src/image.cpp
index 3f691bc27..a3fc8b914 100644
--- a/modules/legacy/src/image.cpp
+++ b/modules/legacy/src/image.cpp
@@ -330,4 +330,3 @@ void CvMatrix::show( const char* window_name )
 
 
 /* End of file. */
-
diff --git a/modules/legacy/src/kdtree.cpp b/modules/legacy/src/kdtree.cpp
index a32677b79..418395075 100644
--- a/modules/legacy/src/kdtree.cpp
+++ b/modules/legacy/src/kdtree.cpp
@@ -172,7 +172,7 @@ public:
       CV_Error(CV_StsUnsupportedFormat, "dist must be CV_64FC1");
 
     if (CV_MAT_TYPE(type()) != CV_MAT_TYPE(desc->type)) {
-      tmp_desc = cvCreateMat(desc->rows, desc->cols, type());
+      tmp_desc.reset(cvCreateMat(desc->rows, desc->cols, type()));
       cvConvert(desc, tmp_desc);
       desc = tmp_desc;
     }
diff --git a/modules/legacy/src/lcm.cpp b/modules/legacy/src/lcm.cpp
index 05919f59f..95ab2c6d6 100644
--- a/modules/legacy/src/lcm.cpp
+++ b/modules/legacy/src/lcm.cpp
@@ -718,7 +718,3 @@ void _cvProjectionPointToSegment(CvPoint2D32f* PointO,
     PrPoint->y = PointO->y - VectorAO.y + alfa*VectorAB.y;
     return;
 }//end of _cvProjectionPointToSegment
-
-
-
-
diff --git a/modules/legacy/src/levmar.cpp b/modules/legacy/src/levmar.cpp
index aa2514e33..9b16b5a0f 100644
--- a/modules/legacy/src/levmar.cpp
+++ b/modules/legacy/src/levmar.cpp
@@ -315,6 +315,3 @@ void TestOptimX2Y2()
 }
 
 #endif
-
-
-
diff --git a/modules/legacy/src/lines.cpp b/modules/legacy/src/lines.cpp
index d0e243dfb..48eaf797d 100644
--- a/modules/legacy/src/lines.cpp
+++ b/modules/legacy/src/lines.cpp
@@ -477,4 +477,3 @@ cvPostWarpImage( int numLines,  /* number of scanlines  */
 }
 
 /* End of file */
-
diff --git a/modules/legacy/src/lmeds.cpp b/modules/legacy/src/lmeds.cpp
index 42272345c..779464076 100644
--- a/modules/legacy/src/lmeds.cpp
+++ b/modules/legacy/src/lmeds.cpp
@@ -1683,4 +1683,3 @@ icvPoint7( int *ml, int *mr, double *F, int *amount )
 
     return error;
 }                               /* icvPoint7 */
-
diff --git a/modules/legacy/src/morphing.cpp b/modules/legacy/src/morphing.cpp
index 44451414d..ab85b3b93 100644
--- a/modules/legacy/src/morphing.cpp
+++ b/modules/legacy/src/morphing.cpp
@@ -389,4 +389,3 @@ cvMorphEpilinesMulti( int lines,        /* number of lines             */
                                           second_corr   /* correspond information for the 2nd seq     */
                 ));
 }
-
diff --git a/modules/legacy/src/oneway.cpp b/modules/legacy/src/oneway.cpp
index 7fa4bee33..43ded8593 100644
--- a/modules/legacy/src/oneway.cpp
+++ b/modules/legacy/src/oneway.cpp
@@ -1736,7 +1736,7 @@ namespace cv{
     {
         std::vector<KeyPoint> features;
         Ptr<FeatureDetector> surf_extractor = FeatureDetector::create("SURF");
-        if( surf_extractor.empty() )
+        if( !surf_extractor )
             CV_Error(CV_StsNotImplemented, "OpenCV was built without SURF support");
         surf_extractor->set("hessianThreshold", 1.0);
         //printf("Extracting SURF features...");
@@ -2186,7 +2186,7 @@ namespace cv{
     {
         clear();
 
-        if( _base.empty() )
+        if( !_base )
             base = _base;
 
         params = _params;
@@ -2197,16 +2197,17 @@ namespace cv{
         GenericDescriptorMatcher::clear();
 
         prevTrainCount = 0;
-        if( !base.empty() )
+        if( base )
             base->clear();
     }
 
     void OneWayDescriptorMatcher::train()
     {
-        if( base.empty() || prevTrainCount < (int)trainPointCollection.keypointCount() )
+        if( !base || prevTrainCount < (int)trainPointCollection.keypointCount() )
         {
-            base = new OneWayDescriptorObject( params.patchSize, params.poseCount, params.pcaFilename,
-                                              params.trainPath, params.trainImagesList, params.minScale, params.maxScale, params.stepScale );
+            base.reset(
+                new OneWayDescriptorObject( params.patchSize, params.poseCount, params.pcaFilename,
+                                            params.trainPath, params.trainImagesList, params.minScale, params.maxScale, params.stepScale ));
 
             base->Allocate( (int)trainPointCollection.keypointCount() );
             prevTrainCount = (int)trainPointCollection.keypointCount();
@@ -2270,8 +2271,9 @@ namespace cv{
 
     void OneWayDescriptorMatcher::read( const FileNode &fn )
     {
-        base = new OneWayDescriptorObject( params.patchSize, params.poseCount, String (), String (), String (),
-                                          params.minScale, params.maxScale, params.stepScale );
+        base.reset(
+            new OneWayDescriptorObject( params.patchSize, params.poseCount, String (), String (), String (),
+                                        params.minScale, params.maxScale, params.stepScale ));
         base->Read (fn);
     }
 
@@ -2282,12 +2284,12 @@ namespace cv{
 
     bool OneWayDescriptorMatcher::empty() const
     {
-        return base.empty() || base->empty();
+        return !base || base->empty();
     }
 
     Ptr<GenericDescriptorMatcher> OneWayDescriptorMatcher::clone( bool emptyTrainData ) const
     {
-        OneWayDescriptorMatcher* matcher = new OneWayDescriptorMatcher( params );
+        Ptr<OneWayDescriptorMatcher> matcher = makePtr<OneWayDescriptorMatcher>( params );
 
         if( !emptyTrainData )
         {
diff --git a/modules/legacy/src/planardetect.cpp b/modules/legacy/src/planardetect.cpp
index 2bffea0ed..fa9152d47 100644
--- a/modules/legacy/src/planardetect.cpp
+++ b/modules/legacy/src/planardetect.cpp
@@ -1240,7 +1240,7 @@ FernDescriptorMatcher::FernDescriptorMatcher( const Params& _params )
     params = _params;
     if( !params.filename.empty() )
     {
-        classifier = new FernClassifier;
+        classifier = makePtr<FernClassifier>();
         FileStorage fs(params.filename, FileStorage::READ);
         if( fs.isOpened() )
             classifier->read( fs.getFirstTopLevelNode() );
@@ -1260,7 +1260,7 @@ void FernDescriptorMatcher::clear()
 
 void FernDescriptorMatcher::train()
 {
-    if( classifier.empty() || prevTrainCount < (int)trainPointCollection.keypointCount() )
+    if( !classifier || prevTrainCount < (int)trainPointCollection.keypointCount() )
     {
         assert( params.filename.empty() );
 
@@ -1268,9 +1268,10 @@ void FernDescriptorMatcher::train()
         for( size_t imgIdx = 0; imgIdx < trainPointCollection.imageCount(); imgIdx++ )
             KeyPoint::convert( trainPointCollection.getKeypoints((int)imgIdx), points[imgIdx] );
 
-        classifier = new FernClassifier( points, trainPointCollection.getImages(), std::vector<std::vector<int> >(), 0, // each points is a class
-                                        params.patchSize, params.signatureSize, params.nstructs, params.structSize,
-                                        params.nviews, params.compressionMethod, params.patchGenerator );
+        classifier.reset(
+            new FernClassifier( points, trainPointCollection.getImages(), std::vector<std::vector<int> >(), 0, // each points is a class
+                                params.patchSize, params.signatureSize, params.nstructs, params.structSize,
+                                params.nviews, params.compressionMethod, params.patchGenerator ));
     }
 }
 
@@ -1384,12 +1385,12 @@ void FernDescriptorMatcher::write( FileStorage& fs ) const
 
 bool FernDescriptorMatcher::empty() const
 {
-    return classifier.empty() || classifier->empty();
+    return !classifier || classifier->empty();
 }
 
 Ptr<GenericDescriptorMatcher> FernDescriptorMatcher::clone( bool emptyTrainData ) const
 {
-    FernDescriptorMatcher* matcher = new FernDescriptorMatcher( params );
+    Ptr<FernDescriptorMatcher> matcher = makePtr<FernDescriptorMatcher>( params );
     if( !emptyTrainData )
     {
         CV_Error( CV_StsNotImplemented, "deep clone dunctionality is not implemented, because "
diff --git a/modules/legacy/src/precomp.cpp b/modules/legacy/src/precomp.cpp
deleted file mode 100644
index a9477b85b..000000000
--- a/modules/legacy/src/precomp.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/legacy/src/testseq.cpp b/modules/legacy/src/testseq.cpp
index 5e7976a40..af4da3384 100644
--- a/modules/legacy/src/testseq.cpp
+++ b/modules/legacy/src/testseq.cpp
@@ -1367,4 +1367,3 @@ void cvAddNoise(IplImage* pImg, int noise_type, double Ampl, CvRandState* rnd_st
     cvReleaseImage(&pImgAdd);
 
 }   /* cvAddNoise */
-
diff --git a/modules/legacy/src/texture.cpp b/modules/legacy/src/texture.cpp
index 4b7cb061c..07fd08726 100644
--- a/modules/legacy/src/texture.cpp
+++ b/modules/legacy/src/texture.cpp
@@ -645,4 +645,3 @@ cvCreateGLCMImage( CvGLCM* GLCM, int step )
 
     return dest;
 }
-
diff --git a/modules/legacy/src/trifocal.cpp b/modules/legacy/src/trifocal.cpp
index b7200a510..b049ccccf 100644
--- a/modules/legacy/src/trifocal.cpp
+++ b/modules/legacy/src/trifocal.cpp
@@ -2782,4 +2782,3 @@ static void icvDecomposeQ(CvMat* /*matrQ*/,CvMat* /*matrH*/)
 }
 
 #endif
-
diff --git a/modules/legacy/src/vecfacetracking.cpp b/modules/legacy/src/vecfacetracking.cpp
index f7a9a3b72..81d1a04cb 100644
--- a/modules/legacy/src/vecfacetracking.cpp
+++ b/modules/legacy/src/vecfacetracking.cpp
@@ -971,4 +971,3 @@ inline double CalculateTransformationLMS3_0( CvPoint* pTemplPoints, CvPoint* pSr
         dbLMS = dbXtXt + dbYtYt - ((double)pow(dbXtXs + dbYtYs,2) + (double)pow(dbXtYs - dbYtXs,2)) / del;
     return dbLMS;
 }
-
diff --git a/modules/legacy/src/video.cpp b/modules/legacy/src/video.cpp
index 3c6191ac9..f0bb6bca4 100644
--- a/modules/legacy/src/video.cpp
+++ b/modules/legacy/src/video.cpp
@@ -80,5 +80,3 @@ cvDeInterlace( const CvArr* framearr, CvArr* fieldEven, CvArr* fieldOdd )
 }
 
 /* End of file. */
-
-
diff --git a/modules/legacy/test/test_bruteforcematcher.cpp b/modules/legacy/test/test_bruteforcematcher.cpp
index c6c40f845..3259563e5 100644
--- a/modules/legacy/test/test_bruteforcematcher.cpp
+++ b/modules/legacy/test/test_bruteforcematcher.cpp
@@ -113,4 +113,3 @@ protected:
 };
 
 TEST(Legacy_BruteForceMatcher, accuracy) { CV_BruteForceMatcherTest test; test.safe_run(); }
-
diff --git a/modules/legacy/test/test_optflow.cpp b/modules/legacy/test/test_optflow.cpp
index 99645410f..c5b35e963 100644
--- a/modules/legacy/test/test_optflow.cpp
+++ b/modules/legacy/test/test_optflow.cpp
@@ -353,5 +353,3 @@ void CV_OptFlowTest::run( int /* start_from */)
 
 
 TEST(Legacy_OpticalFlow, accuracy) { CV_OptFlowTest test; test.safe_run(); }
-
-
diff --git a/modules/legacy/test/test_precomp.cpp b/modules/legacy/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/legacy/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/legacy/test/test_subdivisions.cpp b/modules/legacy/test/test_subdivisions.cpp
index 3dafd17ba..0df71ab5f 100644
--- a/modules/legacy/test/test_subdivisions.cpp
+++ b/modules/legacy/test/test_subdivisions.cpp
@@ -338,4 +338,3 @@ _exit_:
 TEST(Legacy_Subdiv, correctness) { CV_SubdivTest test; test.safe_run(); }
 
 /* End of file. */
-
diff --git a/modules/ml/doc/decision_trees.rst b/modules/ml/doc/decision_trees.rst
index 4bf9c116b..de6fc99d6 100644
--- a/modules/ml/doc/decision_trees.rst
+++ b/modules/ml/doc/decision_trees.rst
@@ -318,4 +318,3 @@ decision tree.
 
 
 .. [Breiman84] Breiman, L., Friedman, J. Olshen, R. and Stone, C. (1984), *Classification and Regression Trees*, Wadsworth.
-
diff --git a/modules/ml/doc/k_nearest_neighbors.rst b/modules/ml/doc/k_nearest_neighbors.rst
index e01871771..05413c778 100644
--- a/modules/ml/doc/k_nearest_neighbors.rst
+++ b/modules/ml/doc/k_nearest_neighbors.rst
@@ -11,6 +11,12 @@ CvKNearest
 
 The class implements K-Nearest Neighbors model as described in the beginning of this section.
 
+.. note::
+
+   * (Python) An example of digit recognition using KNearest can be found at opencv_source/samples/python2/digits.py
+   * (Python) An example of grid search digit recognition using KNearest can be found at opencv_source/samples/python2/digits_adjust.py
+   * (Python) An example of video digit recognition using KNearest can be found at opencv_source/samples/python2/digits_video.py
+
 CvKNearest::CvKNearest
 ----------------------
 Default and training constructors.
@@ -188,5 +194,3 @@ The sample below (currently using the obsolete ``CvMat`` structures) demonstrate
         cvReleaseMat( &trainData );
         return 0;
     }
-
-
diff --git a/modules/ml/doc/neural_networks.rst b/modules/ml/doc/neural_networks.rst
index f955f15b1..0496e2201 100644
--- a/modules/ml/doc/neural_networks.rst
+++ b/modules/ml/doc/neural_networks.rst
@@ -278,4 +278,3 @@ Returns neurons weights of the particular layer.
 .. ocv:function:: double* CvANN_MLP::get_weights(int layer)
 
     :param layer: Index of the particular layer.
-
diff --git a/modules/ml/doc/statistical_models.rst b/modules/ml/doc/statistical_models.rst
index 76da927b1..af250b786 100644
--- a/modules/ml/doc/statistical_models.rst
+++ b/modules/ml/doc/statistical_models.rst
@@ -161,4 +161,3 @@ Predicts the response for a sample.
 The method is used to predict the response for a new sample. In case of a classification, the method returns the class label. In case of a regression, the method returns the output function value. The input sample must have as many components as the ``train_data`` passed to ``train`` contains. If the ``var_idx`` parameter is passed to ``train``, it is remembered and then is used to extract only the necessary components from the input sample in the method ``predict``.
 
 The suffix ``const`` means that prediction does not affect the internal model state, so the method can be safely called from within different threads.
-
diff --git a/modules/ml/doc/support_vector_machines.rst b/modules/ml/doc/support_vector_machines.rst
index 723dda087..14ee12bb4 100644
--- a/modules/ml/doc/support_vector_machines.rst
+++ b/modules/ml/doc/support_vector_machines.rst
@@ -158,6 +158,12 @@ CvSVM
 
 Support Vector Machines.
 
+.. note::
+
+   * (Python) An example of digit recognition using SVM can be found at opencv_source/samples/python2/digits.py
+   * (Python) An example of grid search digit recognition using SVM can be found at opencv_source/samples/python2/digits_adjust.py
+   * (Python) An example of video digit recognition using SVM can be found at opencv_source/samples/python2/digits_video.py
+
 CvSVM::CvSVM
 ------------
 Default and training constructors.
diff --git a/modules/ml/include/opencv2/ml.hpp b/modules/ml/include/opencv2/ml.hpp
index 7a334f264..2b84d67ca 100644
--- a/modules/ml/include/opencv2/ml.hpp
+++ b/modules/ml/include/opencv2/ml.hpp
@@ -942,6 +942,8 @@ protected:
     CvDTreeNode* root;
     CvMat* var_importance;
     CvDTreeTrainData* data;
+    CvMat train_data_hdr, responses_hdr;
+    cv::Mat train_data_mat, responses_mat;
 
 public:
     int pruned_tree_idx;
@@ -1053,6 +1055,8 @@ protected:
     // array of the trees of the forest
     CvForestTree** trees;
     CvDTreeTrainData* data;
+    CvMat train_data_hdr, responses_hdr;
+    cv::Mat train_data_mat, responses_mat;
     int ntrees;
     int nclasses;
     double oob_error;
@@ -1268,6 +1272,8 @@ protected:
     virtual void initialize_weights(double (&p)[2]);
 
     CvDTreeTrainData* data;
+    CvMat train_data_hdr, responses_hdr;
+    cv::Mat train_data_mat, responses_mat;
     CvBoostParams params;
     CvSeq* weak;
 
@@ -2142,7 +2148,7 @@ typedef CvANN_MLP NeuralNet_MLP;
 typedef CvGBTreesParams GradientBoostingTreeParams;
 typedef CvGBTrees GradientBoostingTrees;
 
-template<> CV_EXPORTS void Ptr<CvDTreeSplit>::delete_obj();
+template<> CV_EXPORTS void DefaultDeleter<CvDTreeSplit>::operator ()(CvDTreeSplit* obj) const;
 
 CV_EXPORTS bool initModule_ml(void);
 }
diff --git a/modules/ml/include/opencv2/ml/ml.hpp b/modules/ml/include/opencv2/ml/ml.hpp
index 676d861b6..f6f9cd8f8 100644
--- a/modules/ml/include/opencv2/ml/ml.hpp
+++ b/modules/ml/include/opencv2/ml/ml.hpp
@@ -45,4 +45,4 @@
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 
-#include "opencv2/ml.hpp"
\ No newline at end of file
+#include "opencv2/ml.hpp"
diff --git a/modules/ml/src/boost.cpp b/modules/ml/src/boost.cpp
index 53c194f3c..a22e13a53 100644
--- a/modules/ml/src/boost.cpp
+++ b/modules/ml/src/boost.cpp
@@ -2122,9 +2122,14 @@ CvBoost::train( const Mat& _train_data, int _tflag,
                const Mat& _missing_mask,
                CvBoostParams _params, bool _update )
 {
-    CvMat tdata = _train_data, responses = _responses, vidx = _var_idx,
-        sidx = _sample_idx, vtype = _var_type, mmask = _missing_mask;
-    return train(&tdata, _tflag, &responses, vidx.data.ptr ? &vidx : 0,
+    train_data_hdr = _train_data;
+    train_data_mat = _train_data;
+    responses_hdr = _responses;
+    responses_mat = _responses;
+
+    CvMat vidx = _var_idx, sidx = _sample_idx, vtype = _var_type, mmask = _missing_mask;
+
+    return train(&train_data_hdr, _tflag, &responses_hdr, vidx.data.ptr ? &vidx : 0,
           sidx.data.ptr ? &sidx : 0, vtype.data.ptr ? &vtype : 0,
           mmask.data.ptr ? &mmask : 0, _params, _update);
 }
diff --git a/modules/ml/src/data.cpp b/modules/ml/src/data.cpp
index f4e401cb8..9171dd713 100644
--- a/modules/ml/src/data.cpp
+++ b/modules/ml/src/data.cpp
@@ -170,8 +170,13 @@ int CvMLData::read_csv(const char* filename)
 
     // skip header lines
     for( int i = 0; i < header_lines_number; i++ )
+    {
         if( fgets( buf, M, file ) == 0 )
+        {
+            fclose(file);
             return -1;
+        }
+    }
 
     // read the first data line and determine the number of variables
     if( !fgets_chomp( buf, M, file ))
diff --git a/modules/ml/src/ertrees.cpp b/modules/ml/src/ertrees.cpp
index e379ed4b1..0201deb0c 100644
--- a/modules/ml/src/ertrees.cpp
+++ b/modules/ml/src/ertrees.cpp
@@ -1844,12 +1844,16 @@ bool CvERTrees::train( const Mat& _train_data, int _tflag,
                       const Mat& _sample_idx, const Mat& _var_type,
                       const Mat& _missing_mask, CvRTParams params )
 {
-    CvMat tdata = _train_data, responses = _responses, vidx = _var_idx,
-    sidx = _sample_idx, vtype = _var_type, mmask = _missing_mask;
-    return train(&tdata, _tflag, &responses, vidx.data.ptr ? &vidx : 0,
+    train_data_hdr = _train_data;
+    train_data_mat = _train_data;
+    responses_hdr = _responses;
+    responses_mat = _responses;
+
+    CvMat vidx = _var_idx, sidx = _sample_idx, vtype = _var_type, mmask = _missing_mask;
+
+    return train(&train_data_hdr, _tflag, &responses_hdr, vidx.data.ptr ? &vidx : 0,
                  sidx.data.ptr ? &sidx : 0, vtype.data.ptr ? &vtype : 0,
                  mmask.data.ptr ? &mmask : 0, params);
 }
 
 // End of file.
-
diff --git a/modules/ml/src/knearest.cpp b/modules/ml/src/knearest.cpp
index 6b6f5e6af..a05a30da8 100644
--- a/modules/ml/src/knearest.cpp
+++ b/modules/ml/src/knearest.cpp
@@ -116,7 +116,7 @@ bool CvKNearest::train( const CvMat* _train_data, const CvMat* _responses,
 
     if( !responses )
         CV_ERROR( CV_StsNoMem, "Could not allocate memory for responses" );
-        
+
     if( _update_base && _dims != var_count )
         CV_ERROR( CV_StsBadArg, "The newly added data have different dimensionality" );
 
@@ -480,4 +480,3 @@ float CvKNearest::find_nearest( const cv::Mat& _samples, int k, CV_OUT cv::Mat&
 }
 
 /* End of file */
-
diff --git a/modules/ml/src/ml_init.cpp b/modules/ml/src/ml_init.cpp
index 3d6669c26..3ff7d9bae 100644
--- a/modules/ml/src/ml_init.cpp
+++ b/modules/ml/src/ml_init.cpp
@@ -56,7 +56,7 @@ CV_INIT_ALGORITHM(EM, "StatModel.EM",
 
 bool initModule_ml(void)
 {
-    Ptr<Algorithm> em = createEM_hidden();
+    Ptr<Algorithm> em = createEM_ptr_hidden();
     return em->info() != 0;
 }
 
diff --git a/modules/ml/src/nbayes.cpp b/modules/ml/src/nbayes.cpp
index f1f7a24ec..b9a966cd1 100644
--- a/modules/ml/src/nbayes.cpp
+++ b/modules/ml/src/nbayes.cpp
@@ -623,4 +623,3 @@ float CvNormalBayesClassifier::predict( const Mat& _samples, Mat* _results ) con
 }
 
 /* End of file. */
-
diff --git a/modules/ml/src/precomp.hpp b/modules/ml/src/precomp.hpp
index ecae7b344..06b8f4f35 100644
--- a/modules/ml/src/precomp.hpp
+++ b/modules/ml/src/precomp.hpp
@@ -41,7 +41,6 @@
 #ifndef __OPENCV_PRECOMP_H__
 #define __OPENCV_PRECOMP_H__
 
-
 #include "opencv2/core.hpp"
 #include "opencv2/ml.hpp"
 #include "opencv2/core/core_c.h"
diff --git a/modules/ml/src/rtrees.cpp b/modules/ml/src/rtrees.cpp
index 7947b062f..c41b84214 100644
--- a/modules/ml/src/rtrees.cpp
+++ b/modules/ml/src/rtrees.cpp
@@ -126,7 +126,7 @@ void ForestTreeBestSplitFinder::operator()(const BlockedRange& range)
         }
 
         if( res && bestSplit->quality < split->quality )
-                memcpy( (CvDTreeSplit*)bestSplit, (CvDTreeSplit*)split, splitSize );
+            memcpy( bestSplit.get(), split.get(), splitSize );
     }
 }
 }
@@ -839,9 +839,14 @@ bool CvRTrees::train( const Mat& _train_data, int _tflag,
                      const Mat& _sample_idx, const Mat& _var_type,
                      const Mat& _missing_mask, CvRTParams _params )
 {
-    CvMat tdata = _train_data, responses = _responses, vidx = _var_idx,
-    sidx = _sample_idx, vtype = _var_type, mmask = _missing_mask;
-    return train(&tdata, _tflag, &responses, vidx.data.ptr ? &vidx : 0,
+    train_data_hdr = _train_data;
+    train_data_mat = _train_data;
+    responses_hdr = _responses;
+    responses_mat = _responses;
+
+    CvMat vidx = _var_idx, sidx = _sample_idx, vtype = _var_type, mmask = _missing_mask;
+
+    return train(&train_data_hdr, _tflag, &responses_hdr, vidx.data.ptr ? &vidx : 0,
                  sidx.data.ptr ? &sidx : 0, vtype.data.ptr ? &vtype : 0,
                  mmask.data.ptr ? &mmask : 0, _params);
 }
diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp
index 581abb61b..50c618e5d 100644
--- a/modules/ml/src/svm.cpp
+++ b/modules/ml/src/svm.cpp
@@ -2998,4 +2998,3 @@ cvTrainSVM_CrossValidation( const CvMat* train_data, int tflag,
 #endif
 
 /* End of file. */
-
diff --git a/modules/ml/src/tree.cpp b/modules/ml/src/tree.cpp
index e8072958f..3a67cdd37 100644
--- a/modules/ml/src/tree.cpp
+++ b/modules/ml/src/tree.cpp
@@ -1594,9 +1594,14 @@ bool CvDTree::train( const Mat& _train_data, int _tflag,
                     const Mat& _sample_idx, const Mat& _var_type,
                     const Mat& _missing_mask, CvDTreeParams _params )
 {
-    CvMat tdata = _train_data, responses = _responses, vidx=_var_idx,
-        sidx=_sample_idx, vtype=_var_type, mmask=_missing_mask;
-    return train(&tdata, _tflag, &responses, vidx.data.ptr ? &vidx : 0, sidx.data.ptr ? &sidx : 0,
+    train_data_hdr = _train_data;
+    train_data_mat = _train_data;
+    responses_hdr = _responses;
+    responses_mat = _responses;
+
+    CvMat vidx=_var_idx, sidx=_sample_idx, vtype=_var_type, mmask=_missing_mask;
+
+    return train(&train_data_hdr, _tflag, &responses_hdr, vidx.data.ptr ? &vidx : 0, sidx.data.ptr ? &sidx : 0,
                  vtype.data.ptr ? &vtype : 0, mmask.data.ptr ? &mmask : 0, _params);
 }
 
@@ -1877,7 +1882,7 @@ double CvDTree::calc_node_dir( CvDTreeNode* node )
 namespace cv
 {
 
-template<> CV_EXPORTS void Ptr<CvDTreeSplit>::delete_obj()
+template<> CV_EXPORTS void DefaultDeleter<CvDTreeSplit>::operator ()(CvDTreeSplit* obj) const
 {
     fastFree(obj);
 }
@@ -1888,12 +1893,12 @@ DTreeBestSplitFinder::DTreeBestSplitFinder( CvDTree* _tree, CvDTreeNode* _node)
     node = _node;
     splitSize = tree->get_data()->split_heap->elem_size;
 
-    bestSplit = (CvDTreeSplit*)fastMalloc(splitSize);
-    memset((CvDTreeSplit*)bestSplit, 0, splitSize);
+    bestSplit.reset((CvDTreeSplit*)fastMalloc(splitSize));
+    memset(bestSplit.get(), 0, splitSize);
     bestSplit->quality = -1;
     bestSplit->condensed_idx = INT_MIN;
-    split = (CvDTreeSplit*)fastMalloc(splitSize);
-    memset((CvDTreeSplit*)split, 0, splitSize);
+    split.reset((CvDTreeSplit*)fastMalloc(splitSize));
+    memset(split.get(), 0, splitSize);
     //haveSplit = false;
 }
 
@@ -1903,10 +1908,10 @@ DTreeBestSplitFinder::DTreeBestSplitFinder( const DTreeBestSplitFinder& finder,
     node = finder.node;
     splitSize = tree->get_data()->split_heap->elem_size;
 
-    bestSplit = (CvDTreeSplit*)fastMalloc(splitSize);
-    memcpy((CvDTreeSplit*)(bestSplit), (const CvDTreeSplit*)finder.bestSplit, splitSize);
-    split = (CvDTreeSplit*)fastMalloc(splitSize);
-    memset((CvDTreeSplit*)split, 0, splitSize);
+    bestSplit.reset((CvDTreeSplit*)fastMalloc(splitSize));
+    memcpy(bestSplit.get(), finder.bestSplit.get(), splitSize);
+    split.reset((CvDTreeSplit*)fastMalloc(splitSize));
+    memset(split.get(), 0, splitSize);
 }
 
 void DTreeBestSplitFinder::operator()(const BlockedRange& range)
@@ -1939,14 +1944,14 @@ void DTreeBestSplitFinder::operator()(const BlockedRange& range)
         }
 
         if( res && bestSplit->quality < split->quality )
-                memcpy( (CvDTreeSplit*)bestSplit, (CvDTreeSplit*)split, splitSize );
+                memcpy( bestSplit.get(), split.get(), splitSize );
     }
 }
 
 void DTreeBestSplitFinder::join( DTreeBestSplitFinder& rhs )
 {
     if( bestSplit->quality < rhs.bestSplit->quality )
-        memcpy( (CvDTreeSplit*)bestSplit, (CvDTreeSplit*)rhs.bestSplit, splitSize );
+        memcpy( bestSplit.get(), rhs.bestSplit.get(), splitSize );
 }
 }
 
diff --git a/modules/ml/test/test_emknearestkmeans.cpp b/modules/ml/test/test_emknearestkmeans.cpp
index 6841f9d72..a14b63606 100644
--- a/modules/ml/test/test_emknearestkmeans.cpp
+++ b/modules/ml/test/test_emknearestkmeans.cpp
@@ -678,4 +678,3 @@ TEST(ML_KNearest, accuracy) { CV_KNearestTest test; test.safe_run(); }
 TEST(ML_EM, accuracy) { CV_EMTest test; test.safe_run(); }
 TEST(ML_EM, save_load) { CV_EMTest_SaveLoad test; test.safe_run(); }
 TEST(ML_EM, classification) { CV_EMTest_Classification test; test.safe_run(); }
-
diff --git a/modules/ml/test/test_precomp.cpp b/modules/ml/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/ml/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/nonfree/doc/feature_detection.rst b/modules/nonfree/doc/feature_detection.rst
index 0b7956080..9f66271aa 100644
--- a/modules/nonfree/doc/feature_detection.rst
+++ b/modules/nonfree/doc/feature_detection.rst
@@ -16,6 +16,8 @@ The SIFT constructors.
 
 .. ocv:function:: SIFT::SIFT( int nfeatures=0, int nOctaveLayers=3, double contrastThreshold=0.04, double edgeThreshold=10, double sigma=1.6)
 
+.. ocv:pyfunction:: cv2.SIFT([, nfeatures[, nOctaveLayers[, contrastThreshold[, edgeThreshold[, sigma]]]]]) -> <SIFT object>
+
     :param nfeatures: The number of best features to retain. The features are ranked by their scores (measured in SIFT algorithm as the local contrast)
 
     :param nOctaveLayers: The number of layers in each octave. 3 is the value used in D. Lowe paper. The number of octaves is computed automatically from the image resolution.
@@ -33,6 +35,12 @@ Extract features and computes their descriptors using SIFT algorithm
 
 .. ocv:function:: void SIFT::operator()(InputArray img, InputArray mask, vector<KeyPoint>& keypoints, OutputArray descriptors, bool useProvidedKeypoints=false)
 
+.. ocv:pyfunction:: cv2.SIFT.detect(image[, mask]) -> keypoints
+
+.. ocv:pyfunction:: cv2.SIFT.compute(image, keypoints[, descriptors]) -> keypoints, descriptors
+
+.. ocv:pyfunction:: cv2.SIFT.detectAndCompute(image, mask[, descriptors[, useProvidedKeypoints]]) -> keypoints, descriptors
+
     :param img: Input 8-bit grayscale image
 
     :param mask: Optional input mask that marks the regions where we should detect features.
@@ -43,6 +51,7 @@ Extract features and computes their descriptors using SIFT algorithm
 
     :param useProvidedKeypoints: Boolean flag. If it is true, the keypoint detector is not run. Instead, the provided vector of keypoints is used and the algorithm just computes their descriptors.
 
+.. note:: Python API provides three functions. First one finds keypoints only. Second function computes the descriptors based on the keypoints we provide. Third function detects the keypoints and computes their descriptors. If you want both keypoints and descriptors, directly use third function as ``kp, des = cv2.SIFT.detectAndCompute(image, None)``
 
 SURF
 ----
@@ -75,6 +84,10 @@ SURF
 
 .. [Bay06] Bay, H. and Tuytelaars, T. and Van Gool, L. "SURF: Speeded Up Robust Features", 9th European Conference on Computer Vision, 2006
 
+.. note::
+
+   * An example using the SURF feature detector can be found at opencv_source_code/samples/cpp/generic_descriptor_match.cpp
+   * Another example using the SURF feature detector, extractor and matcher can be found at opencv_source_code/samples/cpp/matcher_simple.cpp
 
 SURF::SURF
 ----------
@@ -105,6 +118,8 @@ Detects keypoints and computes SURF descriptors for them.
 .. ocv:function:: void SURF::operator()(InputArray img, InputArray mask, vector<KeyPoint>& keypoints, OutputArray descriptors, bool useProvidedKeypoints=false)
 
 .. ocv:pyfunction:: cv2.SURF.detect(image[, mask]) -> keypoints
+.. ocv:pyfunction:: cv2.SURF.compute(image, keypoints[, descriptors]) -> keypoints, descriptors
+.. ocv:pyfunction:: cv2.SURF.detectAndCompute(image, mask[, descriptors[, useProvidedKeypoints]]) -> keypoints, descriptors
 
 .. ocv:cfunction:: void cvExtractSURF( const CvArr* image, const CvArr* mask, CvSeq** keypoints, CvSeq** descriptors, CvMemStorage* storage, CvSURFParams params )
 
@@ -228,6 +243,9 @@ The class ``SURF_GPU`` uses some buffers and provides access to it. All buffers
 
 .. seealso:: :ocv:class:`SURF`
 
+.. note::
+
+   * An example for using the SURF keypoint matcher on GPU can be found at opencv_source_code/samples/gpu/surf_keypoint_matcher.cpp
 
 ocl::SURF_OCL
 -------------
@@ -325,4 +343,8 @@ The ``descriptors`` matrix is :math:`\texttt{nFeatures} \times \texttt{descripto
 
 The class ``SURF_OCL`` uses some buffers and provides access to it. All buffers can be safely released between function calls.
 
-.. seealso:: :ocv:class:`SURF`
\ No newline at end of file
+.. seealso:: :ocv:class:`SURF`
+
+.. note::
+
+   * OCL : An example of the SURF detector can be found at opencv_source_code/samples/ocl/surf_matcher.cpp
diff --git a/modules/nonfree/include/opencv2/nonfree/nonfree.hpp b/modules/nonfree/include/opencv2/nonfree/nonfree.hpp
index 5601147ae..c1bb651f0 100644
--- a/modules/nonfree/include/opencv2/nonfree/nonfree.hpp
+++ b/modules/nonfree/include/opencv2/nonfree/nonfree.hpp
@@ -45,4 +45,4 @@
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 
-#include "opencv2/nonfree.hpp"
\ No newline at end of file
+#include "opencv2/nonfree.hpp"
diff --git a/modules/nonfree/perf/perf_main.cpp b/modules/nonfree/perf/perf_main.cpp
index de1242149..d5f4a1a51 100644
--- a/modules/nonfree/perf/perf_main.cpp
+++ b/modules/nonfree/perf/perf_main.cpp
@@ -1,4 +1,11 @@
 #include "perf_precomp.hpp"
 #include "opencv2/ts/gpu_perf.hpp"
 
-CV_PERF_TEST_MAIN(nonfree, perf::printCudaInfo())
+static const char * impls[] = {
+#ifdef HAVE_CUDA
+    "cuda",
+#endif
+    "plain"
+};
+
+CV_PERF_TEST_MAIN_WITH_IMPLS(nonfree, impls, perf::printCudaInfo())
diff --git a/modules/nonfree/perf/perf_precomp.cpp b/modules/nonfree/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/nonfree/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/nonfree/perf/perf_precomp.hpp b/modules/nonfree/perf/perf_precomp.hpp
index 5e3f3c3cb..e8fad3b7b 100644
--- a/modules/nonfree/perf/perf_precomp.hpp
+++ b/modules/nonfree/perf/perf_precomp.hpp
@@ -14,6 +14,7 @@
 #include "opencv2/highgui.hpp"
 
 #include "opencv2/opencv_modules.hpp"
+#include "cvconfig.h"
 
 #ifdef HAVE_OPENCV_OCL
 #  include "opencv2/nonfree/ocl.hpp"
diff --git a/modules/nonfree/perf/perf_surf.ocl.cpp b/modules/nonfree/perf/perf_surf.ocl.cpp
index 23b1f1ecd..fdd1931bd 100644
--- a/modules/nonfree/perf/perf_surf.ocl.cpp
+++ b/modules/nonfree/perf/perf_surf.ocl.cpp
@@ -108,4 +108,4 @@ PERF_TEST_P(OCL_SURF, DISABLED_without_data_transfer, testing::Values(SURF_IMAGE
     SANITY_CHECK(cpu_dp, 1);
 }
 
-#endif // HAVE_OPENCV_OCL
\ No newline at end of file
+#endif // HAVE_OPENCV_OCL
diff --git a/modules/nonfree/src/nonfree_init.cpp b/modules/nonfree/src/nonfree_init.cpp
index 827853cfd..ac804dd9a 100644
--- a/modules/nonfree/src/nonfree_init.cpp
+++ b/modules/nonfree/src/nonfree_init.cpp
@@ -67,7 +67,7 @@ CV_INIT_ALGORITHM(SIFT, "Feature2D.SIFT",
 
 bool initModule_nonfree(void)
 {
-    Ptr<Algorithm> sift = createSIFT_hidden(), surf = createSURF_hidden();
+    Ptr<Algorithm> sift = createSIFT_ptr_hidden(), surf = createSURF_ptr_hidden();
     return sift->info() != 0 && surf->info() != 0;
 }
 
diff --git a/modules/nonfree/src/opencl/surf.cl b/modules/nonfree/src/opencl/surf.cl
index 197571365..aace143d5 100644
--- a/modules/nonfree/src/opencl/surf.cl
+++ b/modules/nonfree/src/opencl/surf.cl
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Peng Xiao, pengxiao@multicorewareinc.com
+//    Sen Liu, swjtuls1987@126.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -43,9 +44,6 @@
 //
 //M*/
 
-#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
-#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
-
 // specialized for non-image2d_t supported platform, intel HD4000, for example
 #ifdef DISABLE_IMAGE2D
 #define IMAGE_INT32 __global uint  *
@@ -105,7 +103,7 @@ __constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAM
 // for simple haar paatern
 float icvCalcHaarPatternSum_2(
     IMAGE_INT32 sumTex,
-    __constant float src[2][5],
+    __constant float2 *src,
     int oldSize,
     int newSize,
     int y, int x,
@@ -116,21 +114,24 @@ float icvCalcHaarPatternSum_2(
 
     F d = 0;
 
-#pragma unroll
-    for (int k = 0; k < 2; ++k)
-    {
-        int dx1 = convert_int_rte(ratio * src[k][0]);
-        int dy1 = convert_int_rte(ratio * src[k][1]);
-        int dx2 = convert_int_rte(ratio * src[k][2]);
-        int dy2 = convert_int_rte(ratio * src[k][3]);
+    int2 dx1 = convert_int2_rte(ratio * src[0]);
+    int2 dy1 = convert_int2_rte(ratio * src[1]);
+    int2 dx2 = convert_int2_rte(ratio * src[2]);
+    int2 dy2 = convert_int2_rte(ratio * src[3]);
 
-        F t = 0;
-        t += read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy1), rows, cols, elemPerRow );
-        t -= read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy2), rows, cols, elemPerRow );
-        t -= read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy1), rows, cols, elemPerRow );
-        t += read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy2), rows, cols, elemPerRow );
-        d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
-    }
+    F t = 0;
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
+    d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
+
+    t = 0;
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
+    d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
 
     return (float)d;
 }
@@ -138,7 +139,7 @@ float icvCalcHaarPatternSum_2(
 // N = 3
 float icvCalcHaarPatternSum_3(
     IMAGE_INT32 sumTex,
-    __constant float src[2][5],
+    __constant float4 *src,
     int oldSize,
     int newSize,
     int y, int x,
@@ -149,21 +150,31 @@ float icvCalcHaarPatternSum_3(
 
     F d = 0;
 
-#pragma unroll
-    for (int k = 0; k < 3; ++k)
-    {
-        int dx1 = convert_int_rte(ratio * src[k][0]);
-        int dy1 = convert_int_rte(ratio * src[k][1]);
-        int dx2 = convert_int_rte(ratio * src[k][2]);
-        int dy2 = convert_int_rte(ratio * src[k][3]);
+    int4 dx1 = convert_int4_rte(ratio * src[0]);
+    int4 dy1 = convert_int4_rte(ratio * src[1]);
+    int4 dx2 = convert_int4_rte(ratio * src[2]);
+    int4 dy2 = convert_int4_rte(ratio * src[3]);
 
-        F t = 0;
-        t += read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy1), rows, cols, elemPerRow );
-        t -= read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy2), rows, cols, elemPerRow );
-        t -= read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy1), rows, cols, elemPerRow );
-        t += read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy2), rows, cols, elemPerRow );
-        d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
-    }
+    F t = 0;
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
+    d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
+
+    t = 0;
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
+    d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
+
+    t = 0;
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
+    d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
 
     return (float)d;
 }
@@ -171,7 +182,7 @@ float icvCalcHaarPatternSum_3(
 // N = 4
 float icvCalcHaarPatternSum_4(
     IMAGE_INT32 sumTex,
-    __constant float src[2][5],
+    __constant float4 *src,
     int oldSize,
     int newSize,
     int y, int x,
@@ -182,21 +193,38 @@ float icvCalcHaarPatternSum_4(
 
     F d = 0;
 
-#pragma unroll
-    for (int k = 0; k < 4; ++k)
-    {
-        int dx1 = convert_int_rte(ratio * src[k][0]);
-        int dy1 = convert_int_rte(ratio * src[k][1]);
-        int dx2 = convert_int_rte(ratio * src[k][2]);
-        int dy2 = convert_int_rte(ratio * src[k][3]);
+    int4 dx1 = convert_int4_rte(ratio * src[0]);
+    int4 dy1 = convert_int4_rte(ratio * src[1]);
+    int4 dx2 = convert_int4_rte(ratio * src[2]);
+    int4 dy2 = convert_int4_rte(ratio * src[3]);
 
-        F t = 0;
-        t += read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy1), rows, cols, elemPerRow );
-        t -= read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy2), rows, cols, elemPerRow );
-        t -= read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy1), rows, cols, elemPerRow );
-        t += read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy2), rows, cols, elemPerRow );
-        d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
-    }
+    F t = 0;
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
+    d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
+
+    t = 0;
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
+    d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
+
+    t = 0;
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
+    d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
+
+    t = 0;
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy1.w), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy2.w), rows, cols, elemPerRow );
+    t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy1.w), rows, cols, elemPerRow );
+    t += read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy2.w), rows, cols, elemPerRow );
+    d += t * src[4].w / ((dx2.w - dx1.w) * (dy2.w - dy1.w));
 
     return (float)d;
 }
@@ -204,9 +232,9 @@ float icvCalcHaarPatternSum_4(
 ////////////////////////////////////////////////////////////////////////
 // Hessian
 
-__constant float c_DX [3][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} };
-__constant float c_DY [3][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
-__constant float c_DXY[4][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
+__constant float4 c_DX[5] = { (float4)(0, 3, 6, 0), (float4)(2, 2, 2, 0), (float4)(3, 6, 9, 0), (float4)(7, 7, 7, 0), (float4)(1, -2, 1, 0) };
+__constant float4 c_DY[5] = { (float4)(2, 2, 2, 0), (float4)(0, 3, 6, 0), (float4)(7, 7, 7, 0), (float4)(3, 6, 9, 0), (float4)(1, -2, 1, 0) };
+__constant float4 c_DXY[5] = { (float4)(1, 5, 1, 5), (float4)(1, 1, 5, 5), (float4)(4, 8, 4, 8), (float4)(4, 4, 8, 8), (float4)(1, -1, -1, 1) };// Use integral image to calculate haar wavelets.
 
 __inline int calcSize(int octave, int layer)
 {
@@ -236,7 +264,7 @@ __kernel void icvCalcLayerDetAndTrace(
     int c_octave,
     int c_layer_rows,
     int sumTex_step
-    )
+)
 {
     det_step   /= sizeof(*det);
     trace_step /= sizeof(*trace);
@@ -300,7 +328,7 @@ bool within_check(IMAGE_INT32 maskSumTex, int sum_i, int sum_j, int size, int ro
 
 // Non-maximal suppression to further filtering the candidates from previous step
 __kernel
-    void icvFindMaximaInLayer_withmask(
+void icvFindMaximaInLayer_withmask(
     __global const float * det,
     __global const float * trace,
     __global int4 * maxPosBuffer,
@@ -318,7 +346,7 @@ __kernel
     float c_hessianThreshold,
     IMAGE_INT32 maskSumTex,
     int mask_step
-    )
+)
 {
     volatile __local  float N9[768]; // threads.x * threads.y * 3
 
@@ -347,26 +375,26 @@ __kernel
     const int localLin = get_local_id(0) + get_local_id(1) * get_local_size(0) + zoff;
     N9[localLin - zoff] =
         det[det_step *
-        (c_layer_rows * (layer - 1) + min(max(i, 0), c_img_rows - 1)) // y
-        + min(max(j, 0), c_img_cols - 1)];                            // x
+            (c_layer_rows * (layer - 1) + min(max(i, 0), c_img_rows - 1)) // y
+            + min(max(j, 0), c_img_cols - 1)];                            // x
     N9[localLin       ] =
         det[det_step *
-        (c_layer_rows * (layer    ) + min(max(i, 0), c_img_rows - 1)) // y
-        + min(max(j, 0), c_img_cols - 1)];                            // x
+            (c_layer_rows * (layer    ) + min(max(i, 0), c_img_rows - 1)) // y
+            + min(max(j, 0), c_img_cols - 1)];                            // x
     N9[localLin + zoff] =
         det[det_step *
-        (c_layer_rows * (layer + 1) + min(max(i, 0), c_img_rows - 1)) // y
-        + min(max(j, 0), c_img_cols - 1)];                            // x
+            (c_layer_rows * (layer + 1) + min(max(i, 0), c_img_rows - 1)) // y
+            + min(max(j, 0), c_img_cols - 1)];                            // x
 
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (i < c_layer_rows - margin
-        && j < c_layer_cols - margin
-        && get_local_id(0) > 0
-        && get_local_id(0) < get_local_size(0) - 1
-        && get_local_id(1) > 0
-        && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA
-        )
+            && j < c_layer_cols - margin
+            && get_local_id(0) > 0
+            && get_local_id(0) < get_local_size(0) - 1
+            && get_local_id(1) > 0
+            && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA
+       )
     {
         float val0 = N9[localLin];
 
@@ -382,34 +410,34 @@ __kernel
             {
                 // Check to see if we have a max (in its 26 neighbours)
                 const bool condmax = val0 > N9[localLin - 1 - get_local_size(0) - zoff]
-                &&                   val0 > N9[localLin     - get_local_size(0) - zoff]
-                &&                   val0 > N9[localLin + 1 - get_local_size(0) - zoff]
-                &&                   val0 > N9[localLin - 1                     - zoff]
-                &&                   val0 > N9[localLin                         - zoff]
-                &&                   val0 > N9[localLin + 1                     - zoff]
-                &&                   val0 > N9[localLin - 1 + get_local_size(0) - zoff]
-                &&                   val0 > N9[localLin     + get_local_size(0) - zoff]
-                &&                   val0 > N9[localLin + 1 + get_local_size(0) - zoff]
+                                     &&                   val0 > N9[localLin     - get_local_size(0) - zoff]
+                                     &&                   val0 > N9[localLin + 1 - get_local_size(0) - zoff]
+                                     &&                   val0 > N9[localLin - 1                     - zoff]
+                                     &&                   val0 > N9[localLin                         - zoff]
+                                     &&                   val0 > N9[localLin + 1                     - zoff]
+                                     &&                   val0 > N9[localLin - 1 + get_local_size(0) - zoff]
+                                     &&                   val0 > N9[localLin     + get_local_size(0) - zoff]
+                                     &&                   val0 > N9[localLin + 1 + get_local_size(0) - zoff]
 
-                &&                   val0 > N9[localLin - 1 - get_local_size(0)]
-                &&                   val0 > N9[localLin     - get_local_size(0)]
-                &&                   val0 > N9[localLin + 1 - get_local_size(0)]
-                &&                   val0 > N9[localLin - 1                    ]
-                &&                   val0 > N9[localLin + 1                    ]
-                &&                   val0 > N9[localLin - 1 + get_local_size(0)]
-                &&                   val0 > N9[localLin     + get_local_size(0)]
-                &&                   val0 > N9[localLin + 1 + get_local_size(0)]
+                                     &&                   val0 > N9[localLin - 1 - get_local_size(0)]
+                                     &&                   val0 > N9[localLin     - get_local_size(0)]
+                                     &&                   val0 > N9[localLin + 1 - get_local_size(0)]
+                                     &&                   val0 > N9[localLin - 1                    ]
+                                     &&                   val0 > N9[localLin + 1                    ]
+                                     &&                   val0 > N9[localLin - 1 + get_local_size(0)]
+                                     &&                   val0 > N9[localLin     + get_local_size(0)]
+                                     &&                   val0 > N9[localLin + 1 + get_local_size(0)]
 
-                &&                   val0 > N9[localLin - 1 - get_local_size(0) + zoff]
-                &&                   val0 > N9[localLin     - get_local_size(0) + zoff]
-                &&                   val0 > N9[localLin + 1 - get_local_size(0) + zoff]
-                &&                   val0 > N9[localLin - 1                     + zoff]
-                &&                   val0 > N9[localLin                         + zoff]
-                &&                   val0 > N9[localLin + 1                     + zoff]
-                &&                   val0 > N9[localLin - 1 + get_local_size(0) + zoff]
-                &&                   val0 > N9[localLin     + get_local_size(0) + zoff]
-                &&                   val0 > N9[localLin + 1 + get_local_size(0) + zoff]
-                ;
+                                     &&                   val0 > N9[localLin - 1 - get_local_size(0) + zoff]
+                                     &&                   val0 > N9[localLin     - get_local_size(0) + zoff]
+                                     &&                   val0 > N9[localLin + 1 - get_local_size(0) + zoff]
+                                     &&                   val0 > N9[localLin - 1                     + zoff]
+                                     &&                   val0 > N9[localLin                         + zoff]
+                                     &&                   val0 > N9[localLin + 1                     + zoff]
+                                     &&                   val0 > N9[localLin - 1 + get_local_size(0) + zoff]
+                                     &&                   val0 > N9[localLin     + get_local_size(0) + zoff]
+                                     &&                   val0 > N9[localLin + 1 + get_local_size(0) + zoff]
+                                     ;
 
                 if(condmax)
                 {
@@ -428,7 +456,7 @@ __kernel
 }
 
 __kernel
-    void icvFindMaximaInLayer(
+void icvFindMaximaInLayer(
     __global float * det,
     __global float * trace,
     __global int4 * maxPosBuffer,
@@ -444,7 +472,7 @@ __kernel
     int c_layer_cols,
     int c_max_candidates,
     float c_hessianThreshold
-    )
+)
 {
     volatile __local  float N9[768]; // threads.x * threads.y * 3
 
@@ -483,12 +511,12 @@ __kernel
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (i < c_layer_rows - margin
-        && j < c_layer_cols - margin
-        && get_local_id(0) > 0
-        && get_local_id(0) < get_local_size(0) - 1
-        && get_local_id(1) > 0
-        && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA
-        )
+            && j < c_layer_cols - margin
+            && get_local_id(0) > 0
+            && get_local_id(0) < get_local_size(0) - 1
+            && get_local_id(1) > 0
+            && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA
+       )
     {
         float val0 = N9[localLin];
         if (val0 > c_hessianThreshold)
@@ -499,38 +527,38 @@ __kernel
 
             // Check to see if we have a max (in its 26 neighbours)
             const bool condmax = val0 > N9[localLin - 1 - get_local_size(0) - zoff]
-            &&                   val0 > N9[localLin     - get_local_size(0) - zoff]
-            &&                   val0 > N9[localLin + 1 - get_local_size(0) - zoff]
-            &&                   val0 > N9[localLin - 1                     - zoff]
-            &&                   val0 > N9[localLin                         - zoff]
-            &&                   val0 > N9[localLin + 1                     - zoff]
-            &&                   val0 > N9[localLin - 1 + get_local_size(0) - zoff]
-            &&                   val0 > N9[localLin     + get_local_size(0) - zoff]
-            &&                   val0 > N9[localLin + 1 + get_local_size(0) - zoff]
+                                 &&                   val0 > N9[localLin     - get_local_size(0) - zoff]
+                                 &&                   val0 > N9[localLin + 1 - get_local_size(0) - zoff]
+                                 &&                   val0 > N9[localLin - 1                     - zoff]
+                                 &&                   val0 > N9[localLin                         - zoff]
+                                 &&                   val0 > N9[localLin + 1                     - zoff]
+                                 &&                   val0 > N9[localLin - 1 + get_local_size(0) - zoff]
+                                 &&                   val0 > N9[localLin     + get_local_size(0) - zoff]
+                                 &&                   val0 > N9[localLin + 1 + get_local_size(0) - zoff]
 
-            &&                   val0 > N9[localLin - 1 - get_local_size(0)]
-            &&                   val0 > N9[localLin     - get_local_size(0)]
-            &&                   val0 > N9[localLin + 1 - get_local_size(0)]
-            &&                   val0 > N9[localLin - 1                    ]
-            &&                   val0 > N9[localLin + 1                    ]
-            &&                   val0 > N9[localLin - 1 + get_local_size(0)]
-            &&                   val0 > N9[localLin     + get_local_size(0)]
-            &&                   val0 > N9[localLin + 1 + get_local_size(0)]
+                                 &&                   val0 > N9[localLin - 1 - get_local_size(0)]
+                                 &&                   val0 > N9[localLin     - get_local_size(0)]
+                                 &&                   val0 > N9[localLin + 1 - get_local_size(0)]
+                                 &&                   val0 > N9[localLin - 1                    ]
+                                 &&                   val0 > N9[localLin + 1                    ]
+                                 &&                   val0 > N9[localLin - 1 + get_local_size(0)]
+                                 &&                   val0 > N9[localLin     + get_local_size(0)]
+                                 &&                   val0 > N9[localLin + 1 + get_local_size(0)]
 
-            &&                   val0 > N9[localLin - 1 - get_local_size(0) + zoff]
-            &&                   val0 > N9[localLin     - get_local_size(0) + zoff]
-            &&                   val0 > N9[localLin + 1 - get_local_size(0) + zoff]
-            &&                   val0 > N9[localLin - 1                     + zoff]
-            &&                   val0 > N9[localLin                         + zoff]
-            &&                   val0 > N9[localLin + 1                     + zoff]
-            &&                   val0 > N9[localLin - 1 + get_local_size(0) + zoff]
-            &&                   val0 > N9[localLin     + get_local_size(0) + zoff]
-            &&                   val0 > N9[localLin + 1 + get_local_size(0) + zoff]
-            ;
+                                 &&                   val0 > N9[localLin - 1 - get_local_size(0) + zoff]
+                                 &&                   val0 > N9[localLin     - get_local_size(0) + zoff]
+                                 &&                   val0 > N9[localLin + 1 - get_local_size(0) + zoff]
+                                 &&                   val0 > N9[localLin - 1                     + zoff]
+                                 &&                   val0 > N9[localLin                         + zoff]
+                                 &&                   val0 > N9[localLin + 1                     + zoff]
+                                 &&                   val0 > N9[localLin - 1 + get_local_size(0) + zoff]
+                                 &&                   val0 > N9[localLin     + get_local_size(0) + zoff]
+                                 &&                   val0 > N9[localLin + 1 + get_local_size(0) + zoff]
+                                 ;
 
             if(condmax)
             {
-                 int ind = atomic_inc(maxCounter);
+                int ind = atomic_inc(maxCounter);
 
                 if (ind < c_max_candidates)
                 {
@@ -544,30 +572,30 @@ __kernel
 }
 
 // solve 3x3 linear system Ax=b for floating point input
-inline bool solve3x3_float(volatile __local  const float A[3][3], volatile __local  const float b[3], volatile __local  float x[3])
+inline bool solve3x3_float(volatile __local  const float4 *A, volatile __local  const float *b, volatile __local  float *x)
 {
-    float det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
-        - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
-        + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
+    float det = A[0].x * (A[1].y * A[2].z - A[1].z * A[2].y)
+                - A[0].y * (A[1].x * A[2].z - A[1].z * A[2].x)
+                + A[0].z * (A[1].x * A[2].y - A[1].y * A[2].x);
 
     if (det != 0)
     {
         F invdet = 1.0 / det;
 
         x[0] = invdet *
-            (b[0]    * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
-            A[0][1] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) +
-            A[0][2] * (b[1]    * A[2][1] - A[1][1] * b[2]   ));
+               (b[0]    * (A[1].y * A[2].z - A[1].z * A[2].y) -
+                A[0].y * (b[1]    * A[2].z - A[1].z * b[2]   ) +
+                A[0].z * (b[1]    * A[2].y - A[1].y * b[2]   ));
 
         x[1] = invdet *
-            (A[0][0] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) -
-            b[0]    * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
-            A[0][2] * (A[1][0] * b[2]    - b[1]    * A[2][0]));
+               (A[0].x * (b[1]    * A[2].z - A[1].z * b[2]   ) -
+                b[0]    * (A[1].x * A[2].z - A[1].z * A[2].x) +
+                A[0].z * (A[1].x * b[2]    - b[1]    * A[2].x));
 
         x[2] = invdet *
-            (A[0][0] * (A[1][1] * b[2]    - b[1]    * A[2][1]) -
-            A[0][1] * (A[1][0] * b[2]    - b[1]    * A[2][0]) +
-            b[0]    * (A[1][0] * A[2][1] - A[1][1] * A[2][0]));
+               (A[0].x * (A[1].y * b[2]    - b[1]    * A[2].y) -
+                A[0].y * (A[1].x * b[2]    - b[1]    * A[2].x) +
+                b[0]    * (A[1].x * A[2].y - A[1].y * A[2].x));
 
         return true;
     }
@@ -586,7 +614,7 @@ inline bool solve3x3_float(volatile __local  const float A[3][3], volatile __loc
 ////////////////////////////////////////////////////////////////////////
 // INTERPOLATION
 __kernel
-    void icvInterpolateKeypoint(
+void icvInterpolateKeypoint(
     __global const float * det,
     __global const int4 * maxPosBuffer,
     __global float * keypoints,
@@ -598,7 +626,7 @@ __kernel
     int c_octave,
     int c_layer_rows,
     int c_max_features
-    )
+)
 {
     det_step /= sizeof(*det);
     keypoints_step /= sizeof(*keypoints);
@@ -632,26 +660,26 @@ __kernel
         //ds
         dD[2] = -0.5f * (N9[2][1][1] - N9[0][1][1]);
 
-        volatile __local  float H[3][3];
+        volatile __local  float4 H[3];
 
         //dxx
-        H[0][0] = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2];
+        H[0].x = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2];
         //dxy
-        H[0][1]= 0.25f * (N9[1][2][2] - N9[1][2][0] - N9[1][0][2] + N9[1][0][0]);
+        H[0].y= 0.25f * (N9[1][2][2] - N9[1][2][0] - N9[1][0][2] + N9[1][0][0]);
         //dxs
-        H[0][2]= 0.25f * (N9[2][1][2] - N9[2][1][0] - N9[0][1][2] + N9[0][1][0]);
+        H[0].z= 0.25f * (N9[2][1][2] - N9[2][1][0] - N9[0][1][2] + N9[0][1][0]);
         //dyx = dxy
-        H[1][0] = H[0][1];
+        H[1].x = H[0].y;
         //dyy
-        H[1][1] = N9[1][0][1] - 2.0f * N9[1][1][1] + N9[1][2][1];
+        H[1].y = N9[1][0][1] - 2.0f * N9[1][1][1] + N9[1][2][1];
         //dys
-        H[1][2]= 0.25f * (N9[2][2][1] - N9[2][0][1] - N9[0][2][1] + N9[0][0][1]);
+        H[1].z= 0.25f * (N9[2][2][1] - N9[2][0][1] - N9[0][2][1] + N9[0][0][1]);
         //dsx = dxs
-        H[2][0] = H[0][2];
+        H[2].x = H[0].z;
         //dsy = dys
-        H[2][1] = H[1][2];
+        H[2].y = H[1].z;
         //dss
-        H[2][2] = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
+        H[2].z = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
 
         volatile __local  float x[3];
 
@@ -689,7 +717,7 @@ __kernel
                 if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
                 {
                     // Get a new feature index.
-                     int ind = atomic_inc(featureCounter);
+                    int ind = atomic_inc(featureCounter);
 
                     if (ind < c_max_features)
                     {
@@ -716,31 +744,32 @@ __kernel
 __constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
 __constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
 __constant float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448408f, 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f,
-    0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f,
-    0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f,
-    0.002003900473937392f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f,
-    0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f,
-    0.0035081731621176f, 0.001707611023448408f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f,
-    0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f,
-    0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.003238451667129993f, 0.00665318313986063f,
-    0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f,
-    0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.001455130288377404f,
-    0.0035081731621176f, 0.00720730796456337f, 0.01261763460934162f, 0.0188232995569706f, 0.02392910048365593f,
-    0.02592208795249462f, 0.02392910048365593f, 0.0188232995569706f, 0.01261763460934162f, 0.00720730796456337f,
-    0.0035081731621176f, 0.001455130288377404f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f,
-    0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f,
-    0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.002547456417232752f, 0.005233579315245152f,
-    0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f,
-    0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.001707611023448408f,
-    0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f,
-    0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 0.0035081731621176f, 0.001707611023448408f,
-    0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f,
-    0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f,
-    0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 0.003238451667129993f, 0.002547456417232752f,
-    0.001707611023448408f, 0.001455130288377404f};
+                                        0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f,
+                                        0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f,
+                                        0.002003900473937392f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f,
+                                        0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f,
+                                        0.0035081731621176f, 0.001707611023448408f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f,
+                                        0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f,
+                                        0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.003238451667129993f, 0.00665318313986063f,
+                                        0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f,
+                                        0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.001455130288377404f,
+                                        0.0035081731621176f, 0.00720730796456337f, 0.01261763460934162f, 0.0188232995569706f, 0.02392910048365593f,
+                                        0.02592208795249462f, 0.02392910048365593f, 0.0188232995569706f, 0.01261763460934162f, 0.00720730796456337f,
+                                        0.0035081731621176f, 0.001455130288377404f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f,
+                                        0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f,
+                                        0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.002547456417232752f, 0.005233579315245152f,
+                                        0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f,
+                                        0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.001707611023448408f,
+                                        0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f,
+                                        0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 0.0035081731621176f, 0.001707611023448408f,
+                                        0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f,
+                                        0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f,
+                                        0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 0.003238451667129993f, 0.002547456417232752f,
+                                        0.001707611023448408f, 0.001455130288377404f
+                                       };
 
-__constant float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
-__constant float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
+__constant float2 c_NX[5] = { (float2)(0, 2), (float2)(0, 0), (float2)(2, 4), (float2)(4, 4), (float2)(-1, 1) };
+__constant float2 c_NY[5] = { (float2)(0, 0), (float2)(0, 2), (float2)(4, 4), (float2)(2, 4), (float2)(1, -1) };
 
 void reduce_32_sum(volatile __local  float * data, volatile float* partial_reduction, int tid)
 {
@@ -759,14 +788,14 @@ void reduce_32_sum(volatile __local  float * data, volatile float* partial_reduc
     if (tid < 8)
     {
 #endif
-        data[tid] = *partial_reduction = op(partial_reduction, data[tid + 8 ]);
+        data[tid] = *partial_reduction = op(partial_reduction, data[tid + 8]);
 #if WAVE_SIZE < 8
     }
     barrier(CLK_LOCAL_MEM_FENCE);
     if (tid < 4)
     {
 #endif
-        data[tid] = *partial_reduction = op(partial_reduction, data[tid + 4 ]);
+        data[tid] = *partial_reduction = op(partial_reduction, data[tid + 4]);
 #if WAVE_SIZE < 4
     }
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -787,14 +816,14 @@ void reduce_32_sum(volatile __local  float * data, volatile float* partial_reduc
 }
 
 __kernel
-    void icvCalcOrientation(
+void icvCalcOrientation(
     IMAGE_INT32 sumTex,
     __global float * keypoints,
     int keypoints_step,
     int c_img_rows,
     int c_img_cols,
     int sum_step
-    )
+)
 {
     keypoints_step /= sizeof(*keypoints);
     sum_step       /= sizeof(uint);
@@ -838,7 +867,7 @@ __kernel
         const int y = convert_int_rte(featureY[get_group_id(0)] + c_aptY[tid] * s - margin);
 
         if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
-            x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
+                x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
         {
             X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
             Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
@@ -934,11 +963,11 @@ __kernel
 
 
 __kernel
-    void icvSetUpright(
+void icvSetUpright(
     __global float * keypoints,
     int keypoints_step,
     int nFeatures
-    )
+)
 {
     keypoints_step /= sizeof(*keypoints);
     __global float* featureDir  = keypoints + ANGLE_ROW * keypoints_step;
@@ -988,7 +1017,7 @@ inline uchar readerGet(
     IMAGE_INT8 src,
     const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,
     int i, int j, int rows, int cols, int elemPerRow
-    )
+)
 {
     float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
     float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
@@ -999,7 +1028,7 @@ inline float linearFilter(
     IMAGE_INT8 src,
     const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,
     float y, float x, int rows, int cols, int elemPerRow
-    )
+)
 {
     x -= 0.5f;
     y -= 0.5f;
@@ -1028,9 +1057,9 @@ inline float linearFilter(
 
 void calc_dx_dy(
     IMAGE_INT8 imgTex,
-    volatile __local  float s_dx_bin[25],
-    volatile __local  float s_dy_bin[25],
-    volatile __local  float s_PATCH[6][6],
+    volatile __local  float *s_dx_bin,
+    volatile __local  float *s_dy_bin,
+    volatile __local  float *s_PATCH,
     __global const float* featureX,
     __global const float* featureY,
     __global const float* featureSize,
@@ -1038,7 +1067,7 @@ void calc_dx_dy(
     int rows,
     int cols,
     int elemPerRow
-    )
+)
 {
     const float centerX = featureX[get_group_id(0)];
     const float centerY = featureY[get_group_id(0)];
@@ -1048,6 +1077,7 @@ void calc_dx_dy(
     {
         descriptor_dir = 0.0f;
     }
+
     descriptor_dir *= (float)(CV_PI_F / 180.0f);
 
     /* The sampling intervals and wavelet sized for selecting an orientation
@@ -1074,7 +1104,7 @@ void calc_dx_dy(
     const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
     const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
 
-    s_PATCH[get_local_id(1)][get_local_id(0)] = linearFilter(imgTex, centerX, centerY, win_offset, cos_dir, sin_dir, icoo, jcoo, rows, cols, elemPerRow);
+    s_PATCH[get_local_id(1) * 6 + get_local_id(0)] = linearFilter(imgTex, centerX, centerY, win_offset, cos_dir, sin_dir, icoo, jcoo, rows, cols, elemPerRow);
 
     barrier(CLK_LOCAL_MEM_FENCE);
 
@@ -1085,17 +1115,17 @@ void calc_dx_dy(
         const float dw = c_DW[yIndex * PATCH_SZ + xIndex];
 
         const float vx = (
-            s_PATCH[get_local_id(1)    ][get_local_id(0) + 1] -
-            s_PATCH[get_local_id(1)    ][get_local_id(0)    ] +
-            s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] -
-            s_PATCH[get_local_id(1) + 1][get_local_id(0)    ])
-            * dw;
+                             s_PATCH[      get_local_id(1) * 6 + get_local_id(0) + 1] -
+                             s_PATCH[      get_local_id(1) * 6 + get_local_id(0)    ] +
+                             s_PATCH[(get_local_id(1) + 1) * 6 + get_local_id(0) + 1] -
+                             s_PATCH[(get_local_id(1) + 1) * 6 + get_local_id(0)    ])
+                         * dw;
         const float vy = (
-            s_PATCH[get_local_id(1) + 1][get_local_id(0)    ] -
-            s_PATCH[get_local_id(1)    ][get_local_id(0)    ] +
-            s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] -
-            s_PATCH[get_local_id(1)    ][get_local_id(0) + 1])
-            * dw;
+                             s_PATCH[(get_local_id(1) + 1) * 6 + get_local_id(0)    ] -
+                             s_PATCH[      get_local_id(1) * 6 + get_local_id(0)    ] +
+                             s_PATCH[(get_local_id(1) + 1) * 6 + get_local_id(0) + 1] -
+                             s_PATCH[      get_local_id(1) * 6 + get_local_id(0) + 1])
+                         * dw;
         s_dx_bin[tid] = vx;
         s_dy_bin[tid] = vy;
     }
@@ -1106,7 +1136,7 @@ void reduce_sum25(
     volatile __local  float* sdata3,
     volatile __local  float* sdata4,
     int tid
-    )
+)
 {
 #ifndef WAVE_SIZE
 #define WAVE_SIZE 1
@@ -1125,11 +1155,8 @@ void reduce_sum25(
     {
 #endif
         sdata1[tid] += sdata1[tid + 8];
-
         sdata2[tid] += sdata2[tid + 8];
-
         sdata3[tid] += sdata3[tid + 8];
-
         sdata4[tid] += sdata4[tid + 8];
 #if WAVE_SIZE < 8
     }
@@ -1166,7 +1193,7 @@ void reduce_sum25(
 }
 
 __kernel
-    void compute_descriptors64(
+void compute_descriptors64(
     IMAGE_INT8 imgTex,
     __global float * descriptors,
     __global const float * keypoints,
@@ -1175,7 +1202,7 @@ __kernel
     int rows,
     int cols,
     int img_step
-    )
+)
 {
     descriptors_step /= sizeof(float);
     keypoints_step   /= sizeof(float);
@@ -1189,7 +1216,7 @@ __kernel
     volatile __local  float sdy[25];
     volatile __local  float sdxabs[25];
     volatile __local  float sdyabs[25];
-    volatile __local  float s_PATCH[6][6];
+    volatile __local  float s_PATCH[6*6];
 
     calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir, rows, cols, img_step);
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -1203,7 +1230,7 @@ __kernel
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
-        reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
+    reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
 
     barrier(CLK_LOCAL_MEM_FENCE);
     if (tid < 25)
@@ -1221,7 +1248,7 @@ __kernel
     }
 }
 __kernel
-    void compute_descriptors128(
+void compute_descriptors128(
     IMAGE_INT8 imgTex,
     __global float * descriptors,
     __global float * keypoints,
@@ -1230,7 +1257,7 @@ __kernel
     int rows,
     int cols,
     int img_step
-    )
+)
 {
     descriptors_step /= sizeof(*descriptors);
     keypoints_step   /= sizeof(*keypoints);
@@ -1249,7 +1276,7 @@ __kernel
     volatile __local  float sd2[25];
     volatile __local  float sdabs1[25];
     volatile __local  float sdabs2[25];
-    volatile __local  float s_PATCH[6][6];
+    volatile __local  float s_PATCH[6*6];
 
     calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir, rows, cols, img_step);
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -1275,7 +1302,7 @@ __kernel
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
-        reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
+    reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
     barrier(CLK_LOCAL_MEM_FENCE);
 
     __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3);
@@ -1306,8 +1333,7 @@ __kernel
         }
     }
     barrier(CLK_LOCAL_MEM_FENCE);
-
-        reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
+    reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 25)
@@ -1322,11 +1348,13 @@ __kernel
         }
     }
 }
+
 void reduce_sum128(volatile __local  float* smem, int tid)
 {
 #ifndef WAVE_SIZE
 #define WAVE_SIZE 1
 #endif
+
     if (tid < 64)
     {
         smem[tid] += smem[tid + 64];
@@ -1374,6 +1402,8 @@ void reduce_sum128(volatile __local  float* smem, int tid)
         smem[tid] += smem[tid + 1];
     }
 }
+
+
 void reduce_sum64(volatile __local  float* smem, int tid)
 {
 #ifndef WAVE_SIZE
@@ -1421,7 +1451,7 @@ void reduce_sum64(volatile __local  float* smem, int tid)
 }
 
 __kernel
-    void normalize_descriptors128(__global float * descriptors, int descriptors_step)
+void normalize_descriptors128(__global float * descriptors, int descriptors_step)
 {
     descriptors_step /= sizeof(*descriptors);
     // no need for thread ID
@@ -1436,8 +1466,6 @@ __kernel
     reduce_sum128(sqDesc, get_local_id(0));
     barrier(CLK_LOCAL_MEM_FENCE);
 
-
-
     // compute length (square root)
     volatile __local  float len;
     if (get_local_id(0) == 0)
@@ -1450,7 +1478,7 @@ __kernel
     descriptor_base[get_local_id(0)] = lookup / len;
 }
 __kernel
-    void normalize_descriptors64(__global float * descriptors, int descriptors_step)
+void normalize_descriptors64(__global float * descriptors, int descriptors_step)
 {
     descriptors_step /= sizeof(*descriptors);
     // no need for thread ID
@@ -1462,7 +1490,6 @@ __kernel
     sqDesc[get_local_id(0)] = lookup * lookup;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-
     reduce_sum64(sqDesc, get_local_id(0));
     barrier(CLK_LOCAL_MEM_FENCE);
 
diff --git a/modules/nonfree/test/test_detectors.cpp b/modules/nonfree/test/test_detectors.cpp
index 155c9c869..5854207dc 100644
--- a/modules/nonfree/test/test_detectors.cpp
+++ b/modules/nonfree/test/test_detectors.cpp
@@ -312,6 +312,3 @@ void CV_DetectorsTest::run( int /*start_from*/ )
 
 
 TEST(Features2d_Detectors, regression) { CV_DetectorsTest test; test.safe_run(); }
-
-
-
diff --git a/modules/nonfree/test/test_features2d.cpp b/modules/nonfree/test/test_features2d.cpp
index 09997abe5..7c888e261 100644
--- a/modules/nonfree/test/test_features2d.cpp
+++ b/modules/nonfree/test/test_features2d.cpp
@@ -231,7 +231,7 @@ void CV_FeatureDetectorTest::regressionTest()
 
 void CV_FeatureDetectorTest::run( int /*start_from*/ )
 {
-    if( fdetector.empty() )
+    if( !fdetector )
     {
         ts->printf( cvtest::TS::LOG, "Feature detector is empty.\n" );
         ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA );
@@ -464,7 +464,7 @@ protected:
     void run(int)
     {
         createDescriptorExtractor();
-        if( dextractor.empty() )
+        if( !dextractor )
         {
             ts->printf(cvtest::TS::LOG, "Descriptor extractor is empty.\n");
             ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA );
@@ -1101,7 +1101,7 @@ protected:
     void run(int)
     {
         Ptr<Feature2D> f = Algorithm::create<Feature2D>("Feature2D." + fname);
-        if(f.empty())
+        if(!f)
             return;
         string path = string(ts->get_data_path()) + "detectors_descriptors_evaluation/planar/";
         string imgname1 = path + "box.png";
@@ -1149,3 +1149,76 @@ protected:
 
 TEST(Features2d_SIFTHomographyTest, regression) { CV_DetectPlanarTest test("SIFT", 80); test.safe_run(); }
 TEST(Features2d_SURFHomographyTest, regression) { CV_DetectPlanarTest test("SURF", 80); test.safe_run(); }
+
+class FeatureDetectorUsingMaskTest : public cvtest::BaseTest
+{
+public:
+    FeatureDetectorUsingMaskTest(const Ptr<FeatureDetector>& featureDetector) :
+        featureDetector_(featureDetector)
+    {
+        CV_Assert(featureDetector_);
+    }
+
+protected:
+
+    void run(int)
+    {
+        const int nStepX = 2;
+        const int nStepY = 2;
+
+        const string imageFilename = string(ts->get_data_path()) + "/features2d/tsukuba.png";
+
+        Mat image = imread(imageFilename);
+        if(image.empty())
+        {
+            ts->printf(cvtest::TS::LOG, "Image %s can not be read.\n", imageFilename.c_str());
+            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+            return;
+        }
+
+        Mat mask(image.size(), CV_8U);
+
+        const int stepX = image.size().width / nStepX;
+        const int stepY = image.size().height / nStepY;
+
+        vector<KeyPoint> keyPoints;
+        vector<Point2f> points;
+        for(int i=0; i<nStepX; ++i)
+            for(int j=0; j<nStepY; ++j)
+            {
+
+                mask.setTo(0);
+                Rect whiteArea(i * stepX, j * stepY, stepX, stepY);
+                mask(whiteArea).setTo(255);
+
+                featureDetector_->detect(image, keyPoints, mask);
+                KeyPoint::convert(keyPoints, points);
+
+                for(size_t k=0; k<points.size(); ++k)
+                {
+                    if ( !whiteArea.contains(points[k]) )
+                    {
+                        ts->printf(cvtest::TS::LOG, "The feature point is outside of the mask.");
+                        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+                        return;
+                    }
+                }
+            }
+
+        ts->set_failed_test_info( cvtest::TS::OK );
+    }
+
+    Ptr<FeatureDetector> featureDetector_;
+};
+
+TEST(Features2d_SIFT_using_mask, regression)
+{
+    FeatureDetectorUsingMaskTest test(Algorithm::create<FeatureDetector>("Feature2D.SIFT"));
+    test.safe_run();
+}
+
+TEST(DISABLED_Features2d_SURF_using_mask, regression)
+{
+    FeatureDetectorUsingMaskTest test(Algorithm::create<FeatureDetector>("Feature2D.SURF"));
+    test.safe_run();
+}
diff --git a/modules/nonfree/test/test_keypoints.cpp b/modules/nonfree/test/test_keypoints.cpp
index 3984f19d8..b046d7534 100644
--- a/modules/nonfree/test/test_keypoints.cpp
+++ b/modules/nonfree/test/test_keypoints.cpp
@@ -62,7 +62,7 @@ protected:
     virtual void run(int)
     {
         cv::initModule_features2d();
-        CV_Assert(!detector.empty());
+        CV_Assert(detector);
         string imgFilename = string(ts->get_data_path()) + FEATURES2D_DIR + "/" + IMAGE_FILENAME;
 
         // Read the test image.
diff --git a/modules/nonfree/test/test_precomp.cpp b/modules/nonfree/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/nonfree/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/nonfree/test/test_precomp.hpp b/modules/nonfree/test/test_precomp.hpp
index 563dad43b..965e651e5 100644
--- a/modules/nonfree/test/test_precomp.hpp
+++ b/modules/nonfree/test/test_precomp.hpp
@@ -19,6 +19,7 @@
 #include "opencv2/ts/gpu_test.hpp"
 
 #include "opencv2/opencv_modules.hpp"
+#include "cvconfig.h"
 
 #ifdef HAVE_OPENCV_OCL
 #  include "opencv2/nonfree/ocl.hpp"
diff --git a/modules/nonfree/test/test_rotation_and_scale_invariance.cpp b/modules/nonfree/test/test_rotation_and_scale_invariance.cpp
index b63b8b748..47efc60eb 100644
--- a/modules/nonfree/test/test_rotation_and_scale_invariance.cpp
+++ b/modules/nonfree/test/test_rotation_and_scale_invariance.cpp
@@ -210,7 +210,7 @@ public:
         minKeyPointMatchesRatio(_minKeyPointMatchesRatio),
         minAngleInliersRatio(_minAngleInliersRatio)
     {
-        CV_Assert(!featureDetector.empty());
+        CV_Assert(featureDetector);
     }
 
 protected:
@@ -323,8 +323,8 @@ public:
         normType(_normType),
         minDescInliersRatio(_minDescInliersRatio)
     {
-        CV_Assert(!featureDetector.empty());
-        CV_Assert(!descriptorExtractor.empty());
+        CV_Assert(featureDetector);
+        CV_Assert(descriptorExtractor);
     }
 
 protected:
@@ -410,7 +410,7 @@ public:
         minKeyPointMatchesRatio(_minKeyPointMatchesRatio),
         minScaleInliersRatio(_minScaleInliersRatio)
     {
-        CV_Assert(!featureDetector.empty());
+        CV_Assert(featureDetector);
     }
 
 protected:
@@ -530,8 +530,8 @@ public:
         normType(_normType),
         minDescInliersRatio(_minDescInliersRatio)
     {
-        CV_Assert(!featureDetector.empty());
-        CV_Assert(!descriptorExtractor.empty());
+        CV_Assert(featureDetector);
+        CV_Assert(descriptorExtractor);
     }
 
 protected:
diff --git a/modules/objdetect/CMakeLists.txt b/modules/objdetect/CMakeLists.txt
index 78aa4aa23..5739ee9e3 100644
--- a/modules/objdetect/CMakeLists.txt
+++ b/modules/objdetect/CMakeLists.txt
@@ -1,2 +1,2 @@
 set(the_description "Object Detection")
-ocv_define_module(objdetect opencv_core opencv_imgproc OPTIONAL opencv_highgui)
+ocv_define_module(objdetect opencv_core opencv_imgproc opencv_ml OPTIONAL opencv_highgui)
diff --git a/modules/objdetect/doc/cascade_classification.rst b/modules/objdetect/doc/cascade_classification.rst
index 46272d2f0..453f05285 100644
--- a/modules/objdetect/doc/cascade_classification.rst
+++ b/modules/objdetect/doc/cascade_classification.rst
@@ -216,6 +216,10 @@ Detects objects of different sizes in the input image. The detected objects are
 
 The function is parallelized with the TBB library.
 
+.. note::
+
+   * (Python) A face detection example using cascade classifiers can be found at opencv_source_code/samples/python2/facedetect.py
+
 
 CascadeClassifier::setImage
 -------------------------------
diff --git a/modules/objdetect/doc/latent_svm.rst b/modules/objdetect/doc/latent_svm.rst
index 8362dfb11..4b4ff117f 100644
--- a/modules/objdetect/doc/latent_svm.rst
+++ b/modules/objdetect/doc/latent_svm.rst
@@ -260,5 +260,3 @@ Return a count of loaded models (classes).
 
 
 .. [Felzenszwalb2010] Felzenszwalb, P. F. and Girshick, R. B. and McAllester, D. and Ramanan, D. *Object Detection with Discriminatively Trained Part Based Models*. PAMI, vol. 32, no. 9, pp. 1627-1645, September 2010
-
-
diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp
index d263b2eb7..f1b371610 100644
--- a/modules/objdetect/include/opencv2/objdetect.hpp
+++ b/modules/objdetect/include/opencv2/objdetect.hpp
@@ -93,6 +93,23 @@ private:
     std::vector<String> classNames;
 };
 
+// class for grouping object candidates, detected by Cascade Classifier, HOG etc.
+// instance of the class is to be passed to cv::partition (see cxoperations.hpp)
+class CV_EXPORTS SimilarRects
+{
+public:
+    SimilarRects(double _eps) : eps(_eps) {}
+    inline bool operator()(const Rect& r1, const Rect& r2) const
+    {
+        double delta = eps*(std::min(r1.width, r2.width) + std::min(r1.height, r2.height))*0.5;
+        return std::abs(r1.x - r2.x) <= delta &&
+            std::abs(r1.y - r2.y) <= delta &&
+            std::abs(r1.x + r1.width - r2.x - r2.width) <= delta &&
+            std::abs(r1.y + r1.height - r2.y - r2.height) <= delta;
+    }
+    double eps;
+};
+
 CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps = 0.2);
 CV_EXPORTS_W void groupRectangles(CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights, int groupThreshold, double eps = 0.2);
 CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
@@ -124,7 +141,7 @@ public:
     static Ptr<FeatureEvaluator> create(int type);
 };
 
-template<> CV_EXPORTS void Ptr<CvHaarClassifierCascade>::delete_obj();
+template<> CV_EXPORTS void DefaultDeleter<CvHaarClassifierCascade>::operator ()(CvHaarClassifierCascade* obj) const;
 
 enum { CASCADE_DO_CANNY_PRUNING    = 1,
        CASCADE_SCALE_IMAGE         = 2,
@@ -393,6 +410,7 @@ public:
 
    // read/parse Dalal's alt model file
    void readALTModel(String modelfile);
+   void groupRectangles(std::vector<cv::Rect>& rectList, std::vector<double>& weights, int groupThreshold, double eps) const;
 };
 
 
@@ -407,5 +425,6 @@ CV_EXPORTS_W void drawDataMatrixCodes(InputOutputArray image,
 }
 
 #include "opencv2/objdetect/linemod.hpp"
+#include "opencv2/objdetect/erfilter.hpp"
 
 #endif
diff --git a/modules/objdetect/include/opencv2/objdetect/erfilter.hpp b/modules/objdetect/include/opencv2/objdetect/erfilter.hpp
new file mode 100644
index 000000000..69809a8d1
--- /dev/null
+++ b/modules/objdetect/include/opencv2/objdetect/erfilter.hpp
@@ -0,0 +1,240 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_OBJDETECT_ERFILTER_HPP__
+#define __OPENCV_OBJDETECT_ERFILTER_HPP__
+
+#include "opencv2/core.hpp"
+#include <vector>
+#include <deque>
+#include <string>
+
+namespace cv
+{
+
+/*!
+    Extremal Region Stat structure
+
+    The ERStat structure represents a class-specific Extremal Region (ER).
+
+    An ER is a 4-connected set of pixels with all its grey-level values smaller than the values
+    in its outer boundary. A class-specific ER is selected (using a classifier) from all the ER's
+    in the component tree of the image.
+*/
+struct CV_EXPORTS ERStat
+{
+public:
+    //! Constructor
+    explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
+    //! Destructor
+    ~ERStat(){};
+
+    //! seed point and the threshold (max grey-level value)
+    int pixel;
+    int level;
+
+    //! incrementally computable features
+    int area;
+    int perimeter;
+    int euler;                 //!< euler number
+    Rect rect;
+    double raw_moments[2];     //!< order 1 raw moments to derive the centroid
+    double central_moments[3]; //!< order 2 central moments to construct the covariance matrix
+    std::deque<int> *crossings;//!< horizontal crossings
+    float med_crossings;       //!< median of the crossings at three different height levels
+
+    //! 2nd stage features
+    float hole_area_ratio;
+    float convex_hull_ratio;
+    float num_inflexion_points;
+
+    // TODO Other features can be added (average color, standard deviation, and such)
+
+
+    // TODO shall we include the pixel list whenever available (i.e. after 2nd stage) ?
+    std::vector<int> *pixels;
+
+    //! probability that the ER belongs to the class we are looking for
+    double probability;
+
+    //! pointers preserving the tree structure of the component tree
+    ERStat* parent;
+    ERStat* child;
+    ERStat* next;
+    ERStat* prev;
+
+    //! wenever the regions is a local maxima of the probability
+    bool local_maxima;
+    ERStat* max_probability_ancestor;
+    ERStat* min_probability_ancestor;
+};
+
+/*!
+    Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithms
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier.
+*/
+class CV_EXPORTS ERFilter : public Algorithm
+{
+public:
+
+    //! callback with the classifier is made a class. By doing it we hide SVM, Boost etc.
+    class CV_EXPORTS Callback
+    {
+    public:
+        virtual ~Callback(){};
+        //! The classifier must return probability measure for the region.
+        virtual double eval(const ERStat& stat) = 0; //const = 0; //TODO why cannot use const = 0 here?
+    };
+
+    /*!
+        the key method. Takes image on input and returns the selected regions in a vector of ERStat
+        only distinctive ERs which correspond to characters are selected by a sequential classifier
+        \param image   is the input image
+        \param regions is output for the first stage, input/output for the second one.
+    */
+    virtual void run( InputArray image, std::vector<ERStat>& regions ) = 0;
+
+
+    //! set/get methods to set the algorithm properties,
+    virtual void setCallback(const Ptr<ERFilter::Callback>& cb) = 0;
+    virtual void setThresholdDelta(int thresholdDelta) = 0;
+    virtual void setMinArea(float minArea) = 0;
+    virtual void setMaxArea(float maxArea) = 0;
+    virtual void setMinProbability(float minProbability) = 0;
+    virtual void setMinProbabilityDiff(float minProbabilityDiff) = 0;
+    virtual void setNonMaxSuppression(bool nonMaxSuppression) = 0;
+    virtual int  getNumRejected() = 0;
+};
+
+
+/*!
+    Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    The component tree of the image is extracted by a threshold increased step by step
+    from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness,
+    number of holes, and number of horizontal crossings) are computed for each ER
+    and used as features for a classifier which estimates the class-conditional
+    probability P(er|character). The value of P(er|character) is tracked using the inclusion
+    relation of ER across all thresholds and only the ERs which correspond to local maximum
+    of the probability P(er|character) are selected (if the local maximum of the
+    probability is above a global limit pmin and the difference between local maximum and
+    local minimum is greater than minProbabilityDiff).
+
+    \param  cb                Callback with the classifier.
+                              default classifier can be implicitly load with function loadClassifierNM1()
+                              from file in samples/cpp/trained_classifierNM1.xml
+    \param  thresholdDelta    Threshold step in subsequent thresholds when extracting the component tree
+    \param  minArea           The minimum area (% of image size) allowed for retreived ER's
+    \param  minArea           The maximum area (% of image size) allowed for retreived ER's
+    \param  minProbability    The minimum probability P(er|character) allowed for retreived ER's
+    \param  nonMaxSuppression Whenever non-maximum suppression is done over the branch probabilities
+    \param  minProbability    The minimum probability difference between local maxima and local minima ERs
+*/
+CV_EXPORTS Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
+                                                  int thresholdDelta = 1, float minArea = 0.00025,
+                                                  float maxArea = 0.13, float minProbability = 0.4,
+                                                  bool nonMaxSuppression = true,
+                                                  float minProbabilityDiff = 0.1);
+
+/*!
+    Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    In the second stage, the ERs that passed the first stage are classified into character
+    and non-character classes using more informative but also more computationally expensive
+    features. The classifier uses all the features calculated in the first stage and the following
+    additional features: hole area ratio, convex hull ratio, and number of outer inflexion points.
+
+    \param  cb             Callback with the classifier
+                           default classifier can be implicitly load with function loadClassifierNM2()
+                           from file in samples/cpp/trained_classifierNM2.xml
+    \param  minProbability The minimum probability P(er|character) allowed for retreived ER's
+*/
+CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
+                                                  float minProbability = 0.3);
+
+
+/*!
+    Allow to implicitly load the default classifier when creating an ERFilter object.
+    The function takes as parameter the XML or YAML file with the classifier model
+    (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
+*/
+
+CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM1(const std::string& filename);
+
+/*!
+    Allow to implicitly load the default classifier when creating an ERFilter object.
+    The function takes as parameter the XML or YAML file with the classifier model
+    (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
+*/
+
+CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM2(const std::string& filename);
+
+
+// computeNMChannels operation modes
+enum { ERFILTER_NM_RGBLGrad = 0,
+       ERFILTER_NM_IHSGrad  = 1
+     };
+
+/*!
+    Compute the different channels to be processed independently in the N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient
+    magnitude channels (Grad) are used in order to obtain high localization recall.
+    This implementation also provides an alternative combination of red (R), green (G), blue (B),
+    lightness (L), and gradient magnitude (Grad).
+
+    \param  _src           Source image. Must be RGB CV_8UC3.
+    \param  _channels      Output vector<Mat> where computed channels are stored.
+    \param  _mode          Mode of operation. Currently the only available options are
+                           ERFILTER_NM_RGBLGrad (by default) and ERFILTER_NM_IHSGrad.
+
+*/
+CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
+
+}
+#endif // _OPENCV_ERFILTER_HPP_
diff --git a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp
index 1a3599614..3ee284f42 100644
--- a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp
+++ b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp
@@ -45,4 +45,4 @@
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 
-#include "opencv2/objdetect.hpp"
\ No newline at end of file
+#include "opencv2/objdetect.hpp"
diff --git a/modules/objdetect/perf/perf_cascadeclassifier.cpp b/modules/objdetect/perf/perf_cascadeclassifier.cpp
index 2823302c7..1d5bff11f 100644
--- a/modules/objdetect/perf/perf_cascadeclassifier.cpp
+++ b/modules/objdetect/perf/perf_cascadeclassifier.cpp
@@ -47,4 +47,4 @@ PERF_TEST_P(ImageName_MinSize, CascadeClassifierLBPFrontalFace,
 
     std::sort(faces.begin(), faces.end(), comparators::RectLess());
     SANITY_CHECK(faces, 3.001 * faces.size());
-}
\ No newline at end of file
+}
diff --git a/modules/objdetect/perf/perf_precomp.cpp b/modules/objdetect/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/objdetect/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/objdetect/src/_lsvm_tbbversion.h b/modules/objdetect/src/_lsvm_tbbversion.h
index fb02d8ac0..8bfca1bc1 100644
--- a/modules/objdetect/src/_lsvm_tbbversion.h
+++ b/modules/objdetect/src/_lsvm_tbbversion.h
@@ -48,4 +48,4 @@ int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const i
                                      int *kPoints,
                                      CvPoint ****partsDisplacement);
 
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp
index 13422b97e..88f463faa 100644
--- a/modules/objdetect/src/cascadedetect.cpp
+++ b/modules/objdetect/src/cascadedetect.cpp
@@ -113,24 +113,6 @@ struct Logger
 namespace cv
 {
 
-// class for grouping object candidates, detected by Cascade Classifier, HOG etc.
-// instance of the class is to be passed to cv::partition (see cxoperations.hpp)
-class CV_EXPORTS SimilarRects
-{
-public:
-    SimilarRects(double _eps) : eps(_eps) {}
-    inline bool operator()(const Rect& r1, const Rect& r2) const
-    {
-        double delta = eps*(std::min(r1.width, r2.width) + std::min(r1.height, r2.height))*0.5;
-        return std::abs(r1.x - r2.x) <= delta &&
-        std::abs(r1.y - r2.y) <= delta &&
-        std::abs(r1.x + r1.width - r2.x - r2.width) <= delta &&
-        std::abs(r1.y + r1.height - r2.y - r2.height) <= delta;
-    }
-    double eps;
-};
-
-
 void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights)
 {
     if( groupThreshold <= 0 || rectList.empty() )
@@ -196,8 +178,11 @@ void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps
     for( i = 0; i < nclasses; i++ )
     {
         Rect r1 = rrects[i];
-        int n1 = levelWeights ? rejectLevels[i] : rweights[i];
+        int n1 = rweights[i];
         double w1 = rejectWeights[i];
+        int l1 = rejectLevels[i];
+
+        // filter out rectangles which don't have enough similar rectangles
         if( n1 <= groupThreshold )
             continue;
         // filter out small face rectangles inside large rectangles
@@ -225,7 +210,7 @@ void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps
         {
             rectList.push_back(r1);
             if( weights )
-                weights->push_back(n1);
+                weights->push_back(l1);
             if( levelWeights )
                 levelWeights->push_back(w1);
         }
@@ -482,7 +467,7 @@ bool HaarEvaluator::Feature :: read( const FileNode& node )
 
 HaarEvaluator::HaarEvaluator()
 {
-    features = new std::vector<Feature>();
+    features = makePtr<std::vector<Feature> >();
 }
 HaarEvaluator::~HaarEvaluator()
 {
@@ -507,7 +492,7 @@ bool HaarEvaluator::read(const FileNode& node)
 
 Ptr<FeatureEvaluator> HaarEvaluator::clone() const
 {
-    HaarEvaluator* ret = new HaarEvaluator;
+    Ptr<HaarEvaluator> ret = makePtr<HaarEvaluator>();
     ret->origWinSize = origWinSize;
     ret->features = features;
     ret->featuresPtr = &(*ret->features)[0];
@@ -597,7 +582,7 @@ bool LBPEvaluator::Feature :: read(const FileNode& node )
 
 LBPEvaluator::LBPEvaluator()
 {
-    features = new std::vector<Feature>();
+    features = makePtr<std::vector<Feature> >();
 }
 LBPEvaluator::~LBPEvaluator()
 {
@@ -618,7 +603,7 @@ bool LBPEvaluator::read( const FileNode& node )
 
 Ptr<FeatureEvaluator> LBPEvaluator::clone() const
 {
-    LBPEvaluator* ret = new LBPEvaluator;
+    Ptr<LBPEvaluator> ret = makePtr<LBPEvaluator>();
     ret->origWinSize = origWinSize;
     ret->features = features;
     ret->featuresPtr = &(*ret->features)[0];
@@ -677,7 +662,7 @@ bool HOGEvaluator::Feature :: read( const FileNode& node )
 
 HOGEvaluator::HOGEvaluator()
 {
-    features = new std::vector<Feature>();
+    features = makePtr<std::vector<Feature> >();
 }
 
 HOGEvaluator::~HOGEvaluator()
@@ -699,7 +684,7 @@ bool HOGEvaluator::read( const FileNode& node )
 
 Ptr<FeatureEvaluator> HOGEvaluator::clone() const
 {
-    HOGEvaluator* ret = new HOGEvaluator;
+    Ptr<HOGEvaluator> ret = makePtr<HOGEvaluator>();
     ret->origWinSize = origWinSize;
     ret->features = features;
     ret->featuresPtr = &(*ret->features)[0];
@@ -864,7 +849,7 @@ CascadeClassifier::~CascadeClassifier()
 
 bool CascadeClassifier::empty() const
 {
-    return oldCascade.empty() && data.stages.empty();
+    return !oldCascade && data.stages.empty();
 }
 
 bool CascadeClassifier::load(const String& filename)
@@ -882,13 +867,13 @@ bool CascadeClassifier::load(const String& filename)
 
     fs.release();
 
-    oldCascade = Ptr<CvHaarClassifierCascade>((CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0));
+    oldCascade.reset((CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0));
     return !oldCascade.empty();
 }
 
 int CascadeClassifier::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, double& weight )
 {
-    CV_Assert( oldCascade.empty() );
+    CV_Assert( !oldCascade );
 
     assert( data.featureType == FeatureEvaluator::HAAR ||
             data.featureType == FeatureEvaluator::LBP ||
@@ -988,7 +973,7 @@ public:
                 {
                     if( result == 1 )
                         result =  -(int)classifier->data.stages.size();
-                    if( classifier->data.stages.size() + result < 4 )
+                    if( classifier->data.stages.size() + result == 0 )
                     {
                         mtx->lock();
                         rectangles->push_back(Rect(cvRound(x*scalingFactor), cvRound(y*scalingFactor), winSize.width, winSize.height));
@@ -1037,7 +1022,7 @@ bool CascadeClassifier::detectSingleScale( const Mat& image, int stripCount, Siz
 #endif
 
     Mat currentMask;
-    if (!maskGenerator.empty()) {
+    if (maskGenerator) {
         currentMask=maskGenerator->generateMask(image);
     }
 
@@ -1112,7 +1097,7 @@ void CascadeClassifier::detectMultiScaleNoGrouping( const Mat& image, std::vecto
 {
     candidates.clear();
 
-    if (!maskGenerator.empty())
+    if (maskGenerator)
         maskGenerator->initializeMask(image);
 
     if( maxObjectSize.height == 0 || maxObjectSize.width == 0 )
@@ -1365,7 +1350,7 @@ bool CascadeClassifier::read(const FileNode& root)
     return featureEvaluator->read(fn);
 }
 
-template<> void Ptr<CvHaarClassifierCascade>::delete_obj()
+template<> void DefaultDeleter<CvHaarClassifierCascade>::operator ()(CvHaarClassifierCascade* obj) const
 { cvReleaseHaarClassifierCascade(&obj); }
 
 } // namespace cv
diff --git a/modules/objdetect/src/cascadedetect.hpp b/modules/objdetect/src/cascadedetect.hpp
index c6da4b9ff..de3303e4a 100644
--- a/modules/objdetect/src/cascadedetect.hpp
+++ b/modules/objdetect/src/cascadedetect.hpp
@@ -487,4 +487,3 @@ inline int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvalu
     return 1;
 }
 }
-
diff --git a/modules/objdetect/src/erfilter.cpp b/modules/objdetect/src/erfilter.cpp
new file mode 100644
index 000000000..6e19b34ba
--- /dev/null
+++ b/modules/objdetect/src/erfilter.cpp
@@ -0,0 +1,1252 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <fstream>
+
+using namespace std;
+
+namespace cv
+{
+
+ERStat::ERStat(int init_level, int init_pixel, int init_x, int init_y) : pixel(init_pixel),
+               level(init_level), area(0), perimeter(0), euler(0), probability(1.0),
+               parent(0), child(0), next(0), prev(0), local_maxima(0),
+               max_probability_ancestor(0), min_probability_ancestor(0)
+{
+    rect = Rect(init_x,init_y,1,1);
+    raw_moments[0] = 0.0;
+    raw_moments[1] = 0.0;
+    central_moments[0] = 0.0;
+    central_moments[1] = 0.0;
+    central_moments[2] = 0.0;
+    crossings = new std::deque<int>();
+    crossings->push_back(0);
+}
+
+
+// derivative classes
+
+
+// the classe implementing the interface for the 1st and 2nd stages of Neumann and Matas algorithm
+class CV_EXPORTS ERFilterNM : public ERFilter
+{
+public:
+    //Constructor
+    ERFilterNM();
+    //Destructor
+    ~ERFilterNM() {};
+
+    float minProbability;
+    bool  nonMaxSuppression;
+    float minProbabilityDiff;
+
+    // the key method. Takes image on input, vector of ERStat is output for the first stage,
+    // input/output - for the second one.
+    void run( InputArray image, std::vector<ERStat>& regions );
+
+protected:
+    int thresholdDelta;
+    float maxArea;
+    float minArea;
+
+    Ptr<ERFilter::Callback> classifier;
+
+    // count of the rejected/accepted regions
+    int num_rejected_regions;
+    int num_accepted_regions;
+
+public:
+
+    // set/get methods to set the algorithm properties,
+    void setCallback(const Ptr<ERFilter::Callback>& cb);
+    void setThresholdDelta(int thresholdDelta);
+    void setMinArea(float minArea);
+    void setMaxArea(float maxArea);
+    void setMinProbability(float minProbability);
+    void setMinProbabilityDiff(float minProbabilityDiff);
+    void setNonMaxSuppression(bool nonMaxSuppression);
+    int  getNumRejected();
+
+private:
+    // pointer to the input/output regions vector
+    std::vector<ERStat> *regions;
+    // image mask used for feature calculations
+    Mat region_mask;
+
+    // extract the component tree and store all the ER regions
+    void er_tree_extract( InputArray image );
+    // accumulate a pixel into an ER
+    void er_add_pixel( ERStat *parent, int x, int y, int non_boundary_neighbours,
+                       int non_boundary_neighbours_horiz,
+                       int d_C1, int d_C2, int d_C3 );
+    // merge an ER with its nested parent
+    void er_merge( ERStat *parent, ERStat *child );
+    // recursively walk the tree and clean memory
+    void er_tree_clean( ERStat *er );
+    // copy extracted regions into the output vector
+    ERStat* er_save( ERStat *er, ERStat *parent, ERStat *prev );
+    // recursively walk the tree and filter (remove) regions using the callback classifier
+    ERStat* er_tree_filter( InputArray image, ERStat *stat, ERStat *parent, ERStat *prev );
+    // recursively walk the tree selecting only regions with local maxima probability
+    ERStat* er_tree_nonmax_suppression( ERStat *er, ERStat *parent, ERStat *prev );
+};
+
+
+// default 1st stage classifier
+class CV_EXPORTS ERClassifierNM1 : public ERFilter::Callback
+{
+public:
+    //Constructor
+    ERClassifierNM1(const std::string& filename);
+    // Destructor
+    ~ERClassifierNM1() {};
+
+    // The classifier must return probability measure for the region.
+    double eval(const ERStat& stat);
+
+private:
+    CvBoost boost;
+};
+
+// default 2nd stage classifier
+class CV_EXPORTS ERClassifierNM2 : public ERFilter::Callback
+{
+public:
+    //constructor
+    ERClassifierNM2(const std::string& filename);
+    // Destructor
+    ~ERClassifierNM2() {};
+
+    // The classifier must return probability measure for the region.
+    double eval(const ERStat& stat);
+
+private:
+    CvBoost boost;
+};
+
+
+
+
+
+// default constructor
+ERFilterNM::ERFilterNM()
+{
+    thresholdDelta = 1;
+    minArea = 0.;
+    maxArea = 1.;
+    minProbability = 0.;
+    nonMaxSuppression = false;
+    minProbabilityDiff = 1.;
+    num_accepted_regions = 0;
+    num_rejected_regions = 0;
+}
+
+// the key method. Takes image on input, vector of ERStat is output for the first stage,
+// input/output for the second one.
+void ERFilterNM::run( InputArray image, std::vector<ERStat>& _regions )
+{
+
+    // assert correct image type
+    CV_Assert( image.getMat().type() == CV_8UC1 );
+
+    regions = &_regions;
+    region_mask = Mat::zeros(image.getMat().rows+2, image.getMat().cols+2, CV_8UC1);
+
+    // if regions vector is empty we must extract the entire component tree
+    if ( regions->size() == 0 )
+    {
+        er_tree_extract( image );
+        if (nonMaxSuppression)
+        {
+            vector<ERStat> aux_regions;
+            regions->swap(aux_regions);
+            regions->reserve(aux_regions.size());
+            er_tree_nonmax_suppression( &aux_regions.front(), NULL, NULL );
+            aux_regions.clear();
+        }
+    }
+    else // if regions vector is already filled we'll just filter the current regions
+    {
+        // the tree root must have no parent
+        CV_Assert( regions->front().parent == NULL );
+
+        vector<ERStat> aux_regions;
+        regions->swap(aux_regions);
+        regions->reserve(aux_regions.size());
+        er_tree_filter( image, &aux_regions.front(), NULL, NULL );
+        aux_regions.clear();
+    }
+}
+
+// extract the component tree and store all the ER regions
+// uses the algorithm described in
+// Linear time maximally stable extremal regions, D Nistér, H Stewénius – ECCV 2008
+void ERFilterNM::er_tree_extract( InputArray image )
+{
+
+    Mat src = image.getMat();
+    // assert correct image type
+    CV_Assert( src.type() == CV_8UC1 );
+
+    if (thresholdDelta > 1)
+    {
+        Mat tmp;
+        src.copyTo(tmp);
+        src.release();
+        src = (image.getMat() / thresholdDelta) -1;
+    }
+
+    const unsigned char * image_data = src.data;
+    int width = src.cols, height = src.rows;
+
+    // the component stack
+    vector<ERStat*> er_stack;
+
+    //the quads for euler number calculation
+    unsigned char quads[3][4];
+    quads[0][0] = 1 << 3;
+    quads[0][1] = 1 << 2;
+    quads[0][2] = 1 << 1;
+    quads[0][3] = 1;
+    quads[1][0] = (1<<2)|(1<<1)|(1);
+    quads[1][1] = (1<<3)|(1<<1)|(1);
+    quads[1][2] = (1<<3)|(1<<2)|(1);
+    quads[1][3] = (1<<3)|(1<<2)|(1<<1);
+    quads[2][0] = (1<<2)|(1<<1);
+    quads[2][1] = (1<<3)|(1);
+    quads[2][3] = 255;
+
+
+    // masks to know if a pixel is accessible and if it has been already added to some region
+    vector<bool> accessible_pixel_mask(width * height);
+    vector<bool> accumulated_pixel_mask(width * height);
+
+    // heap of boundary pixels
+    vector<int> boundary_pixes[256];
+    vector<int> boundary_edges[256];
+
+    // add a dummy-component before start
+    er_stack.push_back(new ERStat);
+
+    // we'll look initially for all pixels with grey-level lower than a grey-level higher than any allowed in the image
+    int threshold_level = (255/thresholdDelta)+1;
+
+    // starting from the first pixel (0,0)
+    int current_pixel = 0;
+    int current_edge = 0;
+    int current_level = image_data[0];
+    accessible_pixel_mask[0] = true;
+
+    bool push_new_component = true;
+
+    for (;;) {
+
+        int x = current_pixel % width;
+        int y = current_pixel / width;
+
+        // push a component with current level in the component stack
+        if (push_new_component)
+            er_stack.push_back(new ERStat(current_level, current_pixel, x, y));
+        push_new_component = false;
+
+        // explore the (remaining) edges to the neighbors to the current pixel
+        for (current_edge = current_edge; current_edge < 4; current_edge++)
+        {
+
+            int neighbour_pixel = current_pixel;
+
+            switch (current_edge)
+            {
+                    case 0: if (x < width - 1) neighbour_pixel = current_pixel + 1;  break;
+                    case 1: if (y < height - 1) neighbour_pixel = current_pixel + width; break;
+                    case 2: if (x > 0) neighbour_pixel = current_pixel - 1; break;
+                    default: if (y > 0) neighbour_pixel = current_pixel - width; break;
+            }
+
+            // if neighbour is not accessible, mark it accessible and retreive its grey-level value
+            if ( !accessible_pixel_mask[neighbour_pixel] && (neighbour_pixel != current_pixel) )
+            {
+
+                int neighbour_level = image_data[neighbour_pixel];
+                accessible_pixel_mask[neighbour_pixel] = true;
+
+                // if neighbour level is not lower than current level add neighbour to the boundary heap
+                if (neighbour_level >= current_level)
+                {
+
+                    boundary_pixes[neighbour_level].push_back(neighbour_pixel);
+                    boundary_edges[neighbour_level].push_back(0);
+
+                    // if neighbour level is lower than our threshold_level set threshold_level to neighbour level
+                    if (neighbour_level < threshold_level)
+                        threshold_level = neighbour_level;
+
+                }
+                else // if neighbour level is lower than current add current_pixel (and next edge)
+                     // to the boundary heap for later processing
+                {
+
+                    boundary_pixes[current_level].push_back(current_pixel);
+                    boundary_edges[current_level].push_back(current_edge + 1);
+
+                    // if neighbour level is lower than threshold_level set threshold_level to neighbour level
+                    if (current_level < threshold_level)
+                        threshold_level = current_level;
+
+                    // consider the new pixel and its grey-level as current pixel
+                    current_pixel = neighbour_pixel;
+                    current_edge = 0;
+                    current_level = neighbour_level;
+
+                    // and push a new component
+                    push_new_component = true;
+                    break;
+                }
+            }
+
+        } // else neigbor was already accessible
+
+        if (push_new_component) continue;
+
+
+        // once here we can add the current pixel to the component at the top of the stack
+        // but first we find how many of its neighbours are part of the region boundary (needed for
+        // perimeter and crossings calc.) and the increment in quads counts for euler number calc.
+        int non_boundary_neighbours = 0;
+        int non_boundary_neighbours_horiz = 0;
+
+        unsigned char quad_before[4] = {0,0,0,0};
+        unsigned char quad_after[4] = {0,0,0,0};
+        quad_after[0] = 1<<1;
+        quad_after[1] = 1<<3;
+        quad_after[2] = 1<<2;
+        quad_after[3] = 1;
+
+        for (int edge = 0; edge < 8; edge++)
+        {
+            int neighbour4 = -1;
+            int neighbour8 = -1;
+            int cell = 0;
+            switch (edge)
+            {
+                    case 0: if (x < width - 1) { neighbour4 = neighbour8 = current_pixel + 1;} cell = 5; break;
+                    case 1: if ((x < width - 1)&&(y < height - 1)) { neighbour8 = current_pixel + 1 + width;} cell = 8; break;
+                    case 2: if (y < height - 1) { neighbour4 = neighbour8 = current_pixel + width;} cell = 7; break;
+                    case 3: if ((x > 0)&&(y < height - 1)) { neighbour8 = current_pixel - 1 + width;} cell = 6; break;
+                    case 4: if (x > 0) { neighbour4 = neighbour8 = current_pixel - 1;} cell = 3; break;
+                    case 5: if ((x > 0)&&(y > 0)) { neighbour8 = current_pixel - 1 - width;} cell = 0; break;
+                    case 6: if (y > 0) { neighbour4 = neighbour8 = current_pixel - width;} cell = 1; break;
+                    default: if ((x < width - 1)&&(y > 0)) { neighbour8 = current_pixel + 1 - width;} cell = 2; break;
+            }
+            if ((neighbour4 != -1)&&(accumulated_pixel_mask[neighbour4])&&(image_data[neighbour4]<=image_data[current_pixel]))
+            {
+                non_boundary_neighbours++;
+                if ((edge == 0) || (edge == 4))
+                    non_boundary_neighbours_horiz++;
+            }
+
+            int pix_value = image_data[current_pixel] + 1;
+            if (neighbour8 != -1)
+            {
+                if (accumulated_pixel_mask[neighbour8])
+                    pix_value = image_data[neighbour8];
+            }
+
+            if (pix_value<=image_data[current_pixel])
+            {
+                switch(cell)
+                {
+                    case 0:
+                        quad_before[3] = quad_before[3] | (1<<3);
+                        quad_after[3]  = quad_after[3]  | (1<<3);
+                        break;
+                    case 1:
+                        quad_before[3] = quad_before[3] | (1<<2);
+                        quad_after[3]  = quad_after[3]  | (1<<2);
+                        quad_before[0] = quad_before[0] | (1<<3);
+                        quad_after[0]  = quad_after[0]  | (1<<3);
+                        break;
+                    case 2:
+                        quad_before[0] = quad_before[0] | (1<<2);
+                        quad_after[0]  = quad_after[0]  | (1<<2);
+                        break;
+                    case 3:
+                        quad_before[3] = quad_before[3] | (1<<1);
+                        quad_after[3]  = quad_after[3]  | (1<<1);
+                        quad_before[2] = quad_before[2] | (1<<3);
+                        quad_after[2]  = quad_after[2]  | (1<<3);
+                        break;
+                    case 5:
+                        quad_before[0] = quad_before[0] | (1);
+                        quad_after[0]  = quad_after[0]  | (1);
+                        quad_before[1] = quad_before[1] | (1<<2);
+                        quad_after[1]  = quad_after[1]  | (1<<2);
+                        break;
+                    case 6:
+                        quad_before[2] = quad_before[2] | (1<<1);
+                        quad_after[2]  = quad_after[2]  | (1<<1);
+                        break;
+                    case 7:
+                        quad_before[2] = quad_before[2] | (1);
+                        quad_after[2]  = quad_after[2]  | (1);
+                        quad_before[1] = quad_before[1] | (1<<1);
+                        quad_after[1]  = quad_after[1]  | (1<<1);
+                        break;
+                    default:
+                        quad_before[1] = quad_before[1] | (1);
+                        quad_after[1]  = quad_after[1]  | (1);
+                        break;
+                }
+            }
+
+        }
+
+        int C_before[3] = {0, 0, 0};
+        int C_after[3] = {0, 0, 0};
+
+        for (int p=0; p<3; p++)
+        {
+            for (int q=0; q<4; q++)
+            {
+                if ( (quad_before[0] == quads[p][q]) && ((p<2)||(q<2)) )
+                    C_before[p]++;
+                if ( (quad_before[1] == quads[p][q]) && ((p<2)||(q<2)) )
+                    C_before[p]++;
+                if ( (quad_before[2] == quads[p][q]) && ((p<2)||(q<2)) )
+                    C_before[p]++;
+                if ( (quad_before[3] == quads[p][q]) && ((p<2)||(q<2)) )
+                    C_before[p]++;
+
+                if ( (quad_after[0] == quads[p][q]) && ((p<2)||(q<2)) )
+                    C_after[p]++;
+                if ( (quad_after[1] == quads[p][q]) && ((p<2)||(q<2)) )
+                    C_after[p]++;
+                if ( (quad_after[2] == quads[p][q]) && ((p<2)||(q<2)) )
+                    C_after[p]++;
+                if ( (quad_after[3] == quads[p][q]) && ((p<2)||(q<2)) )
+                    C_after[p]++;
+            }
+        }
+
+        int d_C1 = C_after[0]-C_before[0];
+        int d_C2 = C_after[1]-C_before[1];
+        int d_C3 = C_after[2]-C_before[2];
+
+        er_add_pixel(er_stack.back(), x, y, non_boundary_neighbours, non_boundary_neighbours_horiz, d_C1, d_C2, d_C3);
+        accumulated_pixel_mask[current_pixel] = true;
+
+        // if we have processed all the possible threshold levels (the hea is empty) we are done!
+        if (threshold_level == (255/thresholdDelta)+1)
+        {
+
+            // save the extracted regions into the output vector
+            regions->reserve(num_accepted_regions+1);
+            er_save(er_stack.back(), NULL, NULL);
+
+            // clean memory
+            er_tree_clean(er_stack.back());
+            er_stack.clear();
+
+            return;
+        }
+
+
+        // pop the heap of boundary pixels
+        current_pixel = boundary_pixes[threshold_level].back();
+        boundary_pixes[threshold_level].erase(boundary_pixes[threshold_level].end()-1);
+        current_edge  = boundary_edges[threshold_level].back();
+        boundary_edges[threshold_level].erase(boundary_edges[threshold_level].end()-1);
+
+        while (boundary_pixes[threshold_level].empty() && (threshold_level < (255/thresholdDelta)+1))
+            threshold_level++;
+
+
+        int new_level = image_data[current_pixel];
+
+        // if the new pixel has higher grey value than the current one
+        if (new_level != current_level) {
+
+            current_level = new_level;
+
+            // process components on the top of the stack until we reach the higher grey-level
+            while (er_stack.back()->level < new_level)
+            {
+                ERStat* er = er_stack.back();
+                er_stack.erase(er_stack.end()-1);
+
+                if (new_level < er_stack.back()->level)
+                {
+                    er_stack.push_back(new ERStat(new_level, current_pixel, current_pixel%width, current_pixel/width));
+                    er_merge(er_stack.back(), er);
+                    break;
+                }
+
+                er_merge(er_stack.back(), er);
+            }
+
+        }
+
+    }
+}
+
+// accumulate a pixel into an ER
+void ERFilterNM::er_add_pixel(ERStat *parent, int x, int y, int non_border_neighbours,
+                                                            int non_border_neighbours_horiz,
+                                                            int d_C1, int d_C2, int d_C3)
+{
+    parent->area++;
+    parent->perimeter += 4 - 2*non_border_neighbours;
+
+    if (parent->crossings->size()>0)
+    {
+        if (y<parent->rect.y) parent->crossings->push_front(2);
+        else if (y>parent->rect.br().y-1) parent->crossings->push_back(2);
+        else {
+            parent->crossings->at(y - parent->rect.y) += 2-2*non_border_neighbours_horiz;
+        }
+    } else {
+        parent->crossings->push_back(2);
+    }
+
+    parent->euler += (d_C1 - d_C2 + 2*d_C3) / 4;
+
+    int new_x1 = min(parent->rect.x,x);
+    int new_y1 = min(parent->rect.y,y);
+    int new_x2 = max(parent->rect.br().x-1,x);
+    int new_y2 = max(parent->rect.br().y-1,y);
+    parent->rect.x = new_x1;
+    parent->rect.y = new_y1;
+    parent->rect.width  = new_x2-new_x1+1;
+    parent->rect.height = new_y2-new_y1+1;
+
+    parent->raw_moments[0] += x;
+    parent->raw_moments[1] += y;
+
+    parent->central_moments[0] += x * x;
+    parent->central_moments[1] += x * y;
+    parent->central_moments[2] += y * y;
+}
+
+// merge an ER with its nested parent
+void ERFilterNM::er_merge(ERStat *parent, ERStat *child)
+{
+
+    parent->area += child->area;
+
+    parent->perimeter += child->perimeter;
+
+
+    for (int i=parent->rect.y; i<=min(parent->rect.br().y-1,child->rect.br().y-1); i++)
+        if (i-child->rect.y >= 0)
+            parent->crossings->at(i-parent->rect.y) += child->crossings->at(i-child->rect.y);
+
+    for (int i=parent->rect.y-1; i>=child->rect.y; i--)
+        if (i-child->rect.y < (int)child->crossings->size())
+            parent->crossings->push_front(child->crossings->at(i-child->rect.y));
+        else
+            parent->crossings->push_front(0);
+
+    for (int i=parent->rect.br().y; i<child->rect.y; i++)
+        parent->crossings->push_back(0);
+
+    for (int i=max(parent->rect.br().y,child->rect.y); i<=child->rect.br().y-1; i++)
+        parent->crossings->push_back(child->crossings->at(i-child->rect.y));
+
+    parent->euler += child->euler;
+
+    int new_x1 = min(parent->rect.x,child->rect.x);
+    int new_y1 = min(parent->rect.y,child->rect.y);
+    int new_x2 = max(parent->rect.br().x-1,child->rect.br().x-1);
+    int new_y2 = max(parent->rect.br().y-1,child->rect.br().y-1);
+    parent->rect.x = new_x1;
+    parent->rect.y = new_y1;
+    parent->rect.width  = new_x2-new_x1+1;
+    parent->rect.height = new_y2-new_y1+1;
+
+    parent->raw_moments[0] += child->raw_moments[0];
+    parent->raw_moments[1] += child->raw_moments[1];
+
+    parent->central_moments[0] += child->central_moments[0];
+    parent->central_moments[1] += child->central_moments[1];
+    parent->central_moments[2] += child->central_moments[2];
+
+    vector<int> m_crossings;
+    m_crossings.push_back(child->crossings->at((int)(child->rect.height)/6));
+    m_crossings.push_back(child->crossings->at((int)3*(child->rect.height)/6));
+    m_crossings.push_back(child->crossings->at((int)5*(child->rect.height)/6));
+    std::sort(m_crossings.begin(), m_crossings.end());
+    child->med_crossings = (float)m_crossings.at(1);
+
+    // free unnecessary mem
+    child->crossings->clear();
+    delete(child->crossings);
+    child->crossings = NULL;
+
+    // recover the original grey-level
+    child->level = child->level*thresholdDelta;
+
+    // before saving calculate P(child|character) and filter if possible
+    if (classifier != NULL)
+    {
+        child->probability = classifier->eval(*child);
+    }
+
+    if ( (((classifier!=NULL)?(child->probability >= minProbability):true)||(nonMaxSuppression)) &&
+         ((child->area >= (minArea*region_mask.rows*region_mask.cols)) &&
+          (child->area <= (maxArea*region_mask.rows*region_mask.cols)) &&
+          (child->rect.width > 2) && (child->rect.height > 2)) )
+    {
+
+        num_accepted_regions++;
+
+        child->next = parent->child;
+        if (parent->child)
+            parent->child->prev = child;
+        parent->child = child;
+        child->parent = parent;
+
+    } else {
+
+        num_rejected_regions++;
+
+        if (child->prev !=NULL)
+            child->prev->next = child->next;
+
+        ERStat *new_child = child->child;
+        if (new_child != NULL)
+        {
+            while (new_child->next != NULL)
+                new_child = new_child->next;
+            new_child->next = parent->child;
+            if (parent->child)
+                parent->child->prev = new_child;
+            parent->child   = child->child;
+            child->child->parent = parent;
+        }
+
+        // free mem
+        if(child->crossings)
+        {
+            child->crossings->clear();
+            delete(child->crossings);
+            child->crossings = NULL;
+        }
+        delete(child);
+    }
+
+}
+
+// recursively walk the tree and clean memory
+void ERFilterNM::er_tree_clean( ERStat *stat )
+{
+        for (ERStat * child = stat->child; child; child = child->next)
+        {
+            er_tree_clean(child);
+        }
+        if (stat->crossings)
+        {
+            stat->crossings->clear();
+            delete(stat->crossings);
+            stat->crossings = NULL;
+        }
+        delete stat;
+}
+
+// copy extracted regions into the output vector
+ERStat* ERFilterNM::er_save( ERStat *er, ERStat *parent, ERStat *prev )
+{
+
+    regions->push_back(*er);
+
+    regions->back().parent = parent;
+    if (prev != NULL)
+    {
+      prev->next = &(regions->back());
+    }
+    else if (parent != NULL)
+      parent->child = &(regions->back());
+
+    ERStat *old_prev = NULL;
+    ERStat *this_er  = &regions->back();
+
+    if (this_er->parent == NULL)
+    {
+       this_er->probability = 0;
+    }
+
+    if (nonMaxSuppression)
+    {
+        if (this_er->parent == NULL)
+        {
+            this_er->max_probability_ancestor = this_er;
+            this_er->min_probability_ancestor = this_er;
+        }
+        else
+        {
+            this_er->max_probability_ancestor = (this_er->probability > parent->max_probability_ancestor->probability)? this_er :  parent->max_probability_ancestor;
+
+            this_er->min_probability_ancestor = (this_er->probability < parent->min_probability_ancestor->probability)? this_er :  parent->min_probability_ancestor;
+
+            if ( (this_er->max_probability_ancestor->probability > minProbability) && (this_er->max_probability_ancestor->probability - this_er->min_probability_ancestor->probability > minProbabilityDiff))
+            {
+              this_er->max_probability_ancestor->local_maxima = true;
+              if ((this_er->max_probability_ancestor == this_er) && (this_er->parent->local_maxima))
+              {
+                this_er->parent->local_maxima = false;
+              }
+            }
+            else if (this_er->probability < this_er->parent->probability)
+            {
+              this_er->min_probability_ancestor = this_er;
+            }
+            else if (this_er->probability > this_er->parent->probability)
+            {
+              this_er->max_probability_ancestor = this_er;
+            }
+
+
+        }
+    }
+
+    for (ERStat * child = er->child; child; child = child->next)
+    {
+        old_prev = er_save(child, this_er, old_prev);
+    }
+
+    return this_er;
+}
+
+// recursively walk the tree and filter (remove) regions using the callback classifier
+ERStat* ERFilterNM::er_tree_filter ( InputArray image, ERStat * stat, ERStat *parent, ERStat *prev )
+{
+    Mat src = image.getMat();
+    // assert correct image type
+    CV_Assert( src.type() == CV_8UC1 );
+
+    //Fill the region and calculate 2nd stage features
+    Mat region = region_mask(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x+2,stat->rect.br().y+2)));
+    region = Scalar(0);
+    int newMaskVal = 255;
+    int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
+    Rect rect;
+
+    floodFill( src(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x,stat->rect.br().y))),
+               region, Point(stat->pixel%src.cols - stat->rect.x, stat->pixel/src.cols - stat->rect.y),
+               Scalar(255), &rect, Scalar(stat->level), Scalar(0), flags );
+    rect.width += 2;
+    rect.height += 2;
+    region = region(rect);
+
+    vector<vector<Point> > contours;
+    vector<Point> contour_poly;
+    vector<Vec4i> hierarchy;
+    findContours( region, contours, hierarchy, RETR_TREE, CHAIN_APPROX_NONE, Point(0, 0) );
+    //TODO check epsilon parameter of approxPolyDP (set empirically) : we want more precission
+    //     if the region is very small because otherwise we'll loose all the convexities
+    approxPolyDP( Mat(contours[0]), contour_poly, (float)min(rect.width,rect.height)/17, true );
+
+    bool was_convex = false;
+    int  num_inflexion_points = 0;
+
+    for (int p = 0 ; p<(int)contour_poly.size(); p++)
+    {
+        int p_prev = p-1;
+        int p_next = p+1;
+        if (p_prev == -1)
+            p_prev = contour_poly.size()-1;
+        if (p_next == (int)contour_poly.size())
+            p_next = 0;
+
+        double angle_next = atan2((double)(contour_poly[p_next].y-contour_poly[p].y),
+                                  (double)(contour_poly[p_next].x-contour_poly[p].x));
+        double angle_prev = atan2((double)(contour_poly[p_prev].y-contour_poly[p].y),
+                                  (double)(contour_poly[p_prev].x-contour_poly[p].x));
+        if ( angle_next < 0 )
+            angle_next = 2.*CV_PI + angle_next;
+
+        double angle = (angle_next - angle_prev);
+        if (angle > 2.*CV_PI)
+            angle = angle - 2.*CV_PI;
+        else if (angle < 0)
+            angle = 2.*CV_PI + std::abs(angle);
+
+        if (p>0)
+        {
+            if ( ((angle > CV_PI)&&(!was_convex)) || ((angle < CV_PI)&&(was_convex)) )
+                num_inflexion_points++;
+        }
+        was_convex = (angle > CV_PI);
+
+    }
+
+    floodFill(region, Point(0,0), Scalar(255), 0);
+    int holes_area = region.cols*region.rows-countNonZero(region);
+
+    int hull_area = 0;
+
+    {
+
+        vector<Point> hull;
+        convexHull(contours[0], hull, false);
+        hull_area = (int)contourArea(hull);
+    }
+
+
+    stat->hole_area_ratio = (float)holes_area / stat->area;
+    stat->convex_hull_ratio = (float)hull_area / (float)contourArea(contours[0]);
+    stat->num_inflexion_points = (float)num_inflexion_points;
+
+
+    // calculate P(child|character) and filter if possible
+    if ( (classifier != NULL) && (stat->parent != NULL) )
+    {
+        stat->probability = classifier->eval(*stat);
+    }
+
+    if ( ( ((classifier != NULL)?(stat->probability >= minProbability):true) &&
+          ((stat->area >= minArea*region_mask.rows*region_mask.cols) &&
+           (stat->area <= maxArea*region_mask.rows*region_mask.cols)) ) ||
+        (stat->parent == NULL) )
+    {
+
+        num_accepted_regions++;
+        regions->push_back(*stat);
+
+        regions->back().parent = parent;
+        regions->back().next   = NULL;
+        regions->back().child  = NULL;
+
+        if (prev != NULL)
+            prev->next = &(regions->back());
+        else if (parent != NULL)
+            parent->child = &(regions->back());
+
+        ERStat *old_prev = NULL;
+        ERStat *this_er  = &regions->back();
+
+        for (ERStat * child = stat->child; child; child = child->next)
+        {
+            old_prev = er_tree_filter(image, child, this_er, old_prev);
+        }
+
+        return this_er;
+
+    } else {
+
+        num_rejected_regions++;
+
+        ERStat *old_prev = prev;
+
+        for (ERStat * child = stat->child; child; child = child->next)
+        {
+            old_prev = er_tree_filter(image, child, parent, old_prev);
+        }
+
+        return old_prev;
+    }
+
+}
+
+// recursively walk the tree selecting only regions with local maxima probability
+ERStat* ERFilterNM::er_tree_nonmax_suppression ( ERStat * stat, ERStat *parent, ERStat *prev )
+{
+
+    if ( ( stat->local_maxima ) || ( stat->parent == NULL ) )
+    {
+
+        regions->push_back(*stat);
+
+        regions->back().parent = parent;
+        regions->back().next   = NULL;
+        regions->back().child  = NULL;
+
+        if (prev != NULL)
+            prev->next = &(regions->back());
+        else if (parent != NULL)
+            parent->child = &(regions->back());
+
+        ERStat *old_prev = NULL;
+        ERStat *this_er  = &regions->back();
+
+        for (ERStat * child = stat->child; child; child = child->next)
+        {
+            old_prev = er_tree_nonmax_suppression( child, this_er, old_prev );
+        }
+
+        return this_er;
+
+    } else {
+
+        num_rejected_regions++;
+        num_accepted_regions--;
+
+        ERStat *old_prev = prev;
+
+        for (ERStat * child = stat->child; child; child = child->next)
+        {
+            old_prev = er_tree_nonmax_suppression( child, parent, old_prev );
+        }
+
+        return old_prev;
+    }
+
+}
+
+void ERFilterNM::setCallback(const Ptr<ERFilter::Callback>& cb)
+{
+    classifier = cb;
+};
+
+void ERFilterNM::setMinArea(float _minArea)
+{
+    CV_Assert( (_minArea >= 0) && (_minArea < maxArea) );
+    minArea = _minArea;
+    return;
+};
+
+void ERFilterNM::setMaxArea(float _maxArea)
+{
+    CV_Assert(_maxArea <= 1);
+    CV_Assert(minArea < _maxArea);
+    maxArea = _maxArea;
+    return;
+};
+
+void ERFilterNM::setThresholdDelta(int _thresholdDelta)
+{
+    CV_Assert( (_thresholdDelta > 0) && (_thresholdDelta <= 128) );
+    thresholdDelta = _thresholdDelta;
+    return;
+};
+
+void ERFilterNM::setMinProbability(float _minProbability)
+{
+    CV_Assert( (_minProbability >= 0.0) && (_minProbability <= 1.0) );
+    minProbability = _minProbability;
+    return;
+};
+
+void ERFilterNM::setMinProbabilityDiff(float _minProbabilityDiff)
+{
+    CV_Assert( (_minProbabilityDiff >= 0.0) && (_minProbabilityDiff <= 1.0) );
+    minProbabilityDiff = _minProbabilityDiff;
+    return;
+};
+
+void ERFilterNM::setNonMaxSuppression(bool _nonMaxSuppression)
+{
+    nonMaxSuppression = _nonMaxSuppression;
+    return;
+};
+
+int ERFilterNM::getNumRejected()
+{
+    return num_rejected_regions;
+};
+
+
+
+
+// load default 1st stage classifier if found
+ERClassifierNM1::ERClassifierNM1(const std::string& filename)
+{
+
+    if (ifstream(filename.c_str()))
+        boost.load( filename.c_str(), "boost" );
+    else
+        CV_Error(CV_StsBadArg, "Default classifier file not found!");
+};
+
+double ERClassifierNM1::eval(const ERStat& stat)
+{
+    //Classify
+    float arr[] = {0,(float)(stat.rect.width)/(stat.rect.height), // aspect ratio
+                     sqrt((float)(stat.area))/stat.perimeter, // compactness
+                     (float)(1-stat.euler), //number of holes
+                     stat.med_crossings};
+
+    vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
+
+    float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true );
+
+    // Logistic Correction returns a probability value (in the range(0,1))
+    return (double)1-(double)1/(1+exp(-2*votes));
+};
+
+
+// load default 2nd stage classifier if found
+ERClassifierNM2::ERClassifierNM2(const std::string& filename)
+{
+    if (ifstream(filename.c_str()))
+        boost.load( filename.c_str(), "boost" );
+    else
+        CV_Error(CV_StsBadArg, "Default classifier file not found!");
+};
+
+double ERClassifierNM2::eval(const ERStat& stat)
+{
+    //Classify
+    float arr[] = {0,(float)(stat.rect.width)/(stat.rect.height), // aspect ratio
+                     sqrt((float)(stat.area))/stat.perimeter, // compactness
+                     (float)(1-stat.euler), //number of holes
+                     stat.med_crossings, stat.hole_area_ratio,
+                     stat.convex_hull_ratio, stat.num_inflexion_points};
+
+    vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
+
+    float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true );
+
+    // Logistic Correction returns a probability value (in the range(0,1))
+    return (double)1-(double)1/(1+exp(-2*votes));
+};
+
+
+/*!
+    Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    The component tree of the image is extracted by a threshold increased step by step
+    from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness,
+    number of holes, and number of horizontal crossings) are computed for each ER
+    and used as features for a classifier which estimates the class-conditional
+    probability P(er|character). The value of P(er|character) is tracked using the inclusion
+    relation of ER across all thresholds and only the ERs which correspond to local maximum
+    of the probability P(er|character) are selected (if the local maximum of the
+    probability is above a global limit pmin and the difference between local maximum and
+    local minimum is greater than minProbabilityDiff).
+
+    \param  cb                Callback with the classifier.
+                              default classifier can be implicitly load with function loadClassifierNM1()
+                              from file in samples/cpp/trained_classifierNM1.xml
+    \param  thresholdDelta    Threshold step in subsequent thresholds when extracting the component tree
+    \param  minArea           The minimum area (% of image size) allowed for retreived ER's
+    \param  minArea           The maximum area (% of image size) allowed for retreived ER's
+    \param  minProbability    The minimum probability P(er|character) allowed for retreived ER's
+    \param  nonMaxSuppression Whenever non-maximum suppression is done over the branch probabilities
+    \param  minProbability    The minimum probability difference between local maxima and local minima ERs
+*/
+Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb, int thresholdDelta,
+                                float minArea, float maxArea, float minProbability,
+                                bool nonMaxSuppression, float minProbabilityDiff)
+{
+
+    CV_Assert( (minProbability >= 0.) && (minProbability <= 1.) );
+    CV_Assert( (minArea < maxArea) && (minArea >=0.) && (maxArea <= 1.) );
+    CV_Assert( (thresholdDelta >= 0) && (thresholdDelta <= 128) );
+    CV_Assert( (minProbabilityDiff >= 0.) && (minProbabilityDiff <= 1.) );
+
+    Ptr<ERFilterNM> filter = makePtr<ERFilterNM>();
+
+    filter->setCallback(cb);
+
+    filter->setThresholdDelta(thresholdDelta);
+    filter->setMinArea(minArea);
+    filter->setMaxArea(maxArea);
+    filter->setMinProbability(minProbability);
+    filter->setNonMaxSuppression(nonMaxSuppression);
+    filter->setMinProbabilityDiff(minProbabilityDiff);
+    return (Ptr<ERFilter>)filter;
+}
+
+/*!
+    Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    In the second stage, the ERs that passed the first stage are classified into character
+    and non-character classes using more informative but also more computationally expensive
+    features. The classifier uses all the features calculated in the first stage and the following
+    additional features: hole area ratio, convex hull ratio, and number of outer inflexion points.
+
+    \param  cb             Callback with the classifier
+                           default classifier can be implicitly load with function loadClassifierNM1()
+                           from file in samples/cpp/trained_classifierNM2.xml
+    \param  minProbability The minimum probability P(er|character) allowed for retreived ER's
+*/
+Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb, float minProbability)
+{
+
+    CV_Assert( (minProbability >= 0.) && (minProbability <= 1.) );
+
+    Ptr<ERFilterNM> filter = makePtr<ERFilterNM>();
+
+    filter->setCallback(cb);
+
+    filter->setMinProbability(minProbability);
+    return (Ptr<ERFilter>)filter;
+}
+
+/*!
+    Allow to implicitly load the default classifier when creating an ERFilter object.
+    The function takes as parameter the XML or YAML file with the classifier model
+    (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
+*/
+Ptr<ERFilter::Callback> loadClassifierNM1(const std::string& filename)
+
+{
+    return makePtr<ERClassifierNM1>(filename);
+}
+
+/*!
+    Allow to implicitly load the default classifier when creating an ERFilter object.
+    The function takes as parameter the XML or YAML file with the classifier model
+    (e.g. trained_classifierNM2.xml) returns a pointer to ERFilter::Callback.
+*/
+Ptr<ERFilter::Callback> loadClassifierNM2(const std::string& filename)
+{
+    return makePtr<ERClassifierNM2>(filename);
+}
+
+
+/* ------------------------------------------------------------------------------------*/
+/* -------------------------------- Compute Channels NM -------------------------------*/
+/* ------------------------------------------------------------------------------------*/
+
+
+void  get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude);
+
+void get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude)
+{
+    Mat C = Mat_<float>(_grey_img);
+
+    Mat kernel = (Mat_<float>(1,3) << -1,0,1);
+    Mat grad_x;
+    filter2D(C, grad_x, -1, kernel, Point(-1,-1), 0, BORDER_DEFAULT);
+
+    Mat kernel2 = (Mat_<float>(3,1) << -1,0,1);
+    Mat grad_y;
+    filter2D(C, grad_y, -1, kernel2, Point(-1,-1), 0, BORDER_DEFAULT);
+
+    magnitude( grad_x, grad_y, _gradient_magnitude);
+}
+
+
+/*!
+    Compute the diferent channels to be processed independently in the N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient
+    magnitude channels (Grad) are used in order to obatin high localization recall.
+    This implementation also the alternative combination of red (R), grren (G), blue (B),
+    lightness (L), and gradient magnitude (Grad).
+
+    \param  _src           Source image. Must be RGB CV_8UC3.
+    \param  _channels      Output vector<Mat> where computed channels are stored.
+    \param  _mode          Mode of operation. Currently the only available options are
+                           ERFILTER_NM_RGBLGrad and ERFILTER_NM_IHSGrad.
+
+*/
+void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode)
+{
+
+    CV_Assert( ( _mode == ERFILTER_NM_RGBLGrad ) || ( _mode == ERFILTER_NM_IHSGrad ) );
+
+    Mat src = _src.getMat();
+    if( src.empty() )
+    {
+        _channels.release();
+        return;
+    }
+
+    // assert RGB image
+    CV_Assert(src.type() == CV_8UC3);
+
+    if (_mode == ERFILTER_NM_IHSGrad)
+    {
+        _channels.create( 4, 1, src.depth());
+
+        Mat hsv;
+        cvtColor(src, hsv, COLOR_RGB2HSV);
+        vector<Mat> channelsHSV;
+        split(hsv, channelsHSV);
+
+        for (int i = 0; i < src.channels(); i++)
+        {
+            _channels.create(src.rows, src.cols, CV_8UC1, i);
+            Mat channel = _channels.getMat(i);
+            channelsHSV.at(i).copyTo(channel);
+        }
+
+        Mat grey;
+        cvtColor(src, grey, COLOR_RGB2GRAY);
+        Mat gradient_magnitude = Mat_<float>(grey.size());
+        get_gradient_magnitude( grey, gradient_magnitude);
+        gradient_magnitude.convertTo(gradient_magnitude, CV_8UC1);
+
+        _channels.create(src.rows, src.cols, CV_8UC1, 3);
+        Mat channelGrad = _channels.getMat(3);
+        gradient_magnitude.copyTo(channelGrad);
+
+    } else if (_mode == ERFILTER_NM_RGBLGrad) {
+
+        _channels.create( 5, 1, src.depth());
+
+        vector<Mat> channelsRGB;
+        split(src, channelsRGB);
+        for (int i = 0; i < src.channels(); i++)
+        {
+            _channels.create(src.rows, src.cols, CV_8UC1, i);
+            Mat channel = _channels.getMat(i);
+            channelsRGB.at(i).copyTo(channel);
+        }
+
+        Mat hls;
+        cvtColor(src, hls, COLOR_RGB2HLS);
+        vector<Mat> channelsHLS;
+        split(hls, channelsHLS);
+
+        _channels.create(src.rows, src.cols, CV_8UC1, 3);
+        Mat channelL = _channels.getMat(3);
+        channelsHLS.at(1).copyTo(channelL);
+
+        Mat grey;
+        cvtColor(src, grey, COLOR_RGB2GRAY);
+        Mat gradient_magnitude = Mat_<float>(grey.size());
+        get_gradient_magnitude( grey, gradient_magnitude);
+        gradient_magnitude.convertTo(gradient_magnitude, CV_8UC1);
+
+        _channels.create(src.rows, src.cols, CV_8UC1, 4);
+        Mat channelGrad = _channels.getMat(4);
+        gradient_magnitude.copyTo(channelGrad);
+    }
+}
+}
diff --git a/modules/objdetect/src/featurepyramid.cpp b/modules/objdetect/src/featurepyramid.cpp
index a193eef43..35cda9539 100644
--- a/modules/objdetect/src/featurepyramid.cpp
+++ b/modules/objdetect/src/featurepyramid.cpp
@@ -512,4 +512,4 @@ int getFeaturePyramid(IplImage * image, CvLSVMFeaturePyramid **maps)
     }
 
     return LATENT_SVM_OK;
-}
\ No newline at end of file
+}
diff --git a/modules/objdetect/src/fft.cpp b/modules/objdetect/src/fft.cpp
index 6db38a214..5c5902f00 100644
--- a/modules/objdetect/src/fft.cpp
+++ b/modules/objdetect/src/fft.cpp
@@ -244,4 +244,3 @@ int fftInverse2d(float *x_in, float *x_out, int numRows, int numColls)
     free(x_outTmp);
     return FFT_OK;
 }
-
diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp
index 088298881..cbb60b091 100644
--- a/modules/objdetect/src/haar.cpp
+++ b/modules/objdetect/src/haar.cpp
@@ -1536,15 +1536,15 @@ cvHaarDetectObjectsForROC( const CvArr* _img,
         maxSize.width = img->cols;
     }
 
-    temp = cvCreateMat( img->rows, img->cols, CV_8UC1 );
-    sum = cvCreateMat( img->rows + 1, img->cols + 1, CV_32SC1 );
-    sqsum = cvCreateMat( img->rows + 1, img->cols + 1, CV_64FC1 );
+    temp.reset(cvCreateMat( img->rows, img->cols, CV_8UC1 ));
+    sum.reset(cvCreateMat( img->rows + 1, img->cols + 1, CV_32SC1 ));
+    sqsum.reset(cvCreateMat( img->rows + 1, img->cols + 1, CV_64FC1 ));
 
     if( !cascade->hid_cascade )
         icvCreateHidHaarClassifierCascade(cascade);
 
     if( cascade->hid_cascade->has_tilted_features )
-        tilted = cvCreateMat( img->rows + 1, img->cols + 1, CV_32SC1 );
+        tilted.reset(cvCreateMat( img->rows + 1, img->cols + 1, CV_32SC1 ));
 
     result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), storage );
 
@@ -1566,7 +1566,7 @@ cvHaarDetectObjectsForROC( const CvArr* _img,
         if( use_ipp )
             normImg = cvCreateMat( img->rows, img->cols, CV_32FC1 );
 #endif
-        imgSmall = cvCreateMat( img->rows + 1, img->cols + 1, CV_8UC1 );
+        imgSmall.reset(cvCreateMat( img->rows + 1, img->cols + 1, CV_8UC1 ));
 
         for( factor = 1; ; factor *= scaleFactor )
         {
@@ -1635,7 +1635,7 @@ cvHaarDetectObjectsForROC( const CvArr* _img,
 
         if( doCannyPruning )
         {
-            sumcanny = cvCreateMat( img->rows + 1, img->cols + 1, CV_32SC1 );
+            sumcanny.reset(cvCreateMat( img->rows + 1, img->cols + 1, CV_32SC1 ));
             cvCanny( img, temp, 0, 50, 3 );
             cvIntegral( temp, sumcanny );
         }
@@ -1965,10 +1965,10 @@ cvLoadHaarClassifierCascade( const char* directory, CvSize orig_window_size )
 
     size += (n+1)*sizeof(char*);
     const char** input_cascade = (const char**)cvAlloc( size );
-    
+
     if( !input_cascade )
       CV_Error( CV_StsNoMem, "Could not allocate memory for input_cascade" );
-      
+
     char* ptr = (char*)(input_cascade + n + 1);
 
     for( int i = 0; i < n; i++ )
@@ -1989,7 +1989,7 @@ cvLoadHaarClassifierCascade( const char* directory, CvSize orig_window_size )
     }
 
     input_cascade[n] = 0;
-    
+
     CvHaarClassifierCascade* cascade = icvLoadCascadeCART( input_cascade, n, orig_window_size );
 
     if( input_cascade )
diff --git a/modules/objdetect/src/hog.cpp b/modules/objdetect/src/hog.cpp
index ad0bb0217..5cc7f6a61 100644
--- a/modules/objdetect/src/hog.cpp
+++ b/modules/objdetect/src/hog.cpp
@@ -820,7 +820,7 @@ const float* HOGCache::getBlock(Point pt, float* buf)
         int h0 = h[0], h1 = h[1];
 
         __m128 _a0 = _mm_set1_ps(a[0]), _a1 = _mm_set1_ps(a[1]);
-		__m128 _w = _mm_mul_ps(_mm_set1_ps(pk.gradWeight), _mm_loadu_ps(pk.histWeights));
+        __m128 _w = _mm_mul_ps(_mm_set1_ps(pk.gradWeight), _mm_loadu_ps(pk.histWeights));
         __m128 _t0 = _mm_mul_ps(_a0, _w), _t1 = _mm_mul_ps(_a1, _w);
 
         _mm_storeu_ps(hist0, _t0);
@@ -1303,7 +1303,7 @@ void HOGDescriptor::detectMultiScale(
     if ( useMeanshiftGrouping )
         groupRectangles_meanshift(foundLocations, foundWeights, foundScales, finalThreshold, winSize);
     else
-        groupRectangles(foundLocations, (int)finalThreshold, 0.2);
+        groupRectangles(foundLocations, foundWeights, (int)finalThreshold, 0.2);
 }
 
 void HOGDescriptor::detectMultiScale(const Mat& img, std::vector<Rect>& foundLocations,
@@ -1353,8 +1353,7 @@ public:
     {
         if(ptr && _fs)
         {
-            FileStorage fs(_fs);
-            fs.fs.addref();
+            FileStorage fs(_fs, false);
             ((const _ClsName*)ptr)->write(fs, String(name));
         }
     }
@@ -2944,5 +2943,82 @@ void HOGDescriptor::readALTModel(String modelfile)
     fclose(modelfl);
 }
 
-}
+void HOGDescriptor::groupRectangles(std::vector<cv::Rect>& rectList, std::vector<double>& weights, int groupThreshold, double eps) const
+{
+    if( groupThreshold <= 0 || rectList.empty() )
+    {
+        return;
+    }
 
+    CV_Assert(rectList.size() == weights.size());
+
+    std::vector<int> labels;
+    int nclasses = partition(rectList, labels, SimilarRects(eps));
+
+    std::vector<cv::Rect_<double> > rrects(nclasses);
+    std::vector<int> numInClass(nclasses, 0);
+    std::vector<double> foundWeights(nclasses, DBL_MIN);
+    std::vector<double> totalFactorsPerClass(nclasses, 1);
+    int i, j, nlabels = (int)labels.size();
+
+    for( i = 0; i < nlabels; i++ )
+    {
+        int cls = labels[i];
+        rrects[cls].x += rectList[i].x;
+        rrects[cls].y += rectList[i].y;
+        rrects[cls].width += rectList[i].width;
+        rrects[cls].height += rectList[i].height;
+        foundWeights[cls] = max(foundWeights[cls], weights[i]);
+        numInClass[cls]++;
+    }
+
+    for( i = 0; i < nclasses; i++ )
+    {
+        // find the average of all ROI in the cluster
+        cv::Rect_<double> r = rrects[i];
+        double s = 1.0/numInClass[i];
+        rrects[i] = cv::Rect_<double>(cv::saturate_cast<double>(r.x*s),
+            cv::saturate_cast<double>(r.y*s),
+            cv::saturate_cast<double>(r.width*s),
+            cv::saturate_cast<double>(r.height*s));
+    }
+
+    rectList.clear();
+    weights.clear();
+
+    for( i = 0; i < nclasses; i++ )
+    {
+        cv::Rect r1 = rrects[i];
+        int n1 = numInClass[i];
+        double w1 = foundWeights[i];
+        if( n1 <= groupThreshold )
+            continue;
+        // filter out small rectangles inside large rectangles
+        for( j = 0; j < nclasses; j++ )
+        {
+            int n2 = numInClass[j];
+
+            if( j == i || n2 <= groupThreshold )
+                continue;
+
+            cv::Rect r2 = rrects[j];
+
+            int dx = cv::saturate_cast<int>( r2.width * eps );
+            int dy = cv::saturate_cast<int>( r2.height * eps );
+
+            if( r1.x >= r2.x - dx &&
+                r1.y >= r2.y - dy &&
+                r1.x + r1.width <= r2.x + r2.width + dx &&
+                r1.y + r1.height <= r2.y + r2.height + dy &&
+                (n2 > std::max(3, n1) || n1 < 3) )
+                break;
+        }
+
+        if( j == nclasses )
+        {
+            rectList.push_back(r1);
+            weights.push_back(w1);
+        }
+    }
+}
+}
diff --git a/modules/objdetect/src/linemod.cpp b/modules/objdetect/src/linemod.cpp
index 0fd3433a5..e8fc8e4aa 100644
--- a/modules/objdetect/src/linemod.cpp
+++ b/modules/objdetect/src/linemod.cpp
@@ -204,11 +204,11 @@ void QuantizedPyramid::selectScatteredFeatures(const std::vector<Candidate>& can
 Ptr<Modality> Modality::create(const String& modality_type)
 {
   if (modality_type == "ColorGradient")
-    return new ColorGradient();
+    return makePtr<ColorGradient>();
   else if (modality_type == "DepthNormal")
-    return new DepthNormal();
+    return makePtr<DepthNormal>();
   else
-    return NULL;
+    return Ptr<Modality>();
 }
 
 Ptr<Modality> Modality::create(const FileNode& fn)
@@ -574,7 +574,7 @@ String ColorGradient::name() const
 Ptr<QuantizedPyramid> ColorGradient::processImpl(const Mat& src,
                                                      const Mat& mask) const
 {
-  return new ColorGradientPyramid(src, mask, weak_threshold, num_features, strong_threshold);
+  return makePtr<ColorGradientPyramid>(src, mask, weak_threshold, num_features, strong_threshold);
 }
 
 void ColorGradient::read(const FileNode& fn)
@@ -889,8 +889,8 @@ String DepthNormal::name() const
 Ptr<QuantizedPyramid> DepthNormal::processImpl(const Mat& src,
                                                    const Mat& mask) const
 {
-  return new DepthNormalPyramid(src, mask, distance_threshold, difference_threshold,
-                                num_features, extract_threshold);
+  return makePtr<DepthNormalPyramid>(src, mask, distance_threshold, difference_threshold,
+                                     num_features, extract_threshold);
 }
 
 void DepthNormal::read(const FileNode& fn)
@@ -1828,16 +1828,16 @@ static const int T_DEFAULTS[] = {5, 8};
 Ptr<Detector> getDefaultLINE()
 {
   std::vector< Ptr<Modality> > modalities;
-  modalities.push_back(new ColorGradient);
-  return new Detector(modalities, std::vector<int>(T_DEFAULTS, T_DEFAULTS + 2));
+  modalities.push_back(makePtr<ColorGradient>());
+  return makePtr<Detector>(modalities, std::vector<int>(T_DEFAULTS, T_DEFAULTS + 2));
 }
 
 Ptr<Detector> getDefaultLINEMOD()
 {
   std::vector< Ptr<Modality> > modalities;
-  modalities.push_back(new ColorGradient);
-  modalities.push_back(new DepthNormal);
-  return new Detector(modalities, std::vector<int>(T_DEFAULTS, T_DEFAULTS + 2));
+  modalities.push_back(makePtr<ColorGradient>());
+  modalities.push_back(makePtr<DepthNormal>());
+  return makePtr<Detector>(modalities, std::vector<int>(T_DEFAULTS, T_DEFAULTS + 2));
 }
 
 } // namespace linemod
diff --git a/modules/objdetect/src/lsvmtbbversion.cpp b/modules/objdetect/src/lsvmtbbversion.cpp
index 908c685b8..26e85bd1b 100644
--- a/modules/objdetect/src/lsvmtbbversion.cpp
+++ b/modules/objdetect/src/lsvmtbbversion.cpp
@@ -120,4 +120,3 @@ int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const i
     return LATENT_SVM_OK;
 };
 #endif
-
diff --git a/modules/objdetect/src/precomp.cpp b/modules/objdetect/src/precomp.cpp
deleted file mode 100644
index 3e0ec42de..000000000
--- a/modules/objdetect/src/precomp.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-/* End of file. */
diff --git a/modules/objdetect/src/precomp.hpp b/modules/objdetect/src/precomp.hpp
index de636de02..e5157d022 100644
--- a/modules/objdetect/src/precomp.hpp
+++ b/modules/objdetect/src/precomp.hpp
@@ -46,6 +46,8 @@
 #include "opencv2/objdetect.hpp"
 #include "opencv2/imgproc.hpp"
 
+#include "opencv2/ml.hpp"
+
 #include "opencv2/core/utility.hpp"
 
 #include "opencv2/opencv_modules.hpp"
diff --git a/modules/objdetect/test/test_cascadeandhog.cpp b/modules/objdetect/test/test_cascadeandhog.cpp
index b4fd541a1..a30109905 100644
--- a/modules/objdetect/test/test_cascadeandhog.cpp
+++ b/modules/objdetect/test/test_cascadeandhog.cpp
@@ -426,10 +426,10 @@ int CV_CascadeDetectorTest::detectMultiScale_C( const string& filename,
                                                 int di, const Mat& img,
                                                 vector<Rect>& objects )
 {
-    Ptr<CvHaarClassifierCascade> c_cascade = cvLoadHaarClassifierCascade(filename.c_str(), cvSize(0,0));
-    Ptr<CvMemStorage> storage = cvCreateMemStorage();
+    Ptr<CvHaarClassifierCascade> c_cascade(cvLoadHaarClassifierCascade(filename.c_str(), cvSize(0,0)));
+    Ptr<CvMemStorage> storage(cvCreateMemStorage());
 
-    if( c_cascade.empty() )
+    if( !c_cascade )
     {
         ts->printf( cvtest::TS::LOG, "cascade %s can not be opened");
         return cvtest::TS::FAIL_INVALID_TEST_DATA;
diff --git a/modules/objdetect/test/test_precomp.cpp b/modules/objdetect/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/objdetect/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/ocl/doc/data_structures.rst b/modules/ocl/doc/data_structures.rst
index 8519c40a3..556efa961 100644
--- a/modules/ocl/doc/data_structures.rst
+++ b/modules/ocl/doc/data_structures.rst
@@ -186,4 +186,4 @@ Only basic flags are supported in oclMat(i.e. depth number of channels)
 
 All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in oclMat. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and oclMat.
 
-For example: If a oclMat has 3 channels, channels() returns 3 and oclchannels() returns 4
\ No newline at end of file
+For example: If a oclMat has 3 channels, channels() returns 3 and oclchannels() returns 4
diff --git a/modules/ocl/doc/feature_detection_and_description.rst b/modules/ocl/doc/feature_detection_and_description.rst
index d4fd6e65f..11fb27242 100644
--- a/modules/ocl/doc/feature_detection_and_description.rst
+++ b/modules/ocl/doc/feature_detection_and_description.rst
@@ -363,7 +363,9 @@ The class implements Histogram of Oriented Gradients ([Dalal2005]_) object detec
 
 Interfaces of all methods are kept similar to the ``CPU HOG`` descriptor and detector analogues as much as possible.
 
+.. note::
 
+   (Ocl) An example using the HOG descriptor can be found at opencv_source_code/samples/ocl/hog.cpp
 
 ocl::HOGDescriptor::HOGDescriptor
 -------------------------------------
@@ -497,4 +499,4 @@ Returns block descriptors computed for the whole image.
 
         * **DESCR_FORMAT_COL_BY_COL** - Column-major order.
 
-The function is mainly used to learn the classifier.
\ No newline at end of file
+The function is mainly used to learn the classifier.
diff --git a/modules/ocl/doc/image_filtering.rst b/modules/ocl/doc/image_filtering.rst
index 3da5d3ded..4798bddaa 100644
--- a/modules/ocl/doc/image_filtering.rst
+++ b/modules/ocl/doc/image_filtering.rst
@@ -151,9 +151,9 @@ Returns void
     :param temp1: Convolution kernel, a single-channel floating point matrix. The size is not greater than the  ``image`` size. The type is the same as  ``image``.
 
     :param result: The destination image
-    
+
     :param ccorr: Flags to evaluate cross-correlation instead of convolution.
-    
+
     :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`ocl::ConvolveBuf`.
 
 Convolves an image with the kernel. Supports only CV_32FC1 data types and do not support ROI.
@@ -162,7 +162,7 @@ ocl::bilateralFilter
 --------------------
 Returns void
 
-.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT)
+.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT)
 
     :param src: The source image
 
@@ -315,4 +315,4 @@ Performs linear blending of two images.
 
     :param weights2: Weights for second image. Must have tha same size as ``img2`` . Supports only ``CV_32F`` type.
 
-    :param result: Destination image.
\ No newline at end of file
+    :param result: Destination image.
diff --git a/modules/ocl/doc/image_processing.rst b/modules/ocl/doc/image_processing.rst
index 3f9730174..98f334762 100644
--- a/modules/ocl/doc/image_processing.rst
+++ b/modules/ocl/doc/image_processing.rst
@@ -257,7 +257,10 @@ The class can calculate an optical flow for a sparse feature set or dense optica
 
 .. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
 
+.. note::
 
+   (Ocl) An example the Lucas Kanade optical flow pyramid method can be found at opencv_source_code/samples/ocl/pyrlk_optical_flow.cpp
+   (Ocl) An example for square detection can be found at opencv_source_code/samples/ocl/squares.cpp
 
 ocl::PyrLKOpticalFlow::sparse
 -----------------------------
@@ -363,4 +366,3 @@ Finds circles in a grayscale image using the Hough transform.
 
 .. note:: Currently only non-ROI oclMat is supported for src.
 .. seealso:: :ocv:func:`HoughCircles`
-
diff --git a/modules/ocl/doc/matrix_reductions.rst b/modules/ocl/doc/matrix_reductions.rst
index 69b27eb65..aed9fa564 100644
--- a/modules/ocl/doc/matrix_reductions.rst
+++ b/modules/ocl/doc/matrix_reductions.rst
@@ -67,4 +67,4 @@ Returns the squared sum of matrix elements for each channel
 
     :param m: The Source image of all depth
 
-Counts the squared sum of matrix elements for each channel.
\ No newline at end of file
+Counts the squared sum of matrix elements for each channel.
diff --git a/modules/ocl/doc/object_detection.rst b/modules/ocl/doc/object_detection.rst
index 17eb62d0e..024ce5268 100644
--- a/modules/ocl/doc/object_detection.rst
+++ b/modules/ocl/doc/object_detection.rst
@@ -12,23 +12,24 @@ Cascade classifier class used for object detection. Supports HAAR cascade classi
     class CV_EXPORTS OclCascadeClassifier : public CascadeClassifier
     {
     public:
-          OclCascadeClassifier() {};
-          ~OclCascadeClassifier() {};
-           CvSeq *oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage,
-                                      double scaleFactor,int minNeighbors,
-                                      int flags, CvSize minSize = cvSize(0, 0),
-                                      CvSize maxSize = cvSize(0, 0));
+            void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
+                                              double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
+                                              Size minSize = Size(), Size maxSize = Size());
     };
 
+.. note::
+
+   (Ocl) A face detection example using cascade classifiers can be found at opencv_source_code/samples/ocl/facedetect.cpp
+
 ocl::OclCascadeClassifier::oclHaarDetectObjects
 ------------------------------------------------------
-Returns the detected objects by a list of rectangles
+Detects objects of different sizes in the input image.
 
-.. ocv:function:: CvSeq* ocl::OclCascadeClassifier::oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0))
+.. ocv:function:: void ocl::OclCascadeClassifier::detectMultiScale(oclMat &image, std::vector<cv::Rect>& faces, double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0, Size minSize = Size(), Size maxSize = Size())
 
     :param image:  Matrix of type CV_8U containing an image where objects should be detected.
 
-    :param imageobjectsBuff: Buffer to store detected objects (rectangles). If it is empty, it is allocated with the defaultsize. If not empty, the function searches not more than N  objects, where N = sizeof(objectsBufers data)/sizeof(cv::Rect).
+    :param faces: Vector of rectangles where each rectangle contains the detected object.
 
     :param scaleFactor: Parameter specifying how much the image size is reduced at each image scale.
 
@@ -36,7 +37,9 @@ Returns the detected objects by a list of rectangles
 
     :param minSize: Minimum possible object size. Objects smaller than that are ignored.
 
-Detects objects of different sizes in the input image,only tested for face detection now. The function returns the number of detected objects.
+    :param maxSize: Maximum possible object size. Objects larger than that are ignored.
+
+The function provides a very similar interface with that in CascadeClassifier class, except using oclMat as input image.
 
 ocl::MatchTemplateBuf
 ---------------------
diff --git a/modules/ocl/doc/operations_on_matrices.rst b/modules/ocl/doc/operations_on_matrices.rst
index 145659b99..e47e72092 100644
--- a/modules/ocl/doc/operations_on_matrices.rst
+++ b/modules/ocl/doc/operations_on_matrices.rst
@@ -481,4 +481,40 @@ Performs generalized matrix multiplication.
             * **GEMM_1_T** transpose  ``src1``
             * **GEMM_2_T** transpose  ``src2``
 
-.. seealso:: :ocv:func:`gemm`
\ No newline at end of file
+.. seealso:: :ocv:func:`gemm`
+
+ocl::sortByKey
+------------------
+Returns void
+
+.. ocv:function:: void ocl::sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false)
+
+    :param keys:   The keys to be used as sorting indices.
+
+    :param values: The array of values.
+
+    :param isGreaterThan: Determine sorting order.
+
+    :param method: supported sorting methods:
+            * **SORT_BITONIC**   bitonic sort, only support power-of-2 buffer size
+            * **SORT_SELECTION** selection sort, currently cannot sort duplicate keys
+            * **SORT_MERGE**     merge sort
+            * **SORT_RADIX**     radix sort, only support signed int/float keys(``CV_32S``/``CV_32F``)
+
+Returns the sorted result of all the elements in values based on equivalent keys.
+
+The element unit in the values to be sorted is determined from the data type,
+i.e., a ``CV_32FC2`` input ``{a1a2, b1b2}`` will be considered as two elements, regardless its matrix dimension.
+
+Both keys and values will be sorted inplace.
+
+Keys needs to be a **single** channel `oclMat`.
+
+Example::
+    input -
+    keys   = {2,    3,   1}   (CV_8UC1)
+    values = {10,5, 4,3, 6,2} (CV_8UC2)
+    sortByKey(keys, values, SORT_SELECTION, false);
+    output -
+    keys   = {1,    2,   3}   (CV_8UC1)
+    values = {6,2, 10,5, 4,3} (CV_8UC2)
diff --git a/modules/ocl/doc/structures_and_utility_functions.rst b/modules/ocl/doc/structures_and_utility_functions.rst
index 32aa8b9ea..3810d7e2d 100644
--- a/modules/ocl/doc/structures_and_utility_functions.rst
+++ b/modules/ocl/doc/structures_and_utility_functions.rst
@@ -55,4 +55,4 @@ Returns the pointer to the opencl command queue
 
 .. ocv:function:: void* ocl::getoclCommandQueue()
 
-Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.
\ No newline at end of file
+Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.
diff --git a/modules/ocl/include/opencv2/ocl.hpp b/modules/ocl/include/opencv2/ocl.hpp
index 9b5e761ab..fc7c114d9 100644
--- a/modules/ocl/include/opencv2/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl.hpp
@@ -180,8 +180,8 @@ namespace cv
 
         //! Enable or disable OpenCL program binary caching onto local disk
         // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
-        // compiled OpenCL program to be cached to the path automatically as "path/*.clb" 
-        // binary file, which will be reused when the OpenCV executable is started again. 
+        // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
+        // binary file, which will be reused when the OpenCV executable is started again.
         //
         // Caching mode is controlled by the following enums
         // Notes
@@ -198,7 +198,7 @@ namespace cv
         };
         CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
 
-        //! set where binary cache to be saved to 
+        //! set where binary cache to be saved to
         CV_EXPORTS void setBinpath(const char *path);
 
         class CV_EXPORTS oclMatExpr;
@@ -245,6 +245,11 @@ namespace cv
             operator Mat() const;
             void download(cv::Mat &m) const;
 
+            //! convert to _InputArray
+            operator _InputArray();
+
+            //! convert to _OutputArray
+            operator _OutputArray();
 
             //! returns a new oclMatrix header for the specified row
             oclMat row(int y) const;
@@ -386,6 +391,9 @@ namespace cv
             int wholecols;
         };
 
+        // convert InputArray/OutputArray to oclMat references
+        CV_EXPORTS oclMat& getOclMatRef(InputArray src);
+        CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
 
         ///////////////////// mat split and merge /////////////////////////////////
         //! Compose a multi-channel array from several single-channel arrays
@@ -505,28 +513,21 @@ namespace cv
         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
         //! only 8UC1 and 256 bins is supported now
         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
-        
+
         //! only 8UC1 is supported now
-        class CV_EXPORTS CLAHE
-        {
-        public:
-            virtual void apply(const oclMat &src, oclMat &dst) = 0;
+        CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 
-            virtual void setClipLimit(double clipLimit) = 0;
-            virtual double getClipLimit() const = 0;
-
-            virtual void setTilesGridSize(Size tileGridSize) = 0;
-            virtual Size getTilesGridSize() const = 0;
-
-            virtual void collectGarbage() = 0;
-
-            virtual ~CLAHE() {}
-        };
-        CV_EXPORTS Ptr<cv::ocl::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
-        
         //! bilateralFilter
         // supports 8UC1 8UC4
-        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
+        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
+
+        //! Applies an adaptive bilateral filter to the input image
+        //  This is not truly a bilateral filter. Instead of using user provided fixed parameters,
+        //  the function calculates a constant at each window based on local standard deviation,
+        //  and use this constant to do filtering.
+        //  supports 8UC1 8UC3
+        CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
+
         //! computes exponent of each matrix element (b = e**a)
         // supports only CV_32FC1 type
         CV_EXPORTS void exp(const oclMat &a, oclMat &b);
@@ -717,7 +718,7 @@ namespace cv
         //! returns 2D filter with the specified kernel
         // supports CV_8UC1 and CV_8UC4 types
         CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
-                Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 
         //! returns the non-separable linear filter engine
         CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
@@ -860,62 +861,29 @@ namespace cv
         CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
             int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 
+
+        /////////////////////////////////// ML ///////////////////////////////////////////
+
+        //! Compute closest centers for each lines in source and lable it after center's index
+        // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
+        CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers);
+
+        //!Does k-means procedure on GPU
+        // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
+        CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
+                                     TermCriteria criteria, int attemps, int flags, oclMat &centers);
+
+
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
         ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#if 0
         class CV_EXPORTS OclCascadeClassifier : public  cv::CascadeClassifier
         {
         public:
-            OclCascadeClassifier() {};
-            ~OclCascadeClassifier() {};
-
-            CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
-                                        int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
-        };
-#endif
-
-#if 0
-        class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
-        {
-        public:
-            OclCascadeClassifierBuf() :
-                m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
-
-            ~OclCascadeClassifierBuf() { release(); }
-
             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
                                   double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
                                   Size minSize = Size(), Size maxSize = Size());
-            void release();
-
-        private:
-            void Init(const int rows, const int cols, double scaleFactor, int flags,
-                      const int outputsz, const size_t localThreads[],
-                      Size minSize, Size maxSize);
-            void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
-            void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
-                                         const double scaleFactor, const size_t localThreads[],
-                                         Size minSize, Size maxSize);
-            void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
-
-            int m_rows;
-            int m_cols;
-            int m_flags;
-            int m_loopcount;
-            int m_nodenum;
-            bool findBiggestObject;
-            bool initialized;
-            double m_scaleFactor;
-            Size m_minSize;
-            Size m_maxSize;
-            std::vector<Size> sizev;
-            std::vector<float> scalev;
-            oclMat gimg1, gsum, gsqsum;
-            void * buffers;
         };
-#endif
 
         /////////////////////////////// Pyramid /////////////////////////////////////
         CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
@@ -1546,6 +1514,45 @@ namespace cv
 
             bool isDeviceArch11_;
         };
+
+        class CV_EXPORTS FarnebackOpticalFlow
+        {
+        public:
+            FarnebackOpticalFlow();
+
+            int numLevels;
+            double pyrScale;
+            bool fastPyramids;
+            int winSize;
+            int numIters;
+            int polyN;
+            double polySigma;
+            int flags;
+
+            void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
+
+            void releaseMemory();
+
+        private:
+            void prepareGaussian(
+                int n, double sigma, float *g, float *xg, float *xxg,
+                double &ig11, double &ig03, double &ig33, double &ig55);
+
+            void setPolynomialExpansionConsts(int n, double sigma);
+
+            void updateFlow_boxFilter(
+                const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
+                oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
+
+            void updateFlow_gaussianBlur(
+                const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
+                oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
+
+            oclMat frames_[2];
+            oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
+            std::vector<oclMat> pyramid0_, pyramid1_;
+        };
+
         //////////////// build warping maps ////////////////////
         //! builds plane warping maps
         CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
@@ -1773,6 +1780,180 @@ namespace cv
             oclMat diff_buf;
             oclMat norm_buf;
         };
+        // current supported sorting methods
+        enum
+        {
+            SORT_BITONIC,   // only support power-of-2 buffer size
+            SORT_SELECTION, // cannot sort duplicate keys
+            SORT_MERGE,
+            SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
+        };
+        //! Returns the sorted result of all the elements in input based on equivalent keys.
+        //
+        //  The element unit in the values to be sorted is determined from the data type,
+        //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
+        //  matrix dimension.
+        //  both keys and values will be sorted inplace
+        //  Key needs to be single channel oclMat.
+        //
+        //  Example:
+        //  input -
+        //    keys   = {2,    3,   1}   (CV_8UC1)
+        //    values = {10,5, 4,3, 6,2} (CV_8UC2)
+        //  sortByKey(keys, values, SORT_SELECTION, false);
+        //  output -
+        //    keys   = {1,    2,   3}   (CV_8UC1)
+        //    values = {6,2, 10,5, 4,3} (CV_8UC2)
+        void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
+        /*!Base class for MOG and MOG2!*/
+        class CV_EXPORTS BackgroundSubtractor
+        {
+        public:
+            //! the virtual destructor
+            virtual ~BackgroundSubtractor();
+            //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
+            virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
+
+            //! computes a background image
+            virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
+        };
+                /*!
+        Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+
+        The class implements the following algorithm:
+        "An improved adaptive background mixture model for real-time tracking with shadow detection"
+        P. KadewTraKuPong and R. Bowden,
+        Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
+        http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
+        */
+        class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
+        {
+        public:
+            //! the default constructor
+            MOG(int nmixtures = -1);
+
+            //! re-initiaization method
+            void initialize(Size frameSize, int frameType);
+
+            //! the update operator
+            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
+
+            //! computes a background image which are the mean of all background gaussians
+            void getBackgroundImage(oclMat& backgroundImage) const;
+
+            //! releases all inner buffers
+            void release();
+
+            int history;
+            float varThreshold;
+            float backgroundRatio;
+            float noiseSigma;
+
+        private:
+            int nmixtures_;
+
+            Size frameSize_;
+            int frameType_;
+            int nframes_;
+
+            oclMat weight_;
+            oclMat sortKey_;
+            oclMat mean_;
+            oclMat var_;
+        };
+
+        /*!
+        The class implements the following algorithm:
+        "Improved adaptive Gausian mixture model for background subtraction"
+        Z.Zivkovic
+        International Conference Pattern Recognition, UK, August, 2004.
+        http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+        */
+        class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
+        {
+        public:
+            //! the default constructor
+            MOG2(int nmixtures = -1);
+
+            //! re-initiaization method
+            void initialize(Size frameSize, int frameType);
+
+            //! the update operator
+            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
+
+            //! computes a background image which are the mean of all background gaussians
+            void getBackgroundImage(oclMat& backgroundImage) const;
+
+            //! releases all inner buffers
+            void release();
+
+            // parameters
+            // you should call initialize after parameters changes
+
+            int history;
+
+            //! here it is the maximum allowed number of mixture components.
+            //! Actual number is determined dynamically per pixel
+            float varThreshold;
+            // threshold on the squared Mahalanobis distance to decide if it is well described
+            // by the background model or not. Related to Cthr from the paper.
+            // This does not influence the update of the background. A typical value could be 4 sigma
+            // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+
+            /////////////////////////
+            // less important parameters - things you might change but be carefull
+            ////////////////////////
+
+            float backgroundRatio;
+            // corresponds to fTB=1-cf from the paper
+            // TB - threshold when the component becomes significant enough to be included into
+            // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+            // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+            // it is considered foreground
+            // float noiseSigma;
+            float varThresholdGen;
+
+            //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+            //when a sample is close to the existing components. If it is not close
+            //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+            //Smaller Tg leads to more generated components and higher Tg might make
+            //lead to small number of components but they can grow too large
+            float fVarInit;
+            float fVarMin;
+            float fVarMax;
+
+            //initial variance  for the newly generated components.
+            //It will will influence the speed of adaptation. A good guess should be made.
+            //A simple way is to estimate the typical standard deviation from the images.
+            //I used here 10 as a reasonable value
+            // min and max can be used to further control the variance
+            float fCT; //CT - complexity reduction prior
+            //this is related to the number of samples needed to accept that a component
+            //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+            //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+
+            //shadow detection parameters
+            bool bShadowDetection; //default 1 - do shadow detection
+            unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
+            float fTau;
+            // Tau - shadow threshold. The shadow is detected if the pixel is darker
+            //version of the background. Tau is a threshold on how much darker the shadow can be.
+            //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+            //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
+
+        private:
+            int nmixtures_;
+
+            Size frameSize_;
+            int frameType_;
+            int nframes_;
+
+            oclMat weight_;
+            oclMat variance_;
+            oclMat mean_;
+
+            oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
+        };
     }
 }
 #if defined _MSC_VER && _MSC_VER >= 1200
diff --git a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp b/modules/ocl/include/opencv2/ocl/matrix_operations.hpp
index bac325939..1361367fc 100644
--- a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp
+++ b/modules/ocl/include/opencv2/ocl/matrix_operations.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
-#define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
+#ifndef __OPENCV_OCL_MATRIX_OPERATIONS_HPP__
+#define __OPENCV_OCL_MATRIX_OPERATIONS_HPP__
 
 namespace cv
 {
@@ -211,8 +211,8 @@ namespace cv
         {
             flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
             offset += roi.y * step + roi.x * elemSize();
-            CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols &&
-                       0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows );
+            CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.wholecols &&
+                       0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.wholerows );
             if( refcount )
                 CV_XADD(refcount, 1);
             if( rows <= 0 || cols <= 0 )
@@ -514,4 +514,4 @@ namespace cv
 
 } /* end of namespace cv */
 
-#endif /* __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ */
+#endif /* __OPENCV_OCL_MATRIX_OPERATIONS_HPP__ */
diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp
index 4c9980a4b..f8172e0c2 100644
--- a/modules/ocl/include/opencv2/ocl/private/util.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/util.hpp
@@ -113,7 +113,7 @@ namespace cv
                                   size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
         void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
                                   size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-                                  int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
+                                  int depth, const char *build_options, FLUSH_MODE finish_mode = DISABLE);
         // bind oclMat to OpenCL image textures
         // note:
         //   1. there is no memory management. User need to explicitly release the resource
@@ -131,7 +131,7 @@ namespace cv
             {
                 openCLFree(tex_);
             }
-            operator cl_mem() 
+            operator cl_mem()
             {
                 return tex_;
             }
diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp
index 9455cbbf1..d1f24f165 100644
--- a/modules/ocl/perf/main.cpp
+++ b/modules/ocl/perf/main.cpp
@@ -40,118 +40,56 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
 
-int main(int argc, const char *argv[])
+const char * impls[] =
 {
-    vector<ocl::Info> oclinfo;
-    int num_devices = getDevice(oclinfo);
+    IMPL_OCL,
+    IMPL_PLAIN,
+#ifdef HAVE_OPENCV_GPU
+    IMPL_GPU
+#endif
+};
 
-    if (num_devices < 1)
-    {
-        cerr << "no device found\n";
-        return -1;
-    }
-
-    int devidx = 0;
-
-    for (size_t i = 0; i < oclinfo.size(); i++)
-    {
-        for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++)
-        {
-            printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str());
-        }
-    }
-
-    const char *keys =
-        "{ h help    | false | print help message }"
-        "{ f filter  |       | filter for test }"
-        "{ w workdir |       | set working directory }"
-        "{ l list    | false | show all tests }"
-        "{ d device  | 0     | device id }"
-        "{ i iters   | 10    | iteration count }"
-        "{ m warmup  | 1     | gpu warm up iteration count}"
-        "{ t xtop    | 1.1	  | xfactor top boundary}"
-        "{ b xbottom | 0.9	  | xfactor bottom boundary}"
-        "{ v verify  | false | only run gpu once to verify if problems occur}";
+int main(int argc, char ** argv)
+{
+    const char * keys =
+        "{ h help     | false              | print help message }"
+        "{ t type     | gpu                | set device type:cpu or gpu}"
+        "{ p platform | 0                  | set platform id }"
+        "{ d device   | 0                  | set device id }";
 
     CommandLineParser cmd(argc, argv, keys);
-
     if (cmd.has("help"))
     {
-        cout << "Avaible options:" << endl;
+        cout << "Available options besides google test option:" << endl;
         cmd.printMessage();
         return 0;
     }
 
+    string type = cmd.get<string>("type");
+    unsigned int pid = cmd.get<unsigned int>("platform");
     int device = cmd.get<int>("device");
 
-    if (device < 0 || device >= num_devices)
+    int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU :
+                               cv::ocl::CVCL_DEVICE_TYPE_GPU;
+
+    std::vector<cv::ocl::Info> oclinfo;
+    int devnums = cv::ocl::getDevice(oclinfo, flag);
+    if (devnums <= device || device < 0)
     {
-        cerr << "Invalid device ID" << endl;
+        std::cout << "device invalid\n";
         return -1;
     }
 
-    if (cmd.get<bool>("verify"))
+    if (pid >= oclinfo.size())
     {
-        TestSystem::instance().setNumIters(1);
-        TestSystem::instance().setGPUWarmupIters(0);
-        TestSystem::instance().setCPUIters(0);
+        std::cout << "platform invalid\n";
+        return -1;
     }
 
-    devidx = 0;
+    cv::ocl::setDevice(oclinfo[pid], device);
+    cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE);
 
-    for (size_t i = 0; i < oclinfo.size(); i++)
-    {
-        for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++)
-        {
-            if (device == devidx)
-            {
-                ocl::setDevice(oclinfo[i], (int)j);
-                TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]);
-                printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str());
-                goto END_DEV;
-            }
-        }
-    }
-
-END_DEV:
-
-    string filter = cmd.get<string>("filter");
-    string workdir = cmd.get<string>("workdir");
-    bool list = cmd.has("list");
-    int iters = cmd.get<int>("iters");
-    int wu_iters = cmd.get<int>("warmup");
-    double x_top = cmd.get<double>("xtop");
-    double x_bottom = cmd.get<double>("xbottom");
-
-    TestSystem::instance().setTopThreshold(x_top);
-    TestSystem::instance().setBottomThreshold(x_bottom);
-
-    if (!filter.empty())
-    {
-        TestSystem::instance().setTestFilter(filter);
-    }
-
-    if (!workdir.empty())
-    {
-        if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
-        {
-            workdir += '/';
-        }
-
-        TestSystem::instance().setWorkingDir(workdir);
-    }
-
-    if (list)
-    {
-        TestSystem::instance().setListMode(true);
-    }
-
-    TestSystem::instance().setNumIters(iters);
-    TestSystem::instance().setGPUWarmupIters(wu_iters);
-
-    TestSystem::instance().run();
-
-    return 0;
-}
\ No newline at end of file
+    CV_PERF_TEST_MAIN_INTERNALS(ocl, impls)
+}
diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp
index 3ef0634e7..814b272f0 100644
--- a/modules/ocl/perf/perf_arithm.cpp
+++ b/modules/ocl/perf/perf_arithm.cpp
@@ -44,1154 +44,884 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::get;
+using std::tr1::tuple;
+
 ///////////// Lut ////////////////////////
-PERFTEST(lut)
+
+typedef Size_MatType LUTFixture;
+
+PERF_TEST_P(LUTFixture, LUT,
+          ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                             OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
 {
-    Mat src, lut, dst, ocl_dst;
-    ocl::oclMat d_src, d_lut, d_dst;
+    // getting params
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_8UC3};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC3"};
+    // creating src data
+    Mat src(srcSize, type), lut(1, 256, CV_8UC1);
+    int dstType = CV_MAKETYPE(lut.depth(), src.channels());
+    Mat dst(srcSize, dstType);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    randu(lut, 0, 2);
+    declare.in(src, WARMUP_RNG).in(lut).out(dst);
+
+    // select implementation
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
+        ocl::oclMat oclSrc(src), oclLut(lut), oclDst(srcSize, dstType);
 
-            gen(src, size, size, all_type[j], 0, 256);
-            gen(lut, 1, 256, CV_8UC1, 0, 1);
-
-            LUT(src, lut, dst);
-
-            CPU_ON;
-            LUT(src, lut, dst);
-            CPU_OFF;
-
-            d_src.upload(src);
-            d_lut.upload(lut);
-
-            WARMUP_ON;
-            ocl::LUT(d_src, d_lut, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::LUT(d_src, d_lut, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            d_lut.upload(lut);
-            ocl::LUT(d_src, d_lut, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0);
-        }
+        OCL_TEST_CYCLE() cv::ocl::LUT(oclSrc, oclLut, oclDst);
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::LUT(src, lut, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Exp ////////////////////////
-PERFTEST(Exp)
+
+typedef TestBaseWithParam<Size> ExpFixture;
+
+PERF_TEST_P(ExpFixture, Exp, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    // getting params
+    const Size srcSize = GetParam();
+    const double eps = 3e-1;
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    // creating src data
+    Mat src(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
+    declare.in(src).out(dst);
+    randu(src, 5, 16);
+
+    // select implementation
+    if (RUN_OCL_IMPL)
     {
-        SUBTEST << size << 'x' << size << "; CV_32FC1";
+        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
 
-        gen(src, size, size, CV_32FC1, 5, 16);
+        OCL_TEST_CYCLE() cv::ocl::exp(oclSrc, oclDst);
 
-        exp(src, dst);
+        oclDst.download(dst);
 
-        CPU_ON;
-        exp(src, dst);
-        CPU_OFF;
-        d_src.upload(src);
-
-        WARMUP_ON;
-        ocl::exp(d_src, d_dst);
-        WARMUP_OFF;
-
-        GPU_ON;
-        ocl::exp(d_src, d_dst);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src.upload(src);
-        ocl::exp(d_src, d_dst);
-        d_dst.download(ocl_dst);
-        GPU_FULL_OFF;
-
-        TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 2);
+        SANITY_CHECK(dst, eps);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::exp(src, dst);
+
+        SANITY_CHECK(dst, eps);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// LOG ////////////////////////
-PERFTEST(Log)
+
+typedef TestBaseWithParam<Size> LogFixture;
+
+PERF_TEST_P(LogFixture, Log, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    // getting params
+    const Size srcSize = GetParam();
+    const double eps = 1e-5;
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    // creating src data
+    Mat src(srcSize, CV_32F), dst(srcSize, src.type());
+    randu(src, 1, 10);
+    declare.in(src).out(dst);
+
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(3.6);
+
+    // select implementation
+    if (RUN_OCL_IMPL)
     {
-        SUBTEST << size << 'x' << size << "; 32F";
+        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
 
-        gen(src, size, size, CV_32F, 1, 10);
+        OCL_TEST_CYCLE() cv::ocl::log(oclSrc, oclDst);
 
-        log(src, dst);
+        oclDst.download(dst);
 
-        CPU_ON;
-        log(src, dst);
-        CPU_OFF;
-        d_src.upload(src);
-
-        WARMUP_ON;
-        ocl::log(d_src, d_dst);
-        WARMUP_OFF;
-
-        GPU_ON;
-        ocl::log(d_src, d_dst);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src.upload(src);
-        ocl::log(d_src, d_dst);
-        d_dst.download(ocl_dst);
-        GPU_FULL_OFF;
-
-        TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
+        SANITY_CHECK(dst, eps);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::log(src, dst);
+
+        SANITY_CHECK(dst, eps);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Add ////////////////////////
-PERFTEST(Add)
+
+typedef Size_MatType AddFixture;
+
+PERF_TEST_P(AddFixture, Add,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
 {
-    Mat src1, src2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
+    // getting params
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+    // creating src data
+    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
+    randu(src1, 0, 1);
+    randu(src2, 0, 1);
+    declare.in(src1, src2).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    // select implementation
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
 
-            gen(src1, size, size, all_type[j], 0, 1);
-            gen(src2, size, size, all_type[j], 0, 1);
+        OCL_TEST_CYCLE() cv::ocl::add(oclSrc1, oclSrc2, oclDst);
 
-            add(src1, src2, dst);
-
-            CPU_ON;
-            add(src1, src2, dst);
-            CPU_OFF;
-
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::add(d_src1, d_src2, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::add(d_src1, d_src2, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::add(d_src1, d_src2, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::add(src1, src2, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Mul ////////////////////////
-PERFTEST(Mul)
+
+typedef Size_MatType MulFixture;
+
+PERF_TEST_P(MulFixture, Mul, ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                                                OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src1, src2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
+    // getting params
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    // creating src data
+    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
+    randu(src1, 0, 256);
+    randu(src2, 0, 256);
+    declare.in(src1, src2).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    // select implementation
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::multiply(oclSrc1, oclSrc2, oclDst);
 
-            multiply(src1, src2, dst);
-
-            CPU_ON;
-            multiply(src1, src2, dst);
-            CPU_OFF;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::multiply(d_src1, d_src2, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::multiply(d_src1, d_src2, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::multiply(d_src1, d_src2, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::multiply(src1, src2, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Div ////////////////////////
-PERFTEST(Div)
+
+typedef Size_MatType DivFixture;
+
+PERF_TEST_P(DivFixture, Div,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src1, src2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    // getting params
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    // creating src data
+    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
+    declare.in(src1, src2).out(dst);
+    randu(src1, 0, 256);
+    randu(src2, 0, 256);
+
+    if ((srcSize == OCL_SIZE_4000 && type == CV_8UC1) ||
+            (srcSize == OCL_SIZE_2000 && type == CV_8UC4))
+        declare.time(4.2);
+    else if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
+        declare.time(16.6);
+
+    // select implementation
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::divide(oclSrc1, oclSrc2, oclDst);
 
-            divide(src1, src2, dst);
-
-            CPU_ON;
-            divide(src1, src2, dst);
-            CPU_OFF;
-
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::divide(d_src1, d_src2, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::divide(d_src1, d_src2, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::divide(d_src1, d_src2, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::divide(src1, src2, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Absdiff ////////////////////////
-PERFTEST(Absdiff)
+
+typedef Size_MatType AbsDiffFixture;
+
+PERF_TEST_P(AbsDiffFixture, Absdiff,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src1, src2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
+    declare.in(src1, src2).in(dst);
+    randu(src1, 0, 256);
+    randu(src2, 0, 256);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::absdiff(oclSrc1, oclSrc2, oclDst);
 
-            absdiff(src1, src2, dst);
-
-            CPU_ON;
-            absdiff(src1, src2, dst);
-            CPU_OFF;
-
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::absdiff(d_src1, d_src2, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::absdiff(d_src1, d_src2, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::absdiff(d_src1, d_src2, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::absdiff(src1, src2, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// CartToPolar ////////////////////////
-PERFTEST(CartToPolar)
+
+typedef TestBaseWithParam<Size> CartToPolarFixture;
+
+PERF_TEST_P(CartToPolarFixture, CartToPolar, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
-    ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
+    const Size srcSize = GetParam();
+    const double eps = 8e-3;
 
-    int all_type[] = {CV_32FC1};
-    std::string type_name[] = {"CV_32FC1"};
+    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
+            dst1(srcSize, CV_32FC1), dst2(srcSize, CV_32FC1);
+    declare.in(src1, src2).out(dst1, dst2);
+    randu(src1, 0, 256);
+    randu(src2, 0, 256);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(3.6);
+
+   if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
+                oclDst1(srcSize, src1.type()), oclDst2(srcSize, src1.type());
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
-            gen(dst1, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::cartToPolar(oclSrc1, oclSrc2, oclDst1, oclDst2);
 
+        oclDst1.download(dst1);
+        oclDst2.download(dst2);
 
-            cartToPolar(src1, src2, dst, dst1, 1);
-
-            CPU_ON;
-            cartToPolar(src1, src2, dst, dst1, 1);
-            CPU_OFF;
-
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
-            d_dst.download(ocl_dst);
-            d_dst1.download(ocl_dst1);
-            GPU_FULL_OFF;
-
-            double diff1 = checkNorm(ocl_dst1, dst1);
-            double diff2 = checkNorm(ocl_dst, dst);
-            double max_diff = max(diff1, diff2);
-            TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
-
-        }
-
+        SANITY_CHECK(dst1, eps);
+        SANITY_CHECK(dst2, eps);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::cartToPolar(src1, src2, dst1, dst2);
+
+        SANITY_CHECK(dst1, eps);
+        SANITY_CHECK(dst2, eps);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// PolarToCart ////////////////////////
-PERFTEST(PolarToCart)
+
+typedef TestBaseWithParam<Size> PolarToCartFixture;
+
+PERF_TEST_P(PolarToCartFixture, PolarToCart, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
-    ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
+    const Size srcSize = GetParam();
 
-    int all_type[] = {CV_32FC1};
-    std::string type_name[] = {"CV_32FC1"};
+   Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
+            dst1(srcSize, CV_32FC1), dst2(srcSize, CV_32FC1);
+    declare.in(src1, src2).out(dst1, dst2);
+    randu(src1, 0, 256);
+    randu(src2, 0, 256);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(5.4);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
+                oclDst1(srcSize, src1.type()), oclDst2(srcSize, src1.type());
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
-            gen(dst1, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::polarToCart(oclSrc1, oclSrc2, oclDst1, oclDst2);
 
+        oclDst1.download(dst1);
+        oclDst2.download(dst2);
 
-            polarToCart(src1, src2, dst, dst1, 1);
-
-            CPU_ON;
-            polarToCart(src1, src2, dst, dst1, 1);
-            CPU_OFF;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
-            d_dst.download(ocl_dst);
-            d_dst1.download(ocl_dst1);
-            GPU_FULL_OFF;
-
-            double diff1 = checkNorm(ocl_dst1, dst1);
-            double diff2 = checkNorm(ocl_dst, dst);
-            double max_diff = max(diff1, diff2);
-            TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
-
-        }
-
+        SANITY_CHECK(dst1, 5e-5);
+        SANITY_CHECK(dst2, 5e-5);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::polarToCart(src1, src2, dst1, dst2);
+
+        SANITY_CHECK(dst1, 5e-5);
+        SANITY_CHECK(dst2, 5e-5);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Magnitude ////////////////////////
-PERFTEST(magnitude)
+
+typedef TestBaseWithParam<Size> MagnitudeFixture;
+
+PERF_TEST_P(MagnitudeFixture, Magnitude, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat x, y, mag, ocl_mag;
-    ocl::oclMat d_x, d_y, d_mag;
+    const Size srcSize = GetParam();
 
-    int all_type[] = {CV_32FC1};
-    std::string type_name[] = {"CV_32FC1"};
+    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
+            dst(srcSize, CV_32FC1);
+    randu(src1, 0, 1);
+    randu(src2, 0, 1);
+    declare.in(src1, src2).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+   if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
+                oclDst(srcSize, src1.type());
 
-            gen(x, size, size, all_type[j], 0, 1);
-            gen(y, size, size, all_type[j], 0, 1);
+        OCL_TEST_CYCLE() cv::ocl::magnitude(oclSrc1, oclSrc2, oclDst);
 
-            magnitude(x, y, mag);
-
-            CPU_ON;
-            magnitude(x, y, mag);
-            CPU_OFF;
-            d_x.upload(x);
-            d_y.upload(y);
-
-            WARMUP_ON;
-            ocl::magnitude(d_x, d_y, d_mag);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::magnitude(d_x, d_y, d_mag);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_x.upload(x);
-            d_y.upload(y);
-            ocl::magnitude(d_x, d_y, d_mag);
-            d_mag.download(ocl_mag);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_mag, mag, 1e-5);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, 1e-6);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::magnitude(src1, src2, dst);
+
+        SANITY_CHECK(dst, 1e-6);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Transpose ////////////////////////
-PERFTEST(Transpose)
+
+typedef Size_MatType TransposeFixture;
+
+PERF_TEST_P(TransposeFixture, Transpose,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+   if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::transpose(oclSrc, oclDst);
 
-            transpose(src, dst);
-
-            CPU_ON;
-            transpose(src, dst);
-            CPU_OFF;
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::transpose(d_src, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::transpose(d_src, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::transpose(d_src, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::transpose(src, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Flip ////////////////////////
-PERFTEST(Flip)
+
+typedef Size_MatType FlipFixture;
+
+PERF_TEST_P(FlipFixture, Flip,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; FLIP_BOTH";
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::flip(oclSrc, oclDst, 0);
 
-            flip(src, dst, 0);
-
-            CPU_ON;
-            flip(src, dst, 0);
-            CPU_OFF;
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::flip(d_src, d_dst, 0);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::flip(d_src, d_dst, 0);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::flip(d_src, d_dst, 0);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::flip(src, dst, 0);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// minMax ////////////////////////
-PERFTEST(minMax)
+
+typedef Size_MatType minMaxFixture;
+
+PERF_TEST_P(minMaxFixture, minMax,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
 {
-    Mat src;
-    ocl::oclMat d_src;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    double min_val = 0.0, max_val = 0.0;
-    double min_val_ = 0.0, max_val_ = 0.0;
-    Point min_loc, max_loc;
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+    Mat src(srcSize, type);
+    declare.in(src, WARMUP_RNG);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    double min_val = std::numeric_limits<double>::max(),
+            max_val = std::numeric_limits<double>::min();
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
+        ocl::oclMat oclSrc(src);
 
-            gen(src, size, size, all_type[j], 0, 256);
-
-            CPU_ON;
-            minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
-            CPU_OFF;
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::minMax(d_src, &min_val_, &max_val_);
-            WARMUP_OFF;
-
-            if(EeceptDoubleEQ<double>(max_val_, max_val) && EeceptDoubleEQ<double>(min_val_, min_val))
-                TestSystem::instance().setAccurate(1, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
-            else
-                TestSystem::instance().setAccurate(0, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
-
-            GPU_ON;
-            ocl::minMax(d_src, &min_val, &max_val);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::minMax(d_src, &min_val, &max_val);
-            GPU_FULL_OFF;
-
-        }
+        OCL_TEST_CYCLE() cv::ocl::minMax(oclSrc, &min_val, &max_val);
 
+        ASSERT_GE(max_val, min_val);
+        SANITY_CHECK(min_val);
+        SANITY_CHECK(max_val);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        Point min_loc, max_loc;
+
+        TEST_CYCLE() cv::minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
+
+        ASSERT_GE(max_val, min_val);
+        SANITY_CHECK(min_val);
+        SANITY_CHECK(max_val);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// minMaxLoc ////////////////////////
-PERFTEST(minMaxLoc)
+
+typedef Size_MatType minMaxLocFixture;
+
+PERF_TEST_P(minMaxLocFixture, minMaxLoc,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
 {
-    Mat src;
-    ocl::oclMat d_src;
+   const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+
+    Mat src(srcSize, type);
+    randu(src, 0, 1);
+    declare.in(src);
 
     double min_val = 0.0, max_val = 0.0;
-    double min_val_ = 0.0, max_val_ = 0.0;
     Point min_loc, max_loc;
-    Point min_loc_, max_loc_;
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src);
 
-            gen(src, size, size, all_type[j], 0, 1);
-
-            CPU_ON;
-            minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
-            CPU_OFF;
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::minMaxLoc(d_src, &min_val_, &max_val_, &min_loc_, &max_loc_);
-            WARMUP_OFF;
-
-            double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.;
-            if(src.depth() == 0)
-            {
-                minlocVal = src.at<unsigned char>(min_loc);
-                minlocVal_ = src.at<unsigned char>(min_loc_);
-                maxlocVal = src.at<unsigned char>(max_loc);
-                maxlocVal_ = src.at<unsigned char>(max_loc_);
-            }
-            if(src.depth() == 1)
-            {
-                minlocVal = src.at<signed char>(min_loc);
-                minlocVal_ = src.at<signed char>(min_loc_);
-                maxlocVal = src.at<signed char>(max_loc);
-                maxlocVal_ = src.at<signed char>(max_loc_);
-            }
-            if(src.depth() == 2)
-            {
-                minlocVal = src.at<unsigned short>(min_loc);
-                minlocVal_ = src.at<unsigned short>(min_loc_);
-                maxlocVal = src.at<unsigned short>(max_loc);
-                maxlocVal_ = src.at<unsigned short>(max_loc_);
-            }
-            if(src.depth() == 3)
-            {
-                minlocVal = src.at<signed short>(min_loc);
-                minlocVal_ = src.at<signed short>(min_loc_);
-                maxlocVal = src.at<signed short>(max_loc);
-                maxlocVal_ = src.at<signed short>(max_loc_);
-            }
-            if(src.depth() == 4)
-            {
-                minlocVal = src.at<int>(min_loc);
-                minlocVal_ = src.at<int>(min_loc_);
-                maxlocVal = src.at<int>(max_loc);
-                maxlocVal_ = src.at<int>(max_loc_);
-            }
-            if(src.depth() == 5)
-            {
-                minlocVal = src.at<float>(min_loc);
-                minlocVal_ = src.at<float>(min_loc_);
-                maxlocVal = src.at<float>(max_loc);
-                maxlocVal_ = src.at<float>(max_loc_);
-            }
-            if(src.depth() == 6)
-            {
-                minlocVal = src.at<double>(min_loc);
-                minlocVal_ = src.at<double>(min_loc_);
-                maxlocVal = src.at<double>(max_loc);
-                maxlocVal_ = src.at<double>(max_loc_);
-            }
-            error0 = ::abs(minlocVal_ - minlocVal);
-            error1 = ::abs(maxlocVal_ - maxlocVal);
-            if( EeceptDoubleEQ<double>(maxlocVal_, maxlocVal)
-                &&EeceptDoubleEQ<double>(minlocVal_, minlocVal)
-                &&EeceptDoubleEQ<double>(max_val_, max_val)
-                &&EeceptDoubleEQ<double>(min_val_, min_val))
-                TestSystem::instance().setAccurate(1, 0.);
-            else
-                TestSystem::instance().setAccurate(0, max(error0, error1));
-
-            GPU_ON;
-            ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
-            GPU_FULL_OFF;
-        }
+        OCL_TEST_CYCLE() cv::ocl::minMaxLoc(oclSrc, &min_val, &max_val, &min_loc, &max_loc);
 
+        ASSERT_GE(max_val, min_val);
+        SANITY_CHECK(min_val);
+        SANITY_CHECK(max_val);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
+
+        ASSERT_GE(max_val, min_val);
+        SANITY_CHECK(min_val);
+        SANITY_CHECK(max_val);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Sum ////////////////////////
-PERFTEST(Sum)
+
+typedef Size_MatType SumFixture;
+
+PERF_TEST_P(SumFixture, Sum,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32SC1)))
 {
-    Mat src;
-    Scalar cpures, gpures;
-    ocl::oclMat d_src;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_32SC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
+   Mat src(srcSize, type);
+    Scalar result;
+    randu(src, 0, 60);
+    declare.in(src);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src);
 
-            gen(src, size, size, all_type[j], 0, 60);
-
-            cpures = sum(src);
-
-            CPU_ON;
-            cpures = sum(src);
-            CPU_OFF;
-            d_src.upload(src);
-
-            WARMUP_ON;
-            gpures = ocl::sum(d_src);
-            WARMUP_OFF;
-
-            vector<double> diffs(4);
-            diffs[3] = fabs(cpures[3] - gpures[3]);
-            diffs[2] = fabs(cpures[2] - gpures[2]);
-            diffs[1] = fabs(cpures[1] - gpures[1]);
-            diffs[0] = fabs(cpures[0] - gpures[0]);
-            double max_diff = *max_element(diffs.begin(), diffs.end());
-            TestSystem::instance().setAccurate(max_diff<0.1?1:0, max_diff);
-
-            GPU_ON;
-            gpures = ocl::sum(d_src);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            gpures = ocl::sum(d_src);
-            GPU_FULL_OFF;
-        }
+        OCL_TEST_CYCLE() result = cv::ocl::sum(oclSrc);
 
+        SANITY_CHECK(result);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() result = cv::sum(src);
+
+        SANITY_CHECK(result);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// countNonZero ////////////////////////
-PERFTEST(countNonZero)
+
+typedef Size_MatType countNonZeroFixture;
+
+PERF_TEST_P(countNonZeroFixture, countNonZero,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
 {
-    Mat src;
-    ocl::oclMat d_src;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+    Mat src(srcSize, type);
+    int result = 0;
+    randu(src, 0, 256);
+    declare.in(src);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src);
 
-            gen(src, size, size, all_type[j], 0, 256);
-
-            countNonZero(src);
-
-            int cpures = 0, gpures = 0;
-            CPU_ON;
-            cpures = countNonZero(src);
-            CPU_OFF;
-            d_src.upload(src);
-
-            WARMUP_ON;
-            gpures = ocl::countNonZero(d_src);
-            WARMUP_OFF;
-
-            int diff = abs(cpures - gpures);
-            if(diff == 0)
-                TestSystem::instance().setAccurate(1, 0);
-            else
-                TestSystem::instance().setAccurate(0, diff);
-
-            GPU_ON;
-            ocl::countNonZero(d_src);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::countNonZero(d_src);
-            GPU_FULL_OFF;
-        }
+        OCL_TEST_CYCLE() result = cv::ocl::countNonZero(oclSrc);
 
+        SANITY_CHECK(result);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() result = cv::countNonZero(src);
+
+        SANITY_CHECK(result);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Phase ////////////////////////
-PERFTEST(Phase)
+
+typedef TestBaseWithParam<Size> PhaseFixture;
+
+PERF_TEST_P(PhaseFixture, Phase, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src1, src2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
+    const Size srcSize = GetParam();
 
-    int all_type[] = {CV_32FC1};
-    std::string type_name[] = {"CV_32FC1"};
+    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
+            dst(srcSize, CV_32FC1);
+    declare.in(src1, src2).out(dst);
+    randu(src1, 0, 256);
+    randu(src2, 0, 256);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
+                oclDst(srcSize, src1.type());
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::phase(oclSrc1, oclSrc2, oclDst, 1);
 
-            phase(src1, src2, dst, 1);
-
-            CPU_ON;
-            phase(src1, src2, dst, 1);
-            CPU_OFF;
-
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::phase(d_src1, d_src2, d_dst, 1);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::phase(d_src1, d_src2, d_dst, 1);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::phase(d_src1, d_src2, d_dst, 1);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-2);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, 1e-2);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::phase(src1, src2, dst, 1);
+
+        SANITY_CHECK(dst, 1e-2);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// bitwise_and////////////////////////
-PERFTEST(bitwise_and)
+
+typedef Size_MatType BitwiseAndFixture;
+
+PERF_TEST_P(BitwiseAndFixture, bitwise_and,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32SC1)))
 {
-    Mat src1, src2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
+   const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_32SC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
+   Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
+    declare.in(src1, src2).out(dst);
+    randu(src1, 0, 256);
+    randu(src2, 0, 256);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+   if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, src1.type());
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::bitwise_and(oclSrc1, oclSrc2, oclDst);
 
-            bitwise_and(src1, src2, dst);
-
-            CPU_ON;
-            bitwise_and(src1, src2, dst);
-            CPU_OFF;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::bitwise_and(d_src1, d_src2, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::bitwise_and(d_src1, d_src2, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::bitwise_and(d_src1, d_src2, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::bitwise_and(src1, src2, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// bitwise_not////////////////////////
-PERFTEST(bitwise_not)
+
+typedef Size_MatType BitwiseNotFixture;
+
+PERF_TEST_P(BitwiseAndFixture, bitwise_not,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32SC1)))
 {
-    Mat src1, dst, ocl_dst;
-    ocl::oclMat d_src1, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_32SC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::bitwise_not(oclSrc, oclDst);
 
-            bitwise_not(src1, dst);
-
-            CPU_ON;
-            bitwise_not(src1, dst);
-            CPU_OFF;
-            d_src1.upload(src1);
-
-            WARMUP_ON;
-            ocl::bitwise_not(d_src1, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::bitwise_not(d_src1, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            ocl::bitwise_not(d_src1, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::bitwise_not(src, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// compare////////////////////////
-PERFTEST(compare)
+
+typedef Size_MatType CompareFixture;
+
+PERF_TEST_P(CompareFixture, compare,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
 {
-    Mat src1, src2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int CMP_EQ = 0;
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, CV_8UC1);
+    declare.in(src1, src2, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, CV_8UC1);
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::compare(oclSrc1, oclSrc2, oclDst, CMP_EQ);
 
-            compare(src1, src2, dst, CMP_EQ);
-
-            CPU_ON;
-            compare(src1, src2, dst, CMP_EQ);
-            CPU_OFF;
-
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::compare(src1, src2, dst, CMP_EQ);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// pow ////////////////////////
-PERFTEST(pow)
+
+typedef TestBaseWithParam<Size> PowFixture;
+
+PERF_TEST_P(PowFixture, pow, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size srcSize = GetParam();
 
-    int all_type[] = {CV_32FC1};
-    std::string type_name[] = {"CV_32FC1"};
+   Mat src(srcSize, CV_32F), dst(srcSize, CV_32F);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
 
-            gen(src, size, size, all_type[j], 5, 16);
+        OCL_TEST_CYCLE() cv::ocl::pow(oclSrc, -2.0, oclDst);
 
-            pow(src, -2.0, dst);
-
-            CPU_ON;
-            pow(src, -2.0, dst);
-            CPU_OFF;
-            d_src.upload(src);
-            d_dst.upload(dst);
-
-            WARMUP_ON;
-            ocl::pow(d_src, -2.0, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::pow(d_src, -2.0, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::pow(d_src, -2.0, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, 5e-2);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::pow(src, -2.0, dst);
+
+        SANITY_CHECK(dst, 5e-2);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// MagnitudeSqr////////////////////////
-PERFTEST(MagnitudeSqr)
+
+typedef TestBaseWithParam<Size> MagnitudeSqrFixture;
+
+PERF_TEST_P(MagnitudeSqrFixture, MagnitudeSqr, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src1, src2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
+    const Size srcSize = GetParam();
 
-    int all_type[] = {CV_32FC1};
-    std::string type_name[] = {"CV_32FC1"};
+    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
+            dst(srcSize, CV_32FC1);
+    declare.in(src1, src2, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++)
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, src1.type());
+
+        OCL_TEST_CYCLE() cv::ocl::magnitudeSqr(oclSrc1, oclSrc2, oclDst);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        ASSERT_EQ(1, src1.channels());
+
+        TEST_CYCLE()
         {
-            SUBTEST << size << 'x' << size << "; " << type_name[t];
-
-            gen(src1, size, size, all_type[t], 0, 256);
-            gen(src2, size, size, all_type[t], 0, 256);
-            gen(dst, size, size, all_type[t], 0, 256);
-
-            CPU_ON;
-            for (int i = 0; i < src1.rows; ++i)
-                for (int j = 0; j < src1.cols; ++j)
+            for (int y = 0; y < srcSize.height; ++y)
+            {
+                const float * const src1Data = reinterpret_cast<float *>(src1.data + src1.step * y);
+                const float * const src2Data = reinterpret_cast<float *>(src2.data + src2.step * y);
+                float * const dstData = reinterpret_cast<float *>(dst.data + dst.step * y);
+                for (int x = 0; x < srcSize.width; ++x)
                 {
-                    float val1 = src1.at<float>(i, j);
-                    float val2 = src2.at<float>(i, j);
-                    ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
-
+                    float t0 = src1Data[x] * src1Data[x];
+                    float t1 = src2Data[x] * src2Data[x];
+                    dstData[x] = t0 + t1;
                 }
-            CPU_OFF;
-
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::magnitudeSqr(d_src1, d_src2, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::magnitudeSqr(d_src1, d_src2, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::magnitudeSqr(d_src1, d_src2, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
+            }
         }
 
+        SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// AddWeighted////////////////////////
-PERFTEST(AddWeighted)
+
+typedef Size_MatType AddWeightedFixture;
+
+PERF_TEST_P(AddWeightedFixture, AddWeighted,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
 {
-    Mat src1, src2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
+    Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
+    declare.in(src1, src2, WARMUP_RNG).out(dst);
     double alpha = 2.0, beta = 1.0, gama = 3.0;
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type);
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::addWeighted(oclSrc1, alpha, oclSrc2, beta, gama, oclDst);
 
+        oclDst.download(dst);
 
-            addWeighted(src1, alpha, src2, beta, gama, dst);
-
-            CPU_ON;
-            addWeighted(src1, alpha, src2, beta, gama, dst);
-            CPU_OFF;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
-        }
-
+        SANITY_CHECK(dst);
     }
-}
\ No newline at end of file
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::addWeighted(src1, alpha, src2, beta, gama, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/perf/perf_bgfg.cpp b/modules/ocl/perf/perf_bgfg.cpp
new file mode 100644
index 000000000..9ccd1657e
--- /dev/null
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -0,0 +1,282 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "perf_precomp.hpp"
+using namespace perf;
+using namespace std;
+using namespace cv::ocl;
+using namespace cv;
+using std::tr1::tuple;
+using std::tr1::get;
+#if defined(HAVE_XINE)         || \
+    defined(HAVE_GSTREAMER)    || \
+    defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_AVFOUNDATION) || \
+    defined(HAVE_FFMPEG)       || \
+    defined(WIN32)
+
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
+#else
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
+#endif
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output)
+{
+    for(int i = 0; i< (int)(input.size()); i++)
+    {
+        cvtColor(input[i], output[i], COLOR_RGB2GRAY);
+    }
+}
+//prepare data for CPU
+static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer)
+{
+    cv::Mat frame;
+    std::vector<Mat> frame_buffer_init;
+    int nFrame = (int)frame_buffer.size();
+    for(int i = 0; i < nFrame; i++)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+        frame_buffer_init.push_back(frame);
+    }
+
+    if(cn == 1)
+        cvtFrameFmt(frame_buffer_init, frame_buffer);
+    else
+        frame_buffer = frame_buffer_init;
+}
+//copy CPU data to GPU
+static void prepareData(vector<Mat>& frame_buffer, vector<oclMat>& frame_buffer_ocl)
+{
+    for(int i = 0; i < (int)frame_buffer.size(); i++)
+        frame_buffer_ocl.push_back(cv::ocl::oclMat(frame_buffer[i]));
+}
+#endif
+///////////// MOG ////////////////////////
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef tuple<string, int, double> VideoMOGParamType;
+typedef TestBaseWithParam<VideoMOGParamType> VideoMOGFixture;
+
+PERF_TEST_P(VideoMOGFixture, MOG,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+            ::testing::Values(1, 3),
+            ::testing::Values(0.0, 0.01)))
+{
+    VideoMOGParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    const float learningRate = static_cast<float>(get<2>(params));
+
+    const int nFrame = 5;
+
+    Mat foreground_cpu;
+    std::vector<Mat> frame_buffer(nFrame);
+    std::vector<oclMat> frame_buffer_ocl;
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    prepareData(cap, cn, frame_buffer);
+
+    cv::Mat foreground;
+    cv::ocl::oclMat foreground_d;
+    if(RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE()
+        {
+            cv::Ptr<cv::BackgroundSubtractorMOG> mog = createBackgroundSubtractorMOG();
+            foreground.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog->apply(frame_buffer[i], foreground, learningRate);
+            }
+        }
+        SANITY_CHECK(foreground);
+    }else if(RUN_OCL_IMPL)
+    {
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
+        OCL_TEST_CYCLE()
+        {
+            cv::ocl::MOG d_mog;
+            foreground_d.release();
+            for (int i = 0; i < nFrame; ++i)
+            {
+                d_mog(frame_buffer_ocl[i], foreground_d, learningRate);
+            }
+        }
+        foreground_d.download(foreground);
+        SANITY_CHECK(foreground);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
+
+///////////// MOG2 ////////////////////////
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef tuple<string, int> VideoMOG2ParamType;
+typedef TestBaseWithParam<VideoMOG2ParamType> VideoMOG2Fixture;
+
+PERF_TEST_P(VideoMOG2Fixture, MOG2,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+            ::testing::Values(1, 3)))
+{
+    VideoMOG2ParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    int nFrame = 5;
+
+    std::vector<cv::Mat> frame_buffer(nFrame);
+    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+    prepareData(cap, cn, frame_buffer);
+    cv::Mat foreground;
+    cv::ocl::oclMat foreground_d;
+
+    if(RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE()
+        {
+            cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
+            mog2->set("detectShadows", false);
+            foreground.release();
+
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog2->apply(frame_buffer[i], foreground);
+            }
+        }
+        SANITY_CHECK(foreground);
+    }else if(RUN_OCL_IMPL)
+    {
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
+        OCL_TEST_CYCLE()
+        {
+            cv::ocl::MOG2 d_mog2;
+            foreground_d.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                d_mog2(frame_buffer_ocl[i], foreground_d);
+            }
+        }
+        foreground_d.download(foreground);
+        SANITY_CHECK(foreground);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
+
+///////////// MOG2_GetBackgroundImage //////////////////
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef TestBaseWithParam<VideoMOG2ParamType> Video_MOG2GetBackgroundImage;
+
+PERF_TEST_P(Video_MOG2GetBackgroundImage, MOG2,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+            ::testing::Values(3)))
+{
+    VideoMOG2ParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    int nFrame = 5;
+
+    std::vector<cv::Mat> frame_buffer(nFrame);
+    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    prepareData(cap, cn, frame_buffer);
+
+    cv::Mat foreground;
+    cv::Mat background;
+    cv::ocl::oclMat foreground_d;
+    cv::ocl::oclMat background_d;
+
+    if(RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE()
+        {
+            cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
+            mog2->set("detectShadows", false);
+            foreground.release();
+            background.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog2->apply(frame_buffer[i], foreground);
+            }
+            mog2->getBackgroundImage(background);
+        }
+        SANITY_CHECK(background);
+    }else if(RUN_OCL_IMPL)
+    {
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
+        OCL_TEST_CYCLE()
+        {
+            cv::ocl::MOG2 d_mog2;
+            foreground_d.release();
+            background_d.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                d_mog2(frame_buffer_ocl[i], foreground_d);
+            }
+            d_mog2.getBackgroundImage(background_d);
+        }
+        background_d.download(background);
+        SANITY_CHECK(background);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
diff --git a/modules/ocl/perf/perf_blend.cpp b/modules/ocl/perf/perf_blend.cpp
index 8ebb6482b..018ec6315 100644
--- a/modules/ocl/perf/perf_blend.cpp
+++ b/modules/ocl/perf/perf_blend.cpp
@@ -44,10 +44,16 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
 ///////////// blend ////////////////////////
+
 template <typename T>
-void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold)
+static void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2,
+                            const cv::Mat &weights1, const cv::Mat &weights2,
+                            cv::Mat &result_gold)
 {
     result_gold.create(img1.size(), img1.type());
 
@@ -63,60 +69,46 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we
 
         for (int x = 0; x < img1.cols * cn; ++x)
         {
-            float w1 = weights1_row[x / cn];
-            float w2 = weights2_row[x / cn];
-            result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
+            int x1 = x * cn;
+            float w1 = weights1_row[x];
+            float w2 = weights2_row[x];
+            result_gold_row[x] = static_cast<T>((img1_row[x1] * w1
+                                                 + img2_row[x1] * w2) / (w1 + w2 + 1e-5f));
         }
     }
 }
-PERFTEST(blend)
+
+typedef TestBaseWithParam<Size> blendLinearFixture;
+
+PERF_TEST_P(blendLinearFixture, blendLinear, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src1, src2, weights1, weights2, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
+    const Size srcSize = GetParam();
+    const int type = CV_8UC1;
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src1(srcSize, type), src2(srcSize, CV_8UC1), dst;
+    Mat weights1(srcSize, CV_32FC1), weights2(srcSize, CV_32FC1);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    declare.in(src1, src2, WARMUP_RNG);
+    randu(weights1, 0.0f, 1.0f);
+    randu(weights2, 0.0f, 1.0f);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1";
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst;
+        ocl::oclMat oclWeights1(weights1), oclWeights2(weights2);
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(weights1, size, size, CV_32FC1, 0, 1);
-            gen(weights2, size, size, CV_32FC1, 0, 1);
+        OCL_TEST_CYCLE() cv::ocl::blendLinear(oclSrc1, oclSrc2, oclWeights1, oclWeights2, oclDst);
 
-            blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
+        oclDst.download(dst);
 
-            CPU_ON;
-            blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
-            CPU_OFF;
-
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            d_weights1.upload(weights1);
-            d_weights2.upload(weights2);
-
-            WARMUP_ON;
-            ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            d_weights1.upload(weights1);
-            d_weights2.upload(weights2);
-            ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f);
-        }
+        SANITY_CHECK(dst);
     }
-}
\ No newline at end of file
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp
index 406b46a32..33c42c72d 100644
--- a/modules/ocl/perf/perf_brute_force_matcher.cpp
+++ b/modules/ocl/perf/perf_brute_force_matcher.cpp
@@ -43,125 +43,135 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+#define OCL_BFMATCHER_TYPICAL_MAT_SIZES ::testing::Values(cv::Size(128, 500), cv::Size(128, 1000), cv::Size(128, 2000))
 
 //////////////////// BruteForceMatch /////////////////
-PERFTEST(BruteForceMatcher)
+
+typedef TestBaseWithParam<Size> BruteForceMatcherFixture;
+
+PERF_TEST_P(BruteForceMatcherFixture, DISABLED_match,
+            OCL_BFMATCHER_TYPICAL_MAT_SIZES) // TODO too big difference between implementations
 {
-    Mat trainIdx_cpu;
-    Mat distance_cpu;
-    Mat allDist_cpu;
-    Mat nMatches_cpu;
+    const Size srcSize = GetParam();
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    vector<DMatch> matches;
+    Mat query(srcSize, CV_32F), train(srcSize, CV_32F);
+    declare.in(query, train).time(srcSize.height == 2000 ? 9 : 4 );
+    randu(query, 0.0f, 1.0f);
+    randu(train, 0.0f, 1.0f);
+
+    if (RUN_PLAIN_IMPL)
     {
-        // Init CPU matcher
-        int desc_len = 64;
-
         BFMatcher matcher(NORM_L2);
+        TEST_CYCLE() matcher.match(query, train, matches);
 
-        Mat query;
-        gen(query, size, desc_len, CV_32F, 0, 1);
-
-        Mat train;
-        gen(train, size, desc_len, CV_32F, 0, 1);
-        // Output
-        vector< vector<DMatch> > matches(2);
-        vector< vector<DMatch> > d_matches(2);
-        // Init GPU matcher
-        ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
-
-        ocl::oclMat d_query(query);
-        ocl::oclMat d_train(train);
-
-        ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches;
-
-        SUBTEST << size << "; match";
-
-        matcher.match(query, train, matches[0]);
-
-        CPU_ON;
-        matcher.match(query, train, matches[0]);
-        CPU_OFF;
-
-        WARMUP_ON;
-        d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
-        WARMUP_OFF;
-
-        GPU_ON;
-        d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_query.upload(query);
-        d_train.upload(train);
-        d_matcher.match(d_query, d_train, d_matches[0]);
-        GPU_FULL_OFF;
-
-        int diff = abs((int)d_matches[0].size() - (int)matches[0].size());
-        if(diff == 0)
-            TestSystem::instance().setAccurate(1, 0);
-        else
-            TestSystem::instance().setAccurate(0, diff);
-
-        SUBTEST << size << "; knnMatch";
-
-        matcher.knnMatch(query, train, matches, 2);
-
-        CPU_ON;
-        matcher.knnMatch(query, train, matches, 2);
-        CPU_OFF;
-
-        WARMUP_ON;
-        d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
-        WARMUP_OFF;
-
-        GPU_ON;
-        d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_query.upload(query);
-        d_train.upload(train);
-        d_matcher.knnMatch(d_query, d_train, d_matches, 2);
-        GPU_FULL_OFF;
-
-        diff = abs((int)d_matches[0].size() - (int)matches[0].size());
-        if(diff == 0)
-            TestSystem::instance().setAccurate(1, 0);
-        else
-            TestSystem::instance().setAccurate(0, diff);
-
-        SUBTEST << size << "; radiusMatch";
-
-        float max_distance = 2.0f;
-
-        matcher.radiusMatch(query, train, matches, max_distance);
-
-        CPU_ON;
-        matcher.radiusMatch(query, train, matches, max_distance);
-        CPU_OFF;
-
-        d_trainIdx.release();
-
-        WARMUP_ON;
-        d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
-        WARMUP_OFF;
-
-        GPU_ON;
-        d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_query.upload(query);
-        d_train.upload(train);
-        d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance);
-        GPU_FULL_OFF;
-
-        diff = abs((int)d_matches[0].size() - (int)matches[0].size());
-        if(diff == 0)
-            TestSystem::instance().setAccurate(1, 0);
-        else
-            TestSystem::instance().setAccurate(0, diff);
+        SANITY_CHECK_MATCHES(matches);
     }
-}
\ No newline at end of file
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
+        ocl::oclMat oclQuery(query), oclTrain(train);
+        ocl::oclMat oclTrainIdx, oclDistance;
+
+        OCL_TEST_CYCLE()
+            oclMatcher.matchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance);
+
+        oclMatcher.matchDownload(oclTrainIdx, oclDistance, matches);
+
+        SANITY_CHECK_MATCHES(matches);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+PERF_TEST_P(BruteForceMatcherFixture, DISABLED_knnMatch,
+            OCL_BFMATCHER_TYPICAL_MAT_SIZES) // TODO too big difference between implementations
+{
+    const Size srcSize = GetParam();
+
+    vector<vector<DMatch> > matches(2);
+    Mat query(srcSize, CV_32F), train(srcSize, CV_32F);
+    randu(query, 0.0f, 1.0f);
+    randu(train, 0.0f, 1.0f);
+
+    declare.in(query, train);
+    if (srcSize.height == 2000)
+        declare.time(9);
+
+    if (RUN_PLAIN_IMPL)
+    {
+        BFMatcher matcher(NORM_L2);
+        TEST_CYCLE() matcher.knnMatch(query, train, matches, 2);
+
+        std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
+        SANITY_CHECK_MATCHES(matches0);
+        SANITY_CHECK_MATCHES(matches1);
+    }
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
+        ocl::oclMat oclQuery(query), oclTrain(train);
+        ocl::oclMat oclTrainIdx, oclDistance, oclAllDist;
+
+        OCL_TEST_CYCLE()
+                oclMatcher.knnMatchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance, oclAllDist, 2);
+
+        oclMatcher.knnMatchDownload(oclTrainIdx, oclDistance, matches);
+
+        std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
+        SANITY_CHECK_MATCHES(matches0);
+        SANITY_CHECK_MATCHES(matches1);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+PERF_TEST_P(BruteForceMatcherFixture, radiusMatch,
+            OCL_BFMATCHER_TYPICAL_MAT_SIZES)
+{
+    const Size srcSize = GetParam();
+
+    const float max_distance = 2.0f;
+    vector<vector<DMatch> > matches(2);
+    Mat query(srcSize, CV_32F), train(srcSize, CV_32F);
+    declare.in(query, train);
+
+    randu(query, 0.0f, 1.0f);
+    randu(train, 0.0f, 1.0f);
+
+    if (srcSize.height == 2000)
+        declare.time(9.15);
+
+    if (RUN_PLAIN_IMPL)
+    {
+        cv::BFMatcher matcher(NORM_L2);
+        TEST_CYCLE() matcher.radiusMatch(query, train, matches, max_distance);
+
+        std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
+        SANITY_CHECK_MATCHES(matches0);
+        SANITY_CHECK_MATCHES(matches1);
+    }
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclQuery(query), oclTrain(train);
+        ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
+        ocl::oclMat oclTrainIdx, oclDistance, oclNMatches;
+
+        OCL_TEST_CYCLE()
+                oclMatcher.radiusMatchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance, oclNMatches, max_distance);
+
+        oclMatcher.radiusMatchDownload(oclTrainIdx, oclDistance, oclNMatches, matches);
+
+        std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
+        SANITY_CHECK_MATCHES(matches0);
+        SANITY_CHECK_MATCHES(matches1);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+#undef OCL_BFMATCHER_TYPICAL_MAT_SIZES
diff --git a/modules/ocl/test/test_pyrup.cpp b/modules/ocl/perf/perf_calib3d.cpp
similarity index 63%
rename from modules/ocl/test/test_pyrup.cpp
rename to modules/ocl/perf/perf_calib3d.cpp
index 3c3c6ef47..997e84856 100644
--- a/modules/ocl/test/test_pyrup.cpp
+++ b/modules/ocl/perf/perf_calib3d.cpp
@@ -15,8 +15,8 @@
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
-//    Zhang Chunpeng chunpeng@multicorewareinc.com
-//    Yao Wang yao@multicorewareinc.com
+//    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -31,7 +31,7 @@
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
-// This software is provided by the copyright holders and contributors "as is" and
+// This software is provided by the copyright holders and contributors as is and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -44,47 +44,42 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
 
-#ifdef HAVE_OPENCL
+///////////// StereoMatchBM ////////////////////////
 
-using namespace cv;
-using namespace cvtest;
-using namespace testing;
-using namespace std;
-
-PARAM_TEST_CASE(PyrUp, MatType, int)
+PERF_TEST(StereoMatchBMFixture, StereoMatchBM)
 {
-    int type;
-    int channels;
+    Mat left_image = imread(getDataPath("gpu/stereobm/aloe-L.png"), cv::IMREAD_GRAYSCALE);
+    Mat right_image = imread(getDataPath("gpu/stereobm/aloe-R.png"), cv::IMREAD_GRAYSCALE);
 
-    virtual void SetUp()
+    ASSERT_TRUE(!left_image.empty()) << "no input image";
+    ASSERT_TRUE(!right_image.empty()) << "no input image";
+    ASSERT_TRUE(right_image.size() == left_image.size());
+    ASSERT_TRUE(right_image.size() == left_image.size());
+
+    const int n_disp = 128, winSize = 19;
+    Mat disp(left_image.size(), CV_16SC1);
+
+    declare.in(left_image, right_image).out(disp);
+
+    if (RUN_OCL_IMPL)
     {
-        type = GET_PARAM(0);
-        channels = GET_PARAM(1);
-    }
-};
+        ocl::oclMat oclLeft(left_image), oclRight(right_image),
+                oclDisp(left_image.size(), CV_16SC1);
+        ocl::StereoBM_OCL oclBM(0, n_disp, winSize);
 
-TEST_P(PyrUp, Accuracy)
-{
-    for(int j = 0; j < LOOP_TIMES; j++)
+        OCL_TEST_CYCLE() oclBM(oclLeft, oclRight, oclDisp);
+    }
+    else if (RUN_PLAIN_IMPL)
     {
-        Size size(MWIDTH, MHEIGHT);
-        Mat src = randomMat(size, CV_MAKETYPE(type, channels));
-        Mat dst_gold;
-        pyrUp(src, dst_gold);
-        ocl::oclMat dst;
-        ocl::oclMat srcMat(src);
-        ocl::pyrUp(srcMat, dst);
+        Ptr<StereoBM> bm = createStereoBM(n_disp, winSize);
 
-        EXPECT_MAT_NEAR(dst_gold, Mat(dst), (type == CV_32F ? 1e-4f : 1.0));
+        TEST_CYCLE() bm->compute(left_image, right_image, disp);
     }
+    else
+        OCL_PERF_ELSE
 
+    int value = 0;
+    SANITY_CHECK(value);
 }
-
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine(
-                            Values(CV_8U, CV_32F), Values(1, 3, 4)));
-
-
-#endif // HAVE_OPENCL
\ No newline at end of file
diff --git a/modules/ocl/perf/perf_canny.cpp b/modules/ocl/perf/perf_canny.cpp
index 8fc0d13cc..259684092 100644
--- a/modules/ocl/perf/perf_canny.cpp
+++ b/modules/ocl/perf/perf_canny.cpp
@@ -43,43 +43,34 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
 
 ///////////// Canny ////////////////////////
-PERFTEST(Canny)
+
+PERF_TEST(CannyFixture, Canny)
 {
-    Mat img = imread(abspath("aloeL.jpg"), IMREAD_GRAYSCALE);
+    Mat img = imread(getDataPath("gpu/stereobm/aloe-L.png"), cv::IMREAD_GRAYSCALE),
+            edges(img.size(), CV_8UC1);
+    ASSERT_TRUE(!img.empty()) << "can't open aloe-L.png";
 
-    if (img.empty())
+    declare.in(img).out(edges);
+
+    if (RUN_OCL_IMPL)
     {
-        throw runtime_error("can't open aloeL.jpg");
+        ocl::oclMat oclImg(img), oclEdges(img.size(), CV_8UC1);
+
+        OCL_TEST_CYCLE() ocl::Canny(oclImg, oclEdges, 50.0, 100.0);
+        oclEdges.download(edges);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() Canny(img, edges, 50.0, 100.0);
+    }
+    else
+        OCL_PERF_ELSE
 
-    SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1";
-
-    Mat edges(img.size(), CV_8UC1), ocl_edges;
-
-    CPU_ON;
-    Canny(img, edges, 50.0, 100.0);
-    CPU_OFF;
-
-    ocl::oclMat d_img(img);
-    ocl::oclMat d_edges;
-    ocl::CannyBuf d_buf;
-
-    WARMUP_ON;
-    ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
-    WARMUP_OFF;
-
-    GPU_ON;
-    ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
-    GPU_OFF;
-
-    GPU_FULL_ON;
-    d_img.upload(img);
-    ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
-    d_edges.download(ocl_edges);
-    GPU_FULL_OFF;
-
-    TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2);
-}
\ No newline at end of file
+    int value = 0;
+    SANITY_CHECK(value);
+}
diff --git a/modules/ocl/perf/perf_color.cpp b/modules/ocl/perf/perf_color.cpp
index 44dc8f855..b66fc2b0a 100644
--- a/modules/ocl/perf/perf_color.cpp
+++ b/modules/ocl/perf/perf_color.cpp
@@ -43,51 +43,36 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
 
 ///////////// cvtColor////////////////////////
-PERFTEST(cvtColor)
+
+typedef TestBaseWithParam<Size> cvtColorFixture;
+
+PERF_TEST_P(cvtColorFixture, cvtColor, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size srcSize = GetParam();
 
-    int all_type[] = {CV_8UC4};
-    std::string type_name[] = {"CV_8UC4"};
+    Mat src(srcSize, CV_8UC4), dst(srcSize, CV_8UC4);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            gen(src, size, size, all_type[j], 0, 256);
-            SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY";
-
-            cvtColor(src, dst, COLOR_RGBA2GRAY, 4);
-
-            CPU_ON;
-            cvtColor(src, dst, COLOR_RGBA2GRAY, 4);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::cvtColor(d_src, d_dst, COLOR_RGBA2GRAY, 4);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::cvtColor(d_src, d_dst, COLOR_RGBA2GRAY, 4);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::cvtColor(d_src, d_dst, COLOR_RGBA2GRAY, 4);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExceptedMatSimilar(dst, ocl_dst, 1e-5);
-        }
+        ocl::oclMat oclSrc(src), oclDst(src.size(), CV_8UC4);
 
+        OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, COLOR_RGBA2GRAY, 4);
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::cvtColor(src, dst, COLOR_RGBA2GRAY, 4);
 
-
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp
index 6e0be3f19..4cba47e96 100644
--- a/modules/ocl/perf/perf_fft.cpp
+++ b/modules/ocl/perf/perf_fft.cpp
@@ -43,49 +43,46 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
 
 ///////////// dft ////////////////////////
-PERFTEST(dft)
+
+typedef TestBaseWithParam<Size> dftFixture;
+
+#ifdef HAVE_CLAMDFFT
+
+PERF_TEST_P(dftFixture, dft, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size srcSize = GetParam();
 
-    int all_type[] = {CV_32FC2};
-    std::string type_name[] = {"CV_32FC2"};
+    Mat src(srcSize, CV_32FC2), dst;
+    randu(src, 0.0f, 1.0f);
+    declare.in(src);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(7.4);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex";
+        ocl::oclMat oclSrc(src), oclDst;
 
-            gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1));
+        OCL_TEST_CYCLE() cv::ocl::dft(oclSrc, oclDst);
 
-            dft(src, dst);
-
-            CPU_ON;
-            dft(src, dst);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::dft(d_src, d_dst, Size(size, size));
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::dft(d_src, d_dst, Size(size, size));
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::dft(d_src, d_dst, Size(size, size));
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, src.size().area() * 1e-4);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, 1.5);
     }
-}
\ No newline at end of file
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::dft(src, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+#endif
diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp
index a05301b34..aa562412b 100644
--- a/modules/ocl/perf/perf_filters.cpp
+++ b/modules/ocl/perf/perf_filters.cpp
@@ -43,335 +43,360 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::get;
+using std::tr1::tuple;
 
 ///////////// Blur////////////////////////
-PERFTEST(Blur)
+
+typedef Size_MatType BlurFixture;
+
+PERF_TEST_P(BlurFixture, Blur,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src1, dst, ocl_dst;
-    ocl::oclMat d_src1, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params), ksize(3, 3);
+    const int type = get<1>(params), bordertype = BORDER_CONSTANT;
 
-    Size ksize = Size(3, 3);
-    int bordertype = BORDER_CONSTANT;
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
+        declare.time(5);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::blur(oclSrc, oclDst, ksize, Point(-1, -1), bordertype);
 
-            blur(src1, dst, ksize, Point(-1, -1), bordertype);
-
-            CPU_ON;
-            blur(src1, dst, ksize, Point(-1, -1), bordertype);
-            CPU_OFF;
-
-            d_src1.upload(src1);
-
-            WARMUP_ON;
-            ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::blur(src, dst, ksize, Point(-1, -1), bordertype);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// Laplacian////////////////////////
-PERFTEST(Laplacian)
+
+typedef Size_MatType LaplacianFixture;
+
+PERF_TEST_P(LaplacianFixture, Laplacian,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src1, dst, ocl_dst;
-    ocl::oclMat d_src1, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), ksize = 3;
 
-    int ksize = 3;
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
+        declare.time(6);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::Laplacian(oclSrc, oclDst, -1, ksize, 1);
 
-            Laplacian(src1, dst, -1, ksize, 1);
-
-            CPU_ON;
-            Laplacian(src1, dst, -1, ksize, 1);
-            CPU_OFF;
-
-            d_src1.upload(src1);
-
-            WARMUP_ON;
-            ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::Laplacian(src, dst, -1, ksize, 1);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Erode ////////////////////
-PERFTEST(Erode)
+
+typedef Size_MatType ErodeFixture;
+
+PERF_TEST_P(ErodeFixture, Erode,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)))
 {
-    Mat src, dst, ker, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), ksize = 3;
+    const Mat ker = getStructuringElement(MORPH_RECT, Size(ksize, ksize));
 
-    int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst).in(ker);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
+        declare.time(5);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type), oclKer(ker);
 
-            gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(256));
-            ker = getStructuringElement(MORPH_RECT, Size(3, 3));
+        OCL_TEST_CYCLE() cv::ocl::erode(oclSrc, oclDst, oclKer);
 
-            erode(src, dst, ker);
-
-            CPU_ON;
-            erode(src, dst, ker);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::erode(d_src, d_dst, ker);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::erode(d_src, d_dst, ker);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::erode(d_src, d_dst, ker);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::erode(src, dst, ker);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Sobel ////////////////////////
-PERFTEST(Sobel)
+
+typedef Size_MatType SobelFixture;
+
+PERF_TEST_P(SobelFixture, Sobel,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), dx = 1, dy = 1;
 
-    int dx = 1;
-    int dy = 1;
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if ((srcSize == OCL_SIZE_2000 && type == CV_8UC4) ||
+            (srcSize == OCL_SIZE_4000 && type == CV_8UC1))
+        declare.time(5.5);
+    else if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
+        declare.time(20);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::Sobel(oclSrc, oclDst, -1, dx, dy);
 
-            Sobel(src, dst, -1, dx, dy);
-
-            CPU_ON;
-            Sobel(src, dst, -1, dx, dy);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::Sobel(d_src, d_dst, -1, dx, dy);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::Sobel(d_src, d_dst, -1, dx, dy);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::Sobel(d_src, d_dst, -1, dx, dy);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::Sobel(src, dst, -1, dx, dy);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// Scharr ////////////////////////
-PERFTEST(Scharr)
+
+typedef Size_MatType ScharrFixture;
+
+PERF_TEST_P(ScharrFixture, Scharr,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), dx = 1, dy = 0;
 
-    int dx = 1;
-    int dy = 0;
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if ((srcSize == OCL_SIZE_2000 && type == CV_8UC4) ||
+            (srcSize == OCL_SIZE_4000 && type == CV_8UC1))
+        declare.time(5.5);
+    else if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
+        declare.time(21);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::Scharr(oclSrc, oclDst, -1, dx, dy);
 
-            Scharr(src, dst, -1, dx, dy);
-
-            CPU_ON;
-            Scharr(src, dst, -1, dx, dy);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::Scharr(d_src, d_dst, -1, dx, dy);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::Scharr(d_src, d_dst, -1, dx, dy);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::Scharr(d_src, d_dst, -1, dx, dy);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::Scharr(src, dst, -1, dx, dy);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// GaussianBlur ////////////////////////
-PERFTEST(GaussianBlur)
+
+typedef Size_MatType GaussianBlurFixture;
+
+PERF_TEST_P(GaussianBlurFixture, GaussianBlur,
+            ::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)))
 {
-    Mat src, dst, ocl_dst;
-    int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), ksize = 7;
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    const double eps = src.depth() == CV_8U ? 1 + DBL_EPSILON : 3e-4;
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src, size, size, all_type[j], 5, 16);
+        OCL_TEST_CYCLE() cv::ocl::GaussianBlur(oclSrc, oclDst, Size(ksize, ksize), 0);
 
-            GaussianBlur(src, dst, Size(9, 9), 0);
-
-            CPU_ON;
-            GaussianBlur(src, dst, Size(9, 9), 0);
-            CPU_OFF;
-
-            ocl::oclMat d_src(src);
-            ocl::oclMat d_dst(src.size(), src.type());
-            ocl::oclMat d_buf;
-
-            WARMUP_ON;
-            ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, eps);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::GaussianBlur(src, dst, Size(ksize, ksize), 0);
+
+        SANITY_CHECK(dst, eps);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// filter2D////////////////////////
-PERFTEST(filter2D)
+
+typedef Size_MatType filter2DFixture;
+
+PERF_TEST_P(filter2DFixture, filter2D,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), ksize = 3;
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, type), dst(srcSize, type), kernel(ksize, ksize, CV_32SC1);
+    declare.in(src, WARMUP_RNG).in(kernel).out(dst);
+    randu(kernel, -3.0, 3.0);
+
+    if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
+        declare.time(8);
+
+    if (RUN_OCL_IMPL)
     {
-        int all_type[] = {CV_8UC1, CV_8UC4};
-        std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type), oclKernel(kernel);
 
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            gen(src, size, size, all_type[j], 0, 256);
-
-            const int ksize = 3;
-
-            SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ;
-
-            Mat kernel;
-            gen(kernel, ksize, ksize, CV_32SC1, -3.0, 3.0);
-
-            Mat dst, ocl_dst;
-
-            cv::filter2D(src, dst, -1, kernel);
-
-            CPU_ON;
-            cv::filter2D(src, dst, -1, kernel);
-            CPU_OFF;
-
-            ocl::oclMat d_src(src), d_dst;
-
-            WARMUP_ON;
-            ocl::filter2D(d_src, d_dst, -1, kernel);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::filter2D(d_src, d_dst, -1, kernel);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::filter2D(d_src, d_dst, -1, kernel);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
-
-        }
+        OCL_TEST_CYCLE() cv::ocl::filter2D(oclSrc, oclDst, -1, oclKernel);
 
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
-}
\ No newline at end of file
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// Bilateral////////////////////////
+
+typedef Size_MatType BilateralFixture;
+
+PERF_TEST_P(BilateralFixture, Bilateral,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
+{
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), d = 7;
+    double sigmacolor = 50.0, sigmaspace = 50.0;
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000 && type == CV_8UC3)
+        declare.time(8);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::bilateralFilter(oclSrc, oclDst, d, sigmacolor, sigmaspace);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::bilateralFilter(src, dst, d, sigmacolor, sigmaspace);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// adaptiveBilateral////////////////////////
+
+typedef Size_MatType adaptiveBilateralFixture;
+
+PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
+{
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+    double sigmaspace = 10.0;
+    Size ksize(9,9);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(15);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::adaptiveBilateralFilter(oclSrc, oclDst, ksize, sigmaspace);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1.);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::adaptiveBilateralFilter(src, dst, ksize, sigmaspace);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/perf/perf_gemm.cpp b/modules/ocl/perf/perf_gemm.cpp
index f197c5f5a..803e1f91b 100644
--- a/modules/ocl/perf/perf_gemm.cpp
+++ b/modules/ocl/perf/perf_gemm.cpp
@@ -43,48 +43,46 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
 
 ///////////// gemm ////////////////////////
-PERFTEST(gemm)
+
+typedef TestBaseWithParam<Size> gemmFixture;
+
+#ifdef HAVE_CLAMDBLAS
+
+PERF_TEST_P(gemmFixture, gemm, ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000))
 {
-    Mat src1, src2, src3, dst, ocl_dst;
-    ocl::oclMat d_src1, d_src2, d_src3, d_dst;
+    const Size srcSize = GetParam();
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
+            src3(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
+    declare.in(src1, src2, src3).out(dst).time(srcSize == OCL_SIZE_2000 ? 65 : 8);
+    randu(src1, -10.0f, 10.0f);
+    randu(src2, -10.0f, 10.0f);
+    randu(src3, -10.0f, 10.0f);
+
+    if (RUN_OCL_IMPL)
     {
-        SUBTEST << size << 'x' << size;
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2),
+                oclSrc3(src3), oclDst(srcSize, CV_32FC1);
 
-        gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
-        gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
-        gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
+        OCL_TEST_CYCLE() cv::ocl::gemm(oclSrc1, oclSrc2, 1.0, oclSrc3, 1.0, oclDst);
 
-        gemm(src1, src2, 1.0, src3, 1.0, dst);
+        oclDst.download(dst);
 
-        CPU_ON;
-        gemm(src1, src2, 1.0, src3, 1.0, dst);
-        CPU_OFF;
-
-        d_src1.upload(src1);
-        d_src2.upload(src2);
-        d_src3.upload(src3);
-
-        WARMUP_ON;
-        ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
-        WARMUP_OFF;
-
-        GPU_ON;
-        ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src1.upload(src1);
-        d_src2.upload(src2);
-        d_src3.upload(src3);
-        ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
-        d_dst.download(ocl_dst);
-        GPU_FULL_OFF;
-
-        TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4);
+        SANITY_CHECK(dst, 0.01);
     }
-}
\ No newline at end of file
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst);
+
+        SANITY_CHECK(dst, 0.01);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+#endif
diff --git a/modules/ocl/src/precomp.cpp b/modules/ocl/perf/perf_gftt.cpp
similarity index 55%
rename from modules/ocl/src/precomp.cpp
rename to modules/ocl/perf/perf_gftt.cpp
index 766138c57..8a29adc0c 100644
--- a/modules/ocl/src/precomp.cpp
+++ b/modules/ocl/perf/perf_gftt.cpp
@@ -10,11 +10,12 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
+//    Peng Xiao, pengxiao@outlook.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -29,7 +30,7 @@
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
-// This software is provided by the copyright holders and contributors "as is" and
+// This software is provided by the copyright holders and contributors as is and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -42,6 +43,53 @@
 //
 //M*/
 
-#include "precomp.hpp"
-//CriticalSection cs;
-/* End of file. */
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
+
+///////////// GoodFeaturesToTrack ////////////////////////
+
+typedef tuple<string, double> GoodFeaturesToTrackParams;
+typedef TestBaseWithParam<GoodFeaturesToTrackParams> GoodFeaturesToTrackFixture;
+
+PERF_TEST_P(GoodFeaturesToTrackFixture, GoodFeaturesToTrack,
+            ::testing::Combine(::testing::Values(string("gpu/opticalflow/rubberwhale1.png"),
+                                                 string("gpu/stereobm/aloe-L.png")),
+                               ::testing::Range(0.0, 4.0, 3.0)))
+{
+
+    const GoodFeaturesToTrackParams param = GetParam();
+    const string fileName = getDataPath(get<0>(param));
+    const int maxCorners = 2000;
+    const double qualityLevel = 0.01, minDistance = get<1>(param);
+
+    Mat frame = imread(fileName, IMREAD_GRAYSCALE);
+    ASSERT_TRUE(!frame.empty()) << "no input image";
+
+    vector<Point2f> pts_gold;
+    declare.in(frame);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclFrame(frame), pts_oclmat;
+        ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
+
+        OCL_TEST_CYCLE() detector(oclFrame, pts_oclmat);
+
+        detector.downloadPoints(pts_oclmat, pts_gold);
+
+        SANITY_CHECK(pts_gold);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::goodFeaturesToTrack(frame, pts_gold,
+                                             maxCorners, qualityLevel, minDistance);
+
+        SANITY_CHECK(pts_gold);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp
index 372949521..9c258fe25 100644
--- a/modules/ocl/perf/perf_haar.cpp
+++ b/modules/ocl/perf/perf_haar.cpp
@@ -43,105 +43,46 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
 
-#if 0
+#include "opencv2/objdetect/objdetect_c.h"
+
+using namespace perf;
 
 ///////////// Haar ////////////////////////
-namespace cv
+
+PERF_TEST(HaarFixture, Haar)
 {
-namespace ocl
-{
-
-struct getRect
-{
-    Rect operator()(const CvAvgComp &e) const
-    {
-        return e.rect;
-    }
-};
-
-class CascadeClassifier_GPU : public OclCascadeClassifier
-{
-public:
-    void detectMultiScale(oclMat &image,
-                          CV_OUT std::vector<cv::Rect>& faces,
-                          double scaleFactor = 1.1,
-                          int minNeighbors = 3, int flags = 0,
-                          Size minSize = Size(),
-                          Size maxSize = Size())
-    {
-        (void)maxSize;
-        MemStorage storage(cvCreateMemStorage(0));
-        //CvMat img=image;
-        CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize);
-        vector<CvAvgComp> vecAvgComp;
-        Seq<CvAvgComp>(objs).copyTo(vecAvgComp);
-        faces.resize(vecAvgComp.size());
-        std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
-    }
-
-};
-
-}
-}
-PERFTEST(Haar)
-{
-    Mat img = imread(abspath("basketball1.png"), IMREAD_GRAYSCALE);
-
-    if (img.empty())
-    {
-        throw runtime_error("can't open basketball1.png");
-    }
-
-    CascadeClassifier faceCascadeCPU;
-
-    if (!faceCascadeCPU.load(abspath("haarcascade_frontalface_alt.xml")))
-    {
-        throw runtime_error("can't load haarcascade_frontalface_alt.xml");
-    }
-
     vector<Rect> faces;
 
-    SUBTEST << img.cols << "x" << img.rows << "; scale image";
-    CPU_ON;
-    faceCascadeCPU.detectMultiScale(img, faces,
-                                    1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
-    CPU_OFF;
+    Mat img = imread(getDataPath("gpu/haarcascade/basketball1.png"), IMREAD_GRAYSCALE);
+    ASSERT_TRUE(!img.empty()) << "can't open basketball1.png";
+    declare.in(img);
 
-
-    vector<Rect> oclfaces;
-    ocl::CascadeClassifier_GPU faceCascade;
-
-    if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml")))
+    if (RUN_PLAIN_IMPL)
     {
-        throw runtime_error("can't load haarcascade_frontalface_alt.xml");
+        CascadeClassifier faceCascade;
+        ASSERT_TRUE(faceCascade.load(getDataPath("gpu/haarcascade/haarcascade_frontalface_alt.xml")))
+                << "can't load haarcascade_frontalface_alt.xml";
+
+        TEST_CYCLE() faceCascade.detectMultiScale(img, faces,
+                                                     1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
+
+        SANITY_CHECK(faces, 4 + 1e-4);
     }
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::OclCascadeClassifier faceCascade;
+        ocl::oclMat oclImg(img);
 
-    ocl::oclMat d_img(img);
+        ASSERT_TRUE(faceCascade.load(getDataPath("gpu/haarcascade/haarcascade_frontalface_alt.xml")))
+                << "can't load haarcascade_frontalface_alt.xml";
 
-    WARMUP_ON;
-    faceCascade.detectMultiScale(d_img, oclfaces,
-                                 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
-    WARMUP_OFF;
+        OCL_TEST_CYCLE() faceCascade.detectMultiScale(oclImg, faces,
+                                     1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
 
-    if(faces.size() == oclfaces.size())
-        TestSystem::instance().setAccurate(1, 0);
+        SANITY_CHECK(faces, 4 + 1e-4);
+    }
     else
-        TestSystem::instance().setAccurate(0, abs((int)faces.size() - (int)oclfaces.size()));
-
-    faces.clear();
-
-    GPU_ON;
-    faceCascade.detectMultiScale(d_img, oclfaces,
-                                 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
-    GPU_OFF;
-
-    GPU_FULL_ON;
-    d_img.upload(img);
-    faceCascade.detectMultiScale(d_img, oclfaces,
-                                 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
-    GPU_FULL_OFF;
+        OCL_PERF_ELSE
 }
-
-#endif
diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp
index 05093811f..15846d831 100644
--- a/modules/ocl/perf/perf_hog.cpp
+++ b/modules/ocl/perf/perf_hog.cpp
@@ -43,118 +43,39 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
 
 ///////////// HOG////////////////////////
-bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
+
+PERF_TEST(HOGFixture, HOG)
 {
-    return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
-        (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
+    Mat src = imread(getDataPath("gpu/hog/road.png"), cv::IMREAD_GRAYSCALE);
+    ASSERT_TRUE(!src.empty()) << "can't open input image road.png";
+
+    vector<cv::Rect> found_locations;
+    declare.in(src).time(5);
+
+    if (RUN_PLAIN_IMPL)
+    {
+        HOGDescriptor hog;
+        hog.setSVMDetector(hog.getDefaultPeopleDetector());
+
+        TEST_CYCLE() hog.detectMultiScale(src, found_locations);
+
+        SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
+    }
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::HOGDescriptor ocl_hog;
+        ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
+        ocl::oclMat oclSrc(src);
+
+        OCL_TEST_CYCLE() ocl_hog.detectMultiScale(oclSrc, found_locations);
+
+        SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
+    }
+    else
+        OCL_PERF_ELSE
 }
-
-PERFTEST(HOG)
-{
-    Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE);
-
-    if (src.empty())
-    {
-        throw runtime_error("can't open road.png");
-    }
-
-
-    cv::HOGDescriptor hog;
-    hog.setSVMDetector(hog.getDefaultPeopleDetector());
-    std::vector<cv::Rect> found_locations;
-    std::vector<cv::Rect> d_found_locations;
-
-    SUBTEST << 768 << 'x' << 576 << "; road.png";
-
-    hog.detectMultiScale(src, found_locations);
-
-    CPU_ON;
-    hog.detectMultiScale(src, found_locations);
-    CPU_OFF;
-
-    cv::ocl::HOGDescriptor ocl_hog;
-    ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
-    ocl::oclMat d_src;
-    d_src.upload(src);
-
-    WARMUP_ON;
-    ocl_hog.detectMultiScale(d_src, d_found_locations);
-    WARMUP_OFF;
-    
-    // Ground-truth rectangular people window
-    cv::Rect win1_64x128(231, 190, 72, 144);
-    cv::Rect win2_64x128(621, 156, 97, 194);
-    cv::Rect win1_48x96(238, 198, 63, 126);
-    cv::Rect win2_48x96(619, 161, 92, 185);
-    cv::Rect win3_48x96(488, 136, 56, 112);
-
-    // Compare whether ground-truth windows are detected and compare the number of windows detected.
-    std::vector<int> d_comp(4);
-    std::vector<int> comp(4);
-    for(int i = 0; i < (int)d_comp.size(); i++)
-    {
-        d_comp[i] = 0;
-        comp[i] = 0;
-    }
-
-    int threshold = 10;
-    int val = 32;
-    d_comp[0] = (int)d_found_locations.size();
-    comp[0] = (int)found_locations.size();
-
-    cv::Size winSize = hog.winSize;
-
-    if (winSize == cv::Size(48, 96))
-    {
-        for(int i = 0; i < (int)d_found_locations.size(); i++)
-        {
-            if (match_rect(d_found_locations[i], win1_48x96, threshold))
-                d_comp[1] = val;
-            if (match_rect(d_found_locations[i], win2_48x96, threshold))
-                d_comp[2] = val;
-            if (match_rect(d_found_locations[i], win3_48x96, threshold))
-                d_comp[3] = val;
-        }
-        for(int i = 0; i < (int)found_locations.size(); i++)
-        {
-            if (match_rect(found_locations[i], win1_48x96, threshold))
-                comp[1] = val;
-            if (match_rect(found_locations[i], win2_48x96, threshold))
-                comp[2] = val;
-            if (match_rect(found_locations[i], win3_48x96, threshold))
-                comp[3] = val;
-        }
-    }
-    else if (winSize == cv::Size(64, 128))
-    {
-        for(int i = 0; i < (int)d_found_locations.size(); i++)
-        {
-            if (match_rect(d_found_locations[i], win1_64x128, threshold))
-                d_comp[1] = val;
-            if (match_rect(d_found_locations[i], win2_64x128, threshold))
-                d_comp[2] = val;
-        }
-        for(int i = 0; i < (int)found_locations.size(); i++)
-        {
-            if (match_rect(found_locations[i], win1_64x128, threshold))
-                comp[1] = val;
-            if (match_rect(found_locations[i], win2_64x128, threshold))
-                comp[2] = val;
-        }
-    }
-
-    cv::Mat gpu_rst(d_comp), cpu_rst(comp);
-    TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3);
-
-    GPU_ON;
-    ocl_hog.detectMultiScale(d_src, found_locations);
-    GPU_OFF;
-
-    GPU_FULL_ON;
-    d_src.upload(src);
-    ocl_hog.detectMultiScale(d_src, found_locations);
-    GPU_FULL_OFF;
-}
\ No newline at end of file
diff --git a/modules/ocl/perf/perf_hough.cpp b/modules/ocl/perf/perf_hough.cpp
index 53c7b3b05..61e8e48d2 100644
--- a/modules/ocl/perf/perf_hough.cpp
+++ b/modules/ocl/perf/perf_hough.cpp
@@ -40,7 +40,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
 
 #ifdef HAVE_OPENCL
 
diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp
index 5ab2f7d24..cb1f8efa4 100644
--- a/modules/ocl/perf/perf_imgproc.cpp
+++ b/modules/ocl/perf/perf_imgproc.cpp
@@ -43,297 +43,239 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
 
 ///////////// equalizeHist ////////////////////////
-PERFTEST(equalizeHist)
+
+typedef TestBaseWithParam<Size> equalizeHistFixture;
+
+PERF_TEST_P(equalizeHistFixture, equalizeHist, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src, dst, ocl_dst;
-    int all_type[] = {CV_8UC1};
-    std::string type_name[] = {"CV_8UC1"};
+    const Size srcSize = GetParam();
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, CV_8UC1), dst(srcSize, CV_8UC1);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
 
-            gen(src, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::equalizeHist(oclSrc, oclDst);
 
-            equalizeHist(src, dst);
-
-            CPU_ON;
-            equalizeHist(src, dst);
-            CPU_OFF;
-
-            ocl::oclMat d_src(src);
-            ocl::oclMat d_dst;
-            ocl::oclMat d_hist;
-            ocl::oclMat d_buf;
-
-            WARMUP_ON;
-            ocl::equalizeHist(d_src, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::equalizeHist(d_src, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::equalizeHist(d_src, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.1);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::equalizeHist(src, dst);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else
+        OCL_PERF_ELSE
 }
+
 /////////// CopyMakeBorder //////////////////////
-PERFTEST(CopyMakeBorder)
+
+typedef Size_MatType CopyMakeBorderFixture;
+
+PERF_TEST_P(CopyMakeBorderFixture, CopyMakeBorder,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), borderType = BORDER_CONSTANT;
 
-    int bordertype = BORDER_CONSTANT;
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst;
+    const Size dstSize = srcSize + Size(12, 12);
+    dst.create(dstSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
 
+        OCL_TEST_CYCLE() cv::ocl::copyMakeBorder(oclSrc, oclDst, 7, 5, 5, 7, borderType, cv::Scalar(1.0));
 
-            gen(src, size, size, all_type[j], 0, 256);
-
-            copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
-
-            CPU_ON;
-            copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
-            CPU_OFF;
-
-            ocl::oclMat d_src(src);
-
-            WARMUP_ON;
-            ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::copyMakeBorder(src, dst, 7, 5, 5, 7, borderType, cv::Scalar(1.0));
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// cornerMinEigenVal ////////////////////////
-PERFTEST(cornerMinEigenVal)
+
+typedef Size_MatType cornerMinEigenValFixture;
+
+PERF_TEST_P(cornerMinEigenValFixture, cornerMinEigenVal,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), borderType = BORDER_REFLECT;
+    const int blockSize = 7, apertureSize = 1 + 2 * 3;
 
-    int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4);
-    int borderType = BORDER_REFLECT;
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+    Mat src(srcSize, type), dst(srcSize, CV_32FC1);
+    declare.in(src, WARMUP_RNG).out(dst)
+            .time(srcSize == OCL_SIZE_4000 ? 20 : srcSize == OCL_SIZE_2000 ? 5 : 3);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    const int depth = CV_MAT_DEPTH(type);
+    const ERROR_TYPE errorType = depth == CV_8U ? ERROR_ABSOLUTE : ERROR_RELATIVE;
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1);
 
-            gen(src, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::cornerMinEigenVal(oclSrc, oclDst, blockSize, apertureSize, borderType);
 
-            cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
-
-            CPU_ON;
-            cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
-            CPU_OFF;
-
-            ocl::oclMat d_src(src);
-
-            WARMUP_ON;
-            ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, 1e-6, errorType);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
+
+        SANITY_CHECK(dst, 1e-6, errorType);
+    }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// cornerHarris ////////////////////////
-PERFTEST(cornerHarris)
+
+typedef Size_MatType cornerHarrisFixture;
+
+PERF_TEST_P(cornerHarrisFixture, cornerHarris,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), borderType = BORDER_REFLECT;
 
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+    Mat src(srcSize, type), dst(srcSize, CV_32FC1);
+    randu(src, 0, 1);
+    declare.in(src).out(dst)
+            .time(srcSize == OCL_SIZE_4000 ? 20 : srcSize == OCL_SIZE_2000 ? 5 : 3);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; BORDER_REFLECT";
+        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1);
 
-            gen(src, size, size, all_type[j], 0, 1);
-
-            cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT);
-
-            CPU_ON;
-            cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
-        }
+        OCL_TEST_CYCLE() cv::ocl::cornerHarris(oclSrc, oclDst, 5, 7, 0.1, borderType);
 
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, 3e-5);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::cornerHarris(src, dst, 5, 7, 0.1, borderType);
+
+        SANITY_CHECK(dst, 3e-5);
+    }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// integral ////////////////////////
-PERFTEST(integral)
+
+typedef TestBaseWithParam<Size> integralFixture;
+
+PERF_TEST_P(integralFixture, DISABLED_integral, OCL_TYPICAL_MAT_SIZES) // TODO does not work properly
 {
-    Mat src, sum, ocl_sum;
-    ocl::oclMat d_src, d_sum, d_buf;
+    const Size srcSize = GetParam();
 
-    int all_type[] = {CV_8UC1};
-    std::string type_name[] = {"CV_8UC1"};
+    Mat src(srcSize, CV_8UC1), dst;
+    declare.in(src, WARMUP_RNG);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j]  ;
+        ocl::oclMat oclSrc(src), oclDst;
 
-            gen(src, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::integral(oclSrc, oclDst);
 
-            integral(src, sum);
-
-            CPU_ON;
-            integral(src, sum);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::integral(d_src, d_sum);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::integral(d_src, d_sum);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::integral(d_src, d_sum);
-            d_sum.download(ocl_sum);
-            GPU_FULL_OFF;
-
-            if(sum.type() == ocl_sum.type()) //we won't test accuracy when cpu function overlow
-                TestSystem::instance().ExpectedMatNear(sum, ocl_sum, 0.0);
-
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
-}
-///////////// WarpAffine ////////////////////////
-PERFTEST(WarpAffine)
-{
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::integral(src, dst);
 
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// WarpAffine ////////////////////////
+
+typedef Size_MatType WarpAffineFixture;
+
+PERF_TEST_P(WarpAffineFixture, WarpAffine,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
+{
     static const double coeffs[2][3] =
     {
-        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
-        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0}
+        { cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
+        { sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
     };
     Mat M(2, 3, CV_64F, (void *)coeffs);
-    int interpolation = INTER_NEAREST;
+    const int interpolation = INTER_NEAREST;
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
-            Size size1 = Size(size, size);
+        OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation);
 
-            warpAffine(src, dst, M, size1, interpolation);
-
-            CPU_ON;
-            warpAffine(src, dst, M, size1, interpolation);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
-}
-///////////// WarpPerspective ////////////////////////
-PERFTEST(WarpPerspective)
-{
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::warpAffine(src, dst, M, srcSize, interpolation);
 
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// WarpPerspective ////////////////////////
+
+typedef Size_MatType WarpPerspectiveFixture;
+
+PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
+{
     static const double coeffs[3][3] =
     {
         {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
@@ -341,199 +283,129 @@ PERFTEST(WarpPerspective)
         {0.0, 0.0, 1.0}
     };
     Mat M(3, 3, CV_64F, (void *)coeffs);
-    int interpolation = INTER_LINEAR;
+    const int interpolation = INTER_LINEAR;
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst)
+            .time(srcSize == OCL_SIZE_4000 ? 18 : srcSize == OCL_SIZE_2000 ? 5 : 2);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
-            Size size1 = Size(size, size);
+        OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation);
 
-            warpPerspective(src, dst, M, size1, interpolation);
-
-            CPU_ON;
-            warpPerspective(src, dst, M, size1, interpolation);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::warpPerspective(src, dst, M, srcSize, interpolation);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// resize ////////////////////////
-PERFTEST(resize)
+
+CV_ENUM(resizeInterType, INTER_NEAREST, INTER_LINEAR)
+
+typedef tuple<Size, MatType, resizeInterType, double> resizeParams;
+typedef TestBaseWithParam<resizeParams> resizeFixture;
+
+PERF_TEST_P(resizeFixture, resize,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
+                               resizeInterType::all(),
+                               ::testing::Values(0.5, 2.0)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const resizeParams params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), interType = get<2>(params);
+    double scale = get<3>(params);
 
+    Mat src(srcSize, type), dst;
+    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
+    dst.create(dstSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+    if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
+        declare.time(11);
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
-
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; up";
+        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType);
 
-            resize(src, dst, Size(), 2.0, 2.0);
-
-            CPU_ON;
-            resize(src, dst, Size(), 2.0, 2.0);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
     }
-
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    else if (RUN_PLAIN_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; down";
-
-            gen(src, size, size, all_type[j], 0, 256);
-
-            resize(src, dst, Size(), 0.5, 0.5);
-
-            CPU_ON;
-            resize(src, dst, Size(), 0.5, 0.5);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
-        }
+        TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, interType);
 
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
     }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// threshold////////////////////////
-PERFTEST(threshold)
+
+CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TRUNC)
+
+typedef tuple<Size, ThreshType> ThreshParams;
+typedef TestBaseWithParam<ThreshParams> ThreshFixture;
+
+PERF_TEST_P(ThreshFixture, threshold,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               ThreshType::all()))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const ThreshParams params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int threshType = get<1>(params);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, CV_8U), dst(srcSize, CV_8U);
+    randu(src, 0, 100);
+    declare.in(src).out(dst);
+
+    if (RUN_OCL_IMPL)
     {
-        SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY";
+        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_8U);
 
-        gen(src, size, size, CV_8U, 0, 100);
+        OCL_TEST_CYCLE() cv::ocl::threshold(oclSrc, oclDst, 50.0, 0.0, threshType);
 
-        threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
+        oclDst.download(dst);
 
-        CPU_ON;
-        threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
-        CPU_OFF;
-
-        d_src.upload(src);
-
-        WARMUP_ON;
-        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
-        WARMUP_OFF;
-
-        GPU_ON;
-        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src.upload(src);
-        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
-        d_dst.download(ocl_dst);
-        GPU_FULL_OFF;
-
-        TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
+        SANITY_CHECK(dst);
     }
-
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    else if (RUN_PLAIN_IMPL)
     {
-        SUBTEST << size << 'x' << size << "; 32FC1; THRESH_TRUNC [NPP]";
+        TEST_CYCLE() cv::threshold(src, dst, 50.0, 0.0, threshType);
 
-        gen(src, size, size, CV_32FC1, 0, 100);
-
-        threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
-
-        CPU_ON;
-        threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
-        CPU_OFF;
-
-        d_src.upload(src);
-
-        WARMUP_ON;
-        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
-        WARMUP_OFF;
-
-        GPU_ON;
-        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src.upload(src);
-        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
-        d_dst.download(ocl_dst);
-        GPU_FULL_OFF;
-
-        TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
+        SANITY_CHECK(dst);
     }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// meanShiftFiltering////////////////////////
-COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
+
+typedef struct _COOR
+{
+    short x;
+    short y;
+} COOR;
+
+static COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
 {
 
     int isr2 = sr * sr;
@@ -715,48 +587,41 @@ static void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr
     }
 }
 
-PERFTEST(meanShiftFiltering)
+typedef TestBaseWithParam<Size> meanShiftFilteringFixture;
+
+PERF_TEST_P(meanShiftFilteringFixture, meanShiftFiltering,
+            OCL_TYPICAL_MAT_SIZES)
 {
-    int sp = 5, sr = 6;
-    Mat src, dst, ocl_dst;
+    const Size srcSize = GetParam();
+    const int sp = 5, sr = 6;
+    cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1);
 
-    ocl::oclMat d_src, d_dst;
+    Mat src(srcSize, CV_8UC4), dst(srcSize, CV_8UC4);
+    declare.in(src, WARMUP_RNG).out(dst)
+            .time(srcSize == OCL_SIZE_4000 ?
+                      56 : srcSize == OCL_SIZE_2000 ? 15 : 3.8);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_PLAIN_IMPL)
     {
-        SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4";
+        TEST_CYCLE() meanShiftFiltering_(src, dst, sp, sr, crit);
 
-        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
-
-        cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1);
-
-        meanShiftFiltering_(src, dst, sp, sr, crit);
-
-        CPU_ON;
-        meanShiftFiltering_(src, dst, sp, sr, crit);
-        CPU_OFF;
-
-        d_src.upload(src);
-
-        WARMUP_ON;
-        ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
-        WARMUP_OFF;
-
-        GPU_ON;
-        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src.upload(src);
-        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
-        d_dst.download(ocl_dst);
-        GPU_FULL_OFF;
-
-        TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
+        SANITY_CHECK(dst);
     }
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_8UC4);
+
+        OCL_TEST_CYCLE() ocl::meanShiftFiltering(oclSrc, oclDst, sp, sr, crit);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
-void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
+static void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
 {
     if (src_roi.empty())
     {
@@ -814,158 +679,184 @@ void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp,
     }
 
 }
-PERFTEST(meanShiftProc)
-{
-    Mat src;
-    vector<Mat> dst(2), ocl_dst(2);
-    ocl::oclMat d_src, d_dst, d_dstCoor;
 
+typedef TestBaseWithParam<Size> meanShiftProcFixture;
+
+PERF_TEST_P(meanShiftProcFixture, meanShiftProc,
+            OCL_TYPICAL_MAT_SIZES)
+{
+    const Size srcSize = GetParam();
     TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, CV_8UC4), dst1(srcSize, CV_8UC4),
+            dst2(srcSize, CV_16SC2);
+    declare.in(src, WARMUP_RNG).out(dst1, dst2)
+            .time(srcSize == OCL_SIZE_4000 ?
+                      56 : srcSize == OCL_SIZE_2000 ? 15 : 3.8);;
+
+    if (RUN_PLAIN_IMPL)
     {
-        SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 ";
+        TEST_CYCLE() meanShiftProc_(src, dst1, dst2, 5, 6, crit);
 
-        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
-
-        meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
-
-        CPU_ON;
-        meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
-        CPU_OFF;
-
-        d_src.upload(src);
-
-        WARMUP_ON;
-        ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
-        WARMUP_OFF;
-
-        GPU_ON;
-        ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src.upload(src);
-        ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
-        d_dst.download(ocl_dst[0]);
-        d_dstCoor.download(ocl_dst[1]);
-        GPU_FULL_OFF;
-
-        vector<double> eps(2, 0.);
-        TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);
+        SANITY_CHECK(dst1);
+        SANITY_CHECK(dst2);
     }
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst1(srcSize, CV_8UC4),
+                oclDst2(srcSize, CV_16SC2);
+
+        OCL_TEST_CYCLE() ocl::meanShiftProc(oclSrc, oclDst1, oclDst2, 5, 6, crit);
+
+        oclDst1.download(dst1);
+        oclDst2.download(dst2);
+
+        SANITY_CHECK(dst1);
+        SANITY_CHECK(dst2);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// remap////////////////////////
-PERFTEST(remap)
+
+CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
+
+typedef tuple<Size, MatType, RemapInterType> remapParams;
+typedef TestBaseWithParam<remapParams> remapFixture;
+
+PERF_TEST_P(remapFixture, remap,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
+                               RemapInterType::all()))
 {
-    Mat src, dst, xmap, ymap, ocl_dst;
-    ocl::oclMat d_src, d_dst, d_xmap, d_ymap;
+    const remapParams params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), interpolation = get<2>(params);
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    int interpolation = INTER_LINEAR;
-    int borderMode = BORDER_CONSTANT;
+    if (srcSize == OCL_SIZE_4000 && interpolation == INTER_LINEAR)
+        declare.time(9);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat xmap, ymap;
+    xmap.create(srcSize, CV_32FC1);
+    ymap.create(srcSize, CV_32FC1);
+
+    for (int i = 0; i < srcSize.height; ++i)
     {
-        for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++)
+        float * const xmap_row = xmap.ptr<float>(i);
+        float * const ymap_row = ymap.ptr<float>(i);
+
+        for (int j = 0; j < srcSize.width; ++j)
         {
-            SUBTEST << size << 'x' << size << "; src " << type_name[t] << "; map CV_32FC1";
-
-            gen(src, size, size, all_type[t], 0, 256);
-
-            xmap.create(size, size, CV_32FC1);
-            dst.create(size, size, CV_32FC1);
-            ymap.create(size, size, CV_32FC1);
-
-            for (int i = 0; i < size; ++i)
-            {
-                float *xmap_row = xmap.ptr<float>(i);
-                float *ymap_row = ymap.ptr<float>(i);
-
-                for (int j = 0; j < size; ++j)
-                {
-                    xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
-                    ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
-                }
-            }
-
-            remap(src, dst, xmap, ymap, interpolation, borderMode);
-
-            CPU_ON;
-            remap(src, dst, xmap, ymap, interpolation, borderMode);
-            CPU_OFF;
-
-            d_src.upload(src);
-            d_dst.upload(dst);
-            d_xmap.upload(xmap);
-            d_ymap.upload(ymap);
-
-            WARMUP_ON;
-            ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 2.0);
+            xmap_row[j] = (j - srcSize.width * 0.5f) * 0.75f + srcSize.width * 0.5f;
+            ymap_row[j] = (i - srcSize.height * 0.5f) * 0.75f + srcSize.height * 0.5f;
         }
-
     }
+
+    const int borderMode = BORDER_CONSTANT;
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+        ocl::oclMat oclXMap(xmap), oclYMap(ymap);
+
+        OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// CLAHE ////////////////////////
-PERFTEST(CLAHE)
+
+typedef TestBaseWithParam<Size> CLAHEFixture;
+
+PERF_TEST_P(CLAHEFixture, CLAHE, OCL_TYPICAL_MAT_SIZES)
 {
-    Mat src, dst, ocl_dst;
-    cv::ocl::oclMat d_src, d_dst;
-    int all_type[] = {CV_8UC1};
-    std::string type_name[] = {"CV_8UC1"};
+    const Size srcSize = GetParam();
+    const string impl = getSelectedImpl();
 
-    double clipLimit = 40.0;
+    Mat src(srcSize, CV_8UC1), dst;
+    const double clipLimit = 40.0;
+    declare.in(src, WARMUP_RNG);
 
-    cv::Ptr<cv::CLAHE>      clahe   = cv::createCLAHE(clipLimit);
-    cv::Ptr<cv::ocl::CLAHE> d_clahe = cv::ocl::createCLAHE(clipLimit);
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(11);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst;
+        cv::Ptr<cv::CLAHE> oclClahe = cv::ocl::createCLAHE(clipLimit);
 
-            gen(src, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() oclClahe->apply(oclSrc, oclDst);
 
-            CPU_ON;
-            clahe->apply(src, dst);
-            CPU_OFF;
+        oclDst.download(dst);
 
-            d_src.upload(src);
-
-            WARMUP_ON;
-            d_clahe->apply(d_src, d_dst);
-            WARMUP_OFF;
-
-            ocl_dst = d_dst;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
-
-            GPU_ON;
-            d_clahe->apply(d_src, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            d_clahe->apply(d_src, d_dst);
-            d_dst.download(dst);
-            GPU_FULL_OFF;
-        }
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(clipLimit);
+        TEST_CYCLE() clahe->apply(src, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// columnSum////////////////////////
+
+typedef TestBaseWithParam<Size> columnSumFixture;
+
+static void columnSumPerfTest(const Mat & src, Mat & dst)
+{
+    for (int j = 0; j < src.cols; j++)
+        dst.at<float>(0, j) = src.at<float>(0, j);
+
+    for (int i = 1; i < src.rows; ++i)
+        for (int j = 0; j < src.cols; ++j)
+            dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
+}
+
+PERF_TEST_P(columnSumFixture, columnSum, OCL_TYPICAL_MAT_SIZES)
+{
+    const Size srcSize = GetParam();
+
+    Mat src(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(5);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1);
+
+        OCL_TEST_CYCLE() cv::ocl::columnSum(oclSrc, oclDst);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() columnSumPerfTest(src, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp
index 1330e268f..869e01e60 100644
--- a/modules/ocl/perf/perf_match_template.cpp
+++ b/modules/ocl/perf/perf_match_template.cpp
@@ -43,103 +43,79 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
 
 /////////// matchTemplate ////////////////////////
-//void InitMatchTemplate()
-//{
-//	Mat src; gen(src, 500, 500, CV_32F, 0, 1);
-//	Mat templ; gen(templ, 500, 500, CV_32F, 0, 1);
-//	ocl::oclMat d_src(src), d_templ(templ), d_dst;
-//	ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
-//}
-PERFTEST(matchTemplate)
+
+typedef Size_MatType CV_TM_CCORRFixture;
+
+PERF_TEST_P(CV_TM_CCORRFixture, matchTemplate,
+            ::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
+                               OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
 {
-    //InitMatchTemplate();
-    Mat src, templ, dst, ocl_dst;
-    int templ_size = 5;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params), templSize(5, 5);
+    const int type = get<1>(params);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, type), templ(templSize, type);
+    const Size dstSize(src.cols - templ.cols + 1, src.rows - templ.rows + 1);
+    Mat dst(dstSize, CV_32F);
+    randu(src, 0.0f, 1.0f);
+    randu(templ, 0.0f, 1.0f);
+    declare.time(srcSize == OCL_SIZE_2000 ? 20 : 6).in(src, templ).out(dst);
+
+    if (RUN_OCL_IMPL)
     {
-        int all_type[] = {CV_32FC1, CV_32FC4};
-        std::string type_name[] = {"CV_32FC1", "CV_32FC4"};
+        ocl::oclMat oclSrc(src), oclTempl(templ), oclDst(dstSize, CV_32F);
 
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            for(templ_size = 5; templ_size <= 5; templ_size *= 5)
-            {
-                gen(src, size, size, all_type[j], 0, 1);
+        OCL_TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR);
 
-                SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR";
+        oclDst.download(dst);
 
-                gen(templ, templ_size, templ_size, all_type[j], 0, 1);
-
-                matchTemplate(src, templ, dst, TM_CCORR);
-
-                CPU_ON;
-                matchTemplate(src, templ, dst, TM_CCORR);
-                CPU_OFF;
-
-                ocl::oclMat d_src(src), d_templ(templ), d_dst;
-
-                WARMUP_ON;
-                ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR);
-                WARMUP_OFF;
-
-                GPU_ON;
-                ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR);
-                GPU_OFF;
-
-                GPU_FULL_ON;
-                d_src.upload(src);
-                d_templ.upload(templ);
-                ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR);
-                d_dst.download(ocl_dst);
-                GPU_FULL_OFF;
-
-                TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
-            }
-        }
-
-        int all_type_8U[] = {CV_8UC1};
-        std::string type_name_8U[] = {"CV_8UC1"};
-
-        for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++)
-        {
-            for(templ_size = 5; templ_size <= 5; templ_size *= 5)
-            {
-                SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED";
-
-                gen(src, size, size, all_type_8U[j], 0, 255);
-
-                gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255);
-
-                matchTemplate(src, templ, dst, TM_CCORR_NORMED);
-
-                CPU_ON;
-                matchTemplate(src, templ, dst, TM_CCORR_NORMED);
-                CPU_OFF;
-
-                ocl::oclMat d_src(src);
-                ocl::oclMat d_templ(templ), d_dst;
-
-                WARMUP_ON;
-                ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR_NORMED);
-                WARMUP_OFF;
-
-                GPU_ON;
-                ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR_NORMED);
-                GPU_OFF;
-
-                GPU_FULL_ON;
-                d_src.upload(src);
-                d_templ.upload(templ);
-                ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR_NORMED);
-                d_dst.download(ocl_dst);
-                GPU_FULL_OFF;
-
-                TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
-            }
-        }
+        SANITY_CHECK(dst, 1e-4);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::matchTemplate(src, templ, dst, TM_CCORR);
+
+        SANITY_CHECK(dst, 1e-4);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+typedef TestBaseWithParam<Size> CV_TM_CCORR_NORMEDFixture;
+
+PERF_TEST_P(CV_TM_CCORR_NORMEDFixture, matchTemplate, OCL_TYPICAL_MAT_SIZES)
+{
+    const Size srcSize = GetParam(), templSize(5, 5);
+
+    Mat src(srcSize, CV_8UC1), templ(templSize, CV_8UC1), dst;
+    const Size dstSize(src.cols - templ.cols + 1, src.rows - templ.rows + 1);
+    dst.create(dstSize, CV_8UC1);
+    declare.in(src, templ, WARMUP_RNG).out(dst)
+            .time(srcSize == OCL_SIZE_2000 ? 10 : srcSize == OCL_SIZE_4000 ? 23 : 2);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclTempl(templ), oclDst(dstSize, CV_8UC1);
+
+        OCL_TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR_NORMED);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 2e-2);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::matchTemplate(src, templ, dst, TM_CCORR_NORMED);
+
+        SANITY_CHECK(dst, 2e-2);
+    }
+    else
+        OCL_PERF_ELSE
 }
diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp
index b724cdbe6..ad1327503 100644
--- a/modules/ocl/perf/perf_matrix_operation.cpp
+++ b/modules/ocl/perf/perf_matrix_operation.cpp
@@ -43,144 +43,115 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
 
 ///////////// ConvertTo////////////////////////
-PERFTEST(ConvertTo)
+
+typedef Size_MatType ConvertToFixture;
+
+PERF_TEST_P(ConvertToFixture, ConvertTo,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst;
+    const int dstType = CV_MAKE_TYPE(CV_32F, src.channels());
+    dst.create(srcSize, dstType);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] << " to 32FC1";
+        ocl::oclMat oclSrc(src), oclDst(srcSize, dstType);
 
-            gen(src, size, size, all_type[j], 0, 256);
-            //gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() oclSrc.convertTo(oclDst, dstType);
 
-            //d_dst.upload(dst);
-
-            src.convertTo(dst, CV_32FC1);
-
-            CPU_ON;
-            src.convertTo(dst, CV_32FC1);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            d_src.convertTo(d_dst, CV_32FC1);
-            WARMUP_OFF;
-
-            GPU_ON;
-            d_src.convertTo(d_dst, CV_32FC1);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            d_src.convertTo(d_dst, CV_32FC1);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() src.convertTo(dst, dstType);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// copyTo////////////////////////
-PERFTEST(copyTo)
+
+typedef Size_MatType copyToFixture;
+
+PERF_TEST_P(copyToFixture, copyTo,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
-            //gen(dst, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() oclSrc.copyTo(oclDst);
 
-            //d_dst.upload(dst);
-
-            src.copyTo(dst);
-
-            CPU_ON;
-            src.copyTo(dst);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            d_src.copyTo(d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            d_src.copyTo(d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            d_src.copyTo(d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
-        }
+        oclDst.download(dst);
 
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() src.copyTo(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
+
 ///////////// setTo////////////////////////
-PERFTEST(setTo)
+
+typedef Size_MatType setToFixture;
+
+PERF_TEST_P(setToFixture, setTo,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, ocl_src;
-    Scalar val(1, 2, 3, 4);
-    ocl::oclMat d_src;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+    const Scalar val(1, 2, 3, 4);
 
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    Mat src(srcSize, type);
+    declare.in(src);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(srcSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
-
-            src.setTo(val);
-
-            CPU_ON;
-            src.setTo(val);
-            CPU_OFF;
-
-            d_src.upload(src);
-
-            WARMUP_ON;
-            d_src.setTo(val);
-            WARMUP_OFF;
-
-            d_src.download(ocl_src);
-            TestSystem::instance().ExpectedMatNear(src, ocl_src, 1.0);
-
-            GPU_ON;;
-            d_src.setTo(val);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            d_src.setTo(val);
-            GPU_FULL_OFF;
-        }
+        OCL_TEST_CYCLE() oclSrc.setTo(val);
+        oclSrc.download(src);
 
+        SANITY_CHECK(src);
     }
-}
\ No newline at end of file
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() src.setTo(val);
+
+        SANITY_CHECK(src);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/perf/perf_columnsum.cpp b/modules/ocl/perf/perf_moments.cpp
similarity index 68%
rename from modules/ocl/perf/perf_columnsum.cpp
rename to modules/ocl/perf/perf_moments.cpp
index ff7ebcd1d..6ecc76651 100644
--- a/modules/ocl/perf/perf_columnsum.cpp
+++ b/modules/ocl/perf/perf_moments.cpp
@@ -43,46 +43,47 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
 
-///////////// columnSum////////////////////////
-PERFTEST(columnSum)
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
+
+///////////// Moments ////////////////////////
+
+typedef Size_MatType MomentsFixture;
+
+PERF_TEST_P(MomentsFixture, DISABLED_Moments,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_16SC1, CV_32FC1, CV_64FC1)))  // TODO does not work properly (see below)
 {
-    Mat src, dst, ocl_dst;
-    ocl::oclMat d_src, d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, type), dst(7, 1, CV_64F);
+    const bool binaryImage = false;
+    cv::Moments mom;
+
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (RUN_OCL_IMPL)
     {
-        SUBTEST << size << 'x' << size << "; CV_32FC1";
+        ocl::oclMat oclSrc(src);
 
-        gen(src, size, size, CV_32FC1, 0, 256);
+        OCL_TEST_CYCLE() mom = cv::ocl::ocl_moments(oclSrc, binaryImage); // TODO Use oclSrc
+        cv::HuMoments(mom, dst);
 
-        CPU_ON;
-        dst.create(src.size(), src.type());
-        for (int j = 0; j < src.cols; j++)
-            dst.at<float>(0, j) = src.at<float>(0, j);
-
-        for (int i = 1; i < src.rows; ++i)
-            for (int j = 0; j < src.cols; ++j)
-                dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
-        CPU_OFF;
-
-        d_src.upload(src);
-
-        WARMUP_ON;
-        ocl::columnSum(d_src, d_dst);
-        WARMUP_OFF;
-
-        GPU_ON;
-        ocl::columnSum(d_src, d_dst);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src.upload(src);
-        ocl::columnSum(d_src, d_dst);
-        d_dst.download(ocl_dst);
-        GPU_FULL_OFF;
-
-        TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1);
+        SANITY_CHECK(dst);
     }
-}
\ No newline at end of file
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() mom = cv::moments(src, binaryImage);
+        cv::HuMoments(mom, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/perf/perf_norm.cpp b/modules/ocl/perf/perf_norm.cpp
index 1d986c8e4..363bcd2b3 100644
--- a/modules/ocl/perf/perf_norm.cpp
+++ b/modules/ocl/perf/perf_norm.cpp
@@ -43,45 +43,41 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
 
 ///////////// norm////////////////////////
-PERFTEST(norm)
+
+typedef TestBaseWithParam<Size> normFixture;
+
+PERF_TEST_P(normFixture, DISABLED_norm, OCL_TYPICAL_MAT_SIZES) // TODO doesn't work properly
 {
-    Mat src1, src2, ocl_src1;
-    ocl::oclMat d_src1, d_src2;
+    const Size srcSize = GetParam();
+    const std::string impl = getSelectedImpl();
+    double value = 0.0;
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1);
+    declare.in(src1, src2);
+    randu(src1, 0, 1);
+    randu(src2, 0, 1);
+
+    if (RUN_OCL_IMPL)
     {
-        SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF";
+        ocl::oclMat oclSrc1(src1), oclSrc2(src2);
 
-        gen(src1, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
-        gen(src2, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
+        OCL_TEST_CYCLE() value = cv::ocl::norm(oclSrc1, oclSrc2, NORM_INF);
 
-        norm(src1, src2, NORM_INF);
-
-        CPU_ON;
-        norm(src1, src2, NORM_INF);
-        CPU_OFF;
-
-        d_src1.upload(src1);
-        d_src2.upload(src2);
-
-        WARMUP_ON;
-        ocl::norm(d_src1, d_src2, NORM_INF);
-        WARMUP_OFF;
-
-        d_src1.download(ocl_src1);
-        TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);                        
-
-        GPU_ON;
-        ocl::norm(d_src1, d_src2, NORM_INF);
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src1.upload(src1);
-        d_src2.upload(src2);
-        ocl::norm(d_src1, d_src2, NORM_INF);
-        GPU_FULL_OFF;
+        SANITY_CHECK(value);
     }
-}
\ No newline at end of file
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() value = cv::norm(src1, src2, NORM_INF);
+
+        SANITY_CHECK(value);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/perf/perf_opticalflow.cpp b/modules/ocl/perf/perf_opticalflow.cpp
index 97283b206..861307526 100644
--- a/modules/ocl/perf/perf_opticalflow.cpp
+++ b/modules/ocl/perf/perf_opticalflow.cpp
@@ -43,186 +43,213 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
 
 ///////////// PyrLKOpticalFlow ////////////////////////
-PERFTEST(PyrLKOpticalFlow)
+
+using namespace perf;
+using std::tr1::get;
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+
+CV_ENUM(LoadMode, IMREAD_GRAYSCALE, IMREAD_COLOR)
+
+typedef tuple<int, tuple<string, string, LoadMode> > PyrLKOpticalFlowParamType;
+typedef TestBaseWithParam<PyrLKOpticalFlowParamType> PyrLKOpticalFlowFixture;
+
+PERF_TEST_P(PyrLKOpticalFlowFixture,
+            PyrLKOpticalFlow,
+            ::testing::Combine(
+                ::testing::Values(1000, 2000, 4000),
+                ::testing::Values(
+                    make_tuple<string, string, LoadMode>
+                    (
+                        string("gpu/opticalflow/rubberwhale1.png"),
+                        string("gpu/opticalflow/rubberwhale2.png"),
+                        LoadMode(IMREAD_COLOR)
+                        ),
+                    make_tuple<string, string, LoadMode>
+                    (
+                        string("gpu/stereobm/aloe-L.png"),
+                        string("gpu/stereobm/aloe-R.png"),
+                        LoadMode(IMREAD_GRAYSCALE)
+                        )
+                    )
+                )
+            )
 {
-    std::string images1[] = {"rubberwhale1.png", "basketball1.png"};
-    std::string images2[] = {"rubberwhale2.png", "basketball2.png"};
+    PyrLKOpticalFlowParamType params = GetParam();
+    tuple<string, string, LoadMode> fileParam = get<1>(params);
+    const int pointsCount = get<0>(params);
+    const int openMode = static_cast<int>(get<2>(fileParam));
+    const string fileName0 = get<0>(fileParam), fileName1 = get<1>(fileParam);
+    Mat frame0 = imread(getDataPath(fileName0), openMode);
+    Mat frame1 = imread(getDataPath(fileName1), openMode);
 
-    for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++)
+    declare.in(frame0, frame1);
+
+    ASSERT_FALSE(frame0.empty()) << "can't load " << fileName0;
+    ASSERT_FALSE(frame1.empty()) << "can't load " << fileName1;
+
+    Mat grayFrame;
+    if (openMode == IMREAD_COLOR)
+        cvtColor(frame0, grayFrame, COLOR_BGR2GRAY);
+    else
+        grayFrame = frame0;
+
+    vector<Point2f> pts, nextPts;
+    vector<unsigned char> status;
+    vector<float> err;
+    goodFeaturesToTrack(grayFrame, pts, pointsCount, 0.01, 0.0);
+    Mat ptsMat(1, static_cast<int>(pts.size()), CV_32FC2, (void *)&pts[0]);
+
+    if (RUN_PLAIN_IMPL)
     {
-        Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
-
-        if (frame0.empty())
-        {
-            std::string errstr = "can't open " + images1[i];
-            throw runtime_error(errstr);
-        }
-
-        Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
-
-        if (frame1.empty())
-        {
-            std::string errstr = "can't open " + images2[i];
-            throw runtime_error(errstr);
-        }
-
-        Mat gray_frame;
-
-        if (i == 0)
-        {
-            cvtColor(frame0, gray_frame, COLOR_BGR2GRAY);
-        }
-
-        for (int points = Min_Size; points <= Max_Size; points *= Multiple)
-        {
-            if (i == 0)
-                SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points";
-            else
-                SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points";
-            Mat ocl_nextPts;
-            Mat ocl_status;
-
-            vector<Point2f> pts;
-            goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0);
-
-            vector<Point2f> nextPts;
-            vector<unsigned char> status;
-
-            vector<float> err;
-
-            calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
-
-            CPU_ON;
-            calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
-            CPU_OFF;
-
-            ocl::PyrLKOpticalFlow d_pyrLK;
-
-            ocl::oclMat d_frame0(frame0);
-            ocl::oclMat d_frame1(frame1);
-
-            ocl::oclMat d_pts;
-            Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]);
-            d_pts.upload(pts_mat);
-
-            ocl::oclMat d_nextPts;
-            ocl::oclMat d_status;
-            ocl::oclMat d_err;
-
-            WARMUP_ON;
-            d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
-            WARMUP_OFF;
-
-            GPU_ON;
-            d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_frame0.upload(frame0);
-            d_frame1.upload(frame1);
-            d_pts.upload(pts_mat);
-            d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
-
-            if (!d_nextPts.empty())
-                d_nextPts.download(ocl_nextPts);
-
-            if (!d_status.empty())
-                d_status.download(ocl_status);
-            GPU_FULL_OFF;
-
-            size_t mismatch = 0;
-            for (int i = 0; i < (int)nextPts.size(); ++i)
-            {
-                if(status[i] != ocl_status.at<unsigned char>(0, i)){
-                    mismatch++;
-                    continue;
-                }
-                if(status[i]){
-                    Point2f gpu_rst = ocl_nextPts.at<Point2f>(0, i);
-                    Point2f cpu_rst = nextPts[i];
-                    if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.)
-                        mismatch++;
-                }
-            }
-            double ratio = (double)mismatch / (double)nextPts.size();
-            if(ratio < .02)
-                TestSystem::instance().setAccurate(1, ratio);
-            else
-                TestSystem::instance().setAccurate(0, ratio);
-        }
-
+        TEST_CYCLE()
+                cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
     }
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::PyrLKOpticalFlow oclPyrLK;
+        ocl::oclMat oclFrame0(frame0), oclFrame1(frame1);
+        ocl::oclMat oclPts(ptsMat);
+        ocl::oclMat oclNextPts, oclStatus, oclErr;
+
+        OCL_TEST_CYCLE()
+                oclPyrLK.sparse(oclFrame0, oclFrame1, oclPts, oclNextPts, oclStatus, &oclErr);
+    }
+    else
+        OCL_PERF_ELSE
+
+    int value = 0;
+    SANITY_CHECK(value);
 }
 
-
-PERFTEST(tvl1flow)
+PERF_TEST(tvl1flowFixture, tvl1flow)
 {
-    cv::Mat frame0 = imread("rubberwhale1.png", cv::IMREAD_GRAYSCALE);
-    assert(!frame0.empty());
+    Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png";
 
-    cv::Mat frame1 = imread("rubberwhale2.png", cv::IMREAD_GRAYSCALE);
-    assert(!frame1.empty());
+    Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png";
 
-    cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
-    cv::ocl::oclMat d_flowx(frame0.size(), CV_32FC1);
-    cv::ocl::oclMat d_flowy(frame1.size(), CV_32FC1);
+    const Size srcSize = frame0.size();
+    const double eps = 1.2;
+    Mat flow(srcSize, CV_32FC2), flow1(srcSize, CV_32FC1), flow2(srcSize, CV_32FC1);
+    declare.in(frame0, frame1).out(flow1, flow2).time(159);
 
-    cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
-    cv::Mat flow;
+    if (RUN_PLAIN_IMPL)
+    {
+        Ptr<DenseOpticalFlow> alg = createOptFlow_DualTVL1();
 
+        TEST_CYCLE() alg->calc(frame0, frame1, flow);
 
-    SUBTEST << frame0.cols << 'x' << frame0.rows << "; rubberwhale1.png; "<<frame1.cols<<'x'<<frame1.rows<<"; rubberwhale2.png";
+        alg->collectGarbage();
+        Mat flows[2] = { flow1, flow2 };
+        split(flow, flows);
 
-    alg->calc(frame0, frame1, flow);
+        SANITY_CHECK(flow1, eps);
+        SANITY_CHECK(flow2, eps);
+    }
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::OpticalFlowDual_TVL1_OCL oclAlg;
+        ocl::oclMat oclFrame0(frame0), oclFrame1(frame1), oclFlow1(srcSize, CV_32FC1),
+                oclFlow2(srcSize, CV_32FC1);
 
-    CPU_ON;
-    alg->calc(frame0, frame1, flow);
-    CPU_OFF;
+        OCL_TEST_CYCLE() oclAlg(oclFrame0, oclFrame1, oclFlow1, oclFlow2);
 
-    cv::Mat gold[2];
-    cv::split(flow, gold);
+        oclAlg.collectGarbage();
 
-    cv::ocl::oclMat d0(frame0.size(), CV_32FC1);
-    d0.upload(frame0);
-    cv::ocl::oclMat d1(frame1.size(), CV_32FC1);
-    d1.upload(frame1);
+        oclFlow1.download(flow1);
+        oclFlow2.download(flow2);
 
-    WARMUP_ON;
-    d_alg(d0, d1, d_flowx, d_flowy);
-    WARMUP_OFF;
-/*
-    double diff1 = 0.0, diff2 = 0.0;
-    if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1
-        &&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1)
-        TestSystem::instance().setAccurate(1);
+        SANITY_CHECK(flow1, eps);
+        SANITY_CHECK(flow2, eps);
+    }
     else
-        TestSystem::instance().setAccurate(0);
+        OCL_PERF_ELSE
+}
 
-    TestSystem::instance().setDiff(diff1);
-    TestSystem::instance().setDiff(diff2);
-*/
+///////////// FarnebackOpticalFlow ////////////////////////
 
+CV_ENUM(farneFlagType, 0, OPTFLOW_FARNEBACK_GAUSSIAN)
 
-    GPU_ON;
-    d_alg(d0, d1, d_flowx, d_flowy);
-    d_alg.collectGarbage();
-    GPU_OFF;
-    
+typedef tuple<tuple<int, double>, farneFlagType, bool> FarnebackOpticalFlowParams;
+typedef TestBaseWithParam<FarnebackOpticalFlowParams> FarnebackOpticalFlowFixture;
 
-    cv::Mat flowx, flowy;
+PERF_TEST_P(FarnebackOpticalFlowFixture, FarnebackOpticalFlow,
+            ::testing::Combine(
+                ::testing::Values(make_tuple<int, double>(5, 1.1),
+                                  make_tuple<int, double>(7, 1.5)),
+                farneFlagType::all(),
+                ::testing::Bool()))
+{
+    Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png";
 
-    GPU_FULL_ON;
-    d0.upload(frame0);
-    d1.upload(frame1);
-    d_alg(d0, d1, d_flowx, d_flowy);
-    d_alg.collectGarbage();
-    d_flowx.download(flowx);
-    d_flowy.download(flowy);
-    GPU_FULL_OFF;
+    Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png";
 
-    TestSystem::instance().ExceptedMatSimilar(gold[0], flowx, 3e-3);
-    TestSystem::instance().ExceptedMatSimilar(gold[1], flowy, 3e-3);
-}
\ No newline at end of file
+    const Size srcSize = frame0.size();
+
+    const FarnebackOpticalFlowParams params = GetParam();
+    const tuple<int, double> polyParams = get<0>(params);
+    const int polyN = get<0>(polyParams), flags = get<1>(params);
+    const double polySigma = get<1>(polyParams), pyrScale = 0.5;
+    const bool useInitFlow = get<2>(params);
+    const double eps = 1.5;
+
+    Mat flowx(srcSize, CV_32FC1), flowy(srcSize, CV_32FC1), flow(srcSize, CV_32FC2);
+    declare.in(frame0, frame1).out(flowx, flowy);
+
+    ocl::FarnebackOpticalFlow farn;
+    farn.pyrScale = pyrScale;
+    farn.polyN = polyN;
+    farn.polySigma = polySigma;
+    farn.flags = flags;
+
+    if (RUN_PLAIN_IMPL)
+    {
+        if (useInitFlow)
+        {
+            calcOpticalFlowFarneback(
+                        frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
+                        farn.numIters, farn.polyN, farn.polySigma, farn.flags);
+            farn.flags |= OPTFLOW_USE_INITIAL_FLOW;
+        }
+
+        TEST_CYCLE()
+                calcOpticalFlowFarneback(
+                    frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
+                    farn.numIters, farn.polyN, farn.polySigma, farn.flags);
+
+        Mat flowxy[2] = { flowx, flowy };
+        split(flow, flowxy);
+
+        SANITY_CHECK(flowx, eps);
+        SANITY_CHECK(flowy, eps);
+    }
+    else if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclFrame0(frame0), oclFrame1(frame1),
+                oclFlowx(srcSize, CV_32FC1), oclFlowy(srcSize, CV_32FC1);
+
+        if (useInitFlow)
+        {
+            farn(oclFrame0, oclFrame1, oclFlowx, oclFlowy);
+            farn.flags |= OPTFLOW_USE_INITIAL_FLOW;
+        }
+
+        OCL_TEST_CYCLE()
+                farn(oclFrame0, oclFrame1, oclFlowx, oclFlowy);
+
+        oclFlowx.download(flowx);
+        oclFlowy.download(flowy);
+
+        SANITY_CHECK(flowx, eps);
+        SANITY_CHECK(flowy, eps);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/perf/perf_precomp.hpp b/modules/ocl/perf/perf_precomp.hpp
new file mode 100644
index 000000000..0235b3d04
--- /dev/null
+++ b/modules/ocl/perf/perf_precomp.hpp
@@ -0,0 +1,119 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#include <iomanip>
+#include <stdexcept>
+#include <string>
+#include <iostream>
+#include <cstdio>
+#include <vector>
+#include <numeric>
+
+#include "cvconfig.h"
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/calib3d.hpp"
+#include "opencv2/video.hpp"
+#include "opencv2/objdetect.hpp"
+#include "opencv2/features2d.hpp"
+#include "opencv2/ocl.hpp"
+#include "opencv2/ts.hpp"
+
+using namespace std;
+using namespace cv;
+
+#define OCL_SIZE_1000 Size(1000, 1000)
+#define OCL_SIZE_2000 Size(2000, 2000)
+#define OCL_SIZE_4000 Size(4000, 4000)
+
+#define OCL_TYPICAL_MAT_SIZES ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000, OCL_SIZE_4000)
+
+#define OCL_PERF_ENUM(type, ...) ::testing::Values(type, ## __VA_ARGS__ )
+
+#define IMPL_OCL "ocl"
+#define IMPL_GPU "gpu"
+#define IMPL_PLAIN "plain"
+
+#define RUN_OCL_IMPL (IMPL_OCL == getSelectedImpl())
+#define RUN_PLAIN_IMPL (IMPL_PLAIN == getSelectedImpl())
+
+#ifdef HAVE_OPENCV_GPU
+# define RUN_GPU_IMPL (IMPL_GPU == getSelectedImpl())
+#endif
+
+#ifdef HAVE_OPENCV_GPU
+#define OCL_PERF_ELSE               \
+        if (RUN_GPU_IMPL)           \
+            CV_TEST_FAIL_NO_IMPL(); \
+        else                        \
+            CV_TEST_FAIL_NO_IMPL();
+#else
+#define OCL_PERF_ELSE               \
+            CV_TEST_FAIL_NO_IMPL();
+#endif
+
+#define OCL_TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); ocl::finish(), stopTimer())
+#define OCL_TEST_CYCLE() for(; startTimer(), next(); ocl::finish(), stopTimer())
+#define OCL_TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ocl::finish(), ++r)
+
+#endif
diff --git a/modules/ocl/perf/perf_pyramid.cpp b/modules/ocl/perf/perf_pyramid.cpp
index 3b96251e5..19c728bb7 100644
--- a/modules/ocl/perf/perf_pyramid.cpp
+++ b/modules/ocl/perf/perf_pyramid.cpp
@@ -43,90 +43,82 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
 
 ///////////// pyrDown //////////////////////
-PERFTEST(pyrDown)
+
+typedef Size_MatType pyrDownFixture;
+
+PERF_TEST_P(pyrDownFixture, pyrDown,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, dst, ocl_dst;
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, type), dst;
+    Size dstSize((srcSize.height + 1) >> 1, (srcSize.width + 1) >> 1);
+    dst.create(dstSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst);
 
-            pyrDown(src, dst);
+        oclDst.download(dst);
 
-            CPU_ON;
-            pyrDown(src, dst);
-            CPU_OFF;
-
-            ocl::oclMat d_src(src);
-            ocl::oclMat d_dst;
-
-            WARMUP_ON;
-            ocl::pyrDown(d_src, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::pyrDown(d_src, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::pyrDown(d_src, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, dst.depth() == CV_32F ? 1e-4f : 1.0f);
-        }
+        SANITY_CHECK(dst);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() pyrDown(src, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// pyrUp ////////////////////////
-PERFTEST(pyrUp)
+
+typedef Size_MatType pyrUpFixture;
+
+PERF_TEST_P(pyrUpFixture, pyrUp,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
 {
-    Mat src, dst, ocl_dst;
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
 
-    for (int size = 500; size <= 2000; size *= 2)
+    Mat src(srcSize, type), dst;
+    Size dstSize(srcSize.height << 1, srcSize.width << 1);
+    dst.create(dstSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
 
-            gen(src, size, size, all_type[j], 0, 256);
+        OCL_TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst);
 
-            pyrUp(src, dst);
+        oclDst.download(dst);
 
-            CPU_ON;
-            pyrUp(src, dst);
-            CPU_OFF;
-
-            ocl::oclMat d_src(src);
-            ocl::oclMat d_dst;
-
-            WARMUP_ON;
-            ocl::pyrUp(d_src, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::pyrUp(d_src, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::pyrUp(d_src, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0));
-        }
+        SANITY_CHECK(dst);
     }
-}
\ No newline at end of file
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() pyrDown(src, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp
index 0fafd14ab..3821a8e16 100644
--- a/modules/ocl/perf/perf_split_merge.cpp
+++ b/modules/ocl/perf/perf_split_merge.cpp
@@ -43,112 +43,99 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
 
 ///////////// Merge////////////////////////
-PERFTEST(Merge)
+
+typedef Size_MatType MergeFixture;
+
+PERF_TEST_P(MergeFixture, Merge,
+            ::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
+                               OCL_PERF_ENUM(CV_8U, CV_32F)))
 {
-    Mat dst, ocl_dst;
-    ocl::oclMat d_dst;
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int depth = get<1>(params), channels = 3;
 
-    int channels = 4;
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
-
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    const int dstType = CV_MAKE_TYPE(depth, channels);
+    Mat dst(srcSize, dstType);
+    vector<Mat> src(channels);
+    for (vector<Mat>::iterator i = src.begin(), end = src.end(); i != end; ++i)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
-            Size size1 = Size(size, size);
-            std::vector<Mat> src(channels);
-
-            for (int i = 0; i < channels; ++i)
-            {
-                src[i] = Mat(size1, all_type[j], cv::Scalar::all(i));
-            }
-
-            merge(src, dst);
-
-            CPU_ON;
-            merge(src, dst);
-            CPU_OFF;
-
-            std::vector<ocl::oclMat> d_src(channels);
-
-            for (int i = 0; i < channels; ++i)
-            {
-                d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i));
-            }
-
-            WARMUP_ON;
-            ocl::merge(d_src, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::merge(d_src, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            for (int i = 0; i < channels; ++i)
-            {
-                d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i));
-            }
-            ocl::merge(d_src, d_dst);
-            d_dst.download(ocl_dst);
-            GPU_FULL_OFF;
-
-            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
-        }
-
+        i->create(srcSize, CV_MAKE_TYPE(depth, 1));
+        declare.in(*i, WARMUP_RNG);
     }
+    declare.out(dst);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclDst(srcSize, dstType);
+        vector<ocl::oclMat> oclSrc(src.size());
+        for (vector<ocl::oclMat>::size_type i = 0, end = src.size(); i < end; ++i)
+            oclSrc[i] = src[i];
+
+        OCL_TEST_CYCLE() cv::ocl::merge(oclSrc, oclDst);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::merge(src, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
 }
 
 ///////////// Split////////////////////////
-PERFTEST(Split)
+
+typedef Size_MatType SplitFixture;
+
+PERF_TEST_P(SplitFixture, Split,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8U, CV_32F)))
 {
-    //int channels = 4;
-    int all_type[] = {CV_8UC1, CV_32FC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int depth = get<1>(params), channels = 3;
 
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    Mat src(srcSize, CV_MAKE_TYPE(depth, channels));
+    declare.in(src, WARMUP_RNG);
+
+    if (RUN_OCL_IMPL)
     {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
-            Size size1 = Size(size, size);
+        ocl::oclMat oclSrc(src);
+        vector<ocl::oclMat> oclDst(channels, ocl::oclMat(srcSize, CV_MAKE_TYPE(depth, 1)));
 
-            Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
-
-            std::vector<cv::Mat> dst, ocl_dst(4);
-
-            split(src, dst);
-
-            CPU_ON;
-            split(src, dst);
-            CPU_OFF;
-
-            ocl::oclMat d_src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
-            std::vector<cv::ocl::oclMat> d_dst;
-
-            WARMUP_ON;
-            ocl::split(d_src, d_dst);
-            WARMUP_OFF;         
-
-            GPU_ON;
-            ocl::split(d_src, d_dst);
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::split(d_src, d_dst);
-            for(size_t i = 0; i < dst.size(); i++)
-                d_dst[i].download(ocl_dst[i]);
-            GPU_FULL_OFF;
-
-            vector<double> eps(4, 0.);
-            TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);
-        }
+        OCL_TEST_CYCLE() cv::ocl::split(oclSrc, oclDst);
 
+        ASSERT_EQ(3, channels);
+        Mat dst0, dst1, dst2;
+        oclDst[0].download(dst0);
+        oclDst[1].download(dst1);
+        oclDst[2].download(dst2);
+        SANITY_CHECK(dst0);
+        SANITY_CHECK(dst1);
+        SANITY_CHECK(dst2);
     }
+    else if (RUN_PLAIN_IMPL)
+    {
+        vector<Mat> dst(channels, Mat(srcSize, CV_MAKE_TYPE(depth, 1)));
+        TEST_CYCLE() cv::split(src, dst);
+
+        ASSERT_EQ(3, channels);
+        Mat & dst0 = dst[0], & dst1 = dst[1], & dst2 = dst[2];
+        SANITY_CHECK(dst0);
+        SANITY_CHECK(dst1);
+        SANITY_CHECK(dst2);
+    }
+    else
+        OCL_PERF_ELSE
 }
diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp
deleted file mode 100644
index 65e2d5181..000000000
--- a/modules/ocl/perf/precomp.cpp
+++ /dev/null
@@ -1,495 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#if GTEST_OS_WINDOWS
-#define NOMINMAX
-# include <windows.h>
-#endif
-
-// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files
-// All images needed in this test are in samples/gpu folder.
-// For haar template, haarcascade_frontalface_alt.xml shouold be in working directory
-void TestSystem::run()
-{
-    if (is_list_mode_)
-    {
-        for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
-        {
-            cout << (*it)->name() << endl;
-        }
-
-        return;
-    }
-
-    // Run test initializers
-    for (vector<Runnable *>::iterator it = inits_.begin(); it != inits_.end(); ++it)
-    {
-        if ((*it)->name().find(test_filter_, 0) != string::npos)
-        {
-            (*it)->run();
-        }
-    }
-
-    printHeading();
-    writeHeading();
-
-    // Run tests
-    for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
-    {
-        try
-        {
-            if ((*it)->name().find(test_filter_, 0) != string::npos)
-            {
-                cout << endl << (*it)->name() << ":\n";
-
-                setCurrentTest((*it)->name());
-                //fprintf(record_,"%s\n",(*it)->name().c_str());
-
-                (*it)->run();
-                finishCurrentSubtest();
-            }
-        }
-        catch (const Exception &)
-        {
-            // Message is printed via callback
-            resetCurrentSubtest();
-        }
-        catch (const runtime_error &e)
-        {
-            printError(e.what());
-            resetCurrentSubtest();
-        }
-    }
-
-    printSummary();
-    writeSummary();
-}
-
-
-void TestSystem::finishCurrentSubtest()
-{
-    if (cur_subtest_is_empty_)
-        // There is no need to print subtest statistics
-    {
-        return;
-    }
-
-    double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
-    double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
-    double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0;
-
-    double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
-    speedup_total_ += speedup;
-
-    double fullspeedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_);
-    speedup_full_total_ += fullspeedup;
-
-    if (speedup > top_)
-    {
-        speedup_faster_count_++;
-    }
-    else if (speedup < bottom_)
-    {
-        speedup_slower_count_++;
-    }
-    else
-    {
-        speedup_equal_count_++;
-    }
-
-    if (fullspeedup > top_)
-    {
-        speedup_full_faster_count_++;
-    }
-    else if (fullspeedup < bottom_)
-    {
-        speedup_full_slower_count_++;
-    }
-    else
-    {
-        speedup_full_equal_count_++;
-    }
-
-    // compute min, max and
-    std::sort(gpu_times_.begin(), gpu_times_.end());
-    double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0;
-    double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0;
-    double deviation = 0;
-
-    if (gpu_times_.size() > 1)
-    {
-        double sum = 0;
-
-        for (size_t i = 0; i < gpu_times_.size(); i++)
-        {
-            int64 diff = gpu_times_[i] - static_cast<int64>(gpu_elapsed_);
-            double diff_time = diff * 1000 / getTickFrequency();
-            sum += diff_time * diff_time;
-        }
-
-        deviation = std::sqrt(sum / gpu_times_.size());
-    }
-
-    printMetrics(is_accurate_, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
-    writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
-
-    num_subtests_called_++;
-    resetCurrentSubtest();
-}
-
-
-double TestSystem::meanTime(const vector<int64> &samples)
-{
-    double sum = accumulate(samples.begin(), samples.end(), 0.);
-    return sum / samples.size();
-}
-
-
-void TestSystem::printHeading()
-{
-    cout << endl;
-    cout<< setiosflags(ios_base::left);
-
-#if 0
-    cout<<TAB<<setw(7)<< "Accu." << setw(10) << "CPU (ms)" << setw(10) << "GPU, ms"
-        << setw(8) << "Speedup"<< setw(10)<<"GPUTotal" << setw(10) << "Total"
-        << "Description\n";
-    cout<<TAB<<setw(7)<<""<<setw(10)<<""<<setw(10)<<""<<setw(8)<<""<<setw(10)<<"(ms)"<<setw(10)<<"Speedup\n";
-#endif
-
-    cout<<TAB<< setw(10) << "CPU (ms)" << setw(10) << "GPU, ms"
-        << setw(8) << "Speedup"<< setw(10)<<"GPUTotal" << setw(10) << "Total"
-        << "Description\n";
-    cout<<TAB<<setw(10)<<""<<setw(10)<<""<<setw(8)<<""<<setw(10)<<"(ms)"<<setw(10)<<"Speedup\n";
-
-    cout << resetiosflags(ios_base::left);
-}
-
-void TestSystem::writeHeading()
-{
-    if (!record_)
-    {
-        recordname_ += "_OCL.csv";
-        record_ = fopen(recordname_.c_str(), "w");
-        if(record_ == NULL)
-        {
-            cout<<".csv file open failed.\n";
-            exit(0);
-        }
-    }
-
-    fprintf(record_, "NAME,DESCRIPTION,ACCURACY,DIFFERENCE,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
-
-    fflush(record_);
-}
-
-void TestSystem::printSummary()
-{
-    cout << setiosflags(ios_base::fixed);
-    cout << "\naverage GPU speedup: x"
-        << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
-        << endl;
-    cout << "\nGPU exceeded: "
-        << setprecision(3) << speedup_faster_count_
-        << "\nGPU passed: "
-        << setprecision(3) << speedup_equal_count_
-        << "\nGPU failed: "
-        << setprecision(3) << speedup_slower_count_
-        << endl;
-    cout << "\nGPU exceeded rate: "
-        << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
-        << "%"
-        << "\nGPU passed rate: "
-        << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
-        << "%"
-        << "\nGPU failed rate: "
-        << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
-        << "%"
-        << endl;
-    cout << "\naverage GPUTOTAL speedup: x"
-        << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
-        << endl;
-    cout << "\nGPUTOTAL exceeded: "
-        << setprecision(3) << speedup_full_faster_count_
-        << "\nGPUTOTAL passed: "
-        << setprecision(3) << speedup_full_equal_count_
-        << "\nGPUTOTAL failed: "
-        << setprecision(3) << speedup_full_slower_count_
-        << endl;
-    cout << "\nGPUTOTAL exceeded rate: "
-        << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
-        << "%"
-        << "\nGPUTOTAL passed rate: "
-        << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
-        << "%"
-        << "\nGPUTOTAL failed rate: "
-        << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
-        << "%"
-        << endl;
-    cout << resetiosflags(ios_base::fixed);
-}
-
-
-enum GTestColor {
-    COLOR_DEFAULT,
-    COLOR_RED,
-    COLOR_GREEN,
-    COLOR_YELLOW
-};
-#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE
-// Returns the character attribute for the given color.
-WORD GetColorAttribute(GTestColor color) {
-    switch (color) {
-    case COLOR_RED:    return FOREGROUND_RED;
-    case COLOR_GREEN:  return FOREGROUND_GREEN;
-    case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
-    default:           return 0;
-    }
-}
-#else
-static const char* GetAnsiColorCode(GTestColor color) {
-    switch (color) {
-    case COLOR_RED:     return "1";
-    case COLOR_GREEN:   return "2";
-    case COLOR_YELLOW:  return "3";
-    default:            return NULL;
-    };
-}
-#endif
-
-static void printMetricsUti(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, std::stringstream& stream, std::stringstream& cur_subtest_description)
-{
-    //cout <<TAB<< setw(7) << stream.str();
-    cout <<TAB;
-
-    stream.str("");
-    stream << cpu_time;
-    cout << setw(10) << stream.str();
-
-    stream.str("");
-    stream << gpu_time;
-    cout << setw(10) << stream.str();
-
-    stream.str("");
-    stream << "x" << setprecision(3) << speedup;
-    cout << setw(8) << stream.str();
-
-    stream.str("");
-    stream << gpu_full_time;
-    cout << setw(10) << stream.str();
-
-    stream.str("");
-    stream << "x" << setprecision(3) << fullspeedup;
-    cout << setw(10) << stream.str();
-
-    cout << cur_subtest_description.str();
-    cout << resetiosflags(ios_base::left) << endl;
-}
-
-void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
-{
-    cout << setiosflags(ios_base::left);
-    stringstream stream;
-
-#if 0
-    if(is_accurate == 1)
-            stream << "Pass";
-    else if(is_accurate_ == 0)
-            stream << "Fail";
-    else if(is_accurate == -1)
-        stream << " ";
-    else
-    {
-        std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
-        exit(-1);
-    }
-#endif
-
-    std::stringstream &cur_subtest_description = getCurSubtestDescription();
-
-#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE
-
-    WORD color;
-    const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
-    // Gets the current text color.
-    CONSOLE_SCREEN_BUFFER_INFO buffer_info;
-    GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
-    const WORD old_color_attrs = buffer_info.wAttributes;
-    // We need to flush the stream buffers into the console before each
-    // SetConsoleTextAttribute call lest it affect the text that is already
-    // printed but has not yet reached the console.
-    fflush(stdout);
-
-    if(is_accurate == 1||is_accurate == -1)
-    {
-        color = old_color_attrs;
-        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
-
-    }else
-    {
-        color = GetColorAttribute(COLOR_RED);
-        SetConsoleTextAttribute(stdout_handle,
-            color| FOREGROUND_INTENSITY);
-
-        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
-        fflush(stdout);
-        // Restores the text color.
-        SetConsoleTextAttribute(stdout_handle, old_color_attrs);
-    }
-#else
-    GTestColor color = COLOR_RED;
-    if(is_accurate == 1|| is_accurate == -1)
-    {
-        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
-
-    }else
-    {
-        printf("\033[0;3%sm", GetAnsiColorCode(color));
-        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
-        printf("\033[m");  // Resets the terminal to default.
-    }
-#endif
-}
-
-void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
-{
-    if (!record_)
-    {
-        recordname_ += ".csv";
-        record_ = fopen(recordname_.c_str(), "w");
-    }
-
-    string _is_accurate_;
-
-    if(is_accurate_ == 1)
-        _is_accurate_ = "Pass";
-    else if(is_accurate_ == 0)
-        _is_accurate_ = "Fail";
-    else if(is_accurate_ == -1)
-        _is_accurate_ = " ";
-    else
-    {
-        std::cout<<"is_accurate errer: "<<is_accurate_<<"\n";
-        exit(-1);
-    }
-
-    fprintf(record_, "%s,%s,%s,%.2f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n",
-        itname_changed_ ? itname_.c_str() : "",
-        cur_subtest_description_.str().c_str(),
-        _is_accurate_.c_str(),
-        accurate_diff_,
-        cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
-        gpu_min, gpu_max, std_dev);
-
-    if (itname_changed_)
-    {
-        itname_changed_ = false;
-    }
-
-    fflush(record_);
-}
-
-void TestSystem::writeSummary()
-{
-    if (!record_)
-    {
-        recordname_ += ".csv";
-        record_ = fopen(recordname_.c_str(), "w");
-    }
-
-    fprintf(record_, "\nAverage GPU speedup: %.3f\n"
-        "exceeded: %d (%.3f%%)\n"
-        "passed: %d (%.3f%%)\n"
-        "failed: %d (%.3f%%)\n"
-        "\nAverage GPUTOTAL speedup: %.3f\n"
-        "exceeded: %d (%.3f%%)\n"
-        "passed: %d (%.3f%%)\n"
-        "failed: %d (%.3f%%)\n",
-        speedup_total_ / std::max(1, num_subtests_called_),
-        speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
-        speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
-        speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
-        speedup_full_total_ / std::max(1, num_subtests_called_),
-        speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
-        speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
-        speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
-        );
-    fflush(record_);
-}
-
-void TestSystem::printError(const std::string &msg)
-{
-    if(msg != "CL_INVALID_BUFFER_SIZE")
-    {
-        cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
-    }
-}
-
-void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
-{
-    mat.create(rows, cols, type);
-    RNG rng(0);
-    rng.fill(mat, RNG::UNIFORM, low, high);
-}
-
-string abspath(const string &relpath)
-{
-    return TestSystem::instance().workingDir() + relpath;
-}
-
-double checkNorm(const Mat &m)
-{
-    return norm(m, NORM_INF);
-}
-
-double checkNorm(const Mat &m1, const Mat &m2)
-{
-    return norm(m1, m2, NORM_INF);
-}
-
-double checkSimilarity(const Mat &m1, const Mat &m2)
-{
-    Mat diff;
-    matchTemplate(m1, m2, diff, TM_CCORR_NORMED);
-    return std::abs(diff.at<float>(0, 0) - 1.f);
-}
diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp
deleted file mode 100644
index 385320bee..000000000
--- a/modules/ocl/perf/precomp.hpp
+++ /dev/null
@@ -1,517 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-declarations"
-#  if defined __clang__ || defined __APPLE__
-#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
-#    pragma GCC diagnostic ignored "-Wextra"
-#  endif
-#endif
-
-#include <iomanip>
-#include <stdexcept>
-#include <string>
-#include <iostream>
-#include <cstdio>
-#include <vector>
-#include <numeric>
-#include "opencv2/core.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/calib3d.hpp"
-#include "opencv2/video.hpp"
-#include "opencv2/objdetect.hpp"
-#include "opencv2/features2d.hpp"
-#include "opencv2/ocl.hpp"
-#include "opencv2/ts.hpp"
-#include "opencv2/ts/ts_perf.hpp"
-#include "opencv2/ts/ts_gtest.h"
-
-#include "opencv2/core/utility.hpp"
-
-#define Min_Size 1000
-#define Max_Size 4000
-#define Multiple 2
-#define TAB "    "
-
-using namespace std;
-using namespace cv;
-
-void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high);
-void gen(Mat &mat, int rows, int cols, int type, int low, int high, int n);
-
-string abspath(const string &relpath);
-
-typedef struct
-{
-    short x;
-    short y;
-} COOR;
-COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep,
-                  cv::Size size, int sp, int sr, int maxIter, float eps, int *tab);
-void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi,
-                    int sp, int sr, cv::TermCriteria crit);
-
-
-template<class T1, class T2>
-int ExpectedEQ(T1 expected, T2 actual)
-{
-    if(expected == actual)
-        return 1;
-
-    return 0;
-}
-
-template<class T1>
-int EeceptDoubleEQ(T1 expected, T1 actual)
-{
-    testing::internal::Double lhs(expected);
-    testing::internal::Double rhs(actual);
-
-    if (lhs.AlmostEquals(rhs))
-    {
-        return 1;
-    }
-
-    return 0;
-}
-
-template<class T>
-int AssertEQ(T expected, T actual)
-{
-    if(expected == actual)
-    {
-        return 1;
-    }
-    return 0;
-}
-
-int ExceptDoubleNear(double val1, double val2, double abs_error);
-bool match_rect(cv::Rect r1, cv::Rect r2, int threshold);
-
-double checkNorm(const cv::Mat &m);
-double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
-double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
-
-int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps);
-int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps);
-
-class Runnable
-{
-public:
-    explicit Runnable(const std::string &runname): name_(runname) {}
-    virtual ~Runnable() {}
-
-    const std::string &name() const
-    {
-        return name_;
-    }
-
-    virtual void run() = 0;
-
-private:
-    std::string name_;
-};
-
-class TestSystem
-{
-public:
-    static TestSystem &instance()
-    {
-        static TestSystem me;
-        return me;
-    }
-
-    void setWorkingDir(const std::string &val)
-    {
-        working_dir_ = val;
-    }
-    const std::string &workingDir() const
-    {
-        return working_dir_;
-    }
-
-    void setTestFilter(const std::string &val)
-    {
-        test_filter_ = val;
-    }
-    const std::string &testFilter() const
-    {
-        return test_filter_;
-    }
-
-    void setNumIters(int num_iters)
-    {
-        num_iters_ = num_iters;
-    }
-    void setGPUWarmupIters(int num_iters)
-    {
-        gpu_warmup_iters_ = num_iters;
-    }
-    void setCPUIters(int num_iters)
-    {
-        cpu_num_iters_ = num_iters;
-    }
-
-    void setTopThreshold(double top)
-    {
-        top_ = top;
-    }
-    void setBottomThreshold(double bottom)
-    {
-        bottom_ = bottom;
-    }
-
-    void addInit(Runnable *init)
-    {
-        inits_.push_back(init);
-    }
-    void addTest(Runnable *test)
-    {
-        tests_.push_back(test);
-    }
-    void run();
-
-    // It's public because OpenCV callback uses it
-    void printError(const std::string &msg);
-
-    std::stringstream &startNewSubtest()
-    {
-        finishCurrentSubtest();
-        return cur_subtest_description_;
-    }
-
-    bool stop() const
-    {
-        return cur_iter_idx_ >= num_iters_;
-    }
-
-    bool cpu_stop() const
-    {
-        return cur_iter_idx_ >= cpu_num_iters_;
-    }
-
-    int get_cur_iter_idx()
-    {
-        return cur_iter_idx_;
-    }
-
-    int get_cpu_num_iters()
-    {
-        return cpu_num_iters_;
-    }
-
-    bool warmupStop()
-    {
-        return cur_warmup_idx_++ >= gpu_warmup_iters_;
-    }
-
-    void warmupComplete()
-    {
-        cur_warmup_idx_ = 0;
-    }
-
-    void cpuOn()
-    {
-        cpu_started_ = cv::getTickCount();
-    }
-    void cpuOff()
-    {
-        int64 delta = cv::getTickCount() - cpu_started_;
-        cpu_times_.push_back(delta);
-        ++cur_iter_idx_;
-    }
-    void cpuComplete()
-    {
-        cpu_elapsed_ += meanTime(cpu_times_);
-        cur_subtest_is_empty_ = false;
-        cur_iter_idx_ = 0;
-    }
-
-    void gpuOn()
-    {
-        gpu_started_ = cv::getTickCount();
-    }
-    void gpuOff()
-    {
-        int64 delta = cv::getTickCount() - gpu_started_;
-        gpu_times_.push_back(delta);
-        ++cur_iter_idx_;
-    }
-    void gpuComplete()
-    {
-        gpu_elapsed_ += meanTime(gpu_times_);
-        cur_subtest_is_empty_ = false;
-        cur_iter_idx_ = 0;
-    }
-
-    void gpufullOn()
-    {
-        gpu_full_started_ = cv::getTickCount();
-    }
-    void gpufullOff()
-    {
-        int64 delta = cv::getTickCount() - gpu_full_started_;
-        gpu_full_times_.push_back(delta);
-        ++cur_iter_idx_;
-    }
-    void gpufullComplete()
-    {
-        gpu_full_elapsed_ += meanTime(gpu_full_times_);
-        cur_subtest_is_empty_ = false;
-        cur_iter_idx_ = 0;
-    }
-
-    bool isListMode() const
-    {
-        return is_list_mode_;
-    }
-    void setListMode(bool value)
-    {
-        is_list_mode_ = value;
-    }
-
-    void setRecordName(const std::string &name)
-    {
-        recordname_ = name;
-    }
-
-    void setCurrentTest(const std::string &name)
-    {
-        itname_ = name;
-        itname_changed_ = true;
-    }
-
-    void setAccurate(int accurate, double diff)
-    {
-        is_accurate_ = accurate;
-        accurate_diff_ = diff;
-    }
-
-    void ExpectMatsNear(vector<Mat>& dst, vector<Mat>& cpu_dst, vector<double>& eps)
-    {
-        assert(dst.size() == cpu_dst.size());
-        assert(cpu_dst.size() == eps.size());
-        is_accurate_ = 1;
-        for(size_t i=0; i<dst.size(); i++)
-        {
-            double cur_diff = checkNorm(dst[i], cpu_dst[i]);
-            accurate_diff_ = max(accurate_diff_, cur_diff);
-            if(cur_diff > eps[i])
-                is_accurate_ = 0;
-        }
-    }
-
-    void ExpectedMatNear(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
-    {
-        assert(dst.type() == cpu_dst.type());
-        assert(dst.size() == cpu_dst.size());
-        accurate_diff_ = checkNorm(dst, cpu_dst);
-        if(accurate_diff_ <= eps)
-            is_accurate_ = 1;
-        else
-            is_accurate_ = 0;
-    }
-
-    void ExceptedMatSimilar(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
-    {
-        assert(dst.type() == cpu_dst.type());
-        assert(dst.size() == cpu_dst.size());
-        accurate_diff_ = checkSimilarity(cpu_dst, dst);
-        if(accurate_diff_ <= eps)
-            is_accurate_ = 1;
-        else
-            is_accurate_ = 0;
-    }
-
-    std::stringstream &getCurSubtestDescription()
-    {
-        return cur_subtest_description_;
-    }
-
-private:
-    TestSystem():
-        cur_subtest_is_empty_(true), cpu_elapsed_(0),
-        gpu_elapsed_(0), gpu_full_elapsed_(0), speedup_total_(0.0),
-        num_subtests_called_(0),
-        speedup_faster_count_(0), speedup_slower_count_(0), speedup_equal_count_(0),
-        speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false),
-        num_iters_(10), cpu_num_iters_(2),
-        gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0),
-        record_(0), recordname_("performance"), itname_changed_(true),
-        is_accurate_(-1), accurate_diff_(0.)
-    {
-        cpu_times_.reserve(num_iters_);
-        gpu_times_.reserve(num_iters_);
-        gpu_full_times_.reserve(num_iters_);
-    }
-
-    void finishCurrentSubtest();
-    void resetCurrentSubtest()
-    {
-        cpu_elapsed_ = 0;
-        gpu_elapsed_ = 0;
-        gpu_full_elapsed_ = 0;
-        cur_subtest_description_.str("");
-        cur_subtest_is_empty_ = true;
-        cur_iter_idx_ = 0;
-        cur_warmup_idx_ = 0;
-        cpu_times_.clear();
-        gpu_times_.clear();
-        gpu_full_times_.clear();
-        is_accurate_ = -1;
-        accurate_diff_ = 0.;
-    }
-
-    double meanTime(const std::vector<int64> &samples);
-
-    void printHeading();
-    void printSummary();
-    void printMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f);
-
-    void writeHeading();
-    void writeSummary();
-    void writeMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f,
-                      double speedup = 0.0f, double fullspeedup = 0.0f,
-                      double gpu_min = 0.0f, double gpu_max = 0.0f, double std_dev = 0.0f);
-
-    std::string working_dir_;
-    std::string test_filter_;
-
-    std::vector<Runnable *> inits_;
-    std::vector<Runnable *> tests_;
-
-    std::stringstream cur_subtest_description_;
-    bool cur_subtest_is_empty_;
-
-    int64 cpu_started_;
-    int64 gpu_started_;
-    int64 gpu_full_started_;
-    double cpu_elapsed_;
-    double gpu_elapsed_;
-    double gpu_full_elapsed_;
-
-    double speedup_total_;
-    double speedup_full_total_;
-    int num_subtests_called_;
-
-    int speedup_faster_count_;
-    int speedup_slower_count_;
-    int speedup_equal_count_;
-
-    int speedup_full_faster_count_;
-    int speedup_full_slower_count_;
-    int speedup_full_equal_count_;
-
-    bool is_list_mode_;
-
-    double top_;
-    double bottom_;
-
-    int num_iters_;
-    int cpu_num_iters_;     //there's no need to set cpu running same times with gpu
-    int gpu_warmup_iters_;  //gpu warm up times, default is 1
-    int cur_iter_idx_;
-    int cur_warmup_idx_;    //current gpu warm up times
-    std::vector<int64> cpu_times_;
-    std::vector<int64> gpu_times_;
-    std::vector<int64> gpu_full_times_;
-
-    FILE *record_;
-    std::string recordname_;
-    std::string itname_;
-    bool itname_changed_;
-
-    int is_accurate_;
-    double accurate_diff_;
-};
-
-
-#define GLOBAL_INIT(name) \
-struct name##_init: Runnable { \
-    name##_init(): Runnable(#name) { \
-    TestSystem::instance().addInit(this); \
-} \
-    void run(); \
-} name##_init_instance; \
-    void name##_init::run()
-
-
-#define PERFTEST(name) \
-struct name##_test: Runnable { \
-    name##_test(): Runnable(#name) { \
-    TestSystem::instance().addTest(this); \
-} \
-    void run(); \
-} name##_test_instance; \
-    void name##_test::run()
-
-#define SUBTEST TestSystem::instance().startNewSubtest()
-
-#define CPU_ON \
-    while (!TestSystem::instance().cpu_stop()) { \
-    TestSystem::instance().cpuOn()
-#define CPU_OFF \
-    TestSystem::instance().cpuOff(); \
-    } TestSystem::instance().cpuComplete()
-
-#define GPU_ON \
-    while (!TestSystem::instance().stop()) { \
-    TestSystem::instance().gpuOn()
-#define GPU_OFF \
-    ocl::finish(); \
-    TestSystem::instance().gpuOff(); \
-    } TestSystem::instance().gpuComplete()
-
-#define GPU_FULL_ON \
-    while (!TestSystem::instance().stop()) { \
-    TestSystem::instance().gpufullOn()
-#define GPU_FULL_OFF \
-    TestSystem::instance().gpufullOff(); \
-    } TestSystem::instance().gpufullComplete()
-
-#define WARMUP_ON \
-    while (!TestSystem::instance().warmupStop()) {
-#define WARMUP_OFF \
-        ocl::finish(); \
-    } TestSystem::instance().warmupComplete()
diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp
index 1778c9839..c9ee6f9f6 100644
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -2337,7 +2337,7 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
         return;
     }
 
-    CV_Assert((x.type() == y.type() && x.size() == y.size() && x.depth() == CV_32F) || x.depth() == CV_64F);
+    CV_Assert(x.depth() == CV_32F || x.depth() == CV_64F);
     y.create(x.size(), x.type());
     String kernelName = "arithm_pow";
 
diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp
new file mode 100644
index 000000000..a92365f0d
--- /dev/null
+++ b/modules/ocl/src/bgfg_mog.cpp
@@ -0,0 +1,638 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma, jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+using namespace cv;
+using namespace cv::ocl;
+namespace cv
+{
+    namespace ocl
+    {
+        extern const char* bgfg_mog;
+
+        typedef struct _contant_struct
+        {
+            cl_float c_Tb;
+            cl_float c_TB;
+            cl_float c_Tg;
+            cl_float c_varInit;
+            cl_float c_varMin;
+            cl_float c_varMax;
+            cl_float c_tau;
+            cl_uchar c_shadowVal;
+        }contant_struct;
+
+        cl_mem cl_constants = NULL;
+        float c_TB;
+    }
+}
+
+#if defined _MSC_VER
+#define snprintf sprintf_s
+#endif
+
+namespace cv { namespace ocl { namespace device
+{
+    namespace mog
+    {
+        void mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+            int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma);
+
+        void getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio);
+
+        void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau,
+                            unsigned char shadowVal);
+
+        void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean,
+                      float alphaT, float prune, bool detectShadows, int nmixtures);
+
+        void getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures);
+    }
+}}}
+
+namespace mog
+{
+    const int defaultNMixtures = 5;
+    const int defaultHistory = 200;
+    const float defaultBackgroundRatio = 0.7f;
+    const float defaultVarThreshold = 2.5f * 2.5f;
+    const float defaultNoiseSigma = 30.0f * 0.5f;
+    const float defaultInitialWeight = 0.05f;
+}
+void cv::ocl::BackgroundSubtractor::operator()(const oclMat&, oclMat&, float)
+{
+
+}
+cv::ocl::BackgroundSubtractor::~BackgroundSubtractor()
+{
+
+}
+
+cv::ocl::MOG::MOG(int nmixtures) :
+frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    nmixtures_ = std::min(nmixtures > 0 ? nmixtures : mog::defaultNMixtures, 8);
+    history = mog::defaultHistory;
+    varThreshold = mog::defaultVarThreshold;
+    backgroundRatio = mog::defaultBackgroundRatio;
+    noiseSigma = mog::defaultNoiseSigma;
+}
+
+void cv::ocl::MOG::initialize(cv::Size frameSize, int frameType)
+{
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store
+    // the mixture sort key (w/sum_of_variances), the mixture weight (w),
+    // the mean (nchannels values) and
+    // the diagonal covariance matrix (another nchannels values)
+
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+    var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+
+    weight_.setTo(cv::Scalar::all(0));
+    sortKey_.setTo(cv::Scalar::all(0));
+    mean_.setTo(cv::Scalar::all(0));
+    var_.setTo(cv::Scalar::all(0));
+
+    nframes_ = 0;
+}
+
+void cv::ocl::MOG::operator()(const cv::ocl::oclMat& frame, cv::ocl::oclMat& fgmask, float learningRate)
+{
+    using namespace cv::ocl::device::mog;
+
+    CV_Assert(frame.depth() == CV_8U);
+
+    int ch = frame.oclchannels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    mog_ocl(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_,
+        varThreshold, learningRate, backgroundRatio, noiseSigma);
+}
+
+void cv::ocl::MOG::getBackgroundImage(oclMat& backgroundImage) const
+{
+    using namespace cv::ocl::device::mog;
+
+    backgroundImage.create(frameSize_, frameType_);
+
+    cv::ocl::device::mog::getBackgroundImage_ocl(backgroundImage.oclchannels(), weight_, mean_, backgroundImage, nmixtures_, backgroundRatio);
+}
+
+void cv::ocl::MOG::release()
+{
+    frameSize_ = Size(0, 0);
+    frameType_ = 0;
+    nframes_ = 0;
+
+    weight_.release();
+    sortKey_.release();
+    mean_.release();
+    var_.release();
+    clReleaseMemObject(cl_constants);
+}
+
+static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float backgroundRatio)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(var.step/var.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "mog_withoutLearning_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+
+static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask_raw, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    oclMat fgmask(fgmask_raw.size(), CV_32SC1);
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int sortKey_step = (int)(sortKey.step/sortKey.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(var.step/var.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "mog_withLearning_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&sortKey.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&sortKey_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&learningRate));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&minVar));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+    fgmask.convertTo(fgmask, CV_8U);
+    fgmask.copyTo(fgmask_raw);
+}
+
+void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma)
+{
+    const float minVar = noiseSigma * noiseSigma;
+
+    if(learningRate > 0.0f)
+        mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures,
+                         varThreshold, backgroundRatio, learningRate, minVar);
+    else
+        mog_withoutLearning(frame, cn, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio);
+}
+
+void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {dst.cols, dst.rows, 1};
+
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int dst_step = (int)(dst.step/dst.elemSize());
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "getBackgroundImage_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
+{
+    varMin = cv::min(varMin, varMax);
+    varMax = cv::max(varMin, varMax);
+
+    c_TB = TB;
+
+    _contant_struct *constants = new _contant_struct;
+    constants->c_Tb = Tb;
+    constants->c_TB = TB;
+    constants->c_Tg = Tg;
+    constants->c_varInit = varInit;
+    constants->c_varMin = varMin;
+    constants->c_varMax = varMax;
+    constants->c_tau = tau;
+    constants->c_shadowVal = shadowVal;
+
+    cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()),
+        (void *)constants, sizeof(_contant_struct));
+}
+
+void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance,
+                                oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures)
+{
+    oclMat fgmask(fgmaskRaw.size(), CV_32SC1);
+
+    Context* clCxt = Context::getContext();
+
+    const float alpha1 = 1.0f - alphaT;
+
+    cl_int detectShadows_flag = 0;
+    if(detectShadows)
+        detectShadows_flag = 1;
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(variance.step/variance.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    String kernel_name = "mog2_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&variance.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&alphaT));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&alpha1));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&prune));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&detectShadows_flag));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&cl_constants));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+
+    fgmask.convertTo(fgmask, CV_8U);
+    fgmask.copyTo(fgmaskRaw);
+}
+
+void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1};
+
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int dst_step = (int)(dst.step/dst.elemSize());
+
+    int dst_y = (int)(dst.offset/dst.step);
+    int dst_x = (int)(dst.offset%dst.step);
+    dst_x = dst_x/(int)dst.elemSize();
+
+    String kernel_name = "getBackgroundImage2_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&c_TB));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+/////////////////////////////////////////////////////////////////
+// MOG2
+
+namespace mog2
+{
+    // default parameters of gaussian background detection algorithm
+    const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
+    const float defaultVarThreshold = 4.0f * 4.0f;
+    const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
+    const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
+    const float defaultVarThresholdGen = 3.0f * 3.0f;
+    const float defaultVarInit = 15.0f; // initial variance for new components
+    const float defaultVarMax = 5.0f * defaultVarInit;
+    const float defaultVarMin = 4.0f;
+
+    // additional parameters
+    const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
+    const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+    const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
+}
+
+cv::ocl::MOG2::MOG2(int nmixtures) : frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures;
+
+    history = mog2::defaultHistory;
+    varThreshold = mog2::defaultVarThreshold;
+    bShadowDetection = true;
+
+    backgroundRatio = mog2::defaultBackgroundRatio;
+    fVarInit = mog2::defaultVarInit;
+    fVarMax  = mog2::defaultVarMax;
+    fVarMin = mog2::defaultVarMin;
+
+    varThresholdGen = mog2::defaultVarThresholdGen;
+    fCT = mog2::defaultfCT;
+    nShadowDetection =  mog2::defaultnShadowDetection;
+    fTau = mog2::defaultfTau;
+}
+
+void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType)
+{
+    using namespace cv::ocl::device::mog;
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+    nframes_ = 0;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store ...
+    // the mixture weight (w),
+    // the mean (nchannels values) and
+    // the covariance
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    weight_.setTo(Scalar::all(0));
+
+    variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    variance_.setTo(Scalar::all(0));
+
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); //4 channels
+    mean_.setTo(Scalar::all(0));
+
+    //make the array for keeping track of the used modes per pixel - all zeros at start
+    bgmodelUsedModes_.create(frameSize_, CV_32FC1);
+    bgmodelUsedModes_.setTo(cv::Scalar::all(0));
+
+    loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);
+}
+
+void cv::ocl::MOG2::operator()(const oclMat& frame, oclMat& fgmask, float learningRate)
+{
+    using namespace cv::ocl::device::mog;
+
+    int ch = frame.oclchannels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+    fgmask.setTo(cv::Scalar::all(0));
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    mog2_ocl(frame, frame.oclchannels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, nmixtures_);
+}
+
+void cv::ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const
+{
+    using namespace cv::ocl::device::mog;
+
+    backgroundImage.create(frameSize_, frameType_);
+
+    cv::ocl::device::mog::getBackgroundImage2_ocl(backgroundImage.oclchannels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, nmixtures_);
+}
+
+void cv::ocl::MOG2::release()
+{
+    frameSize_ = Size(0, 0);
+    frameType_ = 0;
+    nframes_ = 0;
+
+    weight_.release();
+    variance_.release();
+    mean_.release();
+
+    bgmodelUsedModes_.release();
+}
diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp
index 6bef48d18..265998e75 100644
--- a/modules/ocl/src/blend.cpp
+++ b/modules/ocl/src/blend.cpp
@@ -89,4 +89,4 @@ void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &
 
         openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
     }
-}
\ No newline at end of file
+}
diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index d449547bf..13136e0d3 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -213,7 +213,7 @@ void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
     case COLOR_RGB2YUV:
     {
         CV_Assert(scn == 3 || scn == 4);
-        bidx = code == COLOR_BGR2YUV ? 0 : 2;
+        bidx = code == COLOR_RGB2YUV ? 0 : 2;
         dst.create(sz, CV_MAKETYPE(depth, 3));
         RGB2YUV_caller(src, dst, bidx);
         break;
@@ -222,7 +222,7 @@ void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
     case COLOR_YUV2RGB:
     {
         CV_Assert(scn == 3 || scn == 4);
-        bidx = code == COLOR_YUV2BGR ? 0 : 2;
+        bidx = code == COLOR_YUV2RGB ? 0 : 2;
         dst.create(sz, CV_MAKETYPE(depth, 3));
         YUV2RGB_caller(src, dst, bidx);
         break;
diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp
index 9d574a1c0..a336ec754 100644
--- a/modules/ocl/src/columnsum.cpp
+++ b/modules/ocl/src/columnsum.cpp
@@ -81,4 +81,4 @@ void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
 
     openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
 
-}
\ No newline at end of file
+}
diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp
index 79113706a..bbbdf8bc1 100644
--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@ -63,6 +63,7 @@ extern const char *filter_sep_row;
 extern const char *filter_sep_col;
 extern const char *filtering_laplacian;
 extern const char *filtering_morph;
+extern const char *filtering_adaptive_bilateral;
 }
 }
 
@@ -163,7 +164,7 @@ public:
 
 Ptr<FilterEngine_GPU> cv::ocl::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D)
 {
-    return Ptr<FilterEngine_GPU>(new Filter2DEngine_GPU(filter2D));
+    return makePtr<Filter2DEngine_GPU>(filter2D);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -452,7 +453,7 @@ Ptr<FilterEngine_GPU> cv::ocl::createMorphologyFilter_GPU(int op, int type, cons
 
     Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor);
 
-    return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, iterations));
+    return makePtr<MorphologyFilterEngine_GPU>(filter2D, iterations);
 }
 
 namespace
@@ -572,7 +573,7 @@ void cv::ocl::morphologyEx(const oclMat &src, oclMat &dst, int op, const Mat &ke
 
 namespace
 {
-typedef void (*GPUFilter2D_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, const int);
+typedef void (*GPUFilter2D_t)(const oclMat & , oclMat & , const oclMat & , const Size &, const Point&, const int);
 
 class LinearFilter_GPU : public BaseFilter_GPU
 {
@@ -591,21 +592,22 @@ public:
 };
 }
 
-static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
-                 Size &ksize, const Point anchor, const int borderType)
+static void GPUFilter2D(const oclMat &src, oclMat &dst, const oclMat &mat_kernel,
+    const Size &ksize, const Point& anchor, const int borderType)
 {
     CV_Assert(src.clCxt == dst.clCxt);
     CV_Assert((src.cols == dst.cols) &&
               (src.rows == dst.rows));
     CV_Assert((src.oclchannels() == dst.oclchannels()));
-    CV_Assert((borderType != 0));
     CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
     CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
+    CV_Assert(ksize.width == ksize.height);
     Context *clCxt = src.clCxt;
-    int cn =  src.oclchannels();
-    int depth = src.depth();
 
-    String kernelName = "filter2D";
+    int filterWidth = ksize.width;
+    bool ksize_3x3 = filterWidth == 3 && src.type() != CV_32FC4; // CV_32FC4 is not tuned up with filter2d_3x3 kernel
+
+    String kernelName = ksize_3x3 ? "filter2D_3x3" : "filter2D";
 
     size_t src_offset_x = (src.offset % src.step) / src.elemSize();
     size_t src_offset_y = src.offset / src.step;
@@ -613,58 +615,84 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
     size_t dst_offset_x = (dst.offset % dst.step) / dst.elemSize();
     size_t dst_offset_y = dst.offset / dst.step;
 
+    int paddingPixels = filterWidth & (-2);
+
+    size_t localThreads[3]  = {ksize_3x3 ? 256 : 16, ksize_3x3 ? 1 : 16, 1};
+    size_t globalThreads[3] = {src.wholecols, src.wholerows, 1};
+
+    int cn =  src.oclchannels();
+    int src_step = (int)(src.step/src.elemSize());
+    int dst_step = (int)(dst.step/src.elemSize());
+
+    int localWidth = localThreads[0] + paddingPixels;
+    int localHeight = localThreads[1] + paddingPixels;
+
+    // 260 = divup((localThreads[0] + filterWidth * 2), 4) * 4
+    // 6   = (ROWS_PER_GROUP_WHICH_IS_4 + filterWidth * 2)
+    size_t localMemSize = ksize_3x3 ? 260 * 6 * src.elemSize() : (localWidth * localHeight) * src.elemSize();
+
     int vector_lengths[4][7] = {{4, 4, 4, 4, 4, 4, 4},
-        {4, 4, 1, 1, 1, 1, 1},
-        {1, 1, 1, 1, 1, 1, 1},
-        {4, 4, 4, 4, 1, 1, 4}
+    {4, 4, 1, 1, 1, 1, 1},
+    {1, 1, 1, 1, 1, 1, 1},
+    {4, 4, 4, 4, 1, 1, 4}
     };
-
-    int vector_length = vector_lengths[cn - 1][depth];
-    int offset_cols = (dst_offset_x) & (vector_length - 1);
-    int cols = dst.cols + offset_cols;
-    int rows = divUp(dst.rows, vector_length);
-
-    size_t localThreads[3] = {256, 1, 1};
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(rows, localThreads[1]) *localThreads[1], 1
-                              };
+    int cols = dst.cols + ((dst_offset_x) & (vector_lengths[cn - 1][src.depth()] - 1));
 
     std::vector< std::pair<size_t, const void *> > args;
     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst_step));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
+    args.push_back(std::make_pair(localMemSize,   (void *)NULL));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_offset_x));
     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_offset_y));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
     args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst_offset_x));
     args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst_offset_y));
-    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
     args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
-
-    const int buffer_size = 100;
-    char opt_buffer [buffer_size] = "";
-    sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y);
-
-    openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer);
+    char btype[30];
+    switch (borderType)
+    {
+    case 0:
+        sprintf(btype, "BORDER_CONSTANT");
+        break;
+    case 1:
+        sprintf(btype, "BORDER_REPLICATE");
+        break;
+    case 2:
+        sprintf(btype, "BORDER_REFLECT");
+        break;
+    case 3:
+        CV_Error(CV_StsUnsupportedFormat, "BORDER_WRAP is not supported!");
+        return;
+    case 4:
+        sprintf(btype, "BORDER_REFLECT_101");
+        break;
+    }
+    int type = src.depth();
+    char build_options[150];
+    sprintf(build_options, "-D %s -D IMG_C_%d_%d -D CN=%d -D FILTER_SIZE=%d", btype, cn, type, cn, ksize.width);
+    openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
 }
+
 Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
-        Point anchor, int borderType)
+        const Point &anchor, int borderType)
 {
     static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, GPUFilter2D, GPUFilter2D};
 
     CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
 
     oclMat gpu_krnl;
-    int nDivisor;
-    normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false);
-    normalizeAnchor(anchor, ksize);
+    Point norm_archor = anchor;
+    normalizeKernel(kernel, gpu_krnl, CV_32FC1);
+    normalizeAnchor(norm_archor, ksize);
 
-    return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
-                               borderType));
+    return makePtr<LinearFilter_GPU>(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
+        borderType);
 }
 
 Ptr<FilterEngine_GPU> cv::ocl::createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Point &anchor,
@@ -746,7 +774,7 @@ public:
 Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
         const Ptr<BaseColumnFilter_GPU> &columnFilter)
 {
-    return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));
+    return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter);
 }
 
 /*
@@ -1017,8 +1045,8 @@ Ptr<BaseFilter_GPU> cv::ocl::getBoxFilter_GPU(int srcType, int dstType,
 
     normalizeAnchor(anchor, ksize);
 
-    return Ptr<BaseFilter_GPU>(new GPUBoxFilter(ksize, anchor,
-                               borderType, FilterBox_callers[(CV_MAT_DEPTH(srcType) == CV_32F)][CV_MAT_CN(srcType)]));
+    return makePtr<GPUBoxFilter>(ksize, anchor,
+        borderType, FilterBox_callers[(CV_MAT_DEPTH(srcType) == CV_32F)][CV_MAT_CN(srcType)]);
 }
 
 Ptr<FilterEngine_GPU> cv::ocl::createBoxFilter_GPU(int srcType, int dstType,
@@ -1201,8 +1229,8 @@ Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufTyp
 
     normalizeAnchor(anchor, ksize);
 
-    return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, mat_kernel,
-                                  gpuFilter1D_callers[CV_MAT_DEPTH(srcType)], bordertype));
+    return makePtr<GpuLinearRowFilter>(ksize, anchor, mat_kernel,
+        gpuFilter1D_callers[CV_MAT_DEPTH(srcType)], bordertype);
 }
 
 namespace
@@ -1370,8 +1398,8 @@ Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int /*bufType*/, in
 
     normalizeAnchor(anchor, ksize);
 
-    return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, mat_kernel,
-                                     gpuFilter1D_callers[CV_MAT_DEPTH(dstType)], bordertype));
+    return makePtr<GpuLinearColumnFilter>(ksize, anchor, mat_kernel,
+        gpuFilter1D_callers[CV_MAT_DEPTH(dstType)], bordertype);
 }
 
 Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int dstType,
@@ -1589,3 +1617,100 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
     Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
     f->apply(src, dst);
 }
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Adaptive Bilateral Filter
+
+void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
+{
+    CV_Assert((ksize.width & 1) && (ksize.height & 1));  // ksize must be odd
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);  // source must be 8bit RGB image
+    if( sigmaSpace <= 0 )
+        sigmaSpace = 1;
+    Mat lut(Size(ksize.width, ksize.height), CV_32FC1);
+    double sigma2 = sigmaSpace * sigmaSpace;
+    int idx = 0;
+    int w = ksize.width / 2;
+    int h = ksize.height / 2;
+    for(int y=-h; y<=h; y++)
+        for(int x=-w; x<=w; x++)
+    {
+        lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
+    }
+    oclMat dlut(lut);
+    int depth = src.depth();
+    int cn = src.oclchannels();
+
+    normalizeAnchor(anchor, ksize);
+    const static String kernelName = "edgeEnhancingFilter";
+
+    dst.create(src.size(), src.type());
+
+    char btype[30];
+    switch(borderType)
+    {
+    case BORDER_CONSTANT:
+        sprintf(btype, "BORDER_CONSTANT");
+        break;
+    case BORDER_REPLICATE:
+        sprintf(btype, "BORDER_REPLICATE");
+        break;
+    case BORDER_REFLECT:
+        sprintf(btype, "BORDER_REFLECT");
+        break;
+    case BORDER_WRAP:
+        sprintf(btype, "BORDER_WRAP");
+        break;
+    case BORDER_REFLECT101:
+        sprintf(btype, "BORDER_REFLECT_101");
+        break;
+    default:
+        CV_Error(CV_StsBadArg, "This border type is not supported");
+        break;
+    }
+
+    //the following constants may be adjusted for performance concerns
+    const static size_t blockSizeX = 64, blockSizeY = 1, EXTRA = ksize.height - 1;
+
+    //Normalize the result by default
+    const float alpha = ksize.height * ksize.width;
+
+    const size_t gSize = blockSizeX - ksize.width / 2 * 2;
+    const size_t globalSizeX = (src.cols) % gSize == 0 ?
+        src.cols / gSize * blockSizeX :
+        (src.cols / gSize + 1) * blockSizeX;
+    const size_t rows_per_thread = 1 + EXTRA;
+    const size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
+        ((src.rows + rows_per_thread - 1) / rows_per_thread) :
+        (((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
+
+    size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
+    size_t localThreads[3]  = { blockSizeX, blockSizeY, 1};
+
+    char build_options[250];
+
+    //LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
+    sprintf(build_options,
+        "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
+        " -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
+        static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
+
+    std::vector<std::pair<size_t , const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
+    args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+    args.push_back(std::make_pair(sizeof(cl_mem), &dlut.data));
+    int lut_step = dlut.step1();
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&lut_step));
+
+    openCLExecuteKernel(Context::getContext(), &filtering_adaptive_bilateral, kernelName,
+        globalThreads, localThreads, args, cn, depth, build_options);
+}
diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp
index 0867023a7..a74a24be1 100644
--- a/modules/ocl/src/gemm.cpp
+++ b/modules/ocl/src/gemm.cpp
@@ -46,16 +46,62 @@
 #include <iomanip>
 #include "precomp.hpp"
 
+namespace cv { namespace ocl {
+
+// used for clAmdBlas library to avoid redundant setup/teardown
+void clBlasSetup();
+void clBlasTeardown();
+
+}} /* namespace cv { namespace ocl */
+
+
 #if !defined HAVE_CLAMDBLAS
 void cv::ocl::gemm(const oclMat&, const oclMat&, double,
                    const oclMat&, double, oclMat&, int)
 {
     CV_Error(Error::StsNotImplemented, "OpenCL BLAS is not implemented");
 }
+
+void cv::ocl::clBlasSetup()
+{
+    CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
+}
+
+void cv::ocl::clBlasTeardown()
+{
+    //intentionally do nothing
+}
+
 #else
 #include "clAmdBlas.h"
 using namespace cv;
 
+static bool clBlasInitialized = false;
+static Mutex cs;
+
+void cv::ocl::clBlasSetup()
+{
+    if(!clBlasInitialized)
+    {
+        AutoLock al(cs);
+        if(!clBlasInitialized)
+        {
+            openCLSafeCall(clAmdBlasSetup());
+            clBlasInitialized = true;
+        }
+    }
+}
+
+void cv::ocl::clBlasTeardown()
+{
+    AutoLock al(cs);
+    if(clBlasInitialized)
+    {
+        clAmdBlasTeardown();
+        clBlasInitialized = false;
+    }
+}
+
 void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
                    const oclMat &src3, double beta, oclMat &dst, int flags)
 {
@@ -71,7 +117,8 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
         dst.create(src1.rows, src2.cols, src1.type());
         dst.setTo(Scalar::all(0));
     }
-    openCLSafeCall( clAmdBlasSetup() );
+
+    clBlasSetup();
 
     const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
     const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
@@ -156,6 +203,5 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
     }
     break;
     }
-    clAmdBlasTeardown();
 }
 #endif
diff --git a/modules/ocl/src/gfft.cpp b/modules/ocl/src/gftt.cpp
similarity index 97%
rename from modules/ocl/src/gfft.cpp
rename to modules/ocl/src/gftt.cpp
index 79fd48936..0af3039b3 100644
--- a/modules/ocl/src/gfft.cpp
+++ b/modules/ocl/src/gftt.cpp
@@ -55,7 +55,7 @@ namespace cv
     namespace ocl
     {
         ///////////////////////////OpenCL kernel strings///////////////////////////
-        extern const char *imgproc_gfft;
+        extern const char *imgproc_gftt;
     }
 }
 
@@ -133,7 +133,7 @@ struct Sorter<BITONIC>
             for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
             {
                 args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
-                openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+                openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
             }
         }
     }
@@ -160,12 +160,12 @@ struct Sorter<SELECTION>
         args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
         args.push_back( std::make_pair( lds_size,       (void*)NULL) );
 
-        openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+        openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
 
         //final
         kernelname = "sortCorners_selectionSortFinal";
         args.pop_back();
-        openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+        openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
     }
 };
 
@@ -201,7 +201,7 @@ int findCorners_caller(
     size_t localThreads[3]  = {16, 16, 1};
 
     const char * opt = mask.empty() ? "" : "-D WITH_MASK";
-    openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt);
+    openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt);
     return std::min(Mat(g_counter).at<int>(0), max_count);
 }
 }//unnamed namespace
diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp
index 8fb69567a..ac6a6e0a4 100644
--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@ -20,7 +20,6 @@
 //    Jia Haipeng, jiahaipeng95@gmail.com
 //    Wu Xinglong, wxl370@126.com
 //    Wang Yao, bitwangyaoyao@gmail.com
-//    Sen Liu, swjtuls1987@126.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -54,8 +53,6 @@
 using namespace cv;
 using namespace cv::ocl;
 
-#if 0
-
 namespace cv
 {
 namespace ocl
@@ -68,9 +65,9 @@ extern const char *haarobjectdetect_scaled2;
 }
 
 /* these settings affect the quality of detection: change with care */
-#define CV_ADJUST_FEATURES 1
-#define CV_ADJUST_WEIGHTS  0
-
+#define CV_ADJUST_FEATURES  1
+#define CV_ADJUST_WEIGHTS   0
+#define CV_HAAR_FEATURE_MAX 3
 typedef int sumtype;
 typedef double sqsumtype;
 
@@ -141,7 +138,7 @@ typedef struct
     int imgoff;
     float factor;
 } detect_piramid_info;
-#ifdef WIN32
+#ifdef _MSC_VER
 #define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
 
 typedef _ALIGNED_ON(128) struct  GpuHidHaarTreeNode
@@ -261,7 +258,7 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
     int datasize;
     int total_classifiers = 0;
     int total_nodes = 0;
-    char errorstr[100];
+    char errorstr[256];
 
     GpuHidHaarStageClassifier *stage_classifier_ptr;
     GpuHidHaarClassifier *haar_classifier_ptr;
@@ -679,14 +676,15 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade)
         } /* j */
     }
 }
-
-CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemStorage *storage, double scaleFactor,
-        int minNeighbors, int flags, CvSize minSize, CvSize maxSize)
+void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv::Rect>& faces,
+                                            double scaleFactor, int minNeighbors, int flags,
+                                            Size minSize, Size maxSize)
+//CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemStorage *storage, double scaleFactor,
+//        int minNeighbors, int flags, CvSize minSize, CvSize maxSize)
 {
     CvHaarClassifierCascade *cascade = oldCascade;
 
     const double GROUP_EPS = 0.2;
-    CvSeq *result_seq = 0;
 
     cv::ConcurrentRectVector allCandidates;
     std::vector<cv::Rect> rectList;
@@ -714,8 +712,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
     if( !CV_IS_HAAR_CLASSIFIER(cascade) )
         CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );
 
-    if( !storage )
-        CV_Error( CV_StsNullPtr, "Null storage pointer" );
+    //if( !storage )
+    //    CV_Error( CV_StsNullPtr, "Null storage pointer" );
 
     if( CV_MAT_DEPTH(gimg.type()) != CV_8U )
         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" );
@@ -729,7 +727,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
     if( !cascade->hid_cascade )
         gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
 
-    result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), storage );
+    //result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), storage );
 
     if( CV_MAT_CN(gimg.type()) > 1 )
     {
@@ -1028,7 +1026,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
         args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
         args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&nodenum ));
-
+        const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
         openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
 
         candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status);
@@ -1062,623 +1060,22 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
     else
         rweights.resize(rectList.size(), 0);
 
+    faces.clear();
     if( findBiggestObject && rectList.size() )
     {
-        CvAvgComp result_comp = {{0, 0, 0, 0}, 0};
-
+        Rect result_comp(0, 0, 0, 0);
         for( size_t i = 0; i < rectList.size(); i++ )
         {
             cv::Rect r = rectList[i];
-            if( r.area() > cv::Rect(result_comp.rect).area() )
+            if( r.area() > result_comp.area() )
             {
-                result_comp.rect = r;
-                result_comp.neighbors = rweights[i];
+                result_comp = r;
             }
         }
-        cvSeqPush( result_seq, &result_comp );
+        faces.push_back(result_comp);
     }
     else
     {
-        for( size_t i = 0; i < rectList.size(); i++ )
-        {
-            CvAvgComp c;
-            c.rect = rectList[i];
-            c.neighbors = rweights[i];
-            cvSeqPush( result_seq, &c );
-        }
-    }
-
-    return result_seq;
-}
-
-struct OclBuffers
-{
-    cl_mem stagebuffer;
-    cl_mem nodebuffer;
-    cl_mem candidatebuffer;
-    cl_mem scaleinfobuffer;
-    cl_mem pbuffer;
-    cl_mem correctionbuffer;
-    cl_mem newnodebuffer;
-};
-
-struct getRect
-{
-    Rect operator()(const CvAvgComp &e) const
-    {
-        return e.rect;
-    }
-};
-
-void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv::Rect>& faces,
-                                                        double scaleFactor, int minNeighbors, int flags,
-                                                        Size minSize, Size maxSize)
-{
-    int blocksize = 8;
-    int grp_per_CU = 12;
-    size_t localThreads[3] = { blocksize, blocksize, 1 };
-    size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0],
-        localThreads[1],
-        1 };
-    int outputsz = 256 * globalThreads[0] / localThreads[0];
-
-    Init(gimg.rows, gimg.cols, scaleFactor, flags, outputsz, localThreads, minSize, maxSize);
-
-    const double GROUP_EPS = 0.2;
-
-    cv::ConcurrentRectVector allCandidates;
-    std::vector<cv::Rect> rectList;
-    std::vector<int> rweights;
-
-    CvHaarClassifierCascade      *cascade = oldCascade;
-    GpuHidHaarClassifierCascade  *gcascade;
-    GpuHidHaarStageClassifier    *stage;
-
-    if( CV_MAT_DEPTH(gimg.type()) != CV_8U )
-        CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" );
-
-    if( CV_MAT_CN(gimg.type()) > 1 )
-    {
-        oclMat gtemp;
-        cvtColor( gimg, gtemp, CV_BGR2GRAY );
-        gimg = gtemp;
-    }
-
-    int *candidate;
-    cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
-    if( (flags & CV_HAAR_SCALE_IMAGE) )
-    {
-        int indexy = 0;
-        CvSize sz;
-
-        cv::Rect roi, roi2;
-        cv::Mat imgroi, imgroisq;
-        cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
-
-        for( int i = 0; i < m_loopcount; i++ )
-        {
-            sz = sizev[i];
-            roi = Rect(0, indexy, sz.width, sz.height);
-            roi2 = Rect(0, 0, sz.width - 1, sz.height - 1);
-            resizeroi = gimg1(roi2);
-            gimgroi = gsum(roi);
-            gimgroisq = gsqsum(roi);
-
-            cv::ocl::resize(gimg, resizeroi, Size(sz.width - 1, sz.height - 1), 0, 0, INTER_LINEAR);
-            cv::ocl::integral(resizeroi, gimgroi, gimgroisq);
-            indexy += sz.height;
-        }
-
-        gcascade   = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
-        stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
-
-        int startstage = 0;
-        int endstage = gcascade->count;
-        int startnode = 0;
-        int pixelstep = gsum.step / 4;
-        int splitstage = 3;
-        int splitnode = stage[0].count + stage[1].count + stage[2].count;
-        cl_int4 p, pq;
-        p.s[0] = gcascade->p0;
-        p.s[1] = gcascade->p1;
-        p.s[2] = gcascade->p2;
-        p.s[3] = gcascade->p3;
-        pq.s[0] = gcascade->pq0;
-        pq.s[1] = gcascade->pq1;
-        pq.s[2] = gcascade->pq2;
-        pq.s[3] = gcascade->pq3;
-        float correction = gcascade->inv_window_area;
-
-        vector<pair<size_t, const void *> > args;
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->scaleinfobuffer ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->candidatebuffer ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&pixelstep ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_loopcount ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitnode ));
-        args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
-        args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
-        args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
-
-        const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
-
-        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
-
-        candidate = (int *)malloc(4 * sizeof(int) * outputsz);
-        memset(candidate, 0, 4 * sizeof(int) * outputsz);
-
-        openCLReadBuffer( gsum.clCxt, ((OclBuffers *)buffers)->candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
-
-        for(int i = 0; i < outputsz; i++)
-        {
-            if(candidate[4 * i + 2] != 0)
-            {
-                allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
-                candidate[4 * i + 2], candidate[4 * i + 3]));
-            }
-        }
-        free((void *)candidate);
-        candidate = NULL;
-    }
-    else
-    {
-        cv::ocl::integral(gimg, gsum, gsqsum);
-
-        gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
-
-        int step = gsum.step / 4;
-        int startnode = 0;
-        int splitstage = 3;
-
-        int startstage = 0;
-        int endstage = gcascade->count;
-
-        vector<pair<size_t, const void *> > args;
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->scaleinfobuffer ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->candidatebuffer ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&step ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_loopcount ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->pbuffer ));
-        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer ));
-        args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum ));
-
-        const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
-        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
-
-        candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL);
-
-        for(int i = 0; i < outputsz; i++)
-        {
-            if(candidate[4 * i + 2] != 0)
-                allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
-                candidate[4 * i + 2], candidate[4 * i + 3]));
-        }
-        clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0);
-    }
-    rectList.resize(allCandidates.size());
-    if(!allCandidates.empty())
-        std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
-
-    if( minNeighbors != 0 || findBiggestObject )
-        groupRectangles(rectList, rweights, std::max(minNeighbors, 1), GROUP_EPS);
-    else
-        rweights.resize(rectList.size(), 0);
-
-    GenResult(faces, rectList, rweights);
-}
-
-void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
-    double scaleFactor, int flags,
-    const int outputsz, const size_t localThreads[],
-    CvSize minSize, CvSize maxSize)
-{
-    if(initialized)
-    {
-        return; // we only allow one time initialization
-    }
-    CvHaarClassifierCascade      *cascade = oldCascade;
-
-    if( !CV_IS_HAAR_CLASSIFIER(cascade) )
-        CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );
-
-    if( scaleFactor <= 1 )
-        CV_Error( CV_StsOutOfRange, "scale factor must be > 1" );
-
-    if( cols < minSize.width || rows < minSize.height )
-        CV_Error(CV_StsError, "Image too small");
-
-    int datasize=0;
-    int totalclassifier=0;
-
-    if( !cascade->hid_cascade )
-    {
-        gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
-    }
-
-    if( maxSize.height == 0 || maxSize.width == 0 )
-    {
-        maxSize.height = rows;
-        maxSize.width = cols;
-    }
-
-    findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
-    if( findBiggestObject )
-        flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING);
-
-    CreateBaseBufs(datasize, totalclassifier, flags, outputsz);
-    CreateFactorRelatedBufs(rows, cols, flags, scaleFactor, localThreads, minSize, maxSize);
-
-    m_scaleFactor = scaleFactor;
-    m_rows = rows;
-    m_cols = cols;
-    m_flags = flags;
-    m_minSize = minSize;
-    m_maxSize = maxSize;
-
-    // initialize nodes
-    GpuHidHaarClassifierCascade  *gcascade;
-    GpuHidHaarStageClassifier    *stage;
-    GpuHidHaarClassifier         *classifier;
-    GpuHidHaarTreeNode           *node;
-    cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
-    if( (flags & CV_HAAR_SCALE_IMAGE) )
-    {
-        gcascade   = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
-        stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
-        classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
-        node       = (GpuHidHaarTreeNode *)(classifier->node);
-
-        gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
-
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
-            sizeof(GpuHidHaarStageClassifier) * gcascade->count,
-            stage, 0, NULL, NULL));
-
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
-                                            m_nodenum * sizeof(GpuHidHaarTreeNode),
-                                            node, 0, NULL, NULL));
-    }
-    else
-    {
-        gpuSetHaarClassifierCascade(cascade);
-
-        gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
-        stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
-        classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
-        node       = (GpuHidHaarTreeNode *)(classifier->node);
-
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
-            m_nodenum * sizeof(GpuHidHaarTreeNode),
-            node, 0, NULL, NULL));
-
-        cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount);
-        float *correction = (float *)malloc(sizeof(float) * m_loopcount);
-        double factor;
-        for(int i = 0; i < m_loopcount; i++)
-        {
-            factor = scalev[i];
-            int equRect_x = (int)(factor * gcascade->p0 + 0.5);
-            int equRect_y = (int)(factor * gcascade->p1 + 0.5);
-            int equRect_w = (int)(factor * gcascade->p3 + 0.5);
-            int equRect_h = (int)(factor * gcascade->p2 + 0.5);
-            p[i].s[0] = equRect_x;
-            p[i].s[1] = equRect_y;
-            p[i].s[2] = equRect_x + equRect_w;
-            p[i].s[3] = equRect_y + equRect_h;
-            correction[i] = 1. / (equRect_w * equRect_h);
-            int startnodenum = m_nodenum * i;
-            float factor2 = (float)factor;
-
-            vector<pair<size_t, const void *> > args1;
-            args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
-            args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
-            args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
-            args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
-            args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
-
-            size_t globalThreads2[3] = {m_nodenum, 1, 1};
-
-            openCLExecuteKernel(Context::getContext(), &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
-        }
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL));
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL));
-
-        free(p);
-        free(correction);
-    }
-    initialized = true;
-}
-
-void cv::ocl::OclCascadeClassifierBuf::CreateBaseBufs(const int datasize, const int totalclassifier,
-                                                      const int flags, const int outputsz)
-{
-    if (!initialized)
-    {
-        buffers = malloc(sizeof(OclBuffers));
-
-        size_t tempSize =
-            sizeof(GpuHidHaarStageClassifier) * ((GpuHidHaarClassifierCascade *)oldCascade->hid_cascade)->count;
-        m_nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) - tempSize - sizeof(GpuHidHaarClassifier) * totalclassifier)
-            / sizeof(GpuHidHaarTreeNode);
-
-        ((OclBuffers *)buffers)->stagebuffer     = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,  tempSize);
-        ((OclBuffers *)buffers)->nodebuffer      = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,  m_nodenum * sizeof(GpuHidHaarTreeNode));
-    }
-
-    if (initialized
-        && ((m_flags & CV_HAAR_SCALE_IMAGE) ^ (flags & CV_HAAR_SCALE_IMAGE)))
-    {
-        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
-    }
-
-    if (flags & CV_HAAR_SCALE_IMAGE)
-    {
-        ((OclBuffers *)buffers)->candidatebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(),
-                                                        CL_MEM_WRITE_ONLY,
-                                                        4 * sizeof(int) * outputsz);
-    }
-    else
-    {
-        ((OclBuffers *)buffers)->candidatebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(),
-                                                        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
-                                                        4 * sizeof(int) * outputsz);
+        faces = rectList;
     }
 }
-
-void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
-    const int rows, const int cols, const int flags,
-    const double scaleFactor, const size_t localThreads[],
-    CvSize minSize, CvSize maxSize)
-{
-    if (initialized)
-    {
-        if ((m_flags & CV_HAAR_SCALE_IMAGE) && !(flags & CV_HAAR_SCALE_IMAGE))
-        {
-            gimg1.release();
-            gsum.release();
-            gsqsum.release();
-        }
-        else if (!(m_flags & CV_HAAR_SCALE_IMAGE) && (flags & CV_HAAR_SCALE_IMAGE))
-        {
-            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
-            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
-            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
-        }
-        else if ((m_flags & CV_HAAR_SCALE_IMAGE) && (flags & CV_HAAR_SCALE_IMAGE))
-        {
-            if (fabs(m_scaleFactor - scaleFactor) < 1e-6
-                && (rows == m_rows && cols == m_cols)
-                && (minSize.width == m_minSize.width)
-                && (minSize.height == m_minSize.height)
-                && (maxSize.width == m_maxSize.width)
-                && (maxSize.height == m_maxSize.height))
-            {
-                return;
-            }
-        }
-        else
-        {
-            if (fabs(m_scaleFactor - scaleFactor) < 1e-6
-                && (rows == m_rows && cols == m_cols)
-                && (minSize.width == m_minSize.width)
-                && (minSize.height == m_minSize.height)
-                && (maxSize.width == m_maxSize.width)
-                && (maxSize.height == m_maxSize.height))
-            {
-                return;
-            }
-            else
-            {
-                openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
-                openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
-                openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
-            }
-        }
-    }
-
-    int loopcount;
-    int indexy = 0;
-    int totalheight = 0;
-    double factor;
-    Rect roi;
-    CvSize sz;
-    CvSize winSize0 = oldCascade->orig_window_size;
-    detect_piramid_info *scaleinfo;
-    cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
-    if (flags & CV_HAAR_SCALE_IMAGE)
-    {
-        for(factor = 1.f;; factor *= scaleFactor)
-        {
-            CvSize winSize = { cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) };
-            sz.width     = cvRound( cols / factor ) + 1;
-            sz.height    = cvRound( rows / factor ) + 1;
-            CvSize sz1     = { sz.width - winSize0.width - 1,      sz.height - winSize0.height - 1 };
-
-            if( sz1.width <= 0 || sz1.height <= 0 )
-                break;
-            if( winSize.width > maxSize.width || winSize.height > maxSize.height )
-                break;
-            if( winSize.width < minSize.width || winSize.height < minSize.height )
-                continue;
-
-            totalheight += sz.height;
-            sizev.push_back(sz);
-            scalev.push_back(static_cast<float>(factor));
-        }
-
-        loopcount = sizev.size();
-        gimg1.create(rows, cols, CV_8UC1);
-        gsum.create(totalheight + 4, cols + 1, CV_32SC1);
-        gsqsum.create(totalheight + 4, cols + 1, CV_32FC1);
-
-        scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
-        for( int i = 0; i < loopcount; i++ )
-        {
-            sz = sizev[i];
-            roi = Rect(0, indexy, sz.width, sz.height);
-            int width = sz.width - 1 - oldCascade->orig_window_size.width;
-            int height = sz.height - 1 - oldCascade->orig_window_size.height;
-            int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
-            int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
-
-            ((detect_piramid_info *)scaleinfo)[i].width_height = (width << 16) | height;
-            ((detect_piramid_info *)scaleinfo)[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
-            ((detect_piramid_info *)scaleinfo)[i].imgoff = gsum(roi).offset >> 2;
-            ((detect_piramid_info *)scaleinfo)[i].factor = scalev[i];
-
-            indexy += sz.height;
-        }
-    }
-    else
-    {
-        for(factor = 1;
-            cvRound(factor * winSize0.width) < cols - 10 && cvRound(factor * winSize0.height) < rows - 10;
-            factor *= scaleFactor)
-        {
-            CvSize winSize = { cvRound( winSize0.width * factor ), cvRound( winSize0.height * factor ) };
-            if( winSize.width < minSize.width || winSize.height < minSize.height )
-            {
-                continue;
-            }
-            sizev.push_back(winSize);
-            scalev.push_back(factor);
-        }
-
-        loopcount = scalev.size();
-        if(loopcount == 0)
-        {
-            loopcount = 1;
-            sizev.push_back(minSize);
-            scalev.push_back( min(cvRound(minSize.width / winSize0.width), cvRound(minSize.height / winSize0.height)) );
-        }
-
-        ((OclBuffers *)buffers)->pbuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,
-            sizeof(cl_int4) * loopcount);
-        ((OclBuffers *)buffers)->correctionbuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,
-            sizeof(cl_float) * loopcount);
-        ((OclBuffers *)buffers)->newnodebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_WRITE,
-            loopcount * m_nodenum * sizeof(GpuHidHaarTreeNode));
-
-        scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
-        for( int i = 0; i < loopcount; i++ )
-        {
-            sz = sizev[i];
-            factor = scalev[i];
-            int ystep = cvRound(std::max(2., factor));
-            int width = (cols - 1 - sz.width  + ystep - 1) / ystep;
-            int height = (rows - 1 - sz.height + ystep - 1) / ystep;
-            int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
-            int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
-
-            ((detect_piramid_info *)scaleinfo)[i].width_height = (width << 16) | height;
-            ((detect_piramid_info *)scaleinfo)[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
-            ((detect_piramid_info *)scaleinfo)[i].imgoff = 0;
-            ((detect_piramid_info *)scaleinfo)[i].factor = factor;
-        }
-    }
-
-    if (loopcount != m_loopcount)
-    {
-        if (initialized)
-        {
-            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
-        }
-        ((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
-    }
-
-    openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
-        sizeof(detect_piramid_info)*loopcount,
-        scaleinfo, 0, NULL, NULL));
-    free(scaleinfo);
-
-    m_loopcount = loopcount;
-}
-
-void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& faces,
-                                                 const std::vector<cv::Rect> &rectList,
-                                                 const std::vector<int> &rweights)
-{
-    MemStorage tempStorage(cvCreateMemStorage(0));
-    CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), tempStorage );
-
-    if( findBiggestObject && rectList.size() )
-    {
-        CvAvgComp result_comp = {CvRect(), 0};
-
-        for( size_t i = 0; i < rectList.size(); i++ )
-        {
-            cv::Rect r = rectList[i];
-            if( r.area() > cv::Rect(result_comp.rect).area() )
-            {
-                result_comp.rect = r;
-                result_comp.neighbors = rweights[i];
-            }
-        }
-        cvSeqPush( result_seq, &result_comp );
-    }
-    else
-    {
-        for( size_t i = 0; i < rectList.size(); i++ )
-        {
-            CvAvgComp c;
-            c.rect = rectList[i];
-            c.neighbors = rweights[i];
-            cvSeqPush( result_seq, &c );
-        }
-    }
-
-    vector<CvAvgComp> vecAvgComp;
-    Seq<CvAvgComp>(result_seq).copyTo(vecAvgComp);
-    faces.resize(vecAvgComp.size());
-    std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
-}
-
-void cv::ocl::OclCascadeClassifierBuf::release()
-{
-    if(initialized)
-    {
-        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
-        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
-        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
-        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
-
-        if( (m_flags & CV_HAAR_SCALE_IMAGE) )
-        {
-            cvFree(&oldCascade->hid_cascade);
-        }
-        else
-        {
-            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
-            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
-            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
-        }
-
-        free(buffers);
-        buffers = NULL;
-        initialized = false;
-    }
-}
-
-#ifndef _MAX_PATH
-#define _MAX_PATH 1024
-#endif
-
-#endif
diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp
index b2ebb23ce..ac113d27b 100644
--- a/modules/ocl/src/hog.cpp
+++ b/modules/ocl/src/hog.cpp
@@ -15,7 +15,7 @@
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
-//		Wenju He, wenju@multicorewareinc.com
+//     Wenju He, wenju@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -48,13 +48,15 @@
 using namespace cv;
 using namespace cv::ocl;
 
-
 #define CELL_WIDTH 8
 #define CELL_HEIGHT 8
 #define CELLS_PER_BLOCK_X 2
 #define CELLS_PER_BLOCK_Y 2
 #define NTHREADS 256
 
+static oclMat gauss_w_lut;
+static bool hog_device_cpu;
+
 namespace cv
 {
     namespace ocl
@@ -78,38 +80,43 @@ namespace cv
                 int cnblocks_win_x;
                 int cnblocks_win_y;
                 int cblock_hist_size;
-                int cblock_hist_size_2up;
                 int cdescr_size;
                 int cdescr_width;
+                int cdescr_height;
 
                 void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
                                       int nblocks_win_x, int nblocks_win_y);
 
                 void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
                                    int height, int width, const cv::ocl::oclMat &grad,
-                                   const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists);
+                                   const cv::ocl::oclMat &qangle,
+                                   const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists);
 
                 void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
-                                     int height, int width, cv::ocl::oclMat &block_hists, float threshold);
+                                     int height, int width, cv::ocl::oclMat &block_hists,
+                                     float threshold);
 
                 void classify_hists(int win_height, int win_width, int block_stride_y,
-                                    int block_stride_x, int win_stride_y, int win_stride_x, int height,
-                                    int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef,
+                                    int block_stride_x, int win_stride_y, int win_stride_x,
+                                    int height, int width, const cv::ocl::oclMat &block_hists,
+                                    const cv::ocl::oclMat &coefs, float free_coef,
                                     float threshold, cv::ocl::oclMat &labels);
 
-                void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
-                                            int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists,
+                void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y,
+                                            int block_stride_x, int win_stride_y, int win_stride_x,
+                                            int height, int width, const cv::ocl::oclMat &block_hists,
                                             cv::ocl::oclMat &descriptors);
-                void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
-                                            int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists,
+                void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y,
+                                            int block_stride_x, int win_stride_y, int win_stride_x,
+                                            int height, int width, const cv::ocl::oclMat &block_hists,
                                             cv::ocl::oclMat &descriptors);
 
                 void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
-                                            float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma);
+                                            float angle_scale, cv::ocl::oclMat &grad,
+                                            cv::ocl::oclMat &qangle, bool correct_gamma);
                 void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
-                                            float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma);
-
-                void resize( const oclMat &src, oclMat &dst, const Size sz);
+                                            float angle_scale, cv::ocl::oclMat &grad,
+                                            cv::ocl::oclMat &qangle, bool correct_gamma);
             }
         }
     }
@@ -117,8 +124,14 @@ namespace cv
 
 using namespace ::cv::ocl::device;
 
-cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
-                                      int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
+static inline int divUp(int total, int grain)
+{
+    return (total + grain - 1) / grain;
+}
+
+cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_,
+                                      Size cell_size_, int nbins_, double win_sigma_,
+                                      double threshold_L2hys_, bool gamma_correction_, int nlevels_)
     : win_size(win_size_),
       block_size(block_size_),
       block_stride(block_stride_),
@@ -132,19 +145,27 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
     CV_Assert((win_size.width  - block_size.width ) % block_stride.width  == 0 &&
               (win_size.height - block_size.height) % block_stride.height == 0);
 
-    CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0);
+    CV_Assert(block_size.width % cell_size.width == 0 &&
+        block_size.height % cell_size.height == 0);
 
     CV_Assert(block_stride == cell_size);
 
     CV_Assert(cell_size == Size(8, 8));
 
-    Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
+    Size cells_per_block(block_size.width / cell_size.width,
+        block_size.height / cell_size.height);
     CV_Assert(cells_per_block == Size(2, 2));
 
     cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
-    hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
+    hog::set_up_constants(nbins, block_stride.width, block_stride.height,
+        blocks_per_win.width, blocks_per_win.height);
 
     effect_size = Size(0, 0);
+
+    if (queryDeviceInfo<IS_CPU_DEVICE, bool>())
+        hog_device_cpu = true;
+    else
+        hog_device_cpu = false;
 }
 
 size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
@@ -154,7 +175,8 @@ size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
 
 size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const
 {
-    Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
+    Size cells_per_block = Size(block_size.width / cell_size.width,
+        block_size.height / cell_size.height);
     return (size_t)(nbins * cells_per_block.area());
 }
 
@@ -167,7 +189,8 @@ bool cv::ocl::HOGDescriptor::checkDetectorSize() const
 {
     size_t detector_size = detector.rows * detector.cols;
     size_t descriptor_size = getDescriptorSize();
-    return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
+    return detector_size == 0 || detector_size == descriptor_size ||
+        detector_size == descriptor_size + 1;
 }
 
 void cv::ocl::HOGDescriptor::setSVMDetector(const std::vector<float> &_detector)
@@ -207,10 +230,24 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
 
     const size_t block_hist_size = getBlockHistogramSize();
     const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
-    block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
+    block_hists.create(1,
+        static_cast<int>(block_hist_size * blocks_per_img.area()) + 256, CV_32F);
 
     Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
     labels.create(1, wins_per_img.area(), CV_8U);
+
+    float sigma = getWinSigma();
+    float scale = 1.f / (2.f * sigma * sigma);
+    Mat gaussian_lut(1, 512, CV_32FC1);
+    int idx = 0;
+    for(int i=-8; i<8; i++)
+        for(int j=-8; j<8; j++)
+            gaussian_lut.at<float>(idx++) = std::exp(-(j * j + i * i) * scale);
+    for(int i=-8; i<8; i++)
+        for(int j=-8; j<8; j++)
+            gaussian_lut.at<float>(idx++) = (8.f - fabs(j + 0.5f)) * (8.f - fabs(i + 0.5f)) / 64.f;
+
+    gauss_w_lut.upload(gaussian_lut);
 }
 
 void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle)
@@ -221,10 +258,12 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc
     switch (img.type())
     {
     case CV_8UC1:
-        hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
+        hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img,
+            angleScale, grad, qangle, gamma_correction);
         break;
     case CV_8UC4:
-        hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
+        hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img,
+            angleScale, grad, qangle, gamma_correction);
         break;
     }
 }
@@ -232,19 +271,21 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc
 
 void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
 {
-    computeGradient(img, grad, qangle);
+    computeGradient(img, this->grad, this->qangle);
 
-    hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
-                       grad, qangle, (float)getWinSigma(), block_hists);
+    hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
+        effect_size.width, grad, qangle, gauss_w_lut, block_hists);
 
-    hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
-                         block_hists, (float)threshold_L2hys);
+    hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
+        effect_size.width, block_hists, (float)threshold_L2hys);
 }
 
 
-void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format)
+void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
+                                            oclMat &descriptors, int descr_format)
 {
-    CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
+    CV_Assert(win_stride.width % block_stride.width == 0 &&
+        win_stride.height % block_stride.height == 0);
 
     init_buffer(img, win_stride);
 
@@ -254,17 +295,20 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
     Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
     Size wins_per_img   = numPartsWithin(effect_size, win_size, win_stride);
 
-    descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
+    descriptors.create(wins_per_img.area(),
+        static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
 
     switch (descr_format)
     {
     case DESCR_FORMAT_ROW_BY_ROW:
-        hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
-                                    win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
+        hog::extract_descrs_by_rows(win_size.height, win_size.width,
+            block_stride.height, block_stride.width, win_stride.height, win_stride.width,
+            effect_size.height, effect_size.width, block_hists, descriptors);
         break;
     case DESCR_FORMAT_COL_BY_COL:
-        hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
-                                    win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
+        hog::extract_descrs_by_cols(win_size.height, win_size.width,
+            block_stride.height, block_stride.width, win_stride.height, win_stride.width,
+            effect_size.height, effect_size.width, block_hists, descriptors);
         break;
     default:
         CV_Error(Error::StsBadArg, "Unknown descriptor format");
@@ -272,7 +316,8 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
 }
 
 
-void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits, double hit_threshold, Size win_stride, Size padding)
+void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
+                                    double hit_threshold, Size win_stride, Size padding)
 {
     CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
     CV_Assert(padding == Size(0, 0));
@@ -284,14 +329,16 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
     if (win_stride == Size())
         win_stride = block_stride;
     else
-        CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
+        CV_Assert(win_stride.width % block_stride.width == 0 &&
+            win_stride.height % block_stride.height == 0);
     init_buffer(img, win_stride);
 
     computeBlockHistograms(img);
 
-    hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
-                        win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists,
-                        detector, (float)free_coef, (float)hit_threshold, labels);
+    hog::classify_hists(win_size.height, win_size.width, block_stride.height,
+        block_stride.width, win_stride.height, win_stride.width,
+        effect_size.height, effect_size.width, block_hists, detector,
+        (float)free_coef, (float)hit_threshold, labels);
 
     labels.download(labels_host);
     unsigned char *vec = labels_host.ptr();
@@ -307,8 +354,9 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
 
 
 
-void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations, double hit_threshold,
-        Size win_stride, Size padding, double scale0, int group_threshold)
+void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
+                                              double hit_threshold, Size win_stride, Size padding,
+                                              double scale0, int group_threshold)
 {
     CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
     CV_Assert(scale0 > 1);
@@ -334,7 +382,8 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rec
     if (win_stride == Size())
         win_stride = block_stride;
     else
-        CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
+        CV_Assert(win_stride.width % block_stride.width == 0 &&
+            win_stride.height % block_stride.height == 0);
     init_buffer(img, win_stride);
     image_scale.create(img.size(), img.type());
 
@@ -348,16 +397,17 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rec
         }
         else
         {
-            hog::resize( img, image_scale, effect_size);
+            resize(img, image_scale, effect_size);
             detect(image_scale, locations, hit_threshold, win_stride, padding);
         }
-        Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
+        Size scaled_win_size(cvRound(win_size.width * scale),
+            cvRound(win_size.height * scale));
         for (size_t j = 0; j < locations.size(); j++)
             all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size));
     }
 
     found_locations.assign(all_candidates.begin(), all_candidates.end());
-    groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
+    groupRectangles(found_locations, group_threshold, 0.2);
 }
 
 int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
@@ -365,9 +415,11 @@ int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
     return (size - part_size + stride) / stride;
 }
 
-cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride)
+cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size,
+                                                cv::Size stride)
 {
-    return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height));
+    return Size(numPartsWithin(size.width, part_size.width, stride.width),
+        numPartsWithin(size.height, part_size.height, stride.height));
 }
 
 std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
@@ -1548,8 +1600,9 @@ static int power_2up(unsigned int n)
     return -1; // Input is too big
 }
 
-void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y,
-        int nblocks_win_x, int nblocks_win_y)
+void cv::ocl::device::hog::set_up_constants(int nbins,
+                                            int block_stride_x, int block_stride_y,
+                                            int nblocks_win_x, int nblocks_win_y)
 {
     cnbins = nbins;
     cblock_stride_x = block_stride_x;
@@ -1560,106 +1613,184 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b
     int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
     cblock_hist_size = block_hist_size;
 
-    int block_hist_size_2up = power_2up(block_hist_size);
-    cblock_hist_size_2up = block_hist_size_2up;
-
     int descr_width = nblocks_win_x * block_hist_size;
     cdescr_width = descr_width;
+    cdescr_height = nblocks_win_y;
 
     int descr_size = descr_width * nblocks_win_y;
     cdescr_size = descr_size;
 }
 
-void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y,
-        int height, int width, const cv::ocl::oclMat &grad,
-        const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists)
+void cv::ocl::device::hog::compute_hists(int nbins,
+                                         int block_stride_x, int block_stride_y,
+                                         int height, int width,
+                                         const cv::ocl::oclMat &grad,
+                                         const cv::ocl::oclMat &qangle,
+                                         const cv::ocl::oclMat &gauss_w_lut,
+                                         cv::ocl::oclMat &block_hists)
 {
     Context *clCxt = Context::getContext();
-    String kernelName = "compute_hists_kernel";
     std::vector< std::pair<size_t, const void *> > args;
+    String kernelName = "compute_hists_lut_kernel";
 
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
-    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
-
-    size_t globalThreads[3] = { img_block_width * 32, img_block_height * 2, 1 };
-    size_t localThreads[3] = { 32, 2, 1 };
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
+        / block_stride_x;
+    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y)
+        / block_stride_y;
+    int blocks_total = img_block_width * img_block_height;
 
     int grad_quadstep = grad.step >> 2;
     int qangle_step = qangle.step;
 
-    // Precompute gaussian spatial window parameter
-    float scale = 1.f / (2.f * sigma * sigma);
+    int blocks_in_group = 4;
+    size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
+    size_t globalThreads[3] = {
+        divUp(img_block_width * img_block_height, blocks_in_group) * localThreads[0], 2, 1 };
 
     int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float);
     int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float);
-    int smem = hists_size + final_hists_size;
 
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
+    int smem = (hists_size + final_hists_size) * blocks_in_group;
+
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_stride_x));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_stride_y));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
+    args.push_back( std::make_pair( sizeof(cl_int), (void *)&blocks_in_group));
+    args.push_back( std::make_pair( sizeof(cl_int), (void *)&blocks_total));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&grad_quadstep));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
-    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
+    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data));
     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
     args.push_back( std::make_pair( smem, (void *)NULL));
 
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+    if(hog_device_cpu)
+    {
+        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+            localThreads, args, -1, -1, "-D CPU");
+    }else
+    {
+        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
+        int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+        char opt[32] = {0};
+        sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
+        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+            localThreads, args, -1, -1, opt);
+    }
 }
 
-void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y,
-        int height, int width, cv::ocl::oclMat &block_hists, float threshold)
+void cv::ocl::device::hog::normalize_hists(int nbins,
+                                           int block_stride_x, int block_stride_y,
+                                           int height, int width,
+                                           cv::ocl::oclMat &block_hists,
+                                           float threshold)
 {
     Context *clCxt = Context::getContext();
-    String kernelName = "normalize_hists_kernel";
     std::vector< std::pair<size_t, const void *> > args;
+    String kernelName;
 
     int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
-    int nthreads = power_2up(block_hist_size);
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
+        / block_stride_x;
+    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y)
+        / block_stride_y;
+    int nthreads;
+    size_t globalThreads[3] = { 1, 1, 1  };
+    size_t localThreads[3] = { 1, 1, 1  };
 
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
-    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
-    size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 };
-    size_t localThreads[3] = { nthreads, 1, 1  };
+    if ( nbins == 9 )
+    {
+        /* optimized for the case of 9 bins */
+        kernelName = "normalize_hists_36_kernel";
+        int blocks_in_group = NTHREADS / block_hist_size;
+        nthreads = blocks_in_group * block_hist_size;
+        int num_groups = divUp( img_block_width * img_block_height, blocks_in_group);
+        globalThreads[0] = nthreads * num_groups;
+        localThreads[0] = nthreads;
+    }
+    else
+    {
+        kernelName = "normalize_hists_kernel";
+        nthreads = power_2up(block_hist_size);
+        globalThreads[0] = img_block_width * nthreads;
+        globalThreads[1] = img_block_height;
+        localThreads[0] = nthreads;
 
-    if ((nthreads < 32) || (nthreads > 512) )
-        cv::error(Error::StsBadArg, "normalize_hists: histogram's size is too small or too big", "cv::ocl::device::hog::normalize_hists", __FILE__, __LINE__);
+        if ((nthreads < 32) || (nthreads > 512) )
+            cv::error(Error::StsBadArg, "normalize_hists: histogram's size is too small or too big",
+                "normalize_hists", __FILE__, __LINE__);
+
+        args.push_back( std::make_pair( sizeof(cl_int), (void *)&nthreads));
+        args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_hist_size));
+        args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
+    }
 
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nthreads));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_hist_size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
     args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
     args.push_back( std::make_pair( nthreads * sizeof(float), (void *)NULL));
 
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+    if(hog_device_cpu)
+        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+                             localThreads, args, -1, -1, "-D CPU");
+    else
+    {
+        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
+        int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+        char opt[32] = {0};
+        sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
+        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+                             localThreads, args, -1, -1, opt);
+    }
 }
 
-void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y,
-        int block_stride_x, int win_stride_y, int win_stride_x, int height,
-        int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef,
-        float threshold, cv::ocl::oclMat &labels)
+void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
+                                          int block_stride_y, int block_stride_x,
+                                          int win_stride_y, int win_stride_x,
+                                          int height, int width,
+                                          const cv::ocl::oclMat &block_hists,
+                                          const cv::ocl::oclMat &coefs,
+                                          float free_coef, float threshold,
+                                          cv::ocl::oclMat &labels)
 {
     Context *clCxt = Context::getContext();
-    String kernelName = "classify_hists_kernel";
     std::vector< std::pair<size_t, const void *> > args;
 
+    int nthreads;
+    String kernelName;
+    switch (cdescr_width)
+    {
+    case 180:
+        nthreads = 180;
+        kernelName = "classify_hists_180_kernel";
+        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
+        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
+        break;
+    case 252:
+        nthreads = 256;
+        kernelName = "classify_hists_252_kernel";
+        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
+        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
+        break;
+    default:
+        nthreads = 256;
+        kernelName = "classify_hists_kernel";
+        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
+        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
+    }
+
     int win_block_stride_x = win_stride_x / block_stride_x;
     int win_block_stride_y = win_stride_y / block_stride_y;
     int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
     int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
-
-    size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
-    size_t localThreads[3] = { NTHREADS, 1, 1 };
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
+        block_stride_x;
 
+    size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 };
+    size_t localThreads[3] = { nthreads, 1, 1 };
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
-    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_win_width));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
@@ -1670,12 +1801,26 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo
     args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&labels.data));
 
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+    if(hog_device_cpu)
+        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+                             localThreads, args, -1, -1, "-D CPU");
+    else
+    {
+        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
+        int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+        char opt[32] = {0};
+        sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
+        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+                             localThreads, args, -1, -1, opt);
+    }
 }
 
-void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
-        int win_stride_y, int win_stride_x, int height, int width,
-        const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors)
+void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
+                                                  int block_stride_y, int block_stride_x,
+                                                  int win_stride_y, int win_stride_x,
+                                                  int height, int width,
+                                                  const cv::ocl::oclMat &block_hists,
+                                                  cv::ocl::oclMat &descriptors)
 {
     Context *clCxt = Context::getContext();
     String kernelName = "extract_descrs_by_rows_kernel";
@@ -1685,7 +1830,8 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
     int win_block_stride_y = win_stride_y / block_stride_y;
     int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
     int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
+        block_stride_x;
     int descriptors_quadstep = descriptors.step >> 2;
 
     size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
@@ -1701,12 +1847,16 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
 
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+        localThreads, args, -1, -1);
 }
 
-void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
-        int win_stride_y, int win_stride_x, int height, int width,
-        const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors)
+void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
+                                                  int block_stride_y, int block_stride_x,
+                                                  int win_stride_y, int win_stride_x,
+                                                  int height, int width,
+                                                  const cv::ocl::oclMat &block_hists,
+                                                  cv::ocl::oclMat &descriptors)
 {
     Context *clCxt = Context::getContext();
     String kernelName = "extract_descrs_by_cols_kernel";
@@ -1716,7 +1866,8 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
     int win_block_stride_y = win_stride_y / block_stride_y;
     int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
     int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
-    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
+        block_stride_x;
     int descriptors_quadstep = descriptors.step >> 2;
 
     size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
@@ -1733,16 +1884,16 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
 
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+        localThreads, args, -1, -1);
 }
 
-static inline int divUp(int total, int grain)
-{
-    return (total + grain - 1) / grain;
-}
-
-void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
-        float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma)
+void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
+                                                  const cv::ocl::oclMat &img,
+                                                  float angle_scale,
+                                                  cv::ocl::oclMat &grad,
+                                                  cv::ocl::oclMat &qangle,
+                                                  bool correct_gamma)
 {
     Context *clCxt = Context::getContext();
     String kernelName = "compute_gradients_8UC1_kernel";
@@ -1767,11 +1918,16 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c
     args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
 
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+        localThreads, args, -1, -1);
 }
 
-void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
-        float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma)
+void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
+                                                  const cv::ocl::oclMat &img,
+                                                  float angle_scale,
+                                                  cv::ocl::oclMat &grad,
+                                                  cv::ocl::oclMat &qangle,
+                                                  bool correct_gamma)
 {
     Context *clCxt = Context::getContext();
     String kernelName = "compute_gradients_8UC4_kernel";
@@ -1797,37 +1953,6 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c
     args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
     args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
 
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
-{
-    CV_Assert( (src.channels() == dst.channels()) );
-    Context *clCxt = Context::getContext();
-
-    String kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel";
-    size_t blkSizeX = 16, blkSizeY = 16;
-    size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX;
-    size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY;
-    size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
-    size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
-
-    float ifx = (float)src.cols / sz.width;
-    float ify = (float)src.rows / sz.height;
-
-    std::vector< std::pair<size_t, const void *> > args;
-    args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-    args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
-    args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.offset));
-    args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.offset));
-    args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step));
-    args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.step));
-    args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
-    args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
-    args.push_back( std::make_pair(sizeof(cl_int), (void *)&sz.width));
-    args.push_back( std::make_pair(sizeof(cl_int), (void *)&sz.height));
-    args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx));
-    args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify));
-
-    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+        localThreads, args, -1, -1);
 }
diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
index 3366cf726..0dc7fe9ce 100644
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -1585,14 +1585,14 @@ namespace cv
 
         namespace
         {
-            class CLAHE_Impl : public cv::ocl::CLAHE
+            class CLAHE_Impl : public cv::CLAHE
             {
             public:
                 CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
 
                 cv::AlgorithmInfo* info() const;
 
-                void apply(const oclMat &src, oclMat &dst);
+                void apply(cv::InputArray src, cv::OutputArray dst);
 
                 void setClipLimit(double clipLimit);
                 double getClipLimit() const;
@@ -1610,14 +1610,19 @@ namespace cv
                 oclMat srcExt_;
                 oclMat lut_;
             };
-
             CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
             clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
             {
             }
 
-            void CLAHE_Impl::apply(const oclMat &src, oclMat &dst)
+            CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE_OCL",
+                obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
+                obj.info()->addParam(obj, "tilesX", obj.tilesX_);
+                obj.info()->addParam(obj, "tilesY", obj.tilesY_))
+            void CLAHE_Impl::apply(cv::InputArray src_raw, cv::OutputArray dst_raw)
             {
+                oclMat& src = getOclMatRef(src_raw);
+                oclMat& dst = getOclMatRef(dst_raw);
                 CV_Assert( src.type() == CV_8UC1 );
 
                 dst.create( src.size(), src.type() );
@@ -1685,9 +1690,9 @@ namespace cv
             }
         }
 
-        cv::Ptr<cv::ocl::CLAHE> createCLAHE(double clipLimit, cv::Size tileGridSize)
+        cv::Ptr<cv::CLAHE> createCLAHE(double clipLimit, cv::Size tileGridSize)
         {
-            return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
+            return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height);
         }
 
         //////////////////////////////////bilateralFilter////////////////////////////////////////////////////
diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp
index 5d02423a2..827b7d495 100644
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@ -65,6 +65,7 @@ namespace cv
     namespace ocl
     {
         extern void fft_teardown();
+        extern void clBlasTeardown();
         /*
          * The binary caching system to eliminate redundant program source compilation.
          * Strictly, this is not a cache because we do not implement evictions right now.
@@ -120,9 +121,6 @@ namespace cv
             codeCache.clear();
             cacheSize = 0;
         }
-
-        // not to be exported to dynamic lib
-        void setBinaryDiskCacheImpl(int mode, String path, Info::Impl * impl);
         struct Info::Impl
         {
             cl_platform_id oclplatform;
@@ -141,9 +139,6 @@ namespace cv
             char extra_options[512];
             int  double_support;
             int unified_memory; //1 means integrated GPU, otherwise this value is 0
-            bool enable_disk_cache; 
-            bool update_disk_cache;
-            String binpath;
             int refcounter;
 
             Impl();
@@ -171,6 +166,16 @@ namespace cv
             void releaseResources();
         };
 
+        // global variables to hold binary cache properties
+        static int enable_disk_cache =
+#ifdef _DEBUG
+            false;
+#else
+            true;
+#endif
+        static int update_disk_cache = false;
+        static String binpath = "";
+
         Info::Impl::Impl()
             :oclplatform(0),
             oclcontext(0),
@@ -181,13 +186,9 @@ namespace cv
             maxComputeUnits(0),
             double_support(0),
             unified_memory(0),
-            enable_disk_cache(false),
-            update_disk_cache(false),
-            binpath("./"),
             refcounter(1)
         {
             memset(extra_options, 0, 512);
-            setBinaryDiskCacheImpl(CACHE_RELEASE, String("./"), this);
         }
 
         void Info::Impl::releaseResources()
@@ -196,7 +197,8 @@ namespace cv
 
             if(clCmdQueue)
             {
-                openCLSafeCall(clReleaseCommandQueue(clCmdQueue));
+                //temporarily disable command queue release as it causes program hang at exit
+                //openCLSafeCall(clReleaseCommandQueue(clCmdQueue));
                 clCmdQueue = 0;
             }
 
@@ -318,8 +320,7 @@ namespace cv
             char clVersion[256];
             for (unsigned i = 0; i < numPlatforms; ++i)
             {
-
-                cl_uint numsdev;
+                cl_uint numsdev = 0;
                 cl_int status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev);
                 if(status != CL_DEVICE_NOT_FOUND)
                     openCLVerifyCall(status);
@@ -508,29 +509,30 @@ namespace cv
             return openCLGetKernelFromSource(clCxt, source, kernelName, NULL);
         }
 
-        void setBinaryDiskCacheImpl(int mode, String path, Info::Impl * impl)
+        void setBinaryDiskCache(int mode, String path)
         {
-            impl->update_disk_cache = (mode & CACHE_UPDATE) == CACHE_UPDATE;
-            impl->enable_disk_cache = 
-#ifdef _DEBUG 
+            if(mode == CACHE_NONE)
+            {
+                update_disk_cache = 0;
+                enable_disk_cache = 0;
+                return;
+            }
+            update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE;
+            enable_disk_cache |=
+#ifdef _DEBUG
                 (mode & CACHE_DEBUG)   == CACHE_DEBUG;
 #else
                 (mode & CACHE_RELEASE) == CACHE_RELEASE;
 #endif
-            if(impl->enable_disk_cache && !path.empty())
+            if(enable_disk_cache && !path.empty())
             {
-                impl->binpath = path;
+                binpath = path;
             }
         }
-        void setBinaryDiskCache(int mode, cv::String path)
-        {
-            setBinaryDiskCacheImpl(mode, path, Context::getContext()->impl);
-        }
 
         void setBinpath(const char *path)
         {
-            Context *clcxt = Context::getContext();
-            clcxt->impl->binpath = path;
+            binpath = path;
         }
 
         int savetofile(const Context*,  cl_program &program, const char *fileName)
@@ -598,15 +600,15 @@ namespace cv
                     strcat(all_build_options, build_options);
                 if(all_build_options != NULL)
                 {
-                    filename = clCxt->impl->binpath  + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + all_build_options + ".clb";
+                    filename = binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + all_build_options + ".clb";
                 }
                 else
                 {
-                    filename = clCxt->impl->binpath  + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + ".clb";
+                    filename = binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + ".clb";
                 }
 
-                FILE *fp = clCxt->impl->enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL;
-                if(fp == NULL || clCxt->impl->update_disk_cache)
+                FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL;
+                if(fp == NULL || update_disk_cache)
                 {
                     if(fp != NULL)
                         fclose(fp);
@@ -615,7 +617,7 @@ namespace cv
                                   clCxt->impl->oclcontext, 1, source, NULL, &status);
                     openCLVerifyCall(status);
                     status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL);
-                    if(status == CL_SUCCESS && clCxt->impl->enable_disk_cache)
+                    if(status == CL_SUCCESS && enable_disk_cache)
                         savetofile(clCxt, program, filename.c_str());
                 }
                 else
@@ -952,8 +954,8 @@ namespace cv
 
         bool initialized()
         {
-            return *((volatile int*)&Context::val) != 0 && 
-                Context::clCxt->impl->clCmdQueue != NULL&& 
+            return *((volatile int*)&Context::val) != 0 &&
+                Context::clCxt->impl->clCmdQueue != NULL&&
                 Context::clCxt->impl->oclcontext != NULL;
         }
 
@@ -1057,6 +1059,7 @@ namespace cv
         void Info::release()
         {
             fft_teardown();
+            clBlasTeardown();
             impl->release();
             impl = new Impl;
             DeviceName.clear();
@@ -1066,6 +1069,7 @@ namespace cv
         Info::~Info()
         {
             fft_teardown();
+            clBlasTeardown();
             impl->release();
         }
 
@@ -1087,26 +1091,3 @@ namespace cv
     }//namespace ocl
 
 }//namespace cv
-
-#if defined BUILD_SHARED_LIBS && defined CVAPI_EXPORTS && defined WIN32 && !defined WINCE
-#include <windows.h>
-BOOL WINAPI DllMain( HINSTANCE, DWORD  fdwReason, LPVOID );
-
-BOOL WINAPI DllMain( HINSTANCE, DWORD  fdwReason, LPVOID )
-{
-    if( fdwReason == DLL_PROCESS_DETACH )
-    {
-        // application hangs if call clReleaseCommandQueue here, so release context only
-        // without context release application hangs as well
-        context_tear_down = 1;
-        Context* cv_ctx = Context::getContext();
-        if(cv_ctx)
-        {
-            cl_context ctx = cv_ctx->impl->oclcontext;
-            if(ctx)
-                openCLSafeCall(clReleaseContext(ctx));
-        }
-    }
-    return TRUE;
-}
-#endif
diff --git a/modules/ocl/src/interpolate_frames.cpp b/modules/ocl/src/interpolate_frames.cpp
index 387f58fc3..deca8c8a3 100644
--- a/modules/ocl/src/interpolate_frames.cpp
+++ b/modules/ocl/src/interpolate_frames.cpp
@@ -235,4 +235,3 @@ void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
     }
     texture = bindTexture(img);
 }
-
diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp
new file mode 100644
index 000000000..0866117a6
--- /dev/null
+++ b/modules/ocl/src/kmeans.cpp
@@ -0,0 +1,437 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//     Xiaopeng Fu, fuxiaopeng2222@163.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <iomanip>
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace ocl;
+
+namespace cv
+{
+namespace ocl
+{
+////////////////////////////////////OpenCL kernel strings//////////////////////////
+extern const char *kmeans_kernel;
+}
+}
+
+static void generateRandomCenter(const std::vector<Vec2f>& box, float* center, RNG& rng)
+{
+    size_t j, dims = box.size();
+    float margin = 1.f/dims;
+    for( j = 0; j < dims; j++ )
+        center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0];
+}
+
+// This class is copied from matrix.cpp in core module.
+class KMeansPPDistanceComputer : public ParallelLoopBody
+{
+public:
+    KMeansPPDistanceComputer( float *_tdist2,
+                              const float *_data,
+                              const float *_dist,
+                              int _dims,
+                              size_t _step,
+                              size_t _stepci )
+        : tdist2(_tdist2),
+          data(_data),
+          dist(_dist),
+          dims(_dims),
+          step(_step),
+          stepci(_stepci) { }
+
+    void operator()( const cv::Range& range ) const
+    {
+        const int begin = range.start;
+        const int end = range.end;
+
+        for ( int i = begin; i<end; i++ )
+        {
+            tdist2[i] = std::min(normL2Sqr_(data + step*i, data + stepci, dims), dist[i]);
+        }
+    }
+
+private:
+    KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // to quiet MSVC
+
+    float *tdist2;
+    const float *data;
+    const float *dist;
+    const int dims;
+    const size_t step;
+    const size_t stepci;
+};
+/*
+k-means center initialization using the following algorithm:
+Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding
+*/
+static void generateCentersPP(const Mat& _data, Mat& _out_centers,
+                              int K, RNG& rng, int trials)
+{
+    int i, j, k, dims = _data.cols, N = _data.rows;
+    const float* data = (float*)_data.data;
+    size_t step = _data.step/sizeof(data[0]);
+    std::vector<int> _centers(K);
+    int* centers = &_centers[0];
+    std::vector<float> _dist(N*3);
+    float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N;
+    double sum0 = 0;
+
+    centers[0] = (unsigned)rng % N;
+
+    for( i = 0; i < N; i++ )
+    {
+        dist[i] = normL2Sqr_(data + step*i, data + step*centers[0], dims);
+        sum0 += dist[i];
+    }
+
+    for( k = 1; k < K; k++ )
+    {
+        double bestSum = DBL_MAX;
+        int bestCenter = -1;
+
+        for( j = 0; j < trials; j++ )
+        {
+            double p = (double)rng*sum0, s = 0;
+            for( i = 0; i < N-1; i++ )
+                if( (p -= dist[i]) <= 0 )
+                    break;
+            int ci = i;
+
+            parallel_for_(Range(0, N),
+                          KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci));
+            for( i = 0; i < N; i++ )
+            {
+                s += tdist2[i];
+            }
+
+            if( s < bestSum )
+            {
+                bestSum = s;
+                bestCenter = ci;
+                std::swap(tdist, tdist2);
+            }
+        }
+        centers[k] = bestCenter;
+        sum0 = bestSum;
+        std::swap(dist, tdist);
+    }
+
+    for( k = 0; k < K; k++ )
+    {
+        const float* src = data + step*centers[k];
+        float* dst = _out_centers.ptr<float>(k);
+        for( j = 0; j < dims; j++ )
+            dst[j] = src[j];
+    }
+}
+
+void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers)
+{
+    //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+    //{
+    //    CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
+    //    return;
+    //}
+
+    Context  *clCxt = src.clCxt;
+    int labels_step = (int)(labels.step/labels.elemSize());
+    String kernelname = "distanceToCenters";
+    int threadNum = src.rows > 256 ? 256 : src.rows;
+    size_t localThreads[3]  = {1, threadNum, 1};
+    size_t globalThreads[3] = {1, src.rows, 1};
+
+    std::vector<std::pair<size_t, const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&labels_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&centers.rows));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&labels.data));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&centers.cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&centers.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dists.data));
+
+    openCLExecuteKernel(clCxt, &kmeans_kernel, kernelname, globalThreads, localThreads, args, -1, -1, NULL);
+}
+///////////////////////////////////k - means /////////////////////////////////////////////////////////
+double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
+                       TermCriteria criteria, int attempts, int flags, oclMat &_centers)
+{
+    const int SPP_TRIALS = 3;
+    bool isrow = _src.rows == 1 && _src.oclchannels() > 1;
+    int N = !isrow ? _src.rows : _src.cols;
+    int dims = (!isrow ? _src.cols : 1) * _src.oclchannels();
+    int type = _src.depth();
+
+    attempts = std::max(attempts, 1);
+    CV_Assert(type == CV_32F && K > 0 );
+    CV_Assert( N >= K );
+
+    Mat _labels;
+    if( flags & KMEANS_USE_INITIAL_LABELS )
+    {
+        CV_Assert( (_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
+                   _bestLabels.cols * _bestLabels.rows == N &&
+                   _bestLabels.type() == CV_32S );
+        _bestLabels.download(_labels);
+    }
+    else
+    {
+        if( !((_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
+                _bestLabels.cols * _bestLabels.rows == N &&
+                _bestLabels.type() == CV_32S &&
+                _bestLabels.isContinuous()))
+            _bestLabels.create(N, 1, CV_32S);
+        _labels.create(_bestLabels.size(), _bestLabels.type());
+    }
+    int* labels = _labels.ptr<int>();
+
+    Mat data;
+    _src.download(data);
+    Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type);
+    std::vector<int> counters(K);
+    std::vector<Vec2f> _box(dims);
+    Vec2f* box = &_box[0];
+    double best_compactness = DBL_MAX, compactness = 0;
+    RNG& rng = theRNG();
+    int a, iter, i, j, k;
+
+    if( criteria.type & TermCriteria::EPS )
+        criteria.epsilon = std::max(criteria.epsilon, 0.);
+    else
+        criteria.epsilon = FLT_EPSILON;
+    criteria.epsilon *= criteria.epsilon;
+
+    if( criteria.type & TermCriteria::COUNT )
+        criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100);
+    else
+        criteria.maxCount = 100;
+
+    if( K == 1 )
+    {
+        attempts = 1;
+        criteria.maxCount = 2;
+    }
+
+    const float* sample = data.ptr<float>();
+    for( j = 0; j < dims; j++ )
+        box[j] = Vec2f(sample[j], sample[j]);
+
+    for( i = 1; i < N; i++ )
+    {
+        sample = data.ptr<float>(i);
+        for( j = 0; j < dims; j++ )
+        {
+            float v = sample[j];
+            box[j][0] = std::min(box[j][0], v);
+            box[j][1] = std::max(box[j][1], v);
+        }
+    }
+
+    for( a = 0; a < attempts; a++ )
+    {
+        double max_center_shift = DBL_MAX;
+        for( iter = 0;; )
+        {
+            swap(centers, old_centers);
+
+            if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) )
+            {
+                if( flags & KMEANS_PP_CENTERS )
+                    generateCentersPP(data, centers, K, rng, SPP_TRIALS);
+                else
+                {
+                    for( k = 0; k < K; k++ )
+                        generateRandomCenter(_box, centers.ptr<float>(k), rng);
+                }
+            }
+            else
+            {
+                if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) )
+                {
+                    for( i = 0; i < N; i++ )
+                        CV_Assert( (unsigned)labels[i] < (unsigned)K );
+                }
+
+                // compute centers
+                centers = Scalar(0);
+                for( k = 0; k < K; k++ )
+                    counters[k] = 0;
+
+                for( i = 0; i < N; i++ )
+                {
+                    sample = data.ptr<float>(i);
+                    k = labels[i];
+                    float* center = centers.ptr<float>(k);
+                    j=0;
+#if CV_ENABLE_UNROLLED
+                    for(; j <= dims - 4; j += 4 )
+                    {
+                        float t0 = center[j] + sample[j];
+                        float t1 = center[j+1] + sample[j+1];
+
+                        center[j] = t0;
+                        center[j+1] = t1;
+
+                        t0 = center[j+2] + sample[j+2];
+                        t1 = center[j+3] + sample[j+3];
+
+                        center[j+2] = t0;
+                        center[j+3] = t1;
+                    }
+#endif
+                    for( ; j < dims; j++ )
+                        center[j] += sample[j];
+                    counters[k]++;
+                }
+
+                if( iter > 0 )
+                    max_center_shift = 0;
+
+                for( k = 0; k < K; k++ )
+                {
+                    if( counters[k] != 0 )
+                        continue;
+
+                    // if some cluster appeared to be empty then:
+                    //   1. find the biggest cluster
+                    //   2. find the farthest from the center point in the biggest cluster
+                    //   3. exclude the farthest point from the biggest cluster and form a new 1-point cluster.
+                    int max_k = 0;
+                    for( int k1 = 1; k1 < K; k1++ )
+                    {
+                        if( counters[max_k] < counters[k1] )
+                            max_k = k1;
+                    }
+
+                    double max_dist = 0;
+                    int farthest_i = -1;
+                    float* new_center =  centers.ptr<float>(k);
+                    float* old_center =  centers.ptr<float>(max_k);
+                    float* _old_center = temp.ptr<float>(); // normalized
+                    float scale = 1.f/counters[max_k];
+                    for( j = 0; j < dims; j++ )
+                        _old_center[j] = old_center[j]*scale;
+
+                    for( i = 0; i < N; i++ )
+                    {
+                        if( labels[i] != max_k )
+                            continue;
+                        sample = data.ptr<float>(i);
+                        double dist = normL2Sqr_(sample, _old_center, dims);
+
+                        if( max_dist <= dist )
+                        {
+                            max_dist = dist;
+                            farthest_i = i;
+                        }
+                    }
+
+                    counters[max_k]--;
+                    counters[k]++;
+                    labels[farthest_i] = k;
+                    sample = data.ptr<float>(farthest_i);
+
+                    for( j = 0; j < dims; j++ )
+                    {
+                        old_center[j] -= sample[j];
+                        new_center[j] += sample[j];
+                    }
+                }
+
+                for( k = 0; k < K; k++ )
+                {
+                    float* center = centers.ptr<float>(k);
+                    CV_Assert( counters[k] != 0 );
+
+                    float scale = 1.f/counters[k];
+                    for( j = 0; j < dims; j++ )
+                        center[j] *= scale;
+
+                    if( iter > 0 )
+                    {
+                        double dist = 0;
+                        const float* old_center = old_centers.ptr<float>(k);
+                        for( j = 0; j < dims; j++ )
+                        {
+                            double t = center[j] - old_center[j];
+                            dist += t*t;
+                        }
+                        max_center_shift = std::max(max_center_shift, dist);
+                    }
+                }
+            }
+
+            if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon )
+                break;
+
+            // assign labels
+            oclMat _dists(1, N, CV_64F);
+
+            _bestLabels.upload(_labels);
+            _centers.upload(centers);
+            distanceToCenters(_dists, _bestLabels, _src, _centers);
+
+            Mat dists;
+            _dists.download(dists);
+            _bestLabels.download(_labels);
+
+            double* dist = dists.ptr<double>(0);
+            compactness = 0;
+            for( i = 0; i < N; i++ )
+            {
+                compactness += dist[i];
+            }
+        }
+
+        if( compactness < best_compactness )
+        {
+            best_compactness = compactness;
+        }
+    }
+
+    return best_compactness;
+}
diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp
index 4f23789b3..ddbd76db4 100644
--- a/modules/ocl/src/matrix_operations.cpp
+++ b/modules/ocl/src/matrix_operations.cpp
@@ -73,6 +73,7 @@ namespace cv
     }
 }
 
+
 ////////////////////////////////////////////////////////////////////////
 // convert_C3C4
 static void convert_C3C4(const cl_mem &src, oclMat &dst)
@@ -188,7 +189,7 @@ void cv::ocl::oclMat::upload(const Mat &m)
             temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
                                   (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
             openCLVerifyCall(err);
-            openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, 
+            openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step,
                            wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
         }
         else{
@@ -197,7 +198,7 @@ void cv::ocl::oclMat::upload(const Mat &m)
             openCLVerifyCall(err);
         }
 
-        
+
         convert_C3C4(temp, *this);
         openCLSafeCall(clReleaseMemObject(temp));
     }
@@ -215,6 +216,34 @@ void cv::ocl::oclMat::upload(const Mat &m)
     offset = ofs.y * step + ofs.x * elemSize();
 }
 
+cv::ocl::oclMat::operator cv::_InputArray()
+{
+    _InputArray newInputArray;
+    newInputArray.flags = cv::_InputArray::OCL_MAT;
+    newInputArray.obj   = reinterpret_cast<void *>(this);
+    return newInputArray;
+}
+
+cv::ocl::oclMat::operator cv::_OutputArray()
+{
+    _OutputArray newOutputArray;
+    newOutputArray.flags = cv::_InputArray::OCL_MAT;
+    newOutputArray.obj   = reinterpret_cast<void *>(this);
+    return newOutputArray;
+}
+
+cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src)
+{
+    CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
+    return *reinterpret_cast<oclMat*>(src.obj);
+}
+
+cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src)
+{
+    CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
+    return *reinterpret_cast<oclMat*>(src.obj);
+}
+
 void cv::ocl::oclMat::download(cv::Mat &m) const
 {
     CV_DbgAssert(!this->empty());
@@ -382,7 +411,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be
     if( rtype < 0 )
         rtype = type();
     else
-        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), oclchannels());
+        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
 
     //int scn = channels();
     int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);
diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp
index 27f8d26ec..c520cb8d4 100644
--- a/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@@ -80,7 +80,7 @@ namespace cv
         // provide additional methods for the user to interact with the command queue after a task is fired
         static void openCLExecuteKernel_2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
                                    size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-                                   int depth, char *build_options, FLUSH_MODE finish_mode)
+                                   int depth, const char *build_options, FLUSH_MODE finish_mode)
         {
             //construct kernel name
             //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
@@ -133,7 +133,7 @@ namespace cv
         }
         void openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName,
                                   size_t globalThreads[3], size_t localThreads[3],
-                                  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
+                                  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, FLUSH_MODE finish_mode)
 
         {
             openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
@@ -149,7 +149,7 @@ namespace cv
             cl_image_format format;
             int err;
             int depth    = mat.depth();
-            int channels = mat.channels();
+            int channels = mat.oclchannels();
 
             switch(depth)
             {
@@ -246,7 +246,7 @@ namespace cv
 
         Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
         {
-            return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
+            return makePtr<TextureCL>(bindTexture(mat), mat.rows, mat.cols, mat.type());
         }
         void releaseTexture(cl_mem& texture)
         {
diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp
index 66a1a3355..60d6ad7c5 100644
--- a/modules/ocl/src/moments.cpp
+++ b/modules/ocl/src/moments.cpp
@@ -16,7 +16,7 @@
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
-//    Sen Liu, sen@multicorewareinc.com
+//    Sen Liu, swjtuls1987@126.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -46,6 +46,8 @@
 #include "precomp.hpp"
 #include <iostream>
 
+#include "opencv2/imgproc/types_c.h"
+#include "opencv2/imgproc/imgproc_c.h"
 
 namespace cv
 {
@@ -53,7 +55,6 @@ namespace ocl
 {
 extern const char *moments;
 
-#if 0
 // The function calculates center of gravity and the central second order moments
 static void icvCompleteMomentState( CvMoments* moments )
 {
@@ -145,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
         cl_int dst_step = (cl_int)dst_a.step;
         args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step ));
 
-        openCLExecuteKernel(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1);
+        openCLExecuteKernel2(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1);
 
         cv::Mat dst(dst_a);
         a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
@@ -280,8 +281,8 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
         blocky = size.height/TILE_SIZE;
     else
         blocky = size.height/TILE_SIZE + 1;
-    cv::ocl::oclMat dst_m(blocky * 10, blockx, CV_64FC1);
-    cl_mem sum = openCLCreateBuffer(src.clCxt,CL_MEM_READ_WRITE,10*sizeof(double));
+    oclMat dst_m(blocky * 10, blockx, CV_64FC1);
+    oclMat sum(1, 10, CV_64FC1);
     int tile_width  = std::min(size.width,TILE_SIZE);
     int tile_height = std::min(size.height,TILE_SIZE);
     size_t localThreads[3]  = { tile_height, 1, 1};
@@ -291,19 +292,16 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&tileSize.width ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&tileSize.height ));
     args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.step ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&blocky ));
-    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&type ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&depth ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cn ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&coi ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&binary ));
     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
-    openCLExecuteKernel(dst_m.clCxt, &moments, "CvMoments", globalThreads, localThreads, args, -1, depth);
+    openCLExecuteKernel2(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, depth);
 
     size_t localThreadss[3]  = { 128, 1, 1};
     size_t globalThreadss[3] = { 128, 1, 1};
@@ -312,43 +310,34 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
     args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&tile_height ));
     args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&tile_width ));
     args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
-    args_sum.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum ));
+    args_sum.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data ));
     args_sum.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
     args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.step ));
-    openCLExecuteKernel(dst_m.clCxt, &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1);
-    double* dstsum = new double[10];
-    memset(dstsum,0,10*sizeof(double));
-    openCLReadBuffer(dst_m.clCxt,sum,(void *)dstsum,10*sizeof(double));
-    mom->m00 = dstsum[0];
-    mom->m10 = dstsum[1];
-    mom->m01 = dstsum[2];
-    mom->m20 = dstsum[3];
-    mom->m11 = dstsum[4];
-    mom->m02 = dstsum[5];
-    mom->m30 = dstsum[6];
-    mom->m21 = dstsum[7];
-    mom->m12 = dstsum[8];
-    mom->m03 = dstsum[9];
-    delete [] dstsum;
-    openCLSafeCall(clReleaseMemObject(sum));
+    openCLExecuteKernel2(Context::getContext(), &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1);
+
+    Mat dstsum(sum);
+    mom->m00 = dstsum.at<double>(0, 0);
+    mom->m10 = dstsum.at<double>(0, 1);
+    mom->m01 = dstsum.at<double>(0, 2);
+    mom->m20 = dstsum.at<double>(0, 3);
+    mom->m11 = dstsum.at<double>(0, 4);
+    mom->m02 = dstsum.at<double>(0, 5);
+    mom->m30 = dstsum.at<double>(0, 6);
+    mom->m21 = dstsum.at<double>(0, 7);
+    mom->m12 = dstsum.at<double>(0, 8);
+    mom->m03 = dstsum.at<double>(0, 9);
+
     icvCompleteMomentState( mom );
 }
 
-#endif
 
 Moments ocl_moments( InputArray _array, bool binaryImage )
 {
-#if 0
     CvMoments om;
     Mat arr = _array.getMat();
     CvMat c_array = arr;
     ocl_cvMoments(&c_array, &om, binaryImage);
     return om;
-#endif
-    CV_Error(Error::StsNotImplemented, "ocl_moments is not implemented");
-    (void)_array;
-    (void)binaryImage;
-    return Moments();
 }
 
 }
diff --git a/modules/ocl/src/opencl/arithm_add_scalar.cl b/modules/ocl/src/opencl/arithm_add_scalar.cl
index 0552fc8a7..cdb79f37e 100644
--- a/modules/ocl/src/opencl/arithm_add_scalar.cl
+++ b/modules/ocl/src/opencl/arithm_add_scalar.cl
@@ -67,7 +67,7 @@ __kernel void arithm_s_add_C1_D0 (__global   uchar *src1, int src1_step, int src
     if (x < cols && y < rows)
     {
         x = x << 2;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -110,7 +110,7 @@ __kernel void arithm_s_add_C1_D2 (__global   ushort *src1, int src1_step, int sr
     if (x < cols && y < rows)
     {
         x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -145,7 +145,7 @@ __kernel void arithm_s_add_C1_D3 (__global   short *src1, int src1_step, int src
     if (x < cols && y < rows)
     {
         x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -250,7 +250,7 @@ __kernel void arithm_s_add_C2_D0 (__global   uchar *src1, int src1_step, int src
     if (x < cols && y < rows)
     {
         x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
index 3dbd376ec..a0cb7dacb 100644
--- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
@@ -69,7 +69,7 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global   uchar *src1, int src1_ste
     if (x < cols && y < rows)
     {
         x = x << 2;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -122,7 +122,7 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global   ushort *src1, int src1_st
     if (x < cols && y < rows)
     {
         x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -160,7 +160,7 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global   short *src1, int src1_ste
     if (x < cols && y < rows)
     {
         x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -284,7 +284,7 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global   uchar *src1, int src1_ste
     if (x < cols && y < rows)
     {
         x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc.cl b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
index 761cd70f8..94cc14d25 100644
--- a/modules/ocl/src/opencl/arithm_minMaxLoc.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
@@ -377,4 +377,4 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
    }
 }
 
-*/
\ No newline at end of file
+*/
diff --git a/modules/ocl/src/opencl/arithm_mul.cl b/modules/ocl/src/opencl/arithm_mul.cl
index f85362943..bfbb5942e 100644
--- a/modules/ocl/src/opencl/arithm_mul.cl
+++ b/modules/ocl/src/opencl/arithm_mul.cl
@@ -300,4 +300,4 @@ __kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offs
 
         *((__global float *)((__global char *)dst + dst_index)) = tmp;
     }
-}
\ No newline at end of file
+}
diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl
new file mode 100644
index 000000000..2e269999a
--- /dev/null
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -0,0 +1,535 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if defined (CN1)
+#define T_FRAME uchar
+#define T_MEAN_VAR float
+#define CONVERT_TYPE convert_uchar_sat
+#define F_ZERO (0.0f)
+float cvt(uchar val)
+{
+    return val;
+}
+
+float sqr(float val)
+{
+    return val * val;
+}
+
+float sum(float val)
+{
+    return val;
+}
+
+float clamp1(float var, float learningRate, float diff, float minVar)
+{
+    return fmax(var + learningRate * (diff * diff - var), minVar);
+}
+#else
+#define T_FRAME uchar4
+#define T_MEAN_VAR float4
+#define CONVERT_TYPE convert_uchar4_sat
+#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+float4 cvt(const uchar4 val)
+{
+    float4 result;
+    result.x = val.x;
+    result.y = val.y;
+    result.z = val.z;
+    result.w = val.w;
+
+    return result;
+}
+
+float sqr(const float4 val)
+{
+    return val.x * val.x + val.y * val.y + val.z * val.z;
+}
+
+float sum(const float4 val)
+{
+    return (val.x + val.y + val.z);
+}
+
+float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
+{
+    float4 result;
+    result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
+    result.y = fmax(var.y + learningRate * (diff.y * diff.y - var.y), minVar);
+    result.z = fmax(var.z + learningRate * (diff.z * diff.z - var.z), minVar);
+    result.w = 0.0f;
+    return result;
+}
+#endif
+
+typedef struct
+{
+    float c_Tb;
+    float c_TB;
+    float c_Tg;
+    float c_varInit;
+    float c_varMin;
+    float c_varMax;
+    float c_tau;
+    uchar c_shadowVal;
+}con_srtuct_t;
+
+void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float4 val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+__kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
+    __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
+    int frame_row, int frame_col, int frame_step, int fgmask_step,
+    int weight_step, int mean_step, int var_step,
+    float varThreshold, float backgroundRatio, int fgmask_offset_x,
+    int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (x < frame_col && y < frame_row)
+    {
+        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
+
+        int kHit = -1;
+        int kForeground = -1;
+
+        for (int k = 0; k < (NMIXTURES); ++k)
+        {
+            if (weight[(k * frame_row + y) * weight_step + x] < 1.192092896e-07f)
+                break;
+
+            T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
+            T_MEAN_VAR _var = var[(k * frame_row + y) + var_step + x];
+
+            T_MEAN_VAR diff = pix - mu;
+
+            if (sqr(diff) < varThreshold * sum(_var))
+            {
+                kHit = k;
+                break;
+            }
+        }
+
+        if (kHit >= 0)
+        {
+            float wsum = 0.0f;
+            for (int k = 0; k < (NMIXTURES); ++k)
+            {
+                wsum += weight[(k * frame_row + y) * weight_step + x];
+
+                if (wsum > backgroundRatio)
+                {
+                    kForeground = k + 1;
+                    break;
+                }
+            }
+        }
+        if(kHit < 0 || kHit >= kForeground)
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (-1);
+        else
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (0);
+    }
+}
+
+__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgmask,
+    __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean,
+    __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step,
+    int weight_step, int sortKey_step, int mean_step, int var_step,
+    float varThreshold, float backgroundRatio, float learningRate, float minVar,
+    int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
+{
+    const float w0 = 0.05f;
+    const float sk0 = w0 / 30.0f;
+    const float var0 = 900.f;
+
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x >= frame_col || y >= frame_row) return;
+    float wsum = 0.0f;
+    int kHit = -1;
+    int kForeground = -1;
+    int k = 0;
+
+    T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
+
+    for (; k < (NMIXTURES); ++k)
+    {
+        float w = weight[(k * frame_row + y) * weight_step + x];
+        wsum += w;
+
+        if (w < 1.192092896e-07f)
+            break;
+
+        T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
+        T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x];
+
+        float sortKey_prev, weight_prev;
+        T_MEAN_VAR mean_prev, var_prev;
+        if (sqr(pix - mu) < varThreshold * sum(_var))
+        {
+            wsum -= w;
+            float dw = learningRate * (1.0f - w);
+
+            _var = clamp1(_var, learningRate, pix - mu, minVar);
+
+            sortKey_prev = w / sqr(sum(_var));
+            sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev;
+
+            weight_prev = w + dw;
+            weight[(k * frame_row + y) * weight_step + x] = weight_prev;
+
+            mean_prev = mu + learningRate * (pix - mu);
+            mean[(k * frame_row + y) * mean_step + x] = mean_prev;
+
+            var_prev = _var;
+            var[(k * frame_row + y) * var_step + x] = var_prev;
+        }
+
+        int k1 = k - 1;
+
+        if (k1 >= 0 && sqr(pix - mu) < varThreshold * sum(_var))
+        {
+            float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x];
+            float weight_next = weight[(k1 * frame_row + y) * weight_step + x];
+            T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x];
+            T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x];
+
+            for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
+            {
+                sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev;
+                sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next;
+
+                weight[(k1 * frame_row + y) * weight_step + x] = weight_prev;
+                weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next;
+
+                mean[(k1 * frame_row + y) * mean_step + x] = mean_prev;
+                mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next;
+
+                var[(k1 * frame_row + y) * var_step + x] = var_prev;
+                var[((k1 + 1) * frame_row + y) * var_step + x] = var_next;
+
+                sortKey_prev = sortKey_next;
+                sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f;
+
+                weight_prev = weight_next;
+                weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f;
+
+                mean_prev = mean_next;
+                mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO;
+
+                var_prev = var_next;
+                var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO;
+            }
+        }
+
+        kHit = k1 + 1;
+        break;
+    }
+
+    if (kHit < 0)
+    {
+        kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1);
+        wsum += w0 - weight[(k * frame_row + y) * weight_step + x];
+
+        weight[(k * frame_row + y) * weight_step + x] = w0;
+        mean[(k * frame_row + y) * mean_step + x] = pix;
+#if defined (CN1)
+        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0);
+#else
+        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0);
+#endif
+        sortKey[(k * frame_row + y) * sortKey_step + x] = sk0;
+    }
+    else
+    {
+        for( ; k < (NMIXTURES); k++)
+            wsum += weight[(k * frame_row + y) * weight_step + x];
+    }
+
+    float wscale = 1.0f / wsum;
+    wsum = 0;
+    for (k = 0; k < (NMIXTURES); ++k)
+    {
+        float w = weight[(k * frame_row + y) * weight_step + x];
+        w *= wscale;
+        wsum += w;
+
+        weight[(k * frame_row + y) * weight_step + x] = w;
+        sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale;
+
+        kForeground = select(kForeground, k + 1, wsum > backgroundRatio && kForeground < 0);
+    }
+    fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-(kHit >= kForeground));
+}
+
+
+__kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst,
+    int dst_row, int dst_col, int weight_step, int mean_step, int dst_step,
+    float backgroundRatio)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < dst_col && y < dst_row)
+    {
+        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < (NMIXTURES); ++mode)
+        {
+            float _weight = weight[(mode * dst_row + y) * weight_step + x];
+
+            T_MEAN_VAR _mean = mean[(mode * dst_row + y) * mean_step + x];
+            meanVal = meanVal + _weight * _mean;
+
+            totalWeight += _weight;
+
+            if(totalWeight > backgroundRatio)
+                break;
+        }
+        meanVal = meanVal * (1.f / totalWeight);
+        dst[y * dst_step + x] = CONVERT_TYPE(meanVal);
+    }
+}
+
+__kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __global float* weight, __global T_MEAN_VAR * mean,
+        __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step,
+        int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune,
+        int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < frame_col && y < frame_row)
+    {
+        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + x + frame_offset_x]);
+
+        bool background = false; // true - the pixel classified as background
+
+        bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+
+        int nmodes = modesUsed[y * modesUsed_step + x];
+        int nNewModes = nmodes; //current number of modes in GMM
+
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < nmodes; ++mode)
+        {
+            float _weight = alpha1 * weight[(mode * frame_row + y) * weight_step + x] + prune;
+
+            if (!fitsPDF)
+            {
+                float var = variance[(mode * frame_row + y) * var_step + x];
+
+                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
+
+                T_MEAN_VAR diff = _mean - pix;
+                float dist2 = sqr(diff);
+
+                if (totalWeight < constants -> c_TB && dist2 < constants -> c_Tb * var)
+                    background = true;
+
+                if (dist2 < constants -> c_Tg * var)
+                {
+                    fitsPDF = true;
+                    _weight += alphaT;
+                    float k = alphaT / _weight;
+                    mean[(mode * frame_row + y) * mean_step + x] = _mean - k * diff;
+                    float varnew = var + k * (dist2 - var);
+                    varnew = fmax(varnew, constants -> c_varMin);
+                    varnew = fmin(varnew, constants -> c_varMax);
+
+                    variance[(mode * frame_row + y) * var_step + x] = varnew;
+                    for (int i = mode; i > 0; --i)
+                    {
+                        if (_weight < weight[((i - 1) * frame_row + y) * weight_step + x])
+                            break;
+                        swap(weight, x, y, i - 1, frame_row, weight_step);
+                        swap(variance, x, y, i - 1, frame_row, var_step);
+                        #if defined (CN1)
+                        swap(mean, x, y, i - 1, frame_row, mean_step);
+                        #else
+                        swap4(mean, x, y, i - 1, frame_row, mean_step);
+                        #endif
+                    }
+                }
+            } // !fitsPDF
+
+            if (_weight < -prune)
+            {
+                _weight = 0.0;
+                nmodes--;
+            }
+
+            weight[(mode * frame_row + y) * weight_step + x] = _weight; //update weight by the calculated value
+            totalWeight += _weight;
+        }
+
+        totalWeight = 1.f / totalWeight;
+        for (int mode = 0; mode < nmodes; ++mode)
+            weight[(mode * frame_row + y) * weight_step + x] *= totalWeight;
+
+        nmodes = nNewModes;
+
+        if (!fitsPDF)
+        {
+            int mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;
+
+            if (nmodes == 1)
+                weight[(mode * frame_row + y) * weight_step + x] = 1.f;
+            else
+            {
+                weight[(mode * frame_row + y) * weight_step + x] = alphaT;
+
+                for (int i = 0; i < nmodes - 1; ++i)
+                    weight[(i * frame_row + y) * weight_step + x] *= alpha1;
+            }
+
+            mean[(mode * frame_row + y) * mean_step + x] = pix;
+            variance[(mode * frame_row + y) * var_step + x] = constants -> c_varInit;
+
+            for (int i = nmodes - 1; i > 0; --i)
+            {
+                // check one up
+                if (alphaT < weight[((i - 1) * frame_row + y) * weight_step + x])
+                    break;
+
+                swap(weight, x, y, i - 1, frame_row, weight_step);
+                swap(variance, x, y, i - 1, frame_row, var_step);
+                #if defined (CN1)
+                swap(mean, x, y, i - 1, frame_row, mean_step);
+                #else
+                swap4(mean, x, y, i - 1, frame_row, mean_step);
+                #endif
+            }
+        }
+
+        modesUsed[y * modesUsed_step + x] = nmodes;
+
+        bool isShadow = false;
+        if (detectShadows_flag && !background)
+        {
+            float tWeight = 0.0f;
+
+            for (int mode = 0; mode < nmodes; ++mode)
+            {
+                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
+
+                T_MEAN_VAR pix_mean = pix * _mean;
+
+                float numerator = sum(pix_mean);
+                float denominator = sqr(_mean);
+
+                if (denominator == 0)
+                    break;
+
+                if (numerator <= denominator && numerator >= constants -> c_tau * denominator)
+                {
+                    float a = numerator / denominator;
+
+                    T_MEAN_VAR dD = a * _mean - pix;
+
+                    if (sqr(dD) < constants -> c_Tb * variance[(mode * frame_row + y) * var_step + x] * a * a)
+                    {
+                        isShadow = true;
+                        break;
+                    }
+                }
+
+                tWeight += weight[(mode * frame_row + y) * weight_step + x];
+                if (tWeight > constants -> c_TB)
+                    break;
+            }
+        }
+
+        fgmask[(y + fgmask_offset_y) * fgmask_step + x + fgmask_offset_x] = background ? 0 : isShadow ? constants -> c_shadowVal : 255;
+    }
+}
+
+__kernel void getBackgroundImage2_kernel(__global int* modesUsed, __global float* weight, __global T_MEAN_VAR* mean,
+    __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step,
+    int mean_step, int dst_step, int dst_x, int dst_y)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < modesUsed_col && y < modesUsed_row)
+    {
+        int nmodes = modesUsed[y * modesUsed_step + x];
+
+        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
+
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < nmodes; ++mode)
+        {
+            float _weight = weight[(mode * modesUsed_row + y) * weight_step + x];
+
+            T_MEAN_VAR _mean = mean[(mode * modesUsed_row + y) * mean_step + x];
+            meanVal = meanVal + _weight * _mean;
+
+            totalWeight += _weight;
+
+            if(totalWeight > c_TB)
+                break;
+        }
+
+        meanVal = meanVal * (1.f / totalWeight);
+        dst[(y + dst_y) * dst_step + x + dst_x] = CONVERT_TYPE(meanVal);
+    }
+}
diff --git a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
new file mode 100644
index 000000000..a8e0fd17e
--- /dev/null
+++ b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
@@ -0,0 +1,424 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Harris Gasparakis, harris.gasparakis@amd.com
+//    Xiaopeng Fu, fuxiaopeng2222@163.com
+//    Yao Wang, bitwangyaoyao@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifdef BORDER_REPLICATE
+//BORDER_REPLICATE:     aaaaaa|abcdefgh|hhhhhhh
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (l_edge)   : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (r_edge)-1 : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (t_edge)   :(i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (b_edge)-1 :(addr))
+#endif
+
+#ifdef BORDER_REFLECT
+//BORDER_REFLECT:       fedcba|abcdefgh|hgfedcb
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)-1               : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)-1 : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
+#endif
+
+#ifdef BORDER_REFLECT_101
+//BORDER_REFLECT_101:   gfedcb|abcdefgh|gfedcba
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)                 : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)                 : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
+#endif
+
+//blur function does not support BORDER_WRAP
+#ifdef BORDER_WRAP
+//BORDER_WRAP:          cdefgh|abcdefgh|abcdefg
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (i)+(r_edge) : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (i)+(b_edge) : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
+#endif
+
+__kernel void
+edgeEnhancingFilter_C4_D0(
+    __global const uchar4 * restrict src,
+    __global uchar4 *dst,
+    float alpha,
+    int src_offset,
+    int src_whole_rows,
+    int src_whole_cols,
+    int src_step,
+    int dst_offset,
+    int dst_rows,
+    int dst_cols,
+    int dst_step,
+    __global const float* lut,
+    int lut_step)
+{
+    int col = get_local_id(0);
+    const int gX = get_group_id(0);
+    const int gY = get_group_id(1);
+
+    int src_x_off = (src_offset % src_step) >> 2;
+    int src_y_off = src_offset / src_step;
+    int dst_x_off = (dst_offset % dst_step) >> 2;
+    int dst_y_off = dst_offset / dst_step;
+
+    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
+    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
+
+    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
+    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
+
+    int posX = dst_startX - dst_x_off + col;
+    int posY = (gY * (1+EXTRA))	;
+
+    __local uchar4 data[ksY+EXTRA][THREADS];
+
+    float4 tmp_sum[1+EXTRA];
+    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
+    {
+        tmp_sum[tmpint] = (float4)(0,0,0,0);
+    }
+
+#ifdef BORDER_CONSTANT
+    bool con;
+    uchar4 ss;
+    for(int j = 0;	j < ksY+EXTRA; j++)
+    {
+        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
+
+        int cur_col = clamp(startX + col, 0, src_whole_cols);
+        if(con)
+        {
+            ss = src[(startY+j)*(src_step>>2) + cur_col];
+        }
+
+        data[j][col] = con ? ss : (uchar4)0;
+    }
+#else
+    for(int j= 0; j < ksY+EXTRA; j++)
+    {
+        int selected_row;
+        int selected_col;
+        selected_row = ADDR_H(startY+j, 0, src_whole_rows);
+        selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
+
+        selected_col = ADDR_L(startX+col, 0, src_whole_cols);
+        selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
+
+        data[j][col] = src[selected_row * (src_step>>2) + selected_col];
+    }
+#endif
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float4 var[1+EXTRA];
+
+#if VAR_PER_CHANNEL
+    float4 weight;
+    float4 totalWeight = (float4)(0,0,0,0);
+#else
+    float weight;
+    float totalWeight = 0;
+#endif
+
+    int4 currValCenter;
+    int4 currWRTCenter;
+
+    int4 sumVal = 0;
+    int4 sumValSqr = 0;
+
+    if(col < (THREADS-(ksX-1)))
+    {
+        int4 currVal;
+
+        int howManyAll = (2*anX+1)*(ksY);
+
+        //find variance of all data
+        int startLMj;
+        int endLMj ;
+#if CALCVAR
+        // Top row: don't sum the very last element
+        for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
+        {
+            startLMj = extraCnt;
+            endLMj =  ksY+extraCnt-1;
+            sumVal =0;
+            sumValSqr=0;
+            for(int j = startLMj; j < endLMj; j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+                    currVal	= convert_int4(data[j][col+anX+i])	;
+
+                    sumVal += currVal;
+                    sumValSqr += mul24(currVal, currVal);
+                }
+            }
+            var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+#else
+        var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
+#endif
+        }
+
+        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
+        {
+
+            // top row: include the very first element, even on first time
+            startLMj = extraCnt;
+            // go all the way, unless this is the last local mem chunk,
+            // then stay within limits - 1
+            endLMj =  extraCnt + ksY;
+
+            // Top row: don't sum the very last element
+            currValCenter = convert_int4( data[ (startLMj + endLMj)/2][col+anX] );
+
+            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+#if FIXED_WEIGHT
+#if VAR_PER_CHANNEL
+                    weight.x = 1.0f;
+                    weight.y = 1.0f;
+                    weight.z = 1.0f;
+                    weight.w = 1.0f;
+#else
+                    weight = 1.0f;
+#endif
+#else
+                    currVal	= convert_int4(data[j][col+anX+i])	;
+                    currWRTCenter = currVal-currValCenter;
+
+#if VAR_PER_CHANNEL
+                    weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) * (float4)(lut[lut_j*lut_step+anX+i]);
+                    //weight.x = var[extraCnt].x / ( var[extraCnt].x + (float) mul24(currWRTCenter.x , currWRTCenter.x) ) ;
+                    //weight.y = var[extraCnt].y / ( var[extraCnt].y + (float) mul24(currWRTCenter.y , currWRTCenter.y) ) ;
+                    //weight.z = var[extraCnt].z / ( var[extraCnt].z + (float) mul24(currWRTCenter.z , currWRTCenter.z) ) ;
+                    //weight.w = 0;
+#else
+                    weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +  mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
+#endif
+#endif
+                    tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
+                    totalWeight += weight;
+                }
+            }
+
+            tmp_sum[extraCnt] /= totalWeight;
+
+            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
+            {
+                dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
+            }
+
+#if VAR_PER_CHANNEL
+            totalWeight = (float4)(0,0,0,0);
+#else
+            totalWeight = 0;
+#endif
+        }
+    }
+}
+
+
+__kernel void
+edgeEnhancingFilter_C1_D0(
+    __global const uchar * restrict src,
+    __global uchar *dst,
+    float alpha,
+    int src_offset,
+    int src_whole_rows,
+    int src_whole_cols,
+    int src_step,
+    int dst_offset,
+    int dst_rows,
+    int dst_cols,
+    int dst_step,
+    __global const float * lut,
+    int lut_step)
+{
+    int col = get_local_id(0);
+    const int gX = get_group_id(0);
+    const int gY = get_group_id(1);
+
+    int src_x_off = (src_offset % src_step);
+    int src_y_off = src_offset / src_step;
+    int dst_x_off = (dst_offset % dst_step);
+    int dst_y_off = dst_offset / dst_step;
+
+    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
+    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
+
+    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
+    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
+
+    int posX = dst_startX - dst_x_off + col;
+    int posY = (gY * (1+EXTRA))	;
+
+    __local uchar data[ksY+EXTRA][THREADS];
+
+    float tmp_sum[1+EXTRA];
+    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
+    {
+        tmp_sum[tmpint] = (float)(0);
+    }
+
+#ifdef BORDER_CONSTANT
+    bool con;
+    uchar ss;
+    for(int j = 0;	j < ksY+EXTRA; j++)
+    {
+        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
+
+        int cur_col = clamp(startX + col, 0, src_whole_cols);
+        if(con)
+        {
+            ss = src[(startY+j)*(src_step) + cur_col];
+        }
+
+        data[j][col] = con ? ss : 0;
+    }
+#else
+    for(int j= 0; j < ksY+EXTRA; j++)
+    {
+        int selected_row;
+        int selected_col;
+        selected_row = ADDR_H(startY+j, 0, src_whole_rows);
+        selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
+
+        selected_col = ADDR_L(startX+col, 0, src_whole_cols);
+        selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
+
+        data[j][col] = src[selected_row * (src_step) + selected_col];
+    }
+#endif
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float var[1+EXTRA];
+
+    float weight;
+    float totalWeight = 0;
+
+    int currValCenter;
+    int currWRTCenter;
+
+    int sumVal = 0;
+    int sumValSqr = 0;
+
+    if(col < (THREADS-(ksX-1)))
+    {
+        int currVal;
+
+        int howManyAll = (2*anX+1)*(ksY);
+
+        //find variance of all data
+        int startLMj;
+        int endLMj;
+#if CALCVAR
+        // Top row: don't sum the very last element
+        for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
+        {
+            startLMj = extraCnt;
+            endLMj =  ksY+extraCnt-1;
+            sumVal = 0;
+            sumValSqr =0;
+            for(int j = startLMj; j < endLMj; j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+                    currVal	= (uint)(data[j][col+anX+i])	;
+
+                    sumVal += currVal;
+                    sumValSqr += mul24(currVal, currVal);
+                }
+            }
+            var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+#else
+        var[extraCnt] = (float)(900.0);
+#endif
+        }
+
+        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
+        {
+
+            // top row: include the very first element, even on first time
+            startLMj = extraCnt;
+            // go all the way, unless this is the last local mem chunk,
+            // then stay within limits - 1
+            endLMj =  extraCnt + ksY;
+
+            // Top row: don't sum the very last element
+            currValCenter = (int)( data[ (startLMj + endLMj)/2][col+anX] );
+
+            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+#if FIXED_WEIGHT
+                    weight = 1.0f;
+#else
+                    currVal	= (int)(data[j][col+anX+i])	;
+                    currWRTCenter = currVal-currValCenter;
+
+                    weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
+#endif
+                    tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
+                    totalWeight += weight;
+                }
+            }
+
+            tmp_sum[extraCnt] /= totalWeight;
+
+
+            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
+            {
+                dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
+            }
+
+            totalWeight = 0;
+        }
+    }
+}
diff --git a/modules/ocl/src/opencl/filtering_boxFilter.cl b/modules/ocl/src/opencl/filtering_boxFilter.cl
index 512e32997..d163ebe76 100644
--- a/modules/ocl/src/opencl/filtering_boxFilter.cl
+++ b/modules/ocl/src/opencl/filtering_boxFilter.cl
@@ -231,7 +231,7 @@ __kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global ucha
     {
         tmp_sum += (data[i]);
     }
-    
+
     int index = dst_startY * dst_step + dst_startX + (col-anX)*4;
 
     temp[0][col] = tmp_sum + (data[0]);
diff --git a/modules/ocl/src/opencl/filtering_laplacian.cl b/modules/ocl/src/opencl/filtering_laplacian.cl
index 8535eb1a5..f7430d533 100644
--- a/modules/ocl/src/opencl/filtering_laplacian.cl
+++ b/modules/ocl/src/opencl/filtering_laplacian.cl
@@ -15,7 +15,9 @@
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
+//    Pang Erping, erping@multicorewareinc.com
 //    Jia Haipeng, jiahaipeng95@gmail.com
+//    Peng Xiao, pengxiao@outlook.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -42,49 +44,195 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#define BORDER_REFLECT_101
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////Macro for border type////////////////////////////////////////////
 /////////////////////////////////////////////////////////////////////////////////////////////////
 #ifdef BORDER_REPLICATE
+
 //BORDER_REPLICATE:     aaaaaa|abcdefgh|hhhhhhh
 #define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (l_edge)   : (i))
 #define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (r_edge)-1 : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (t_edge)   :(i))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (t_edge)   : (i))
 #define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (b_edge)-1 :(addr))
 #endif
 
 #ifdef BORDER_REFLECT
-//BORDER_REFLECT:       fedcba|abcdefgh|hgfedcb
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)-1               : (i))
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? ((l_edge)<<1)-(i)-1                 : (i))
 #define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)-1 : (i))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? ((t_edge)<<1)-(i)-1                 : (i))
 #define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
 #endif
 
 #ifdef BORDER_REFLECT_101
 //BORDER_REFLECT_101:   gfedcb|abcdefgh|gfedcba
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)                 : (i))
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? ((l_edge)<<1)-(i)                 : (i))
 #define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)                 : (i))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? ((t_edge)<<1)-(i)                 : (i))
 #define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
 #endif
 
-#ifdef BORDER_WRAP
-//BORDER_WRAP:          cdefgh|abcdefgh|abcdefg
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (i)+(r_edge) : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (i)+(b_edge) : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
+#ifdef IMG_C_1_0
+#define T_IMG   uchar
+#define T_IMGx4 uchar4
+#define T_IMG_C1 uchar
+#define CONVERT_TYPE   convert_uchar_sat
+#define CONVERT_TYPEx4 convert_uchar4_sat
 #endif
+#ifdef IMG_C_4_0
+#define T_IMG   uchar4
+#define T_IMGx4 uchar16
+#define T_IMG_C1 uchar
+#define CONVERT_TYPE   convert_uchar4_sat
+#define CONVERT_TYPEx4 convert_uchar16_sat
+#endif
+#ifdef IMG_C_1_5
+#define T_IMG   float
+#define T_IMGx4 float4
+#define T_IMG_C1 float
+#define CONVERT_TYPE   convert_float
+#define CONVERT_TYPEx4 convert_float4
+#endif
+#ifdef IMG_C_4_5
+#define T_IMG   float4
+#define T_IMGx4 float16
+#define T_IMG_C1 float
+#define CONVERT_TYPE   convert_float4
+#define CONVERT_TYPEx4 convert_float16
+#endif
+
+#ifndef CN
+#define CN 1
+#endif
+
+#if CN == 1
+#define T_SUM   float
+#define T_SUMx4 float4
+#define CONVERT_TYPE_SUM   convert_float
+#define CONVERT_TYPE_SUMx4 convert_float4
+#define SUM_ZERO   (0.0f)
+#define SUM_ZEROx4 (0.0f, 0.0f, 0.0f, 0.0f)
+#define VLOAD4 vload4
+#define SX x
+#define SY y
+#define SZ z
+#define SW w
+#elif CN == 4
+#define T_SUM float4
+#define T_SUMx4 float16
+#define CONVERT_TYPE_SUM   convert_float4
+#define CONVERT_TYPE_SUMx4 convert_float16
+#define SUM_ZERO   (0.0f, 0.0f, 0.0f, 0.0f)
+#define SUM_ZEROx4 (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)
+#define VLOAD4 vload16
+#define SX s0123
+#define SY s4567
+#define SZ s89ab
+#define SW scdef
+#endif
+
+#ifndef FILTER_SIZE
+#define FILTER_SIZE 3
+#endif
+
+#define LOCAL_GROUP_SIZE 16
+
+#define LOCAL_WIDTH  ((FILTER_SIZE/2)*2 + LOCAL_GROUP_SIZE)
+#define LOCAL_HEIGHT ((FILTER_SIZE/2)*2 + LOCAL_GROUP_SIZE)
+
+#define FILTER_RADIUS (FILTER_SIZE >> 1)
+
+__kernel void filter2D(
+    __global T_IMG *src,
+    __global T_IMG *dst,
+    int src_step,
+    int dst_step,
+    __constant float *mat_kernel,
+    __local T_IMG *local_data,
+    int wholerows,
+    int wholecols,
+    int src_offset_x,
+    int src_offset_y,
+    int dst_offset_x,
+    int dst_offset_y,
+    int cols,
+    int rows,
+    int operate_cols
+)
+{
+    int groupStartCol = get_group_id(0) * get_local_size(0);
+    int groupStartRow = get_group_id(1) * get_local_size(1);
+
+    int localCol = get_local_id(0);
+    int localRow = get_local_id(1);
+    int globalCol = groupStartCol + localCol;
+    int globalRow = groupStartRow + localRow;
+    const int src_offset = mad24(src_offset_y, src_step, src_offset_x);
+    const int dst_offset = mad24(dst_offset_y, dst_step, dst_offset_x);
+#ifdef BORDER_CONSTANT
+    for(int i = localRow; i < LOCAL_HEIGHT; i += get_local_size(1))
+    {
+        int curRow = groupStartRow + i;
+        for(int j = localCol; j < LOCAL_WIDTH; j += get_local_size(0))
+        {
+            int curCol = groupStartCol + j;
+            if(curRow < FILTER_RADIUS - src_offset_y || (curRow - FILTER_RADIUS) >= wholerows - src_offset_y||
+                curCol < FILTER_RADIUS - src_offset_x || (curCol - FILTER_RADIUS) >= wholecols - src_offset_x)
+            {
+                local_data[(i) * LOCAL_WIDTH + j] = 0;
+            }
+            else
+            {
+                local_data[(i) * LOCAL_WIDTH + j] = src[(curRow - FILTER_RADIUS) * src_step + curCol - FILTER_RADIUS + src_offset];
+            }
+        }
+    }
+#else
+    for(int i = localRow; i < LOCAL_HEIGHT; i += get_local_size(1))
+    {
+        int curRow = groupStartRow + i;
+
+        curRow = ADDR_H(curRow, FILTER_RADIUS - src_offset_y, wholerows - src_offset_y);
+
+        curRow = ADDR_B(curRow - FILTER_RADIUS, wholerows - src_offset_y, curRow - FILTER_RADIUS);
+
+        for(int j = localCol; j < LOCAL_WIDTH; j += get_local_size(0))
+        {
+            int curCol = groupStartCol + j;
+            curCol = ADDR_L(curCol, FILTER_RADIUS - src_offset_x, wholecols - src_offset_x);
+            curCol = ADDR_R(curCol - FILTER_RADIUS, wholecols - src_offset_x, curCol - FILTER_RADIUS);
+            if(curRow < wholerows  && curCol < wholecols)
+            {
+                local_data[(i) * LOCAL_WIDTH + j] = src[(curRow) * src_step + curCol + src_offset];
+            }
+        }
+    }
+#endif
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(globalRow < rows && globalCol < cols)
+    {
+        T_SUM sum = (T_SUM)SUM_ZERO;
+        int filterIdx = 0;
+        for(int i = 0; i < FILTER_SIZE; i++)
+        {
+            int offset = (i + localRow) * LOCAL_WIDTH;
+
+            for(int j = 0; j < FILTER_SIZE; j++)
+            {
+                sum += CONVERT_TYPE_SUM(local_data[offset + j + localCol]) * mat_kernel[filterIdx++];
+            }
+        }
+        dst[(globalRow)*dst_step + (globalCol) + dst_offset] = CONVERT_TYPE(sum);
+    }
+}
+
+/// following is specific for 3x3 kernels
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////Macro for define elements number per thread/////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////
-//#define ANCHOR                  3
-//#define ANX                     1
-//#define ANY                     1
+#define ANX                     1
+#define ANY                     1
 
 #define ROWS_PER_GROUP          4
 #define ROWS_PER_GROUP_BITS     2
@@ -101,10 +249,23 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////////////8uC1////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x, int src_offset_y,
-                             __global uchar *dst, int dst_step, int dst_offset_x, int dst_offset_y,
-                             __constant int *mat_kernel __attribute__((max_constant_size (16384))),
-                             int cols,int rows, int operate_cols, int wholecols, int wholerows)
+__kernel void filter2D_3x3(
+    __global T_IMG *src,
+    __global T_IMG *dst,
+    int src_step,
+    int dst_step,
+    __constant float *mat_kernel,
+    __local T_IMG *local_data,
+    int wholerows,
+    int wholecols,
+    int src_offset_x,
+    int src_offset_y,
+    int dst_offset_x,
+    int dst_offset_y,
+    int cols,
+    int rows,
+    int operate_cols
+)
 {
     int gX = get_global_id(0);
     int gY = get_global_id(1);
@@ -118,7 +279,6 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x
     int cols_start_index_group = src_offset_x - dst_align + groupX_size * groupX_id - ANX;
     int rows_start_index       = src_offset_y + (gY << ROWS_PER_GROUP_BITS) - ANY;
 
-    __local uchar local_data[LOCAL_MEM_STEP * ROWS_FETCH];
     if((gY << 2) < rows)
     {
         for(int i = 0; i < ROWS_FETCH; ++i)
@@ -129,19 +289,19 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x
                 int selected_row  = rows_start_index + i;
                 int selected_cols = cols_start_index_group + lX;
 
-                uchar data = *(src + selected_row * src_step + selected_cols);
-                int con = selected_row >=0 && selected_row < wholerows && selected_cols >=0 && selected_cols < wholecols;
+                T_IMG data = src[mad24(selected_row, src_step, selected_cols)];
+                int con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
                 data = con ? data : 0;
-                local_data[i * LOCAL_MEM_STEP + lX ] =data;
+                local_data[mad24(i, LOCAL_MEM_STEP, lX)] = data;
 
                 if(lX < (ANX << 1))
                 {
                     selected_cols = cols_start_index_group + lX + groupX_size;
 
-                    data = *(src + selected_row * src_step + selected_cols);
-                    con = selected_row >=0 && selected_row < wholerows && selected_cols >=0 && selected_cols < wholecols;
+                    data  = src[mad24(selected_row, src_step, selected_cols)];
+                    con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
                     data = con ? data : 0;
-                    local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
+                    local_data[mad24(i, LOCAL_MEM_STEP, lX) + groupX_size] = data;
                 }
 #else
                 int selected_row = ADDR_H(rows_start_index + i,  0, wholerows);
@@ -150,17 +310,17 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x
                 int selected_cols = ADDR_L(cols_start_index_group + lX, 0, wholecols);
                 selected_cols     = ADDR_R(cols_start_index_group + lX, wholecols, selected_cols);
 
-                uchar data = *(src + selected_row * src_step + selected_cols);
+                T_IMG data = src[mad24(selected_row, src_step, selected_cols)];
 
-                local_data[i * LOCAL_MEM_STEP + lX ] =data;
+                local_data[mad24(i, LOCAL_MEM_STEP, lX)] = data;
 
                 if(lX < (ANX << 1))
                 {
                     selected_cols = cols_start_index_group + lX + groupX_size;
                     selected_cols = ADDR_R(selected_cols, wholecols, selected_cols);
 
-                    data = *(src + selected_row * src_step + selected_cols);
-                    local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
+                    data = src[mad24(selected_row, src_step, selected_cols)];
+                    local_data[mad24(i, LOCAL_MEM_STEP, lX) + groupX_size] = data;
                 }
 #endif
             }
@@ -177,355 +337,40 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x
 
         int dst_rows_end   = dst_offset_y + rows;
         int dst_rows_index = dst_offset_y + (gY << ROWS_PER_GROUP_BITS) + (lX >> THREADS_PER_ROW_BIT);
+        dst = dst + mad24(dst_rows_index, dst_step, dst_cols_index);
 
-        uchar4 dst_data = *((__global uchar4 *)(dst + dst_rows_index * dst_step + dst_cols_index));
+        T_IMGx4 dst_data = *(__global T_IMGx4 *)dst;
 
-        int4 sum = (int4)(0);
-        uchar4 data;
+        T_SUMx4 sum = (T_SUMx4)SUM_ZEROx4;
+        T_IMGx4 data;
 
-        for(int i = 0; i < ANCHOR; i++)
+        for(int i = 0; i < FILTER_SIZE; i++)
         {
 #pragma unroll
-            for(int j = 0; j < ANCHOR; j++)
+            for(int j = 0; j < FILTER_SIZE; j++)
             {
                 if(dst_rows_index < dst_rows_end)
                 {
                     int local_row = (lX >> THREADS_PER_ROW_BIT) + i;
                     int local_cols = ((lX % THREADS_PER_ROW) << ELEMENTS_PER_THREAD_BIT) + j;
 
-                    data = vload4(0, local_data+local_row * LOCAL_MEM_STEP + local_cols);
-                    sum = sum + (mat_kernel[i * ANCHOR + j] * convert_int4_sat(data));
+                    data = VLOAD4(0, (__local T_IMG_C1 *)(local_data + local_row * LOCAL_MEM_STEP + local_cols));
+                    sum = sum + (mat_kernel[i * FILTER_SIZE + j] * CONVERT_TYPE_SUMx4(data));
                 }
             }
         }
-
         if(dst_rows_index < dst_rows_end)
         {
-            sum.x = ((dst_cols_index + 0 >= dst_cols_start) && (dst_cols_index + 0 < dst_cols_end)) ? sum.x : dst_data.x;
-            sum.y = ((dst_cols_index + 1 >= dst_cols_start) && (dst_cols_index + 1 < dst_cols_end)) ? sum.y : dst_data.y;
-            sum.z = ((dst_cols_index + 2 >= dst_cols_start) && (dst_cols_index + 2 < dst_cols_end)) ? sum.z : dst_data.z;
-            sum.w = ((dst_cols_index + 3 >= dst_cols_start) && (dst_cols_index + 3 < dst_cols_end)) ? sum.w : dst_data.w;
-            *((__global uchar4 *)(dst + dst_rows_index * dst_step + dst_cols_index)) = convert_uchar4_sat(sum);
+            T_IMGx4 tmp_dst = CONVERT_TYPEx4(sum);
+            tmp_dst.SX = ((dst_cols_index + 0 >= dst_cols_start) && (dst_cols_index + 0 < dst_cols_end)) ?
+                         tmp_dst.SX : dst_data.SX;
+            tmp_dst.SY = ((dst_cols_index + 1 >= dst_cols_start) && (dst_cols_index + 1 < dst_cols_end)) ?
+                         tmp_dst.SY : dst_data.SY;
+            tmp_dst.SZ = ((dst_cols_index + 2 >= dst_cols_start) && (dst_cols_index + 2 < dst_cols_end)) ?
+                         tmp_dst.SZ : dst_data.SZ;
+            tmp_dst.SW = ((dst_cols_index + 3 >= dst_cols_start) && (dst_cols_index + 3 < dst_cols_end)) ?
+                         tmp_dst.SW : dst_data.SW;
+            *(__global T_IMGx4 *)dst = tmp_dst;
         }
     }
 }
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////32FC1////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x, int src_offset_y,
-                             __global float *dst, int dst_step, int dst_offset_x, int dst_offset_y,
-                             __constant int *mat_kernel __attribute__((max_constant_size (16384))),
-                             int cols,int rows, int operate_cols, int wholecols, int wholerows)
-{
-    int gX = get_global_id(0);
-    int gY = get_global_id(1);
-
-    int lX = get_local_id(0);
-
-    int groupX_size = get_local_size(0);
-    int groupX_id   = get_group_id(0);
-
-#define dst_align (dst_offset_x & 3)
-    int cols_start_index_group = src_offset_x - dst_align + groupX_size * groupX_id - ANX;
-    int rows_start_index       = src_offset_y + (gY << ROWS_PER_GROUP_BITS) - ANY;
-
-    __local float local_data[LOCAL_MEM_STEP * ROWS_FETCH];
-    if(((gY << 2) < rows))
-    {
-        for(int i = 0; i < ROWS_FETCH; ++i)
-        {
-            if((rows_start_index - src_offset_y) + i < rows + ANY)
-            {
-#ifdef BORDER_CONSTANT
-                int selected_row  = rows_start_index + i;
-                int selected_cols = cols_start_index_group + lX;
-
-                float data = *((__global float *)((__global char *)src + selected_row * src_step + (selected_cols << 2)));
-                int con = selected_row >=0 && selected_row < wholerows && selected_cols >=0 && selected_cols < wholecols;
-                data = con ? data : 0;
-                local_data[i * LOCAL_MEM_STEP + lX ] =data;
-
-                if(lX < (ANX << 1))
-                {
-                    selected_cols = cols_start_index_group + lX + groupX_size;
-
-                    data = *((__global float *)((__global char *)src + selected_row * src_step + (selected_cols << 2)));
-                    con = selected_row >=0 && selected_row < wholerows && selected_cols >=0 && selected_cols < wholecols;
-                    data = con ? data : 0;
-                    local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
-                }
-#else
-                int selected_row = ADDR_H(rows_start_index + i,  0, wholerows);
-                selected_row     = ADDR_B(rows_start_index + i, wholerows, selected_row);
-
-                int selected_cols = ADDR_L(cols_start_index_group + lX, 0, wholecols);
-                selected_cols     = ADDR_R(cols_start_index_group + lX, wholecols, selected_cols);
-
-                float data = *((__global float *)((__global char *)src + selected_row * src_step + (selected_cols << 2)));
-                local_data[i * LOCAL_MEM_STEP + lX] =data;
-
-                if(lX < (ANX << 1))
-                {
-                    selected_cols = cols_start_index_group + lX + groupX_size;
-                    selected_cols = ADDR_R(selected_cols, wholecols, selected_cols);
-
-                    data = *((__global float *)((__global char *)src + selected_row * src_step + (selected_cols << 2)));
-                    local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
-                }
-#endif
-            }
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int process_col = groupX_size * groupX_id + ((lX % THREADS_PER_ROW) << 2);
-    if(((gY << 2) < rows) && (process_col < operate_cols))
-    {
-        int dst_cols_start = dst_offset_x;
-        int dst_cols_end   = dst_offset_x + cols;
-        int dst_cols_index = (dst_offset_x + process_col) & 0xfffffffc;
-
-        int dst_rows_end   = dst_offset_y + rows;
-        int dst_rows_index = dst_offset_y + (gY << ROWS_PER_GROUP_BITS) + (lX >> THREADS_PER_ROW_BIT);
-
-        float4 dst_data = *((__global float4*)((__global char *)dst + dst_rows_index * dst_step + (dst_cols_index << 2)));
-
-        float4 sum = (float4)(0);
-        float4 data;
-
-        for(int i = 0; i < ANCHOR; i++)
-        {
-#pragma unroll
-            for(int j = 0; j < ANCHOR; j++)
-            {
-                if(dst_rows_index < dst_rows_end)
-                {
-                    int local_row = (lX >> THREADS_PER_ROW_BIT) + i;
-                    int local_cols = ((lX % THREADS_PER_ROW) << ELEMENTS_PER_THREAD_BIT) + j;
-
-                    data = vload4(0, local_data+local_row * LOCAL_MEM_STEP + local_cols);
-                    sum = sum + ((float)(mat_kernel[i * ANCHOR + j]) * data);
-                }
-            }
-        }
-
-        if(dst_rows_index < dst_rows_end)
-        {
-            sum.x = ((dst_cols_index + 0 >= dst_cols_start) && (dst_cols_index + 0 < dst_cols_end)) ? sum.x : dst_data.x;
-            sum.y = ((dst_cols_index + 1 >= dst_cols_start) && (dst_cols_index + 1 < dst_cols_end)) ? sum.y : dst_data.y;
-            sum.z = ((dst_cols_index + 2 >= dst_cols_start) && (dst_cols_index + 2 < dst_cols_end)) ? sum.z : dst_data.z;
-            sum.w = ((dst_cols_index + 3 >= dst_cols_start) && (dst_cols_index + 3 < dst_cols_end)) ? sum.w : dst_data.w;
-
-            *((__global float4 *)((__global char *)dst + dst_rows_index * dst_step + (dst_cols_index << 2))) = sum;
-        }
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////8uC4////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_x, int src_offset_y,
-                             __global uchar4 *dst, int dst_step, int dst_offset_x, int dst_offset_y,
-                             __constant int *mat_kernel __attribute__((max_constant_size (16384))),
-                             int cols,int rows, int operate_cols, int wholecols, int wholerows)
-{
-    int gX = get_global_id(0);
-    int gY = get_global_id(1);
-
-    int lX = get_local_id(0);
-
-    int groupX_size = get_local_size(0);
-    int groupX_id   = get_group_id(0);
-
-#define dst_align (dst_offset_x & 3)
-    int cols_start_index_group = src_offset_x - dst_align + groupX_size * groupX_id - ANX;
-    int rows_start_index       = src_offset_y + (gY << ROWS_PER_GROUP_BITS) - ANY;
-
-    __local uchar4 local_data[LOCAL_MEM_STEP * ROWS_FETCH];
-
-    if(((gY << 2) < rows))
-    {
-        for(int i = 0; i < ROWS_FETCH; ++i)
-        {
-            if((rows_start_index - src_offset_y) + i < rows + ANY)
-            {
-#ifdef BORDER_CONSTANT
-                int selected_row  = rows_start_index + i;
-                int selected_cols = cols_start_index_group + lX;
-
-                uchar4 data = *((__global uchar4*)((__global char*)src + selected_row * src_step + (selected_cols << 2)));
-                int con = selected_row >=0 && selected_row < wholerows && selected_cols >=0 && selected_cols < wholecols;
-                data = con ? data : 0;
-                local_data[i * LOCAL_MEM_STEP + lX ] =data;
-
-                if(lX < (ANX << 1))
-                {
-                    selected_cols = cols_start_index_group + lX + groupX_size;
-
-                    data = *((__global uchar4*)((__global char*)src + selected_row * src_step + (selected_cols << 2)));
-                    con = selected_row >=0 && selected_row < wholerows && selected_cols >=0 && selected_cols < wholecols;
-                    data = con ? data : 0;
-                    local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
-                }
-#else
-                int selected_row = ADDR_H(rows_start_index + i,  0, wholerows);
-                selected_row     = ADDR_B(rows_start_index + i, wholerows, selected_row);
-
-                int selected_cols = ADDR_L(cols_start_index_group + lX, 0, wholecols);
-                selected_cols     = ADDR_R(cols_start_index_group + lX, wholecols, selected_cols);
-
-                uchar4 data = *((__global uchar4*)((__global char*)src + selected_row * src_step + (selected_cols << 2)));
-
-                local_data[i * LOCAL_MEM_STEP + lX] =data;
-
-                if(lX < (ANX << 1))
-                {
-                    selected_cols = cols_start_index_group + lX + groupX_size;
-                    selected_cols = ADDR_R(selected_cols, wholecols, selected_cols);
-
-                    data = *((__global uchar4*)((__global char*)src + selected_row * src_step + (selected_cols << 2)));
-                    local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
-                }
-#endif
-            }
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int process_col = groupX_size * groupX_id + ((lX % THREADS_PER_ROW) << 2);
-    if(((gY << 2) < rows) && (process_col < operate_cols))
-    {
-        int dst_cols_start = dst_offset_x;
-        int dst_cols_end   = dst_offset_x + cols;
-        int dst_cols_index = (dst_offset_x + process_col) & 0xfffffffc;
-
-        int dst_rows_end   = dst_offset_y + rows;
-        int dst_rows_index = dst_offset_y + (gY << ROWS_PER_GROUP_BITS) + (lX >> THREADS_PER_ROW_BIT);
-
-        uchar16 dst_data;
-        dst_data = *((__global uchar16*)((__global char *)dst + dst_rows_index * dst_step + (dst_cols_index << 2)));
-
-        int16 sum = (int16)(0);
-        uchar16 data;
-
-        for(int i = 0; i < ANCHOR; i++)
-        {
-#pragma unroll
-            for(int j = 0; j < ANCHOR; j++)
-            {
-                if(dst_rows_index < dst_rows_end)
-                {
-                    int local_row = (lX >> THREADS_PER_ROW_BIT) + i;
-                    int local_cols = ((lX % THREADS_PER_ROW) << ELEMENTS_PER_THREAD_BIT) + j;
-
-                    data = vload16(0, (__local uchar *)(local_data+local_row * LOCAL_MEM_STEP + local_cols));
-                    sum = sum + (mat_kernel[i * ANCHOR + j] * convert_int16_sat(data));
-                }
-            }
-        }
-
-        if(dst_rows_index < dst_rows_end)
-        {
-            uchar16 sum1 = convert_uchar16_sat(sum);
-            sum1.s0123 = ((dst_cols_index + 0 >= dst_cols_start) && (dst_cols_index + 0 < dst_cols_end))?
-                         sum1.s0123 : dst_data.s0123;
-            sum1.s4567 = ((dst_cols_index + 1 >= dst_cols_start) && (dst_cols_index + 1 < dst_cols_end))?
-                         sum1.s4567 : dst_data.s4567;
-            sum1.s89ab = ((dst_cols_index + 2 >= dst_cols_start) && (dst_cols_index + 2 < dst_cols_end))?
-                         sum1.s89ab : dst_data.s89ab;
-            sum1.scdef = ((dst_cols_index + 3 >= dst_cols_start) && (dst_cols_index + 3 < dst_cols_end))?
-                         sum1.scdef : dst_data.scdef;
-
-            *((__global uchar16*)((__global char *)dst + dst_rows_index * dst_step + (dst_cols_index << 2))) = sum1;
-        }
-    }
-}
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////32FC4////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#define ROWS_FETCH_C4              (1 + ANY + ANY)   //(ROWS_PER_GROUP + anY * 2)
-#define LOCAL_MEM_STEP_C4           260 //divup((get_local_size(0) + anX * 2), 4) * 4)
-__kernel void filter2D_C4_D5(__global float4 *src, int src_step, int src_offset_x, int src_offset_y,
-                             __global float4 *dst, int dst_step, int dst_offset_x, int dst_offset_y,
-                             __constant int *mat_kernel __attribute__((max_constant_size (16384))),
-                             int cols,int rows, int operate_cols, int wholecols, int wholerows)
-{
-    int gX = get_global_id(0);
-    int gY = get_global_id(1);
-
-    int lX = get_local_id(0);
-
-    int groupX_size = get_local_size(0);
-    int groupX_id   = get_group_id(0);
-
-    int cols_start_index_group = src_offset_x + groupX_size * groupX_id - ANX;
-    int rows_start_index       = src_offset_y + gY - ANY;
-
-    __local float4 local_data[LOCAL_MEM_STEP_C4 * ROWS_FETCH_C4];
-    if((gY < rows) && (gX < (operate_cols + ANX + ANX)))
-    {
-        for(int i = 0; i < ROWS_FETCH_C4; ++i)
-        {
-            if((rows_start_index - src_offset_y) + i < rows + ANY)
-            {
-#ifdef BORDER_CONSTANT
-                int selected_row  = rows_start_index + i;
-                int selected_cols = cols_start_index_group + lX;
-
-                float4 data = *((__global float4*)((__global char*)src + selected_row * src_step + (selected_cols << 4)));
-                int con = selected_row >=0 && selected_row < wholerows && selected_cols >=0 && selected_cols < wholecols;
-                data = con ? data : 0;
-                local_data[i * LOCAL_MEM_STEP + lX ] =data;
-
-                if(lX < (ANX << 1))
-                {
-                    selected_cols = cols_start_index_group + lX + groupX_size;
-
-                    data = *((__global float4*)((__global char*)src + selected_row * src_step + (selected_cols << 4)));
-                    con = selected_row >=0 && selected_row < wholerows && selected_cols >=0 && selected_cols < wholecols;
-                    data = con ? data : 0;
-                    local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
-                }
-#else
-                int selected_row = ADDR_H(rows_start_index + i,  0, wholerows);
-                selected_row     = ADDR_B(rows_start_index + i, wholerows, selected_row);
-
-                int selected_cols = ADDR_L(cols_start_index_group + lX, 0, wholecols);
-                selected_cols     = ADDR_R(cols_start_index_group + lX, wholecols, selected_cols);
-
-                float4 data = *((__global float4*)((__global char*)src + selected_row * src_step + (selected_cols << 4)));
-                local_data[i * LOCAL_MEM_STEP_C4 + lX] =data;
-
-                if(lX < (ANX << 1))
-                {
-                    selected_cols = cols_start_index_group + lX + groupX_size;
-                    selected_cols = ADDR_R(selected_cols, wholecols, selected_cols);
-
-                    data = *((__global float4*)((__global char*)src + selected_row * src_step + (selected_cols << 4)));
-                    local_data[i * LOCAL_MEM_STEP_C4 + lX + groupX_size] =data;
-                }
-#endif
-            }
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if((gY < rows) && (gX < operate_cols))
-    {
-        int dst_cols_index = dst_offset_x + gX;
-        int dst_rows_index = dst_offset_y + gY;
-
-        float4 sum = (float4)(0);
-
-        for(int i = 0; i < ANCHOR; i++)
-        {
-            for(int j = 0; j < ANCHOR; j++)
-            {
-                int local_cols = lX + j;
-                sum = sum + ((float)mat_kernel[i * ANCHOR + j] * local_data[i * LOCAL_MEM_STEP_C4 + local_cols]);
-            }
-        }
-
-        *((__global float4*)((__global char *)dst + dst_rows_index * dst_step + (dst_cols_index << 4))) = sum;
-    }
-}
diff --git a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
index 8507972ff..c12ab5950 100644
--- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
@@ -207,7 +207,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
                         - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
                                      sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
                         + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
-                        
+
                         bool passThres = classsum >= nodethreshold;
 
 #if STUMP_BASED
@@ -304,4 +304,3 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
     newnode[counter].alpha[1] = t1.alpha[1];
     newnode[counter].alpha[2] = t1.alpha[2];
 }
-
diff --git a/modules/ocl/src/opencl/imgproc_clahe.cl b/modules/ocl/src/opencl/imgproc_clahe.cl
index 0d010f7a5..49c709692 100644
--- a/modules/ocl/src/opencl/imgproc_clahe.cl
+++ b/modules/ocl/src/opencl/imgproc_clahe.cl
@@ -71,15 +71,15 @@ void reduce(volatile __local int* smem, int val, int tid)
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 128)
-    { 
+    {
         smem[tid] = val += smem[tid + 128];
-    } 
+    }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 64)
-    { 
+    {
         smem[tid] = val += smem[tid + 64];
-    } 
+    }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 32)
@@ -125,15 +125,15 @@ void reduce(__local volatile int* smem, int val, int tid)
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 128)
-    { 
+    {
         smem[tid] = val += smem[tid + 128];
-    } 
+    }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 64)
-    { 
+    {
         smem[tid] = val += smem[tid + 64];
-    } 
+    }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 32)
diff --git a/modules/ocl/src/opencl/imgproc_gfft.cl b/modules/ocl/src/opencl/imgproc_gftt.cl
similarity index 93%
rename from modules/ocl/src/opencl/imgproc_gfft.cl
rename to modules/ocl/src/opencl/imgproc_gftt.cl
index 5fa27ffc1..bfa8ee3f8 100644
--- a/modules/ocl/src/opencl/imgproc_gfft.cl
+++ b/modules/ocl/src/opencl/imgproc_gftt.cl
@@ -49,12 +49,12 @@
 
 __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
 
-inline float ELEM_INT2(image2d_t _eig, int _x, int _y) 
+inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
 {
     return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
 }
 
-inline float ELEM_FLT2(image2d_t _eig, float2 pt) 
+inline float ELEM_FLT2(image2d_t _eig, float2 pt)
 {
     return read_imagef(_eig, sampler, pt).x;
 }
@@ -132,7 +132,7 @@ __kernel
     const int pairDistance = 1 << (stage - passOfStage);
     const int blockWidth   = 2 * pairDistance;
 
-    const int leftId = min( (threadId % pairDistance) 
+    const int leftId = min( (threadId % pairDistance)
                    + (threadId / pairDistance) * blockWidth, count );
 
     const int rightId = min( leftId + pairDistance, count );
@@ -147,7 +147,7 @@ __kernel
 
     float2 greater = compareResult ? leftPt:rightPt;
     float2 lesser  = compareResult ? rightPt:leftPt;
-    
+
     corners[leftId]  = sortOrder ? lesser : greater;
     corners[rightId] = sortOrder ? greater : lesser;
 }
@@ -195,20 +195,20 @@ __kernel
     {
         pt2  = scratch[j];
         val2 = ELEM_FLT2(eig, pt2);
-        if(val2 > val1) 
+        if(val2 > val1)
             pos++;//calculate the rank of this element in this work group
-        else 
+        else
         {
             if(val1 > val2)
                 continue;
-            else 
+            else
             {
                 // val1 and val2 are same
                 same++;
             }
         }
     }
-    for (int j=0; j< same; j++)      
+    for (int j=0; j< same; j++)
         corners[pos + j] = pt1;
 }
 __kernel
@@ -240,15 +240,15 @@ __kernel
         for(int k=0; k<wg; k++)
         {
             pt2  = corners[j*wg + k];
-            val2 = ELEM_FLT2(eig, pt2); 
+            val2 = ELEM_FLT2(eig, pt2);
             if(val1 > val2)
                 break;
             else
             {
-                //Increment only if the value is not the same. 
+                //Increment only if the value is not the same.
                 if( val2 > val1 )
                     pos++;
-                else 
+                else
                     same++;
             }
         }
@@ -257,20 +257,19 @@ __kernel
     for(int k=0; k<remainder; k++)
     {
         pt2  = corners[(numOfGroups-1)*wg + k];
-        val2 = ELEM_FLT2(eig, pt2); 
+        val2 = ELEM_FLT2(eig, pt2);
         if(val1 > val2)
             break;
         else
         {
-            //Don't increment if the value is the same. 
+            //Don't increment if the value is the same.
             //Two elements are same if (*userComp)(jData, iData)  and (*userComp)(iData, jData) are both false
             if(val2 > val1)
                 pos++;
-            else 
+            else
                 same++;
         }
-    }  
-    for (int j=0; j< same; j++)      
+    }
+    for (int j=0; j< same; j++)
         corners[pos + j] = pt1;
 }
-
diff --git a/modules/ocl/src/opencl/imgproc_integral.cl b/modules/ocl/src/opencl/imgproc_integral.cl
index d279ef728..9ced01d02 100644
--- a/modules/ocl/src/opencl/imgproc_integral.cl
+++ b/modules/ocl/src/opencl/imgproc_integral.cl
@@ -490,4 +490,4 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
         }
         barrier(CLK_LOCAL_MEM_FENCE);
     }
-}
\ No newline at end of file
+}
diff --git a/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl b/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl
new file mode 100644
index 000000000..3c3eb98c8
--- /dev/null
+++ b/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl
@@ -0,0 +1,176 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
+
+#ifndef N   // number of radices
+#define N 4
+#endif
+
+#ifndef K_T
+#define K_T float
+#endif
+
+#ifndef V_T
+#define V_T float
+#endif
+
+#ifndef IS_GT
+#define IS_GT 0
+#endif
+
+
+// from Thrust::b40c, link:
+// https://github.com/thrust/thrust/blob/master/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h
+__inline uint convertKey(uint converted_key)
+{
+#ifdef K_FLT
+    unsigned int mask = (converted_key & 0x80000000) ? 0xffffffff : 0x80000000;
+    converted_key ^= mask;
+#elif defined(K_INT)
+    const uint SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
+    converted_key ^= SIGN_MASK;
+#else
+
+#endif
+    return converted_key;
+}
+
+//FIXME(pengx17):
+// exclusive scan, need to be optimized as this is too naive...
+kernel
+    void naiveScanAddition(
+    __global int * input,
+    __global int * output,
+    int size
+    )
+{
+    if(get_global_id(0) == 0)
+    {
+        output[0] = 0;
+        for(int i = 1; i < size; i ++)
+        {
+            output[i] = output[i - 1] + input[i - 1];
+        }
+    }
+}
+
+// following is ported from
+// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_uint_kernels.cl
+kernel
+    void histogramRadixN (
+    __global K_T* unsortedKeys,
+    __global int * buckets,
+    uint shiftCount
+    )
+{
+    const int RADIX_T     = N;
+    const int RADICES_T   = (1 << RADIX_T);
+    const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T;
+    const int MASK_T      = (1 << RADIX_T) - 1;
+    int localBuckets[16] = {0,0,0,0,0,0,0,0,
+                            0,0,0,0,0,0,0,0};
+    int globalId    = get_global_id(0);
+    int numOfGroups = get_num_groups(0);
+
+    /* Calculate thread-histograms */
+    for(int i = 0; i < NUM_OF_ELEMENTS_PER_WORK_ITEM_T; ++i)
+    {
+        uint value = convertKey(as_uint(unsortedKeys[mad24(globalId, NUM_OF_ELEMENTS_PER_WORK_ITEM_T, i)]));
+        value = (value >> shiftCount) & MASK_T;
+#if IS_GT
+        localBuckets[RADICES_T - value - 1]++;
+#else
+        localBuckets[value]++;
+#endif
+    }
+
+    for(int i = 0; i < NUM_OF_ELEMENTS_PER_WORK_ITEM_T; ++i)
+    {
+        buckets[mad24(i, RADICES_T * numOfGroups, globalId) ] = localBuckets[i];
+    }
+}
+
+kernel
+    void permuteRadixN (
+    __global K_T*  unsortedKeys,
+    __global V_T*  unsortedVals,
+    __global int* scanedBuckets,
+    uint shiftCount,
+    __global K_T*  sortedKeys,
+    __global V_T*  sortedVals
+    )
+{
+    const int RADIX_T     = N;
+    const int RADICES_T   = (1 << RADIX_T);
+    const int MASK_T = (1<<RADIX_T)  -1;
+
+    int globalId  = get_global_id(0);
+    int numOfGroups = get_num_groups(0);
+    const int NUM_OF_ELEMENTS_PER_WORK_GROUP_T = numOfGroups << N;
+    int  localIndex[16];
+
+    /*Load the index to local memory*/
+    for(int i = 0; i < RADICES_T; ++i)
+    {
+#if IS_GT
+        localIndex[i] = scanedBuckets[mad24(RADICES_T - i - 1, NUM_OF_ELEMENTS_PER_WORK_GROUP_T, globalId)];
+#else
+        localIndex[i] = scanedBuckets[mad24(i, NUM_OF_ELEMENTS_PER_WORK_GROUP_T, globalId)];
+#endif
+    }
+    /* Permute elements to appropriate location */
+    for(int i = 0; i < RADICES_T; ++i)
+    {
+        int old_idx = mad24(globalId, RADICES_T, i);
+        K_T  ovalue = unsortedKeys[old_idx];
+        uint value = convertKey(as_uint(ovalue));
+        uint maskedValue = (value >> shiftCount) & MASK_T;
+        uint index = localIndex[maskedValue];
+        sortedKeys[index] = ovalue;
+        sortedVals[index] = unsortedVals[old_idx];
+        localIndex[maskedValue] = index + 1;
+    }
+}
diff --git a/modules/ocl/src/opencl/kernel_sort_by_key.cl b/modules/ocl/src/opencl/kernel_sort_by_key.cl
new file mode 100644
index 000000000..2e85e5a88
--- /dev/null
+++ b/modules/ocl/src/opencl/kernel_sort_by_key.cl
@@ -0,0 +1,245 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef K_T
+#define K_T float
+#endif
+
+#ifndef V_T
+#define V_T float
+#endif
+
+#ifndef IS_GT
+#define IS_GT false
+#endif
+
+#if IS_GT
+#define my_comp(x,y) ((x) > (y))
+#else
+#define my_comp(x,y) ((x) < (y))
+#endif
+
+/////////////////////// Bitonic sort ////////////////////////////
+// ported from
+// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_by_key_kernels.cl
+__kernel
+    void bitonicSort
+    (
+        __global K_T * keys,
+        __global V_T * vals,
+        int count,
+        int stage,
+        int passOfStage
+    )
+{
+    const int threadId = get_global_id(0);
+    if(threadId >= count / 2)
+    {
+        return;
+    }
+    const int pairDistance = 1 << (stage - passOfStage);
+    const int blockWidth   = 2 * pairDistance;
+
+    int leftId = min( (threadId % pairDistance)
+                   + (threadId / pairDistance) * blockWidth, count );
+
+    int rightId = min( leftId + pairDistance, count );
+
+    int temp;
+
+    const V_T lval = vals[leftId];
+    const V_T rval = vals[rightId];
+
+    const K_T lkey = keys[leftId];
+    const K_T rkey = keys[rightId];
+
+    int sameDirectionBlockWidth = 1 << stage;
+
+    if((threadId/sameDirectionBlockWidth) % 2 == 1)
+    {
+        temp = rightId;
+        rightId = leftId;
+        leftId = temp;
+    }
+
+    const bool compareResult = my_comp(lkey, rkey);
+
+    if(compareResult)
+    {
+        keys[rightId] = rkey;
+        keys[leftId]  = lkey;
+        vals[rightId] = rval;
+        vals[leftId]  = lval;
+    }
+    else
+    {
+        keys[rightId] = lkey;
+        keys[leftId]  = rkey;
+        vals[rightId] = lval;
+        vals[leftId]  = rval;
+    }
+}
+
+/////////////////////// Selection sort ////////////////////////////
+//kernel is ported from Bolt library:
+//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
+__kernel
+    void selectionSortLocal
+    (
+        __global K_T * keys,
+        __global V_T * vals,
+        const int count,
+        __local  K_T * scratch
+    )
+{
+    int          i  = get_local_id(0); // index in workgroup
+    int numOfGroups = get_num_groups(0); // index in workgroup
+    int groupID     = get_group_id(0);
+    int         wg  = get_local_size(0); // workgroup size = block size
+    int n; // number of elements to be processed for this work group
+
+    int offset   = groupID * wg;
+    int same     = 0;
+
+    vals      += offset;
+    keys      += offset;
+    n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
+
+    int clamped_i= min(i, n - 1);
+
+    K_T key1 = keys[clamped_i], key2;
+    V_T val1 = vals[clamped_i];
+    scratch[i] = key1;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if(i >= n)
+    {
+        return;
+    }
+
+    int pos = 0;
+    for (int j=0;j<n;++j)
+    {
+        key2  = scratch[j];
+        if(my_comp(key2, key1))
+            pos++;//calculate the rank of this element in this work group
+        else
+        {
+            if(my_comp(key1, key2))
+                continue;
+            else
+            {
+                // key1 and key2 are same
+                same++;
+            }
+        }
+    }
+    for (int j=0; j< same; j++)
+    {
+        vals[pos + j] = val1;
+        keys[pos + j] = key1;
+    }
+}
+__kernel
+    void selectionSortFinal
+    (
+        __global K_T * keys,
+        __global V_T * vals,
+        const int count
+    )
+{
+    const int          i  = get_local_id(0); // index in workgroup
+    const int numOfGroups = get_num_groups(0); // index in workgroup
+    const int groupID     = get_group_id(0);
+    const int         wg  = get_local_size(0); // workgroup size = block size
+    int pos = 0, same = 0;
+    const int offset = get_group_id(0) * wg;
+    const int remainder = count - wg*(numOfGroups-1);
+
+    if((offset + i ) >= count)
+        return;
+    V_T val1 = vals[offset + i];
+
+    K_T key1 = keys[offset + i];
+    K_T key2;
+
+    for(int j=0; j<numOfGroups-1; j++ )
+    {
+        for(int k=0; k<wg; k++)
+        {
+            key2 = keys[j*wg + k];
+            if(my_comp(key1, key2))
+                break;
+            else
+            {
+                //Increment only if the value is not the same.
+                if(my_comp(key2, key1))
+                    pos++;
+                else
+                    same++;
+            }
+        }
+    }
+
+    for(int k=0; k<remainder; k++)
+    {
+        key2 = keys[(numOfGroups-1)*wg + k];
+        if(my_comp(key1, key2))
+            break;
+        else
+        {
+            //Don't increment if the value is the same.
+            if(my_comp(key2, key1))
+                pos++;
+            else
+                same++;
+        }
+    }
+    for (int j=0; j< same; j++)
+    {
+        vals[pos + j] = val1;
+        keys[pos + j] = key1;
+    }
+}
diff --git a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
new file mode 100644
index 000000000..9596d8cc8
--- /dev/null
+++ b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
@@ -0,0 +1,296 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef K_T
+#define K_T float
+#endif
+
+#ifndef V_T
+#define V_T float
+#endif
+
+#ifndef IS_GT
+#define IS_GT false
+#endif
+
+#if IS_GT
+#define my_comp(x,y) ((x) > (y))
+#else
+#define my_comp(x,y) ((x) < (y))
+#endif
+
+///////////// parallel merge sort ///////////////
+// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
+uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
+{
+    //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
+    uint firstIndex = left;
+    uint lastIndex = right;
+
+    //  This loops through [firstIndex, lastIndex)
+    //  Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
+    //  this while loop will be divergent within a wavefront
+    while( firstIndex < lastIndex )
+    {
+        K_T dataVal = data[ firstIndex ];
+
+        //  This branch will create divergent wavefronts
+        if( my_comp( dataVal, searchVal ) )
+        {
+            firstIndex = firstIndex+1;
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    return firstIndex;
+}
+
+//  This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
+//  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
+//  passed as a functor parameter my_comp
+//  This function returns an index that is the first index whos value would be equal to the searched value
+uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
+{
+    //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
+    uint firstIndex = left;
+    uint lastIndex = right;
+
+    //  This loops through [firstIndex, lastIndex)
+    //  Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
+    //  this while loop will be divergent within a wavefront
+    while( firstIndex < lastIndex )
+    {
+        //  midIndex is the average of first and last, rounded down
+        uint midIndex = ( firstIndex + lastIndex ) / 2;
+        K_T midValue = data[ midIndex ];
+
+        //  This branch will create divergent wavefronts
+        if( my_comp( midValue, searchVal ) )
+        {
+            firstIndex = midIndex+1;
+            // printf( "lowerBound: lastIndex[ %i ]=%i\n", get_local_id( 0 ), lastIndex );
+        }
+        else
+        {
+            lastIndex = midIndex;
+            // printf( "lowerBound: firstIndex[ %i ]=%i\n", get_local_id( 0 ), firstIndex );
+        }
+    }
+
+    return firstIndex;
+}
+
+//  This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
+//  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
+//  passed as a functor parameter my_comp
+//  This function returns an index that is the first index whos value would be greater than the searched value
+//  If the search value is not found in the sequence, upperbound returns the same result as lowerbound
+uint upperBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
+{
+    uint upperBound = lowerBoundBinary( data, left, right, searchVal );
+
+    // printf( "upperBoundBinary: upperBound[ %i, %i ]= %i\n", left, right, upperBound );
+    //  If upperBound == right, then  searchVal was not found in the sequence.  Just return.
+    if( upperBound != right )
+    {
+        //  While the values are equal i.e. !(x < y) && !(y < x) increment the index
+        K_T upperValue = data[ upperBound ];
+        while( !my_comp( upperValue, searchVal ) && !my_comp( searchVal, upperValue) && (upperBound != right) )
+        {
+            upperBound++;
+            upperValue = data[ upperBound ];
+        }
+    }
+
+    return upperBound;
+}
+
+//  This kernel implements merging of blocks of sorted data.  The input to this kernel most likely is
+//  the output of blockInsertionSortTemplate.  It is expected that the source array contains multiple
+//  blocks, each block is independently sorted.  The goal is to write into the output buffer half as
+//  many blocks, of double the size.  The even and odd blocks are stably merged together to form
+//  a new sorted block of twice the size.  The algorithm is out-of-place.
+kernel void merge(
+    global K_T*   iKey_ptr,
+    global V_T*   iValue_ptr,
+    global K_T*   oKey_ptr,
+    global V_T*   oValue_ptr,
+    const uint    srcVecSize,
+    const uint    srcLogicalBlockSize,
+    local K_T*    key_lds,
+    local V_T*    val_lds
+)
+{
+    size_t globalID     = get_global_id( 0 );
+    size_t groupID      = get_group_id( 0 );
+    size_t localID      = get_local_id( 0 );
+    size_t wgSize       = get_local_size( 0 );
+
+    //  Abort threads that are passed the end of the input vector
+    if( globalID >= srcVecSize )
+        return; // on SI this doesn't mess-up barriers
+
+    //  For an element in sequence A, find the lowerbound index for it in sequence B
+    uint srcBlockNum   = globalID / srcLogicalBlockSize;
+    uint srcBlockIndex = globalID % srcLogicalBlockSize;
+
+    // printf( "mergeTemplate: srcBlockNum[%i]=%i\n", srcBlockNum, srcBlockIndex );
+
+    //  Pairs of even-odd blocks will be merged together
+    //  An even block should search for an insertion point in the next odd block,
+    //  and the odd block should look for an insertion point in the corresponding previous even block
+    uint dstLogicalBlockSize = srcLogicalBlockSize<<1;
+    uint leftBlockIndex = globalID & ~((dstLogicalBlockSize) - 1 );
+    leftBlockIndex += (srcBlockNum & 0x1) ? 0 : srcLogicalBlockSize;
+    leftBlockIndex = min( leftBlockIndex, srcVecSize );
+    uint rightBlockIndex = min( leftBlockIndex + srcLogicalBlockSize, srcVecSize );
+
+    // if( localID == 0 )
+    // {
+    // printf( "mergeTemplate: wavefront[ %i ] logicalBlock[ %i ] logicalIndex[ %i ] leftBlockIndex[ %i ] <=> rightBlockIndex[ %i ]\n", groupID, srcBlockNum, srcBlockIndex, leftBlockIndex, rightBlockIndex );
+    // }
+
+    //  For a particular element in the input array, find the lowerbound index for it in the search sequence given by leftBlockIndex & rightBlockIndex
+    // uint insertionIndex = lowerBoundLinear( iKey_ptr, leftBlockIndex, rightBlockIndex, iKey_ptr[ globalID ], my_comp ) - leftBlockIndex;
+    uint insertionIndex = 0;
+    if( (srcBlockNum & 0x1) == 0 )
+    {
+        insertionIndex = lowerBoundBinary( iKey_ptr, leftBlockIndex, rightBlockIndex, iKey_ptr[ globalID ] ) - leftBlockIndex;
+    }
+    else
+    {
+        insertionIndex = upperBoundBinary( iKey_ptr, leftBlockIndex, rightBlockIndex, iKey_ptr[ globalID ] ) - leftBlockIndex;
+    }
+
+    //  The index of an element in the result sequence is the summation of it's indixes in the two input
+    //  sequences
+    uint dstBlockIndex = srcBlockIndex + insertionIndex;
+    uint dstBlockNum = srcBlockNum/2;
+
+    // if( (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex == 395 )
+    // {
+    // printf( "mergeTemplate: (dstBlockNum[ %i ] * dstLogicalBlockSize[ %i ]) + dstBlockIndex[ %i ] = srcBlockIndex[ %i ] + insertionIndex[ %i ]\n", dstBlockNum, dstLogicalBlockSize, dstBlockIndex, srcBlockIndex, insertionIndex );
+    // printf( "mergeTemplate: dstBlockIndex[ %i ] = iKey_ptr[ %i ] ( %i )\n", (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex, globalID, iKey_ptr[ globalID ] );
+    // }
+    oKey_ptr[ (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex ] = iKey_ptr[ globalID ];
+    oValue_ptr[ (dstBlockNum*dstLogicalBlockSize)+dstBlockIndex ] = iValue_ptr[ globalID ];
+    // printf( "mergeTemplate: leftResultIndex[ %i ]=%i + %i\n", leftResultIndex, srcBlockIndex, leftInsertionIndex );
+}
+
+kernel void blockInsertionSort(
+    global K_T*   key_ptr,
+    global V_T*   value_ptr,
+    const uint    vecSize,
+    local K_T*    key_lds,
+    local V_T*    val_lds
+)
+{
+    size_t gloId    = get_global_id( 0 );
+    size_t groId    = get_group_id( 0 );
+    size_t locId    = get_local_id( 0 );
+    size_t wgSize   = get_local_size( 0 );
+
+    bool in_range = gloId < vecSize;
+    K_T key;
+    V_T val;
+    //  Abort threads that are passed the end of the input vector
+    if (in_range)
+    {
+        //  Make a copy of the entire input array into fast local memory
+        key = key_ptr[ gloId ];
+        val = value_ptr[ gloId ];
+        key_lds[ locId ] = key;
+        val_lds[ locId ] = val;
+    }
+    barrier( CLK_LOCAL_MEM_FENCE );
+    //  Sorts a workgroup using a naive insertion sort
+    //  The sort uses one thread within a workgroup to sort the entire workgroup
+    if( locId == 0 && in_range )
+    {
+        //  The last workgroup may have an irregular size, so we calculate a per-block endIndex
+        //  endIndex is essentially emulating a mod operator with subtraction and multiply
+        size_t endIndex = vecSize - ( groId * wgSize );
+        endIndex = min( endIndex, wgSize );
+
+        // printf( "Debug: endIndex[%i]=%i\n", groId, endIndex );
+
+        //  Indices are signed because the while loop will generate a -1 index inside of the max function
+        for( int currIndex = 1; currIndex < endIndex; ++currIndex )
+        {
+            key = key_lds[ currIndex ];
+            val = val_lds[ currIndex ];
+            int scanIndex = currIndex;
+            K_T ldsKey = key_lds[scanIndex - 1];
+            while( scanIndex > 0 && my_comp( key, ldsKey ) )
+            {
+                V_T ldsVal = val_lds[scanIndex - 1];
+
+                //  If the keys are being swapped, make sure the values are swapped identicaly
+                key_lds[ scanIndex ] = ldsKey;
+                val_lds[ scanIndex ] = ldsVal;
+
+                scanIndex = scanIndex - 1;
+                ldsKey = key_lds[ max( 0, scanIndex - 1 ) ];  // scanIndex-1 may be -1
+            }
+            key_lds[ scanIndex ] = key;
+            val_lds[ scanIndex ] = val;
+        }
+    }
+    barrier( CLK_LOCAL_MEM_FENCE );
+
+    if(in_range)
+    {
+        key = key_lds[ locId ];
+        key_ptr[ gloId ] = key;
+
+        val = val_lds[ locId ];
+        value_ptr[ gloId ] = val;
+    }
+}
+
+///////////// Radix sort from b40c library /////////////
diff --git a/modules/ocl/test/test_columnsum.cpp b/modules/ocl/src/opencl/kmeans_kernel.cl
similarity index 67%
rename from modules/ocl/test/test_columnsum.cpp
rename to modules/ocl/src/opencl/kmeans_kernel.cl
index 231f0657b..c6af0ad24 100644
--- a/modules/ocl/test/test_columnsum.cpp
+++ b/modules/ocl/src/opencl/kmeans_kernel.cl
@@ -15,8 +15,7 @@
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
-//	   Chunpeng Zhang chunpeng@multicorewareinc.com
-//
+//    Xiaopeng Fu, fuxiaopeng2222@163.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -26,12 +25,12 @@
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
+//     and/or other GpuMaterials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
-// This software is provided by the copyright holders and contributors "as is" and
+// This software is provided by the copyright holders and contributors as is and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
@@ -44,51 +43,42 @@
 //
 //M*/
 
-#include "precomp.hpp"
-#include <iomanip>
-
-#ifdef HAVE_OPENCL
-
-PARAM_TEST_CASE(ColumnSum, cv::Size)
+__kernel void distanceToCenters(
+    int label_step, int K,
+    __global float *src,
+    __global int *labels, int dims, int rows,
+    __global float *centers,
+    __global float *dists)
 {
-    cv::Size size;
-    cv::Mat src;
+    int gid = get_global_id(1);
 
-    virtual void SetUp()
+    float dist, euDist, min;
+    int minCentroid;
+
+    if(gid >= rows)
+        return;
+
+    for(int i = 0 ; i < K; i++)
     {
-        size = GET_PARAM(0);
-    }
-};
-
-TEST_P(ColumnSum, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_32FC1);
-    cv::ocl::oclMat d_dst;
-    cv::ocl::oclMat d_src(src);
-
-    cv::ocl::columnSum(d_src, d_dst);
-
-    cv::Mat dst(d_dst);
-
-    for (int j = 0; j < src.cols; ++j)
-    {
-        float gold = src.at<float>(0, j);
-        float res = dst.at<float>(0, j);
-        ASSERT_NEAR(res, gold, 1e-5);
-    }
-
-    for (int i = 1; i < src.rows; ++i)
-    {
-        for (int j = 0; j < src.cols; ++j)
+        euDist = 0;
+        for(int j = 0; j < dims; j++)
         {
-            float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
-            float res = dst.at<float>(i, j);
-            ASSERT_NEAR(res, gold, 1e-5);
+            dist = (src[j + gid * dims]
+                    - centers[j + i * dims]);
+            euDist += dist * dist;
+        }
+
+        if(i == 0)
+        {
+            min = euDist;
+            minCentroid = 0;
+        }
+        else if(euDist < min)
+        {
+            min = euDist;
+            minCentroid = i;
         }
     }
+    dists[gid] = min;
+    labels[label_step * gid] = minCentroid;
 }
-
-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
-
-
-#endif
diff --git a/modules/ocl/src/opencl/moments.cl b/modules/ocl/src/opencl/moments.cl
index c86ae494d..5ea94e20c 100644
--- a/modules/ocl/src/opencl/moments.cl
+++ b/modules/ocl/src/opencl/moments.cl
@@ -173,10 +173,10 @@ __kernel void dst_sum(int src_rows, int src_cols, int tile_height, int tile_widt
             sum[i] = dst_sum[i][0];
 }
 
-__kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
+__kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_cols, int src_step,
                            __global F* dst_m,
                            int dst_cols, int dst_step, int blocky,
-                           int type, int depth, int cn, int coi, int binary, int TILE_SIZE)
+                           int depth, int cn, int coi, int binary, int TILE_SIZE)
 {
     uchar tmp_coi[16]; // get the coi data
     uchar16 tmp[16];
@@ -192,35 +192,43 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
     int x = wgidx*TILE_SIZE;  // vector length of uchar
     int kcn = (cn==2)?2:4;
     int rstep = min(src_step, TILE_SIZE);
-    tileSize_height = min(TILE_SIZE, src_rows - y);
-    tileSize_width = min(TILE_SIZE, src_cols - x);
+    int tileSize_height = min(TILE_SIZE, src_rows - y);
+    int tileSize_width = min(TILE_SIZE, src_cols - x);
+
+    if ( y+lidy < src_rows )
+    {
+        if( tileSize_width < TILE_SIZE )
+            for(int i = tileSize_width; i < rstep && (x+i) < src_cols; i++ )
+                *((__global uchar*)src_data+(y+lidy)*src_step+x+i) = 0;
+
+        if( coi > 0 )	//channel of interest
+            for(int i = 0; i < tileSize_width; i += VLEN_C)
+            {
+                for(int j=0; j<VLEN_C; j++)
+                    tmp_coi[j] = *((__global uchar*)src_data+(y+lidy)*src_step+(x+i+j)*kcn+coi-1);
+                tmp[i/VLEN_C] = (uchar16)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7],
+                                          tmp_coi[8],tmp_coi[9],tmp_coi[10],tmp_coi[11],tmp_coi[12],tmp_coi[13],tmp_coi[14],tmp_coi[15]);
+            }
+        else
+            for(int i=0; i < tileSize_width; i+=VLEN_C)
+                tmp[i/VLEN_C] = *(src_data+(y+lidy)*src_step/VLEN_C+(x+i)/VLEN_C);
+    }
 
-    if( tileSize_width < TILE_SIZE )
-        for(int i = tileSize_width; i < rstep; i++ )
-            *((__global uchar*)src_data+(y+lidy)*src_step+x+i) = 0;
-    if( coi > 0 )   //channel of interest
-        for(int i = 0; i < tileSize_width; i += VLEN_C)
-        {
-            for(int j=0; j<VLEN_C; j++)
-                tmp_coi[j] = *((__global uchar*)src_data+(y+lidy)*src_step+(x+i+j)*kcn+coi-1);
-            tmp[i/VLEN_C] = (uchar16)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7],
-                                      tmp_coi[8],tmp_coi[9],tmp_coi[10],tmp_coi[11],tmp_coi[12],tmp_coi[13],tmp_coi[14],tmp_coi[15]);
-        }
-    else
-        for(int i=0; i < tileSize_width; i+=VLEN_C)
-            tmp[i/VLEN_C] = *(src_data+(y+lidy)*src_step/VLEN_C+(x+i)/VLEN_C);
     uchar16 zero = (uchar16)(0);
     uchar16 full = (uchar16)(255);
     if( binary )
         for(int i=0; i < tileSize_width; i+=VLEN_C)
             tmp[i/VLEN_C] = (tmp[i/VLEN_C]!=zero)?full:zero;
+
     F mom[10];
     __local int m[10][128];
-    if(lidy == 0)
+    if(lidy < 128)
+    {
         for(int i=0; i<10; i++)
-            for(int j=0; j<128; j++)
-                m[i][j]=0;
+            m[i][lidy]=0;
+    }
     barrier(CLK_LOCAL_MEM_FENCE);
+
     int lm[10] = {0};
     int16 x0 = (int16)(0);
     int16 x1 = (int16)(0);
@@ -281,6 +289,7 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
                 m[i][lidy-j/2] = lm[i];
         barrier(CLK_LOCAL_MEM_FENCE);
     }
+
     if(lidy == 0&&lidx == 0)
     {
         for( int mt = 0; mt < 10; mt++ )
@@ -328,10 +337,10 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
     }
 }
 
-__kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
+__kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_cols, int src_step,
                            __global F* dst_m,
                            int dst_cols, int dst_step, int blocky,
-                           int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
+                           int depth, int cn, int coi, int binary, const int TILE_SIZE)
 {
     ushort tmp_coi[8]; // get the coi data
     ushort8 tmp[32];
@@ -346,21 +355,26 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
     int x = wgidx*TILE_SIZE;  // real X index of pixel
     int kcn = (cn==2)?2:4;
     int rstep = min(src_step/2, TILE_SIZE);
-    tileSize_height = min(TILE_SIZE, src_rows - y);
-    tileSize_width = min(TILE_SIZE, src_cols -x);
-    if(src_cols > TILE_SIZE && tileSize_width < TILE_SIZE)
-        for(int i=tileSize_width; i < rstep; i++ )
-            *((__global ushort*)src_data+(y+lidy)*src_step/2+x+i) = 0;
-    if( coi > 0 )
-        for(int i=0; i < tileSize_width; i+=VLEN_US)
-        {
-            for(int j=0; j<VLEN_US; j++)
-                tmp_coi[j] = *((__global ushort*)src_data+(y+lidy)*(int)src_step/2+(x+i+j)*kcn+coi-1);
-            tmp[i/VLEN_US] = (ushort8)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]);
-        }
-    else
-        for(int i=0; i < tileSize_width; i+=VLEN_US)
-            tmp[i/VLEN_US] = *(src_data+(y+lidy)*src_step/(2*VLEN_US)+(x+i)/VLEN_US);
+    int tileSize_height = min(TILE_SIZE, src_rows - y);
+    int tileSize_width = min(TILE_SIZE, src_cols -x);
+
+    if ( y+lidy < src_rows )
+    {
+        if(src_cols > TILE_SIZE && tileSize_width < TILE_SIZE)
+            for(int i=tileSize_width; i < rstep && (x+i) < src_cols; i++ )
+                *((__global ushort*)src_data+(y+lidy)*src_step/2+x+i) = 0;
+        if( coi > 0 )
+            for(int i=0; i < tileSize_width; i+=VLEN_US)
+            {
+                for(int j=0; j<VLEN_US; j++)
+                    tmp_coi[j] = *((__global ushort*)src_data+(y+lidy)*(int)src_step/2+(x+i+j)*kcn+coi-1);
+                tmp[i/VLEN_US] = (ushort8)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]);
+            }
+        else
+            for(int i=0; i < tileSize_width; i+=VLEN_US)
+                tmp[i/VLEN_US] = *(src_data+(y+lidy)*src_step/(2*VLEN_US)+(x+i)/VLEN_US);
+    }
+
     ushort8 zero = (ushort8)(0);
     ushort8 full = (ushort8)(255);
     if( binary )
@@ -368,11 +382,11 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
             tmp[i/VLEN_US] = (tmp[i/VLEN_US]!=zero)?full:zero;
     F mom[10];
     __local long m[10][128];
-    if(lidy == 0)
+    if(lidy < 128)
         for(int i=0; i<10; i++)
-            for(int j=0; j<128; j++)
-                m[i][j]=0;
+            m[i][lidy]=0;
     barrier(CLK_LOCAL_MEM_FENCE);
+
     long lm[10] = {0};
     int8 x0 = (int8)(0);
     int8 x1 = (int8)(0);
@@ -422,17 +436,22 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
         lm[0] = x0.s0;             // m00
     }
     barrier(CLK_LOCAL_MEM_FENCE);
+
     for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
     {
         if(lidy < j)
             for( int i = 0; i < 10; i++ )
                 lm[i] = lm[i] + m[i][lidy];
-        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
+    {
         if(lidy >= j/2&&lidy < j)
             for( int i = 0; i < 10; i++ )
                 m[i][lidy-j/2] = lm[i];
-        barrier(CLK_LOCAL_MEM_FENCE);
     }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
     if(lidy == 0&&lidx == 0)
     {
         for(int mt = 0; mt < 10; mt++ )
@@ -482,10 +501,10 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
     }
 }
 
-__kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
+__kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols, int src_step,
                            __global F* dst_m,
                            int dst_cols, int dst_step, int blocky,
-                           int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
+                           int depth, int cn, int coi, int binary, const int TILE_SIZE)
 {
     short tmp_coi[8]; // get the coi data
     short8 tmp[32];
@@ -500,21 +519,26 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
     int x = wgidx*TILE_SIZE;  // real X index of pixel
     int kcn = (cn==2)?2:4;
     int rstep = min(src_step/2, TILE_SIZE);
-    tileSize_height = min(TILE_SIZE, src_rows - y);
-    tileSize_width = min(TILE_SIZE, src_cols -x);
-    if(tileSize_width < TILE_SIZE)
-        for(int i = tileSize_width; i < rstep; i++ )
-            *((__global short*)src_data+(y+lidy)*src_step/2+x+i) = 0;
-    if( coi > 0 )
-        for(int i=0; i < tileSize_width; i+=VLEN_S)
-        {
-            for(int j=0; j<VLEN_S; j++)
-                tmp_coi[j] = *((__global short*)src_data+(y+lidy)*src_step/2+(x+i+j)*kcn+coi-1);
-            tmp[i/VLEN_S] = (short8)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]);
-        }
-    else
-        for(int i=0; i < tileSize_width; i+=VLEN_S)
-            tmp[i/VLEN_S] = *(src_data+(y+lidy)*src_step/(2*VLEN_S)+(x+i)/VLEN_S);
+    int tileSize_height = min(TILE_SIZE, src_rows - y);
+    int tileSize_width = min(TILE_SIZE, src_cols -x);
+
+    if ( y+lidy < src_rows )
+    {
+        if(tileSize_width < TILE_SIZE)
+            for(int i = tileSize_width; i < rstep && (x+i) < src_cols; i++ )
+                *((__global short*)src_data+(y+lidy)*src_step/2+x+i) = 0;
+        if( coi > 0 )
+            for(int i=0; i < tileSize_width; i+=VLEN_S)
+            {
+                for(int j=0; j<VLEN_S; j++)
+                    tmp_coi[j] = *((__global short*)src_data+(y+lidy)*src_step/2+(x+i+j)*kcn+coi-1);
+                tmp[i/VLEN_S] = (short8)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]);
+            }
+        else
+            for(int i=0; i < tileSize_width; i+=VLEN_S)
+                tmp[i/VLEN_S] = *(src_data+(y+lidy)*src_step/(2*VLEN_S)+(x+i)/VLEN_S);
+    }
+
     short8 zero = (short8)(0);
     short8 full = (short8)(255);
     if( binary )
@@ -523,10 +547,9 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
 
     F mom[10];
     __local long m[10][128];
-    if(lidy == 0)
+    if(lidy < 128)
         for(int i=0; i<10; i++)
-            for(int j=0; j<128; j++)
-                m[i][j]=0;
+            m[i][lidy]=0;
     barrier(CLK_LOCAL_MEM_FENCE);
     long lm[10] = {0};
     int8 x0 = (int8)(0);
@@ -637,10 +660,10 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
     }
 }
 
-__kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
+__kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols, int src_step,
                             __global F* dst_m,
                             int dst_cols, int dst_step, int blocky,
-                            int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
+                            int depth, int cn, int coi, int binary, const int TILE_SIZE)
 {
     float tmp_coi[4]; // get the coi data
     float4 tmp[64] ;
@@ -654,33 +677,30 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
     int y = wgidy*TILE_SIZE;  // real Y index of pixel
     int x = wgidx*TILE_SIZE;  // real X index of pixel
     int kcn = (cn==2)?2:4;
-    src_step /= sizeof(*src_data);
-    int rstep = min(src_step, TILE_SIZE);
-    tileSize_height = min(TILE_SIZE, src_rows - y);
-    tileSize_width = min(TILE_SIZE, src_cols -x);
+    int rstep = min(src_step/4, TILE_SIZE);
+    int tileSize_height = min(TILE_SIZE, src_rows - y);
+    int tileSize_width = min(TILE_SIZE, src_cols -x);
     int maxIdx = mul24(src_rows, src_cols);
     int yOff = (y+lidy)*src_step;
     int index;
-    if(tileSize_width < TILE_SIZE && yOff < src_rows)
-        for(int i = tileSize_width; i < rstep && (yOff+x+i) < maxIdx; i++ )
-            *(src_data+yOff+x+i) = 0;
-    if( coi > 0 )
-        for(int i=0; i < tileSize_width; i+=VLEN_F)
-        {
-#pragma unroll
-            for(int j=0; j<4; j++)
+
+    if ( y+lidy < src_rows )
+    {
+        if(tileSize_width < TILE_SIZE)
+            for(int i = tileSize_width; i < rstep && (x+i) < src_cols; i++ )
+                *((__global float*)src_data+(y+lidy)*src_step/4+x+i) = 0;
+        if( coi > 0 )
+            for(int i=0; i < tileSize_width; i+=VLEN_F)
             {
-                index = yOff+(x+i+j)*kcn+coi-1;
-                if (index < maxIdx)
-                    tmp_coi[j] = *(src_data+index);
-                else
-                    tmp_coi[j] = 0;
+                for(int j=0; j<4; j++)
+                    tmp_coi[j] = *(src_data+(y+lidy)*src_step/4+(x+i+j)*kcn+coi-1);
+                tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
             }
-            tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
-        }
-    else
-        for(int i=0; i < tileSize_width && (yOff+x+i) < maxIdx; i+=VLEN_F)
-            tmp[i/VLEN_F] = (*(__global float4 *)(src_data+yOff+x+i));
+        else
+            for(int i=0; i < tileSize_width; i+=VLEN_F)
+                tmp[i/VLEN_F] = (float4)(*(src_data+(y+lidy)*src_step/4+x+i),*(src_data+(y+lidy)*src_step/4+x+i+1),*(src_data+(y+lidy)*src_step/4+x+i+2),*(src_data+(y+lidy)*src_step/4+x+i+3));
+    }
+
     float4 zero = (float4)(0);
     float4 full = (float4)(255);
     if( binary )
@@ -688,10 +708,9 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
             tmp[i/VLEN_F] = (tmp[i/VLEN_F]!=zero)?full:zero;
     F mom[10];
     __local F m[10][128];
-    if(lidy == 0)
+    if(lidy < 128)
         for(int i = 0; i < 10; i ++)
-            for(int j = 0; j < 128; j ++)
-                m[i][j] = 0;
+            m[i][lidy] = 0;
     barrier(CLK_LOCAL_MEM_FENCE);
     F lm[10] = {0};
     F4 x0 = (F4)(0);
@@ -729,185 +748,6 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
         m[0][lidy-bheight] = x0.s0;             // m00
     }
 
-    else if(lidy < bheight)
-    {
-        lm[9] = ((F)py) * sy;  // m03
-        lm[8] = ((F)x1.s0) * sy;  // m12
-        lm[7] = ((F)x2.s0) * lidy;  // m21
-        lm[6] = x3.s0;             // m30
-        lm[5] = x0.s0 * sy;        // m02
-        lm[4] = x1.s0 * lidy;         // m11
-        lm[3] = x2.s0;             // m20
-        lm[2] = py;             // m01
-        lm[1] = x1.s0;             // m10
-        lm[0] = x0.s0;             // m00
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
-    {
-        if(lidy < j)
-            for( int i = 0; i < 10; i++ )
-                lm[i] = lm[i] + m[i][lidy];
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if(lidy >= j/2&&lidy < j)
-            for( int i = 0; i < 10; i++ )
-                m[i][lidy-j/2] = lm[i];
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-    if(lidy == 0&&lidx == 0)
-    {
-        for( int mt = 0; mt < 10; mt++ )
-            mom[mt] = (F)lm[mt];
-        if(binary)
-        {
-            F s = 1./255;
-            for( int mt = 0; mt < 10; mt++ )
-                mom[mt] *= s;
-        }
-
-        F xm = x * mom[0], ym = y * mom[0];
-
-        // accumulate moments computed in each tile
-        dst_step /= sizeof(F);
-
-        int dst_x_off = mad24(wgidy, dst_cols, wgidx);
-        int dst_off = 0;
-        int max_dst_index = 10 * blocky * get_global_size(1);
-
-        // + m00 ( = m00' )
-        dst_off = mad24(DST_ROW_00 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[0];
-
-        // + m10 ( = m10' + x*m00' )
-        dst_off = mad24(DST_ROW_10 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[1] + xm;
-
-        // + m01 ( = m01' + y*m00' )
-        dst_off = mad24(DST_ROW_01 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[2] + ym;
-
-        // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
-        dst_off = mad24(DST_ROW_20 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[3] + x * (mom[1] * 2 + xm);
-
-        // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
-        dst_off = mad24(DST_ROW_11 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[4] + x * (mom[2] + ym) + y * mom[1];
-
-        // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
-        dst_off = mad24(DST_ROW_02 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[5] + y * (mom[2] * 2 + ym);
-
-        // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
-        dst_off = mad24(DST_ROW_30 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
-
-        // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
-        dst_off = mad24(DST_ROW_21 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
-
-        // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
-        dst_off = mad24(DST_ROW_12 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
-
-        // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
-        dst_off = mad24(DST_ROW_03 * blocky, dst_step, dst_x_off);
-        if (dst_off < max_dst_index)
-            *(dst_m + dst_off) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
-    }
-}
-
-__kernel void CvMoments_D6(__global F* src_data,  int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
-                           __global F* dst_m,
-                           int dst_cols, int dst_step, int blocky,
-                           int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
-{
-    F tmp_coi[4]; // get the coi data
-    F4 tmp[64];
-    int VLEN_D = 4; // length of vetor
-    int gidy = get_global_id(0);
-    int gidx = get_global_id(1);
-    int wgidy = get_group_id(0);
-    int wgidx = get_group_id(1);
-    int lidy = get_local_id(0);
-    int lidx = get_local_id(1);
-    int y = wgidy*TILE_SIZE;  // real Y index of pixel
-    int x = wgidx*TILE_SIZE;  // real X index of pixel
-    int kcn = (cn==2)?2:4;
-    int rstep = min(src_step/8, TILE_SIZE);
-    tileSize_height = min(TILE_SIZE,  src_rows - y);
-    tileSize_width = min(TILE_SIZE, src_cols - x);
-
-    if(tileSize_width < TILE_SIZE)
-        for(int i = tileSize_width; i < rstep; i++ )
-            *((__global F*)src_data+(y+lidy)*src_step/8+x+i) = 0;
-    if( coi > 0 )
-        for(int i=0; i < tileSize_width; i+=VLEN_D)
-        {
-            for(int j=0; j<4; j++)
-                tmp_coi[j] = *(src_data+(y+lidy)*src_step/8+(x+i+j)*kcn+coi-1);
-            tmp[i/VLEN_D] = (F4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
-        }
-    else
-        for(int i=0; i < tileSize_width; i+=VLEN_D)
-            tmp[i/VLEN_D] = (F4)(*(src_data+(y+lidy)*src_step/8+x+i),*(src_data+(y+lidy)*src_step/8+x+i+1),*(src_data+(y+lidy)*src_step/8+x+i+2),*(src_data+(y+lidy)*src_step/8+x+i+3));
-    F4 zero = (F4)(0);
-    F4 full = (F4)(255);
-    if( binary )
-        for(int i=0; i < tileSize_width; i+=VLEN_D)
-            tmp[i/VLEN_D] = (tmp[i/VLEN_D]!=zero)?full:zero;
-    F mom[10];
-    __local F m[10][128];
-    if(lidy == 0)
-        for(int i=0; i<10; i++)
-            for(int j=0; j<128; j++)
-                m[i][j]=0;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    F lm[10] = {0};
-    F4 x0 = (F4)(0);
-    F4 x1 = (F4)(0);
-    F4 x2 = (F4)(0);
-    F4 x3 = (F4)(0);
-    for( int xt = 0 ; xt < tileSize_width; xt+=VLEN_D )
-    {
-        F4 v_xt = (F4)(xt, xt+1, xt+2, xt+3);
-        F4 p = tmp[xt/VLEN_D];
-        F4 xp = v_xt * p, xxp = xp * v_xt;
-        x0 += p;
-        x1 += xp;
-        x2 += xxp;
-        x3 += xxp *v_xt;
-    }
-    x0.s0 += x0.s1 + x0.s2 + x0.s3;
-    x1.s0 += x1.s1 + x1.s2 + x1.s3;
-    x2.s0 += x2.s1 + x2.s2 + x2.s3;
-    x3.s0 += x3.s1 + x3.s2 + x3.s3;
-
-    F py = lidy * x0.s0, sy = lidy*lidy;
-    int bheight = min(tileSize_height, TILE_SIZE/2);
-    if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
-    {
-        m[9][lidy-bheight] = ((F)py) * sy;  // m03
-        m[8][lidy-bheight] = ((F)x1.s0) * sy;  // m12
-        m[7][lidy-bheight] = ((F)x2.s0) * lidy;  // m21
-        m[6][lidy-bheight] = x3.s0;             // m30
-        m[5][lidy-bheight] = x0.s0 * sy;        // m02
-        m[4][lidy-bheight] = x1.s0 * lidy;         // m11
-        m[3][lidy-bheight] = x2.s0;             // m20
-        m[2][lidy-bheight] = py;             // m01
-        m[1][lidy-bheight] = x1.s0;             // m10
-        m[0][lidy-bheight] = x0.s0;             // m00
-    }
-
     else if(lidy < bheight)
     {
         lm[9] = ((F)py) * sy;  // m03
@@ -980,3 +820,161 @@ __kernel void CvMoments_D6(__global F* src_data,  int src_rows, int src_cols, in
         *(dst_m + mad24(DST_ROW_03 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
     }
 }
+
+__kernel void CvMoments_D6(__global F* src_data,  int src_rows, int src_cols, int src_step,
+                           __global F* dst_m,
+                           int dst_cols, int dst_step, int blocky,
+                           int depth, int cn, int coi, int binary, const int TILE_SIZE)
+{
+    F tmp_coi[4]; // get the coi data
+    F4 tmp[64];
+    int VLEN_D = 4; // length of vetor
+    int gidy = get_global_id(0);
+    int gidx = get_global_id(1);
+    int wgidy = get_group_id(0);
+    int wgidx = get_group_id(1);
+    int lidy = get_local_id(0);
+    int lidx = get_local_id(1);
+    int y = wgidy*TILE_SIZE;  // real Y index of pixel
+    int x = wgidx*TILE_SIZE;  // real X index of pixel
+    int kcn = (cn==2)?2:4;
+    int rstep = min(src_step/8, TILE_SIZE);
+    int tileSize_height = min(TILE_SIZE,  src_rows - y);
+    int tileSize_width = min(TILE_SIZE, src_cols - x);
+
+    if ( y+lidy < src_rows )
+    {
+        if(tileSize_width < TILE_SIZE)
+            for(int i = tileSize_width; i < rstep && (x+i) < src_cols; i++ )
+                *((__global F*)src_data+(y+lidy)*src_step/8+x+i) = 0;
+        if( coi > 0 )
+            for(int i=0; i < tileSize_width; i+=VLEN_D)
+            {
+                for(int j=0; j<4 && ((x+i+j)*kcn+coi-1)<src_cols; j++)
+                    tmp_coi[j] = *(src_data+(y+lidy)*src_step/8+(x+i+j)*kcn+coi-1);
+                tmp[i/VLEN_D] = (F4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
+            }
+        else
+            for(int i=0; i < tileSize_width && (x+i+3) < src_cols; i+=VLEN_D)
+                tmp[i/VLEN_D] = (F4)(*(src_data+(y+lidy)*src_step/8+x+i),*(src_data+(y+lidy)*src_step/8+x+i+1),*(src_data+(y+lidy)*src_step/8+x+i+2),*(src_data+(y+lidy)*src_step/8+x+i+3));
+    }
+
+    F4 zero = (F4)(0);
+    F4 full = (F4)(255);
+    if( binary )
+        for(int i=0; i < tileSize_width; i+=VLEN_D)
+            tmp[i/VLEN_D] = (tmp[i/VLEN_D]!=zero)?full:zero;
+    F mom[10];
+    __local F m[10][128];
+    if(lidy < 128)
+        for(int i=0; i<10; i++)
+            m[i][lidy]=0;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    F lm[10] = {0};
+    F4 x0 = (F4)(0);
+    F4 x1 = (F4)(0);
+    F4 x2 = (F4)(0);
+    F4 x3 = (F4)(0);
+    for( int xt = 0 ; xt < tileSize_width; xt+=VLEN_D )
+    {
+        F4 v_xt = (F4)(xt, xt+1, xt+2, xt+3);
+        F4 p = tmp[xt/VLEN_D];
+        F4 xp = v_xt * p, xxp = xp * v_xt;
+        x0 += p;
+        x1 += xp;
+        x2 += xxp;
+        x3 += xxp *v_xt;
+    }
+    x0.s0 += x0.s1 + x0.s2 + x0.s3;
+    x1.s0 += x1.s1 + x1.s2 + x1.s3;
+    x2.s0 += x2.s1 + x2.s2 + x2.s3;
+    x3.s0 += x3.s1 + x3.s2 + x3.s3;
+
+    F py = lidy * x0.s0, sy = lidy*lidy;
+    int bheight = min(tileSize_height, TILE_SIZE/2);
+    if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
+    {
+        m[9][lidy-bheight] = ((F)py) * sy;  // m03
+        m[8][lidy-bheight] = ((F)x1.s0) * sy;  // m12
+        m[7][lidy-bheight] = ((F)x2.s0) * lidy;  // m21
+        m[6][lidy-bheight] = x3.s0;             // m30
+        m[5][lidy-bheight] = x0.s0 * sy;        // m02
+        m[4][lidy-bheight] = x1.s0 * lidy;         // m11
+        m[3][lidy-bheight] = x2.s0;             // m20
+        m[2][lidy-bheight] = py;             // m01
+        m[1][lidy-bheight] = x1.s0;             // m10
+        m[0][lidy-bheight] = x0.s0;             // m00
+    }
+    else if(lidy < bheight)
+    {
+        lm[9] = ((F)py) * sy;  // m03
+        lm[8] = ((F)x1.s0) * sy;  // m12
+        lm[7] = ((F)x2.s0) * lidy;  // m21
+        lm[6] = x3.s0;             // m30
+        lm[5] = x0.s0 * sy;        // m02
+        lm[4] = x1.s0 * lidy;         // m11
+        lm[3] = x2.s0;             // m20
+        lm[2] = py;             // m01
+        lm[1] = x1.s0;             // m10
+        lm[0] = x0.s0;             // m00
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
+    {
+        if(lidy < j)
+            for( int i = 0; i < 10; i++ )
+                lm[i] = lm[i] + m[i][lidy];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        if(lidy >= j/2&&lidy < j)
+            for( int i = 0; i < 10; i++ )
+                m[i][lidy-j/2] = lm[i];
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if(lidy == 0&&lidx == 0)
+    {
+        for( int mt = 0; mt < 10; mt++ )
+            mom[mt] = (F)lm[mt];
+        if(binary)
+        {
+            F s = 1./255;
+            for( int mt = 0; mt < 10; mt++ )
+                mom[mt] *= s;
+        }
+
+        F xm = x * mom[0], ym = y * mom[0];
+
+        // accumulate moments computed in each tile
+        dst_step /= sizeof(F);
+
+        // + m00 ( = m00' )
+        *(dst_m + mad24(DST_ROW_00 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[0];
+
+        // + m10 ( = m10' + x*m00' )
+        *(dst_m + mad24(DST_ROW_10 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[1] + xm;
+
+        // + m01 ( = m01' + y*m00' )
+        *(dst_m + mad24(DST_ROW_01 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[2] + ym;
+
+        // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
+        *(dst_m + mad24(DST_ROW_20 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[3] + x * (mom[1] * 2 + xm);
+
+        // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
+        *(dst_m + mad24(DST_ROW_11 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[4] + x * (mom[2] + ym) + y * mom[1];
+
+        // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
+        *(dst_m + mad24(DST_ROW_02 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[5] + y * (mom[2] * 2 + ym);
+
+        // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
+        *(dst_m + mad24(DST_ROW_30 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
+
+        // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
+        *(dst_m + mad24(DST_ROW_21 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
+
+        // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
+        *(dst_m + mad24(DST_ROW_12 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+
+        // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
+        *(dst_m + mad24(DST_ROW_03 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
+    }
+}
diff --git a/modules/ocl/src/opencl/objdetect_hog.cl b/modules/ocl/src/opencl/objdetect_hog.cl
index 64ae3ea98..685eccf68 100644
--- a/modules/ocl/src/opencl/objdetect_hog.cl
+++ b/modules/ocl/src/opencl/objdetect_hog.cl
@@ -43,7 +43,6 @@
 //
 //M*/
 
-
 #define CELL_WIDTH 8
 #define CELL_HEIGHT 8
 #define CELLS_PER_BLOCK_X 2
@@ -54,13 +53,15 @@
 //----------------------------------------------------------------------------
 // Histogram computation
 // 12 threads for a cell, 12x4 threads per block
-__kernel void compute_hists_kernel(
+// Use pre-computed gaussian and interp_weight lookup tables
+__kernel void compute_hists_lut_kernel(
     const int cblock_stride_x, const int cblock_stride_y,
     const int cnbins, const int cblock_hist_size, const int img_block_width,
     const int blocks_in_group, const int blocks_total,
     const int grad_quadstep, const int qangle_step,
     __global const float* grad, __global const uchar* qangle,
-    const float scale, __global float* block_hists, __local float* smem)
+    __global const float* gauss_w_lut,
+    __global float* block_hists, __local float* smem)
 {
     const int lx = get_local_id(0);
     const int lp = lx / 24; /* local group id */
@@ -107,10 +108,10 @@ __kernel void compute_hists_kernel(
 
         int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
 
-        float gaussian = exp(-(dist_center_y * dist_center_y + dist_center_x *
-            dist_center_x) * scale);
-        float interp_weight = (8.f - fabs(dist_y + 0.5f)) *
-            (8.f - fabs(dist_x + 0.5f)) / 64.f;
+        int idx = (dist_center_y + 8) * 16 + (dist_center_x + 8);
+        float gaussian = gauss_w_lut[idx];
+        idx = (dist_y + 8) * 16 + (dist_x + 8);
+        float interp_weight = gauss_w_lut[256+idx];
 
         hist[bin.x * 48] += gaussian * interp_weight * vote.x;
         hist[bin.y * 48] += gaussian * interp_weight * vote.y;
@@ -125,14 +126,14 @@ __kernel void compute_hists_kernel(
         barrier(CLK_LOCAL_MEM_FENCE);
         if (cell_thread_x < 3)
             hist_[0] += hist_[3];
-#ifdef WAVE_SIZE_1
+#ifdef CPU
         barrier(CLK_LOCAL_MEM_FENCE);
 #endif
         if (cell_thread_x == 0)
             final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
                 hist_[0] + hist_[1] + hist_[2];
     }
-#ifdef WAVE_SIZE_1
+#ifdef CPU
     barrier(CLK_LOCAL_MEM_FENCE);
 #endif
 
@@ -145,6 +146,57 @@ __kernel void compute_hists_kernel(
     }
 }
 
+//-------------------------------------------------------------
+//  Normalization of histograms via L2Hys_norm
+//  optimized for the case of 9 bins
+__kernel void normalize_hists_36_kernel(__global float* block_hists,
+                                        const float threshold, __local float *squares)
+{
+    const int tid = get_local_id(0);
+    const int gid = get_global_id(0);
+    const int bid = tid / 36;      /* block-hist id, (0 - 6) */
+    const int boffset = bid * 36;  /* block-hist offset in the work-group */
+    const int hid = tid - boffset; /* histogram bin id, (0 - 35) */
+
+    float elem = block_hists[gid];
+    squares[tid] = elem * elem;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    __local float* smem = squares + boffset;
+    float sum = smem[hid];
+    if (hid < 18)
+        smem[hid] = sum = sum + smem[hid + 18];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (hid < 9)
+        smem[hid] = sum = sum + smem[hid + 9];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (hid < 4)
+        smem[hid] = sum + smem[hid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
+
+    elem = elem / (sqrt(sum) + 3.6f);
+    elem = min(elem, threshold);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    squares[tid] = elem * elem;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    sum = smem[hid];
+    if (hid < 18)
+      smem[hid] = sum = sum + smem[hid + 18];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (hid < 9)
+        smem[hid] = sum = sum + smem[hid + 9];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (hid < 4)
+        smem[hid] = sum + smem[hid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
+
+    block_hists[gid] = elem / (sqrt(sum) + 1e-3f);
+}
+
 //-------------------------------------------------------------
 //  Normalization of histograms via L2Hys_norm
 //
@@ -153,76 +205,54 @@ float reduce_smem(volatile __local float* smem, int size)
     unsigned int tid = get_local_id(0);
     float sum = smem[tid];
 
-    if (size >= 512)
-    {
-        if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-    if (size >= 256)
-    {
-        if (tid < 128) smem[tid] = sum = sum + smem[tid + 128];
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-    if (size >= 128)
-    {
-        if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
+    if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+#ifdef CPU
+    if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+#else
     if (tid < 32)
     {
         if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
-#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1)
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16)
-    {
+#if WAVE_SIZE < 32
+    } barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16) {
 #endif
         if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
-#ifdef WAVE_SIZE_1
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 8)
-    {
-#endif
         if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
-#ifdef WAVE_SIZE_1
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 4)
-    {
-#endif
         if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
-#ifdef WAVE_SIZE_1
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 2)
-    {
-#endif
         if (size >= 4) smem[tid] = sum = sum + smem[tid + 2];
-#ifdef WAVE_SIZE_1
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 1)
-    {
-#endif
         if (size >= 2) smem[tid] = sum = sum + smem[tid + 1];
     }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    sum = smem[0];
+#endif
 
     return sum;
 }
 
-__kernel void normalize_hists_kernel(const int nthreads, const int block_hist_size, const int img_block_width,
-                                     __global float* block_hists, const float threshold, __local float *squares)
+__kernel void normalize_hists_kernel(
+    const int nthreads, const int block_hist_size, const int img_block_width,
+    __global float* block_hists, const float threshold, __local float *squares)
 {
     const int tid = get_local_id(0);
     const int gidX = get_group_id(0);
     const int gidY = get_group_id(1);
 
-    __global float* hist = block_hists + (gidY * img_block_width + gidX) * block_hist_size + tid;
+    __global float* hist = block_hists + (gidY * img_block_width + gidX) *
+        block_hist_size + tid;
 
     float elem = 0.f;
     if (tid < block_hist_size)
@@ -249,25 +279,101 @@ __kernel void normalize_hists_kernel(const int nthreads, const int block_hist_si
 
 //---------------------------------------------------------------------
 //  Linear SVM based classification
-//
-__kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr_size, const int cdescr_width,
-                                    const int img_win_width, const int img_block_width,
-                                    const int win_block_stride_x, const int win_block_stride_y,
-                                    __global const float * block_hists, __global const float* coefs,
-                                    float free_coef, float threshold, __global uchar* labels)
+//  48x96 window, 9 bins and default parameters
+//  180 threads, each thread corresponds to a bin in a row
+__kernel void classify_hists_180_kernel(
+    const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
+    const int img_win_width, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float * block_hists, __global const float* coefs,
+    float free_coef, float threshold, __global uchar* labels)
 {
     const int tid = get_local_id(0);
     const int gidX = get_group_id(0);
     const int gidY = get_group_id(1);
 
-    __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+    __global const float* hist = block_hists + (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
 
     float product = 0.f;
-    for (int i = tid; i < cdescr_size; i += NTHREADS)
+
+    for (int i = 0; i < cdescr_height; i++)
     {
-        int offset_y = i / cdescr_width;
-        int offset_x = i - offset_y * cdescr_width;
-        product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];
+        product += coefs[i * cdescr_width + tid] *
+            hist[i * img_block_width * cblock_hist_size + tid];
+    }
+
+    __local float products[180];
+
+    products[tid] = product;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 90) products[tid] = product = product + products[tid + 90];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 45) products[tid] = product = product + products[tid + 45];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    volatile __local float* smem = products;
+#ifdef CPU
+    if (tid < 13) smem[tid] = product = product + smem[tid + 32];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16) smem[tid] = product = product + smem[tid + 16];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<8) smem[tid] = product = product + smem[tid + 8];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<4) smem[tid] = product = product + smem[tid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<2) smem[tid] = product = product + smem[tid + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
+#else
+    if (tid < 13)
+    {
+        smem[tid] = product = product + smem[tid + 32];
+    }
+#if WAVE_SIZE < 32
+    barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+    if (tid < 16)
+    {
+        smem[tid] = product = product + smem[tid + 16];
+        smem[tid] = product = product + smem[tid + 8];
+        smem[tid] = product = product + smem[tid + 4];
+        smem[tid] = product = product + smem[tid + 2];
+    }
+#endif
+
+    if (tid == 0){
+        product = product + smem[tid + 1];
+        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
+    }
+}
+
+//---------------------------------------------------------------------
+//  Linear SVM based classification
+//  64x128 window, 9 bins and default parameters
+//  256 threads, 252 of them are used
+__kernel void classify_hists_252_kernel(
+    const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
+    const int img_win_width, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float * block_hists, __global const float* coefs,
+    float free_coef, float threshold, __global uchar* labels)
+{
+    const int tid = get_local_id(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global const float* hist = block_hists + (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    float product = 0.f;
+    if (tid < cdescr_width)
+    {
+        for (int i = 0; i < cdescr_height; i++)
+            product += coefs[i * cdescr_width + tid] *
+                hist[i * img_block_width * cblock_hist_size + tid];
     }
 
     __local float products[NTHREADS];
@@ -283,66 +389,127 @@ __kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr
     barrier(CLK_LOCAL_MEM_FENCE);
 
     volatile __local float* smem = products;
+#ifdef CPU
+    if(tid<32) smem[tid] = product = product + smem[tid + 32];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<16) smem[tid] = product = product + smem[tid + 16];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<8) smem[tid] = product = product + smem[tid + 8];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<4) smem[tid] = product = product + smem[tid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<2) smem[tid] = product = product + smem[tid + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
+#else
     if (tid < 32)
     {
         smem[tid] = product = product + smem[tid + 32];
-#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1)
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16)
-    {
+#if WAVE_SIZE < 32
+    } barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16) {
 #endif
         smem[tid] = product = product + smem[tid + 16];
-#ifdef WAVE_SIZE_1
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 8)
-    {
-#endif
         smem[tid] = product = product + smem[tid + 8];
-#ifdef WAVE_SIZE_1
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 4)
-    {
-#endif
         smem[tid] = product = product + smem[tid + 4];
-#ifdef WAVE_SIZE_1
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 2)
-    {
-#endif
         smem[tid] = product = product + smem[tid + 2];
-#ifdef WAVE_SIZE_1
     }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 1)
-    {
 #endif
-        smem[tid] = product = product + smem[tid + 1];
+    if (tid == 0){
+        product = product + smem[tid + 1];
+        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
+    }
+}
+
+//---------------------------------------------------------------------
+//  Linear SVM based classification
+//  256 threads
+__kernel void classify_hists_kernel(
+    const int cdescr_size, const int cdescr_width, const int cblock_hist_size,
+    const int img_win_width, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float * block_hists, __global const float* coefs,
+    float free_coef, float threshold, __global uchar* labels)
+{
+    const int tid = get_local_id(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global const float* hist = block_hists + (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    float product = 0.f;
+    for (int i = tid; i < cdescr_size; i += NTHREADS)
+    {
+        int offset_y = i / cdescr_width;
+        int offset_x = i - offset_y * cdescr_width;
+        product += coefs[i] *
+            hist[offset_y * img_block_width * cblock_hist_size + offset_x];
     }
 
-    if (tid == 0)
+    __local float products[NTHREADS];
+
+    products[tid] = product;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 128) products[tid] = product = product + products[tid + 128];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 64) products[tid] = product = product + products[tid + 64];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    volatile __local float* smem = products;
+#ifdef CPU
+    if(tid<32) smem[tid] = product = product + smem[tid + 32];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<16) smem[tid] = product = product + smem[tid + 16];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<8) smem[tid] = product = product + smem[tid + 8];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<4) smem[tid] = product = product + smem[tid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<2) smem[tid] = product = product + smem[tid + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
+#else
+    if (tid < 32)
+    {
+        smem[tid] = product = product + smem[tid + 32];
+#if WAVE_SIZE < 32
+    } barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16) {
+#endif
+        smem[tid] = product = product + smem[tid + 16];
+        smem[tid] = product = product + smem[tid + 8];
+        smem[tid] = product = product + smem[tid + 4];
+        smem[tid] = product = product + smem[tid + 2];
+    }
+#endif
+    if (tid == 0){
+        smem[tid] = product = product + smem[tid + 1];
         labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
+    }
 }
 
 //----------------------------------------------------------------------------
 // Extract descriptors
 
-__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width,
-        const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
-        __global const float* block_hists, __global float* descriptors)
+__kernel void extract_descrs_by_rows_kernel(
+    const int cblock_hist_size, const int descriptors_quadstep,
+    const int cdescr_size, const int cdescr_width, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float* block_hists, __global float* descriptors)
 {
     int tid = get_local_id(0);
     int gidX = get_group_id(0);
     int gidY = get_group_id(1);
 
     // Get left top corner of the window in src
-    __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+    __global const float* hist = block_hists + (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
 
     // Get left top corner of the window in dst
-    __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
+    __global float* descriptor = descriptors +
+        (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
 
     // Copy elements from src to dst
     for (int i = tid; i < cdescr_size; i += NTHREADS)
@@ -353,19 +520,23 @@ __kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const in
     }
 }
 
-__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size,
-        const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x,
-        const int win_block_stride_y, __global const float* block_hists, __global float* descriptors)
+__kernel void extract_descrs_by_cols_kernel(
+    const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size,
+    const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float* block_hists, __global float* descriptors)
 {
     int tid = get_local_id(0);
     int gidX = get_group_id(0);
     int gidY = get_group_id(1);
 
     // Get left top corner of the window in src
-    __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+    __global const float* hist = block_hists +  (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
 
     // Get left top corner of the window in dst
-    __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
+    __global float* descriptor = descriptors +
+        (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
 
     // Copy elements from src to dst
     for (int i = tid; i < cdescr_size; i += NTHREADS)
@@ -376,16 +547,19 @@ __kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const in
         int y = block_idx / cnblocks_win_x;
         int x = block_idx - y * cnblocks_win_x;
 
-        descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = hist[(y * img_block_width  + x) * cblock_hist_size + idx_in_block];
+        descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] =
+            hist[(y * img_block_width  + x) * cblock_hist_size + idx_in_block];
     }
 }
 
 //----------------------------------------------------------------------------
 // Gradients computation
 
-__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step,
-        const __global uchar4 * img, __global float * grad, __global uchar * qangle,
-        const float angle_scale, const char correct_gamma, const int cnbins)
+__kernel void compute_gradients_8UC4_kernel(
+    const int height, const int width,
+    const int img_step, const int grad_quadstep, const int qangle_step,
+    const __global uchar4 * img, __global float * grad, __global uchar * qangle,
+    const float angle_scale, const char correct_gamma, const int cnbins)
 {
     const int x = get_global_id(0);
     const int tid = get_local_id(0);
@@ -426,8 +600,10 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c
     barrier(CLK_LOCAL_MEM_FENCE);
     if (x < width)
     {
-        float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], sh_row[tid + 2 * (NTHREADS + 2)]);
-        float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], sh_row[tid + 2 + 2 * (NTHREADS + 2)]);
+        float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)],
+            sh_row[tid + 2 * (NTHREADS + 2)]);
+        float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)],
+            sh_row[tid + 2 + 2 * (NTHREADS + 2)]);
 
         float3 dx;
         if (correct_gamma == 1)
@@ -482,9 +658,11 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c
     }
 }
 
-__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step,
-        __global const uchar * img, __global float * grad, __global uchar * qangle,
-        const float angle_scale, const char correct_gamma, const int cnbins)
+__kernel void compute_gradients_8UC1_kernel(
+    const int height, const int width,
+    const int img_step, const int grad_quadstep, const int qangle_step,
+    __global const uchar * img, __global float * grad, __global uchar * qangle,
+    const float angle_scale, const char correct_gamma, const int cnbins)
 {
     const int x = get_global_id(0);
     const int tid = get_local_id(0);
@@ -540,42 +718,3 @@ __kernel void compute_gradients_8UC1_kernel(const int height, const int width, c
         grad[ ((gidY * grad_quadstep + x) << 1) + 1 ]   = mag * ang;
     }
 }
-
-//----------------------------------------------------------------------------
-// Resize
-
-__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src,
-                                 int dst_offset, int src_offset, int dst_step, int src_step,
-                                 int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    int sx = (int)floor(dx*ifx+0.5f);
-    int sy = (int)floor(dy*ify+0.5f);
-    sx = min(sx, src_cols-1);
-    sy = min(sy, src_rows-1);
-    int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx;
-    int spos = (src_offset>>2) + sy * (src_step>>2) + sx;
-
-    if(dx<dst_cols && dy<dst_rows)
-        dst[dpos] = src[spos];
-}
-
-__kernel void resize_8UC1_kernel(__global uchar * dst, __global const uchar * src,
-                                 int dst_offset, int src_offset, int dst_step, int src_step,
-                                 int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    int sx = (int)floor(dx*ifx+0.5f);
-    int sy = (int)floor(dy*ify+0.5f);
-    sx = min(sx, src_cols-1);
-    sy = min(sy, src_rows-1);
-    int dpos = dst_offset + dy * dst_step + dx;
-    int spos = src_offset + sy * src_step + sx;
-
-    if(dx<dst_cols && dy<dst_rows)
-        dst[dpos] = src[spos];
-}
\ No newline at end of file
diff --git a/modules/ocl/src/opencl/optical_flow_farneback.cl b/modules/ocl/src/opencl/optical_flow_farneback.cl
new file mode 100644
index 000000000..7cc564ede
--- /dev/null
+++ b/modules/ocl/src/opencl/optical_flow_farneback.cl
@@ -0,0 +1,450 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Sen Liu, swjtuls1987@126.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#define tx  get_local_id(0)
+#define ty  get_local_id(1)
+#define bx  get_group_id(0)
+#define bdx get_local_size(0)
+
+#define BORDER_SIZE 5
+#define MAX_KSIZE_HALF 100
+
+#ifndef polyN
+#define polyN 5
+#endif
+
+__kernel void polynomialExpansion(__global float * dst,
+                                  __global __const float * src,
+                                  __global __const float * c_g,
+                                  __global __const float * c_xg,
+                                  __global __const float * c_xxg,
+                                  __local float * smem,
+                                  const float4 ig,
+                                  const int height, const int width,
+                                  int dstStep, int srcStep)
+{
+    const int y = get_global_id(1);
+    const int x = bx * (bdx - 2*polyN) + tx - polyN;
+
+    dstStep /= sizeof(*dst);
+    srcStep /= sizeof(*src);
+
+    int xWarped;
+    __local float *row = smem + tx;
+
+    if (y < height && y >= 0)
+    {
+        xWarped = min(max(x, 0), width - 1);
+
+        row[0] = src[mad24(y, srcStep, xWarped)] * c_g[0];
+        row[bdx] = 0.f;
+        row[2*bdx] = 0.f;
+
+#pragma unroll
+        for (int k = 1; k <= polyN; ++k)
+        {
+            float t0 = src[mad24(max(y - k, 0), srcStep, xWarped)];
+            float t1 = src[mad24(min(y + k, height - 1), srcStep, xWarped)];
+
+            row[0] += c_g[k] * (t0 + t1);
+            row[bdx] += c_xg[k] * (t1 - t0);
+            row[2*bdx] += c_xxg[k] * (t0 + t1);
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (y < height && y >= 0 && tx >= polyN && tx + polyN < bdx && x < width)
+    {
+        float b1 = c_g[0] * row[0];
+        float b3 = c_g[0] * row[bdx];
+        float b5 = c_g[0] * row[2*bdx];
+        float b2 = 0, b4 = 0, b6 = 0;
+
+#pragma unroll
+        for (int k = 1; k <= polyN; ++k)
+        {
+            b1 += (row[k] + row[-k]) * c_g[k];
+            b4 += (row[k] + row[-k]) * c_xxg[k];
+            b2 += (row[k] - row[-k]) * c_xg[k];
+            b3 += (row[k + bdx] + row[-k + bdx]) * c_g[k];
+            b6 += (row[k + bdx] - row[-k + bdx]) * c_xg[k];
+            b5 += (row[k + 2*bdx] + row[-k + 2*bdx]) * c_g[k];
+        }
+
+        dst[mad24(y, dstStep, xWarped)] = b3*ig.s0;
+        dst[mad24(height + y, dstStep, xWarped)] = b2*ig.s0;
+        dst[mad24(2*height + y, dstStep, xWarped)] = b1*ig.s1 + b5*ig.s2;
+        dst[mad24(3*height + y, dstStep, xWarped)] = b1*ig.s1 + b4*ig.s2;
+        dst[mad24(4*height + y, dstStep, xWarped)] = b6*ig.s3;
+    }
+}
+
+inline int idx_row_low(const int y, const int last_row)
+{
+    return abs(y) % (last_row + 1);
+}
+
+inline int idx_row_high(const int y, const int last_row)
+{
+    return abs(last_row - abs(last_row - y)) % (last_row + 1);
+}
+
+inline int idx_row(const int y, const int last_row)
+{
+    return idx_row_low(idx_row_high(y, last_row), last_row);
+}
+
+inline int idx_col_low(const int x, const int last_col)
+{
+    return abs(x) % (last_col + 1);
+}
+
+inline int idx_col_high(const int x, const int last_col)
+{
+    return abs(last_col - abs(last_col - x)) % (last_col + 1);
+}
+
+inline int idx_col(const int x, const int last_col)
+{
+    return idx_col_low(idx_col_high(x, last_col), last_col);
+}
+
+__kernel void gaussianBlur(__global float * dst,
+                           __global const float * src,
+                           __global const float * c_gKer,
+                           __local float * smem,
+                           const int height,  const int width,
+                           int dstStep, int srcStep,
+                           const int ksizeHalf)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    dstStep /= sizeof(*dst);
+    srcStep /= sizeof(*src);
+
+    __local float *row = smem + ty * (bdx + 2*ksizeHalf);
+
+    if (y < height)
+    {
+        // Vertical pass
+        for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
+        {
+            int xExt = (int)(bx * bdx) + i - ksizeHalf;
+            xExt = idx_col(xExt, width - 1);
+            row[i] = src[mad24(y, srcStep, xExt)] * c_gKer[0];
+            for (int j = 1; j <= ksizeHalf; ++j)
+                row[i] += (src[mad24(idx_row_low(y - j, height - 1), srcStep, xExt)]
+                           + src[mad24(idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j];
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (y < height && y >= 0 && x < width && x >= 0)
+    {
+        // Horizontal pass
+        row += tx + ksizeHalf;
+        float res = row[0] * c_gKer[0];
+        for (int i = 1; i <= ksizeHalf; ++i)
+            res += (row[-i] + row[i]) * c_gKer[i];
+
+        dst[mad24(y, dstStep, x)] = res;
+    }
+}
+
+__constant float c_border[BORDER_SIZE + 1] = { 0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f };
+
+__kernel void updateMatrices(__global float * M,
+                             __global const float * flowx, __global const float * flowy,
+                             __global const float * R0, __global const float * R1,
+                             const int height, const int width,
+                             int mStep, int xStep,  int yStep, int R0Step, int R1Step)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    mStep /= sizeof(*M);
+    xStep /= sizeof(*flowx);
+    yStep /= sizeof(*flowy);
+    R0Step /= sizeof(*R0);
+    R1Step /= sizeof(*R1);
+
+    if (y < height && y >= 0 && x < width && x >= 0)
+    {
+        float dx = flowx[mad24(y, xStep, x)];
+        float dy = flowy[mad24(y, yStep, x)];
+        float fx = x + dx;
+        float fy = y + dy;
+
+        int x1 = convert_int(floor(fx));
+        int y1 = convert_int(floor(fy));
+        fx -= x1;
+        fy -= y1;
+
+        float r2, r3, r4, r5, r6;
+
+        if (x1 >= 0 && y1 >= 0 && x1 < width - 1 && y1 < height - 1)
+        {
+            float a00 = (1.f - fx) * (1.f - fy);
+            float a01 = fx * (1.f - fy);
+            float a10 = (1.f - fx) * fy;
+            float a11 = fx * fy;
+
+            r2 = a00 * R1[mad24(y1, R1Step, x1)] +
+                 a01 * R1[mad24(y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(y1 + 1, R1Step, x1 + 1)];
+
+            r3 = a00 * R1[mad24(height + y1, R1Step, x1)] +
+                 a01 * R1[mad24(height + y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(height + y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(height + y1 + 1, R1Step, x1 + 1)];
+
+            r4 = a00 * R1[mad24(2*height + y1, R1Step, x1)] +
+                 a01 * R1[mad24(2*height + y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(2*height + y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(2*height + y1 + 1, R1Step, x1 + 1)];
+
+            r5 = a00 * R1[mad24(3*height + y1, R1Step, x1)] +
+                 a01 * R1[mad24(3*height + y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(3*height + y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(3*height + y1 + 1, R1Step, x1 + 1)];
+
+            r6 = a00 * R1[mad24(4*height + y1, R1Step, x1)] +
+                 a01 * R1[mad24(4*height + y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(4*height + y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(4*height + y1 + 1, R1Step, x1 + 1)];
+
+            r4 = (R0[mad24(2*height + y, R0Step, x)] + r4) * 0.5f;
+            r5 = (R0[mad24(3*height + y, R0Step, x)] + r5) * 0.5f;
+            r6 = (R0[mad24(4*height + y, R0Step, x)] + r6) * 0.25f;
+        }
+        else
+        {
+            r2 = r3 = 0.f;
+            r4 = R0[mad24(2*height + y, R0Step, x)];
+            r5 = R0[mad24(3*height + y, R0Step, x)];
+            r6 = R0[mad24(4*height + y, R0Step, x)] * 0.5f;
+        }
+
+        r2 = (R0[mad24(y, R0Step, x)] - r2) * 0.5f;
+        r3 = (R0[mad24(height + y, R0Step, x)] - r3) * 0.5f;
+
+        r2 += r4*dy + r6*dx;
+        r3 += r6*dy + r5*dx;
+
+        float scale =
+            c_border[min(x, BORDER_SIZE)] *
+            c_border[min(y, BORDER_SIZE)] *
+            c_border[min(width - x - 1, BORDER_SIZE)] *
+            c_border[min(height - y - 1, BORDER_SIZE)];
+
+        r2 *= scale;
+        r3 *= scale;
+        r4 *= scale;
+        r5 *= scale;
+        r6 *= scale;
+
+        M[mad24(y, mStep, x)] = r4*r4 + r6*r6;
+        M[mad24(height + y, mStep, x)] = (r4 + r5)*r6;
+        M[mad24(2*height + y, mStep, x)] = r5*r5 + r6*r6;
+        M[mad24(3*height + y, mStep, x)] = r4*r2 + r6*r3;
+        M[mad24(4*height + y, mStep, x)] = r6*r2 + r5*r3;
+    }
+}
+
+__kernel void boxFilter5(__global float * dst,
+                         __global const float * src,
+                         __local float * smem,
+                         const int height,  const int width,
+                         int dstStep, int srcStep,
+                         const int ksizeHalf)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    const float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
+    const int smw = bdx + 2*ksizeHalf; // shared memory "width"
+    __local float *row = smem + 5 * ty * smw;
+
+    dstStep /= sizeof(*dst);
+    srcStep /= sizeof(*src);
+
+    if (y < height)
+    {
+        // Vertical pass
+        for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
+        {
+            int xExt = (int)(bx * bdx) + i - ksizeHalf;
+            xExt = min(max(xExt, 0), width - 1);
+
+#pragma unroll
+            for (int k = 0; k < 5; ++k)
+                row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)];
+
+            for (int j = 1; j <= ksizeHalf; ++j)
+#pragma unroll
+                for (int k = 0; k < 5; ++k)
+                    row[k*smw + i] +=
+                        src[mad24(k*height + max(y - j, 0), srcStep, xExt)] +
+                        src[mad24(k*height + min(y + j, height - 1), srcStep, xExt)];
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (y < height && y >= 0 && x < width && x >= 0)
+    {
+        // Horizontal pass
+
+        row += tx + ksizeHalf;
+        float res[5];
+
+#pragma unroll
+        for (int k = 0; k < 5; ++k)
+            res[k] = row[k*smw];
+
+        for (int i = 1; i <= ksizeHalf; ++i)
+#pragma unroll
+            for (int k = 0; k < 5; ++k)
+                res[k] += row[k*smw - i] + row[k*smw + i];
+
+#pragma unroll
+        for (int k = 0; k < 5; ++k)
+            dst[mad24(k*height + y, dstStep, x)] = res[k] * boxAreaInv;
+    }
+}
+
+__kernel void updateFlow(__global float4 * flowx, __global float4 * flowy,
+                         __global const float4 * M,
+                         const int height, const int width,
+                         int xStep, int yStep, int mStep)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    xStep /= sizeof(*flowx);
+    yStep /= sizeof(*flowy);
+    mStep /= sizeof(*M);
+
+    if (y < height && y >= 0 && x < width && x >= 0)
+    {
+        float4 g11 = M[mad24(y, mStep, x)];
+        float4 g12 = M[mad24(height + y, mStep, x)];
+        float4 g22 = M[mad24(2*height + y, mStep, x)];
+        float4 h1 =  M[mad24(3*height + y, mStep, x)];
+        float4 h2 =  M[mad24(4*height + y, mStep, x)];
+
+        float4 detInv = (float4)(1.f) / (g11*g22 - g12*g12 + (float4)(1e-3f));
+
+        flowx[mad24(y, xStep, x)] = (g11*h2 - g12*h1) * detInv;
+        flowy[mad24(y, yStep, x)] = (g22*h1 - g12*h2) * detInv;
+    }
+}
+
+__kernel void gaussianBlur5(__global float * dst,
+                            __global const float * src,
+                            __global const float * c_gKer,
+                            __local float * smem,
+                            const int height,  const int width,
+                            int dstStep, int srcStep,
+                            const int ksizeHalf)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    const int smw = bdx + 2*ksizeHalf; // shared memory "width"
+    __local volatile float *row = smem + 5 * ty * smw;
+
+    dstStep /= sizeof(*dst);
+    srcStep /= sizeof(*src);
+
+    if (y < height)
+    {
+        // Vertical pass
+        for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
+        {
+            int xExt = (int)(bx * bdx) + i - ksizeHalf;
+            xExt = idx_col(xExt, width - 1);
+
+#pragma unroll
+            for (int k = 0; k < 5; ++k)
+                row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)] * c_gKer[0];
+
+            for (int j = 1; j <= ksizeHalf; ++j)
+#pragma unroll
+                for (int k = 0; k < 5; ++k)
+                    row[k*smw + i] +=
+                        (src[mad24(k*height + idx_row_low(y - j, height - 1), srcStep, xExt)] +
+                         src[mad24(k*height + idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j];
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (y < height && y >= 0 && x < width && x >= 0)
+    {
+        // Horizontal pass
+
+        row += tx + ksizeHalf;
+        float res[5];
+
+#pragma unroll
+        for (int k = 0; k < 5; ++k)
+            res[k] = row[k*smw] * c_gKer[0];
+
+        for (int i = 1; i <= ksizeHalf; ++i)
+#pragma unroll
+            for (int k = 0; k < 5; ++k)
+                res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i];
+
+#pragma unroll
+        for (int k = 0; k < 5; ++k)
+            dst[mad24(k*height + y, dstStep, x)] = res[k];
+    }
+}
diff --git a/modules/ocl/src/opencl/pyrlk.cl b/modules/ocl/src/opencl/pyrlk.cl
index 40a199395..02cf3afa4 100644
--- a/modules/ocl/src/opencl/pyrlk.cl
+++ b/modules/ocl/src/opencl/pyrlk.cl
@@ -17,6 +17,7 @@
 // @Authors
 //    Dachuan Zhao, dachuan@multicorewareinc.com
 //    Yao Wang, bitwangyaoyao@gmail.com
+//    Xiaopeng Fu, fuxiaopeng2222@163.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -47,6 +48,7 @@
 //#pragma OPENCL EXTENSION cl_amd_printf : enable
 
 #define	BUFFER	64
+#define	BUFFER2	BUFFER>>1
 #ifndef WAVE_SIZE
 #define WAVE_SIZE 1
 #endif
@@ -58,53 +60,16 @@ void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local
     smem3[tid] = val3;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    if (tid < 32)
+    for(int i = BUFFER2; i > 0; i >>= 1)
     {
-        smem1[tid] += smem1[tid + 32];
-        smem2[tid] += smem2[tid + 32];
-        smem3[tid] += smem3[tid + 32];
+        if(tid < i)
+        {
+            smem1[tid] += smem1[tid + i];
+            smem2[tid] += smem2[tid + i];
+            smem3[tid] += smem3[tid + i];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
     }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 16)
-    {
-        smem1[tid] += smem1[tid + 16];
-        smem2[tid] += smem2[tid + 16];
-        smem3[tid] += smem3[tid + 16];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 8)
-    {
-        smem1[tid] += smem1[tid + 8];
-        smem2[tid] += smem2[tid + 8];
-        smem3[tid] += smem3[tid + 8];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 4)
-    {
-        smem1[tid] += smem1[tid + 4];
-        smem2[tid] += smem2[tid + 4];
-        smem3[tid] += smem3[tid + 4];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 2)
-    {
-        smem1[tid] += smem1[tid + 2];
-        smem2[tid] += smem2[tid + 2];
-        smem3[tid] += smem3[tid + 2];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 1)
-    {
-        smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
-        smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
-        smem3[BUFFER] = smem3[tid] + smem3[tid + 1];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
 }
 
 void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
@@ -113,47 +78,15 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
     smem2[tid] = val2;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    if (tid < 32)
+    for(int i = BUFFER2; i > 0; i >>= 1)
     {
-        smem1[tid] += smem1[tid + 32];
-        smem2[tid] += smem2[tid + 32];
+        if(tid < i)
+        {
+            smem1[tid] += smem1[tid + i];
+            smem2[tid] += smem2[tid + i];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
     }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 16)
-    {
-        smem1[tid] += smem1[tid + 16];
-        smem2[tid] += smem2[tid + 16];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 8)
-    {
-        smem1[tid] += smem1[tid + 8];
-        smem2[tid] += smem2[tid + 8];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 4)
-    {
-        smem1[tid] += smem1[tid + 4];
-        smem2[tid] += smem2[tid + 4];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 2)
-    {
-        smem1[tid] += smem1[tid + 2];
-        smem2[tid] += smem2[tid + 2];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 1)
-    {
-        smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
-        smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
 }
 
 void reduce1(float val1, volatile __local float* smem1, int tid)
@@ -161,45 +94,18 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
     smem1[tid] = val1;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    if (tid < 32)
+    for(int i = BUFFER2; i > 0; i >>= 1)
     {
-        smem1[tid] += smem1[tid + 32];
+        if(tid < i)
+        {
+            smem1[tid] += smem1[tid + i];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
     }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 16)
-    {
-        smem1[tid] += smem1[tid + 16];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 8)
-    {
-        smem1[tid] += smem1[tid + 8];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 4)
-    {
-        smem1[tid] += smem1[tid + 4];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 2)
-    {
-        smem1[tid] += smem1[tid + 2];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (tid < 1)
-    {
-        smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
 }
 #else
-void reduce3(float val1, float val2, float val3, 
-__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
+void reduce3(float val1, float val2, float val3,
+             __local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
 {
     smem1[tid] = val1;
     smem2[tid] = val2;
@@ -212,15 +118,19 @@ __local volatile float* smem1, __local volatile float* smem2, __local volatile f
         smem2[tid] += smem2[tid + 32];
         smem3[tid] += smem3[tid + 32];
 #if WAVE_SIZE < 32
-	} barrier(CLK_LOCAL_MEM_FENCE);
-	if (tid < 16) {
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16)
+    {
 #endif
         smem1[tid] += smem1[tid + 16];
         smem2[tid] += smem2[tid + 16];
         smem3[tid] += smem3[tid + 16];
 #if WAVE_SIZE <16
-	} barrier(CLK_LOCAL_MEM_FENCE);
-	if (tid < 8) {
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 8)
+    {
 #endif
         smem1[tid] += smem1[tid + 8];
         smem2[tid] += smem2[tid + 8];
@@ -238,6 +148,7 @@ __local volatile float* smem1, __local volatile float* smem2, __local volatile f
         smem2[tid] += smem2[tid + 1];
         smem3[tid] += smem3[tid + 1];
     }
+    barrier(CLK_LOCAL_MEM_FENCE);
 }
 
 void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
@@ -251,14 +162,18 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
         smem1[tid] += smem1[tid + 32];
         smem2[tid] += smem2[tid + 32];
 #if WAVE_SIZE < 32
-	} barrier(CLK_LOCAL_MEM_FENCE);
-	if (tid < 16) {
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16)
+    {
 #endif
         smem1[tid] += smem1[tid + 16];
         smem2[tid] += smem2[tid + 16];
 #if WAVE_SIZE <16
-	} barrier(CLK_LOCAL_MEM_FENCE);
-	if (tid < 8) {
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 8)
+    {
 #endif
         smem1[tid] += smem1[tid + 8];
         smem2[tid] += smem2[tid + 8];
@@ -272,6 +187,7 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
         smem1[tid] += smem1[tid + 1];
         smem2[tid] += smem2[tid + 1];
     }
+    barrier(CLK_LOCAL_MEM_FENCE);
 }
 
 void reduce1(float val1, __local volatile float* smem1, int tid)
@@ -283,19 +199,24 @@ void reduce1(float val1, __local volatile float* smem1, int tid)
     {
         smem1[tid] += smem1[tid + 32];
 #if WAVE_SIZE < 32
-	} barrier(CLK_LOCAL_MEM_FENCE);
-	if (tid < 16) {
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16)
+    {
 #endif
         smem1[tid] += smem1[tid + 16];
 #if WAVE_SIZE <16
-	} barrier(CLK_LOCAL_MEM_FENCE);
-	if (tid < 8) {
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 8)
+    {
 #endif
         smem1[tid] += smem1[tid + 8];
         smem1[tid] += smem1[tid + 4];
         smem1[tid] += smem1[tid + 2];
         smem1[tid] += smem1[tid + 1];
     }
+    barrier(CLK_LOCAL_MEM_FENCE);
 }
 #endif
 
@@ -306,106 +227,100 @@ void reduce1(float val1, __local volatile float* smem1, int tid)
 __constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
 
 void SetPatch(image2d_t I, float x, float y,
-                                float* Pch, float* Dx, float* Dy,
-                                float* A11, float* A12, float* A22)
+              float* Pch, float* Dx, float* Dy,
+              float* A11, float* A12, float* A22)
 {
-            *Pch = read_imagef(I, sampler, (float2)(x, y)).x;
+    *Pch = read_imagef(I, sampler, (float2)(x, y)).x;
 
-            float dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
-                             (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x);
+    float dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
+                 (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x);
 
-            float dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
-                            (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x);
+    float dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
+                 (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x);
 
 
-            *Dx = dIdx;
-            *Dy = dIdy;
+    *Dx = dIdx;
+    *Dy = dIdy;
 
-            *A11 += dIdx * dIdx;
-            *A12 += dIdx * dIdy;
-            *A22 += dIdy * dIdy;
+    *A11 += dIdx * dIdx;
+    *A12 += dIdx * dIdy;
+    *A22 += dIdy * dIdy;
 }
 
 void GetPatch(image2d_t J, float x, float y,
-                                float* Pch, float* Dx, float* Dy,
-                                float* b1, float* b2)
+              float* Pch, float* Dx, float* Dy,
+              float* b1, float* b2)
 {
-                float J_val = read_imagef(J, sampler, (float2)(x, y)).x;
-                float diff = (J_val - *Pch) * 32.0f;
-                *b1 += diff**Dx;
-                *b2 += diff**Dy;
+    float J_val = read_imagef(J, sampler, (float2)(x, y)).x;
+    float diff = (J_val - *Pch) * 32.0f;
+    *b1 += diff**Dx;
+    *b2 += diff**Dy;
 }
 
 void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval)
 {
-        float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch;
-        *errval += fabs(diff);
+    float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch;
+    *errval += fabs(diff);
 }
 
 void SetPatch4(image2d_t I, const float x, const float y,
-                                float4* Pch, float4* Dx, float4* Dy,
-                                float* A11, float* A12, float* A22)
+               float4* Pch, float4* Dx, float4* Dy,
+               float* A11, float* A12, float* A22)
 {
-            *Pch = read_imagef(I, sampler, (float2)(x, y));
+    *Pch = read_imagef(I, sampler, (float2)(x, y));
 
-            float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
-                             (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)));
+    float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
+                  (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)));
 
-            float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
-                            (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)));
+    float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
+                  (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)));
 
 
-            *Dx = dIdx;
-            *Dy = dIdy;
-                        float4 sqIdx = dIdx * dIdx;
-                        *A11 += sqIdx.x + sqIdx.y + sqIdx.z;
-                        sqIdx = dIdx * dIdy;
-                        *A12 += sqIdx.x + sqIdx.y + sqIdx.z;
-                        sqIdx = dIdy * dIdy;
-                        *A22 += sqIdx.x + sqIdx.y + sqIdx.z;
+    *Dx = dIdx;
+    *Dy = dIdy;
+    float4 sqIdx = dIdx * dIdx;
+    *A11 += sqIdx.x + sqIdx.y + sqIdx.z;
+    sqIdx = dIdx * dIdy;
+    *A12 += sqIdx.x + sqIdx.y + sqIdx.z;
+    sqIdx = dIdy * dIdy;
+    *A22 += sqIdx.x + sqIdx.y + sqIdx.z;
 }
 
 void GetPatch4(image2d_t J, const float x, const float y,
-                                const float4* Pch, const float4* Dx, const float4* Dy,
-                                float* b1, float* b2)
+               const float4* Pch, const float4* Dx, const float4* Dy,
+               float* b1, float* b2)
 {
-                float4 J_val = read_imagef(J, sampler, (float2)(x, y));
-                float4 diff = (J_val - *Pch) * 32.0f;
-                                float4 xdiff = diff* *Dx;
-                                *b1 += xdiff.x + xdiff.y + xdiff.z;
-                                xdiff = diff* *Dy;
-                                *b2 += xdiff.x + xdiff.y + xdiff.z;
+    float4 J_val = read_imagef(J, sampler, (float2)(x, y));
+    float4 diff = (J_val - *Pch) * 32.0f;
+    float4 xdiff = diff* *Dx;
+    *b1 += xdiff.x + xdiff.y + xdiff.z;
+    xdiff = diff* *Dy;
+    *b2 += xdiff.x + xdiff.y + xdiff.z;
 }
 
 void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval)
 {
-        float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch;
-        *errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
+    float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch;
+    *errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
 }
 
 #define	GRIDSIZE	3
 __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
-    __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
-        const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
+                             __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
+                             const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
 {
-#ifdef CPU
-    __local float smem1[BUFFER+1];
-    __local float smem2[BUFFER+1];
-    __local float smem3[BUFFER+1];
-#else
     __local float smem1[BUFFER];
     __local float smem2[BUFFER];
     __local float smem3[BUFFER];
-#endif
 
-        unsigned int xid=get_local_id(0);
-        unsigned int yid=get_local_id(1);
-        unsigned int gid=get_group_id(0);
-        unsigned int xsize=get_local_size(0);
-        unsigned int ysize=get_local_size(1);
-        int xBase, yBase, i, j, k;
+    unsigned int xid=get_local_id(0);
+    unsigned int yid=get_local_id(1);
+    unsigned int gid=get_group_id(0);
+    unsigned int xsize=get_local_size(0);
+    unsigned int ysize=get_local_size(1);
+    int xBase, yBase, i, j, k;
 
-        float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
+    float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
 
     const int tid = mad24(yid, xsize, xid);
 
@@ -432,77 +347,71 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
     float dIdx_patch[GRIDSIZE][GRIDSIZE];
     float dIdy_patch[GRIDSIZE][GRIDSIZE];
 
-        yBase=yid;
-        {
-                xBase=xid;
-                SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                        &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
-                                        &A11, &A12, &A22);
+    yBase=yid;
+    {
+        xBase=xid;
+        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                 &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
+                 &A11, &A12, &A22);
 
 
-                xBase+=xsize;
-                SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                        &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                 &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
+                 &A11, &A12, &A22);
 
-                xBase+=xsize;
-                if(xBase<c_winSize_x)
-                SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                        &I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
-                                        &A11, &A12, &A22);
-        }
-        yBase+=ysize;
-        {
-                xBase=xid;
-                SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                        &I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        if(xBase<c_winSize_x)
+            SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
+                     &A11, &A12, &A22);
+    }
+    yBase+=ysize;
+    {
+        xBase=xid;
+        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                 &I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
+                 &A11, &A12, &A22);
 
 
-                xBase+=xsize;
-                SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                        &I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                 &I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
+                 &A11, &A12, &A22);
 
-                xBase+=xsize;
-                if(xBase<c_winSize_x)
-                SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                        &I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
-                                        &A11, &A12, &A22);
-        }
-        yBase+=ysize;
-        if(yBase<c_winSize_y)
-        {
-                xBase=xid;
-                SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                        &I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        if(xBase<c_winSize_x)
+            SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
+                     &A11, &A12, &A22);
+    }
+    yBase+=ysize;
+    if(yBase<c_winSize_y)
+    {
+        xBase=xid;
+        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                 &I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
+                 &A11, &A12, &A22);
 
 
-                xBase+=xsize;
-                SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                        &I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                 &I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
+                 &A11, &A12, &A22);
 
-                xBase+=xsize;
-                if(xBase<c_winSize_x)
-                SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                        &I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
-                                        &A11, &A12, &A22);
-        }
+        xBase+=xsize;
+        if(xBase<c_winSize_x)
+            SetPatch(I, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
+                     &A11, &A12, &A22);
+    }
 
     reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
-    barrier(CLK_LOCAL_MEM_FENCE);
 
-#ifdef CPU
-    A11 = smem1[BUFFER];
-    A12 = smem2[BUFFER];
-    A22 = smem3[BUFFER];
-#else
     A11 = smem1[0];
     A12 = smem2[0];
     A22 = smem3[0];
-#endif
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     float D = A11 * A22 - A12 * A12;
 
@@ -532,81 +441,76 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
         float b1 = 0;
         float b2 = 0;
 
-                yBase=yid;
-                {
-                        xBase=xid;
-                        GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
-                                                &b1, &b2);
+        yBase=yid;
+        {
+            xBase=xid;
+            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
+                     &b1, &b2);
 
 
-                        xBase+=xsize;
-                        GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
-                                                &b1, &b2);
+            xBase+=xsize;
+            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
+                     &b1, &b2);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
-                                                &b1, &b2);
-                }
-                yBase+=ysize;
-                {
-                        xBase=xid;
-                        GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
-                                                &b1, &b2);
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                         &I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
+                         &b1, &b2);
+        }
+        yBase+=ysize;
+        {
+            xBase=xid;
+            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
+                     &b1, &b2);
 
 
-                        xBase+=xsize;
-                        GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
-                                                &b1, &b2);
+            xBase+=xsize;
+            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
+                     &b1, &b2);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
-                                                &b1, &b2);
-                }
-                yBase+=ysize;
-                if(yBase<c_winSize_y)
-                {
-                        xBase=xid;
-                        GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
-                                                &b1, &b2);
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                         &I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
+                         &b1, &b2);
+        }
+        yBase+=ysize;
+        if(yBase<c_winSize_y)
+        {
+            xBase=xid;
+            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
+                     &b1, &b2);
 
 
-                        xBase+=xsize;
-                        GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
-                                                &b1, &b2);
+            xBase+=xsize;
+            GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
+                     &b1, &b2);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
-                                                &b1, &b2);
-                }
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                         &I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
+                         &b1, &b2);
+        }
 
         reduce2(b1, b2, smem1, smem2, tid);
-        barrier(CLK_LOCAL_MEM_FENCE);
 
-#ifdef CPU
-        b1 = smem1[BUFFER];
-        b2 = smem2[BUFFER];
-#else
         b1 = smem1[0];
         b2 = smem2[0];
-#endif
+        barrier(CLK_LOCAL_MEM_FENCE);
 
         float2 delta;
         delta.x = A12 * b2 - A22 * b1;
         delta.y = A12 * b1 - A11 * b2;
 
-                prevPt += delta;
+        prevPt += delta;
 
         if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
             break;
@@ -615,97 +519,87 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
     D = 0.0f;
     if (calcErr)
     {
-                yBase=yid;
-                {
-                        xBase=xid;
-                        GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[0][0], &D);
+        yBase=yid;
+        {
+            xBase=xid;
+            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[0][0], &D);
 
 
-                        xBase+=xsize;
-                        GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[0][1], &D);
+            xBase+=xsize;
+            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[0][1], &D);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[0][2], &D);
-                }
-                yBase+=ysize;
-                {
-                        xBase=xid;
-                        GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[1][0], &D);
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                         &I_patch[0][2], &D);
+        }
+        yBase+=ysize;
+        {
+            xBase=xid;
+            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[1][0], &D);
 
 
-                        xBase+=xsize;
-                        GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[1][1], &D);
+            xBase+=xsize;
+            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[1][1], &D);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[1][2], &D);
-                }
-                yBase+=ysize;
-                if(yBase<c_winSize_y)
-                {
-                        xBase=xid;
-                        GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[2][0], &D);
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                         &I_patch[1][2], &D);
+        }
+        yBase+=ysize;
+        if(yBase<c_winSize_y)
+        {
+            xBase=xid;
+            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[2][0], &D);
 
 
-                        xBase+=xsize;
-                        GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[2][1], &D);
+            xBase+=xsize;
+            GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                     &I_patch[2][1], &D);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
-                                                &I_patch[2][2], &D);
-                }
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,
+                         &I_patch[2][2], &D);
+        }
 
         reduce1(D, smem1, tid);
     }
 
     if (tid == 0)
     {
-                prevPt += c_halfWin;
+        prevPt += c_halfWin;
 
         nextPts[gid] = prevPt;
 
         if (calcErr)
-#ifdef CPU
-            err[gid] = smem1[BUFFER] / (float)(c_winSize_x * c_winSize_y);
-#else
             err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);
-#endif
     }
 }
 
 
 __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
-    __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
-        const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
+                             __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
+                             const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
 {
-#ifdef CPU
-     __local float smem1[BUFFER+1];
-     __local float smem2[BUFFER+1];
-     __local float smem3[BUFFER+1];
-#else
-     __local float smem1[BUFFER];
-     __local float smem2[BUFFER];
-     __local float smem3[BUFFER];
-#endif
+    __local float smem1[BUFFER];
+    __local float smem2[BUFFER];
+    __local float smem3[BUFFER];
 
-        unsigned int xid=get_local_id(0);
-        unsigned int yid=get_local_id(1);
-        unsigned int gid=get_group_id(0);
-        unsigned int xsize=get_local_size(0);
-        unsigned int ysize=get_local_size(1);
-        int xBase, yBase, i, j, k;
+    unsigned int xid=get_local_id(0);
+    unsigned int yid=get_local_id(1);
+    unsigned int gid=get_group_id(0);
+    unsigned int xsize=get_local_size(0);
+    unsigned int ysize=get_local_size(1);
+    int xBase, yBase, i, j, k;
 
-        float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
+    float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
 
     const int tid = mad24(yid, xsize, xid);
 
@@ -721,7 +615,7 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
         return;
     }
 
-        nextPt -= c_halfWin;
+    nextPt -= c_halfWin;
 
     // extract the patch from the first image, compute covariation matrix of derivatives
 
@@ -732,80 +626,74 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
     float4 I_patch[8];
     float4 dIdx_patch[8];
     float4 dIdy_patch[8];
-        float4 I_add,Dx_add,Dy_add;
+    float4 I_add,Dx_add,Dy_add;
 
-        yBase=yid;
-        {
-                xBase=xid;
-                SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                        &I_patch[0], &dIdx_patch[0], &dIdy_patch[0],
-                                        &A11, &A12, &A22);
+    yBase=yid;
+    {
+        xBase=xid;
+        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                  &I_patch[0], &dIdx_patch[0], &dIdy_patch[0],
+                  &A11, &A12, &A22);
 
 
-                xBase+=xsize;
-                SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                        &I_patch[1], &dIdx_patch[1], &dIdy_patch[1],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                  &I_patch[1], &dIdx_patch[1], &dIdy_patch[1],
+                  &A11, &A12, &A22);
 
-                xBase+=xsize;
-                if(xBase<c_winSize_x)
-                SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                        &I_patch[2], &dIdx_patch[2], &dIdy_patch[2],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        if(xBase<c_winSize_x)
+            SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[2], &dIdx_patch[2], &dIdy_patch[2],
+                      &A11, &A12, &A22);
 
-        }
-        yBase+=ysize;
-        {
-                xBase=xid;
-                SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                        &I_patch[3], &dIdx_patch[3], &dIdy_patch[3],
-                                        &A11, &A12, &A22);
+    }
+    yBase+=ysize;
+    {
+        xBase=xid;
+        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                  &I_patch[3], &dIdx_patch[3], &dIdy_patch[3],
+                  &A11, &A12, &A22);
 
 
-                xBase+=xsize;
-                SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                        &I_patch[4], &dIdx_patch[4], &dIdy_patch[4],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                  &I_patch[4], &dIdx_patch[4], &dIdy_patch[4],
+                  &A11, &A12, &A22);
 
-                xBase+=xsize;
-                if(xBase<c_winSize_x)
-                SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                        &I_patch[5], &dIdx_patch[5], &dIdy_patch[5],
-                                        &A11, &A12, &A22);
-        }
-        yBase+=ysize;
-        if(yBase<c_winSize_y)
-        {
-                xBase=xid;
-                SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                        &I_patch[6], &dIdx_patch[6], &dIdy_patch[6],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        if(xBase<c_winSize_x)
+            SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[5], &dIdx_patch[5], &dIdy_patch[5],
+                      &A11, &A12, &A22);
+    }
+    yBase+=ysize;
+    if(yBase<c_winSize_y)
+    {
+        xBase=xid;
+        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                  &I_patch[6], &dIdx_patch[6], &dIdy_patch[6],
+                  &A11, &A12, &A22);
 
 
-                xBase+=xsize;
-                SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                        &I_patch[7], &dIdx_patch[7], &dIdy_patch[7],
-                                        &A11, &A12, &A22);
+        xBase+=xsize;
+        SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                  &I_patch[7], &dIdx_patch[7], &dIdy_patch[7],
+                  &A11, &A12, &A22);
 
-                xBase+=xsize;
-                if(xBase<c_winSize_x)
-                SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                        &I_add, &Dx_add, &Dy_add,
-                                        &A11, &A12, &A22);
-        }
+        xBase+=xsize;
+        if(xBase<c_winSize_x)
+            SetPatch4(I, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_add, &Dx_add, &Dy_add,
+                      &A11, &A12, &A22);
+    }
 
     reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
-    barrier(CLK_LOCAL_MEM_FENCE);
 
-#ifdef CPU
-    A11 = smem1[BUFFER];
-    A12 = smem2[BUFFER];
-    A22 = smem3[BUFFER];
-#else
     A11 = smem1[0];
     A12 = smem2[0];
     A22 = smem3[0];
-#endif
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     float D = A11 * A22 - A12 * A12;
 
@@ -821,7 +709,7 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
     A12 /= D;
     A22 /= D;
 
-        nextPt = nextPts[gid] * 2.0f - c_halfWin;
+    nextPt = nextPts[gid] * 2.0f - c_halfWin;
 
     for (k = 0; k < c_iters; ++k)
     {
@@ -835,81 +723,76 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
         float b1 = 0;
         float b2 = 0;
 
-                yBase=yid;
-                {
-                        xBase=xid;
-                        GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[0], &dIdx_patch[0], &dIdy_patch[0],
-                                                &b1, &b2);
+        yBase=yid;
+        {
+            xBase=xid;
+            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[0], &dIdx_patch[0], &dIdy_patch[0],
+                      &b1, &b2);
 
 
-                        xBase+=xsize;
-                        GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[1], &dIdx_patch[1], &dIdy_patch[1],
-                                                &b1, &b2);
+            xBase+=xsize;
+            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[1], &dIdx_patch[1], &dIdy_patch[1],
+                      &b1, &b2);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[2], &dIdx_patch[2], &dIdy_patch[2],
-                                                &b1, &b2);
-                }
-                yBase+=ysize;
-                {
-                        xBase=xid;
-                        GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[3], &dIdx_patch[3], &dIdy_patch[3],
-                                                &b1, &b2);
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                          &I_patch[2], &dIdx_patch[2], &dIdy_patch[2],
+                          &b1, &b2);
+        }
+        yBase+=ysize;
+        {
+            xBase=xid;
+            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[3], &dIdx_patch[3], &dIdy_patch[3],
+                      &b1, &b2);
 
 
-                        xBase+=xsize;
-                        GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[4], &dIdx_patch[4], &dIdy_patch[4],
-                                                &b1, &b2);
+            xBase+=xsize;
+            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[4], &dIdx_patch[4], &dIdy_patch[4],
+                      &b1, &b2);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[5], &dIdx_patch[5], &dIdy_patch[5],
-                                                &b1, &b2);
-                }
-                yBase+=ysize;
-                if(yBase<c_winSize_y)
-                {
-                        xBase=xid;
-                        GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[6], &dIdx_patch[6], &dIdy_patch[6],
-                                                &b1, &b2);
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                          &I_patch[5], &dIdx_patch[5], &dIdy_patch[5],
+                          &b1, &b2);
+        }
+        yBase+=ysize;
+        if(yBase<c_winSize_y)
+        {
+            xBase=xid;
+            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[6], &dIdx_patch[6], &dIdy_patch[6],
+                      &b1, &b2);
 
 
-                        xBase+=xsize;
-                        GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[7], &dIdx_patch[7], &dIdy_patch[7],
-                                                &b1, &b2);
+            xBase+=xsize;
+            GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[7], &dIdx_patch[7], &dIdy_patch[7],
+                      &b1, &b2);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_add, &Dx_add, &Dy_add,
-                                                &b1, &b2);
-                }
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetPatch4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                          &I_add, &Dx_add, &Dy_add,
+                          &b1, &b2);
+        }
 
         reduce2(b1, b2, smem1, smem2, tid);
-        barrier(CLK_LOCAL_MEM_FENCE);
 
-#ifdef CPU
-        b1 = smem1[BUFFER];
-        b2 = smem2[BUFFER];
-#else
         b1 = smem1[0];
         b2 = smem2[0];
-#endif
+        barrier(CLK_LOCAL_MEM_FENCE);
 
         float2 delta;
         delta.x = A12 * b2 - A22 * b1;
         delta.y = A12 * b1 - A11 * b2;
 
-                nextPt +=delta;
+        nextPt +=delta;
 
         if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
             break;
@@ -918,78 +801,74 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
     D = 0.0f;
     if (calcErr)
     {
-                yBase=yid;
-                {
-                        xBase=xid;
-                        GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[0], &D);
+        yBase=yid;
+        {
+            xBase=xid;
+            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[0], &D);
 
 
-                        xBase+=xsize;
-                        GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[1], &D);
+            xBase+=xsize;
+            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[1], &D);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[2], &D);
-                }
-                yBase+=ysize;
-                {
-                        xBase=xid;
-                        GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[3], &D);
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                          &I_patch[2], &D);
+        }
+        yBase+=ysize;
+        {
+            xBase=xid;
+            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[3], &D);
 
 
-                        xBase+=xsize;
-                        GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[4], &D);
+            xBase+=xsize;
+            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[4], &D);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[5], &D);
-                }
-                yBase+=ysize;
-                if(yBase<c_winSize_y)
-                {
-                        xBase=xid;
-                        GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[6], &D);
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                          &I_patch[5], &D);
+        }
+        yBase+=ysize;
+        if(yBase<c_winSize_y)
+        {
+            xBase=xid;
+            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[6], &D);
 
 
-                        xBase+=xsize;
-                        GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_patch[7], &D);
+            xBase+=xsize;
+            GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                      &I_patch[7], &D);
 
-                        xBase+=xsize;
-                        if(xBase<c_winSize_x)
-                        GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
-                                                &I_add, &D);
-                }
+            xBase+=xsize;
+            if(xBase<c_winSize_x)
+                GetError4(J, nextPt.x + xBase + 0.5f, nextPt.y + yBase + 0.5f,
+                          &I_add, &D);
+        }
 
         reduce1(D, smem1, tid);
     }
 
     if (tid == 0)
     {
-                nextPt += c_halfWin;
+        nextPt += c_halfWin;
         nextPts[gid] = nextPt;
 
         if (calcErr)
-#ifdef CPU
-            err[gid] = smem1[BUFFER] / (float)(3 * c_winSize_x * c_winSize_y);
-#else
             err[gid] = smem1[0] / (float)(3 * c_winSize_x * c_winSize_y);
-#endif
     }
 }
 
 __kernel void lkDense_C1_D0(image2d_t I, image2d_t J, __global float* u, int uStep, __global float* v, int vStep, __global const float* prevU, int prevUStep, __global const float* prevV, int prevVStep,
-    const int rows, const int cols, /*__global float* err, int errStep, int cn,*/ int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
+                            const int rows, const int cols, /*__global float* err, int errStep, int cn,*/ int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
 {
-        int c_halfWin_x = (c_winSize_x - 1) / 2;
-        int c_halfWin_y = (c_winSize_y - 1) / 2;
+    int c_halfWin_x = (c_winSize_x - 1) / 2;
+    int c_halfWin_y = (c_winSize_y - 1) / 2;
 
     const int patchWidth  = get_local_size(0) + 2 * c_halfWin_x;
     const int patchHeight = get_local_size(1) + 2 * c_halfWin_y;
@@ -1003,7 +882,7 @@ __kernel void lkDense_C1_D0(image2d_t I, image2d_t J, __global float* u, int uSt
     const int xBase = get_group_id(0) * get_local_size(0);
     const int yBase = get_group_id(1) * get_local_size(1);
 
-        sampler_t sampleri    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+    sampler_t sampleri    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
 
     for (int i = get_local_id(1); i < patchHeight; i += get_local_size(1))
     {
@@ -1017,10 +896,10 @@ __kernel void lkDense_C1_D0(image2d_t I, image2d_t J, __global float* u, int uSt
             // Sharr Deriv
 
             dIdx_patch[i * patchWidth + j] = 3 * read_imagei(I, sampleri, (float2)(x+1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x+1, y)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y+1)).x -
-                                            (3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x-1, y)).x + 3 * read_imagei(I, sampleri, (float2)(x-1, y+1)).x);
+                                             (3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x-1, y)).x + 3 * read_imagei(I, sampleri, (float2)(x-1, y+1)).x);
 
             dIdy_patch[i * patchWidth + j] = 3 * read_imagei(I, sampleri, (float2)(x-1, y+1)).x + 10 * read_imagei(I, sampleri, (float2)(x, y+1)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y+1)).x -
-                                            (3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x, y-1)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y-1)).x);
+                                             (3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x, y-1)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y-1)).x);
         }
     }
     barrier(CLK_LOCAL_MEM_FENCE);
diff --git a/modules/ocl/src/opencl/stereobm.cl b/modules/ocl/src/opencl/stereobm.cl
index bd86a7f3f..56f445e42 100644
--- a/modules/ocl/src/opencl/stereobm.cl
+++ b/modules/ocl/src/opencl/stereobm.cl
@@ -162,8 +162,8 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
     int y_tex;
     int x_tex = X - radius;
 
-    if (x_tex >= cwidth)
-        return;
+    //if (x_tex >= cwidth)
+    //    return;
 
     for(int d = STEREO_MIND; d < maxdisp; d += STEREO_DISP_STEP)
     {
@@ -190,7 +190,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
         {
             int idx1 = y_tex * img_step + x_tex;
             int idx2 = min(y_tex + ((radius << 1) + 1), cheight - 1) * img_step + x_tex;
-            
+
             barrier(CLK_LOCAL_MEM_FENCE);
 
             StepDown(idx1, idx2, left, right, d, col_ssd);
@@ -258,27 +258,13 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
 
 float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
 {
-    float cache = 0;
-    float cache2 = 0;
-    int winsz2 = winsz/2;
+    unsigned int cache = cols[0];
 
-    int x = get_local_id(0);
-    int group_size_x = get_local_size(0);
-
-    for(int i = 1; i <= winsz2; i++)
+#pragma unroll
+    for(int i = 1; i <= winsz; i++)
         cache += cols[i];
 
-    cols_cache[0] = cache;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (x < group_size_x - winsz2)
-        cache2 = cols_cache[winsz2];
-    else
-        for(int i = winsz2 + 1; i < winsz; i++)
-            cache2 += cols[i];
-
-    return cols[0] + cache + cache2;
+    return cache;
 }
 
 #define RpT (2 * ROWSperTHREAD)  // got experimentally
@@ -301,8 +287,7 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in
     int beg_row = group_id_y * RpT;
     int end_row = min(beg_row + RpT, disp_rows);
 
-//   if (x < disp_cols)
-//   {
+
     int y = beg_row;
 
     float sum = 0;
@@ -340,11 +325,15 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in
         }
 
         barrier(CLK_LOCAL_MEM_FENCE);
-        float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
-        if (sum_win < threshold)
-            disp[y * disp_step + x] = 0;
+
+        if (x < disp_cols)
+        {
+            float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
+            if (sum_win < threshold)
+                disp[y * disp_step + x] = 0;
+        }
 
         barrier(CLK_LOCAL_MEM_FENCE);
     }
-    //  }
+
 }
diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl
index 03e42876e..1d523e788 100644
--- a/modules/ocl/src/opencl/stereobp.cl
+++ b/modules/ocl/src/opencl/stereobp.cl
@@ -56,8 +56,10 @@
 
 #ifdef T_FLOAT
 #define T float
+#define T4 float4
 #else
 #define T short
+#define T4 short4
 #endif
 
 ///////////////////////////////////////////////////////////////
@@ -71,6 +73,14 @@ T saturate_cast(float v){
 #endif
 }
 
+T4 saturate_cast4(float4 v){
+#ifdef T_SHORT
+    return convert_short4_sat_rte(v);
+#else
+    return v;
+#endif
+}
+
 #define FLOAT_MAX 3.402823466e+38f
 typedef struct
 {
@@ -84,29 +94,14 @@ typedef struct
 ////////////////////////// comp data //////////////////////////
 ///////////////////////////////////////////////////////////////
 
-float pix_diff_1(__global const uchar *ls, __global const uchar *rs)
+inline float pix_diff_1(const uchar4 l, __global const uchar *rs)
 {
-    return abs((int)(*ls) - *rs);
+    return abs((int)(l.x) - *rs);
 }
 
-float pix_diff_3(__global const uchar *ls, __global const uchar *rs)
+float pix_diff_4(const uchar4 l, __global const uchar *rs)
 {
-    const float tr = 0.299f;
-    const float tg = 0.587f;
-    const float tb = 0.114f;
-
-    float val;
-
-    val =  tb * abs((int)ls[0] - rs[0]);
-    val += tg * abs((int)ls[1] - rs[1]);
-    val += tr * abs((int)ls[2] - rs[2]);
-
-    return val;
-}
-float pix_diff_4(__global const uchar *ls, __global const uchar *rs)
-{
-    uchar4 l, r;
-    l = *((__global uchar4 *)ls);
+    uchar4 r;
     r = *((__global uchar4 *)rs);
 
     const float tr = 0.299f;
@@ -122,11 +117,19 @@ float pix_diff_4(__global const uchar *ls, __global const uchar *rs)
     return val;
 }
 
+inline float pix_diff_3(const uchar4 l, __global const uchar *rs)
+{
+    return pix_diff_4(l, rs);
+}
 
 #ifndef CN
 #define CN 4
 #endif
 
+#ifndef CNDISP
+#define CNDISP 64
+#endif
+
 #define CAT(X,Y) X##Y
 #define CAT2(X,Y) CAT(X,Y)
 
@@ -149,19 +152,20 @@ __kernel void comp_data(__global uchar *left,  int left_rows,  int left_cols,  i
         __global T *ds = data + y * data_step + x;
 
         const unsigned int disp_step = data_step * left_rows;
+        const float weightXterm = con_st -> cdata_weight * con_st -> cmax_data_term;
+        const uchar4 ls_data = vload4(0, ls);
 
         for (int disp = 0; disp < con_st -> cndisp; disp++)
         {
             if (x - disp >= 1)
             {
                 float val = 0;
-                val = PIX_DIFF(ls, rs - disp * CN);
-                ds[disp * disp_step] =  saturate_cast(fmin(con_st -> cdata_weight * val,
-                    con_st -> cdata_weight * con_st -> cmax_data_term));
+                val = PIX_DIFF(ls_data, rs - disp * CN);
+                ds[disp * disp_step] =  saturate_cast(fmin(con_st -> cdata_weight * val, weightXterm));
             }
             else
             {
-                ds[disp * disp_step] =  saturate_cast(con_st -> cdata_weight * con_st -> cmax_data_term);
+                ds[disp * disp_step] =  saturate_cast(weightXterm);
             }
         }
     }
@@ -182,13 +186,20 @@ __kernel void data_step_down(__global T *src, int src_rows,
     {
         src_step /= sizeof(T);
         dst_step /= sizeof(T);
+        int4 coor_step = (int4)(src_rows * src_step);
+        int4 coor = (int4)(min(2*y+0, src_rows-1) * src_step + 2*x+0,
+                           min(2*y+1, src_rows-1) * src_step + 2*x+0,
+                           min(2*y+0, src_rows-1) * src_step + 2*x+1,
+                           min(2*y+1, src_rows-1) * src_step + 2*x+1);
+
         for (int d = 0; d < cndisp; ++d)
         {
             float dst_reg;
-            dst_reg  = src[(d * src_rows + min(2*y+0, src_rows-1)) * src_step + 2*x+0];
-            dst_reg += src[(d * src_rows + min(2*y+1, src_rows-1)) * src_step + 2*x+0];
-            dst_reg += src[(d * src_rows + min(2*y+0, src_rows-1)) * src_step + 2*x+1];
-            dst_reg += src[(d * src_rows + min(2*y+1, src_rows-1)) * src_step + 2*x+1];
+            dst_reg  = src[coor.x];
+            dst_reg += src[coor.y];
+            dst_reg += src[coor.z];
+            dst_reg += src[coor.w];
+            coor += coor_step;
 
             dst[(d * dst_rows + y) * dst_step + x] = saturate_cast(dst_reg);
         }
@@ -224,85 +235,95 @@ __kernel void level_up_message(__global T *src, int src_rows, int src_step,
 ///////////////////////////////////////////////////////////////
 ////////////////////  calc all iterations /////////////////////
 ///////////////////////////////////////////////////////////////
-void calc_min_linear_penalty(__global T * dst, int disp_step,
-                             int cndisp, float cdisc_single_jump)
+void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
+              const __global T *dt,
+              int u_step, int msg_disp_step, int data_disp_step,
+              float4 cmax_disc_term, float4 cdisc_single_jump)
 {
-    float prev = dst[0];
-    float cur;
+    __global T *us = us_ + u_step;
+    __global T *ds = ds_ - u_step;
+    __global T *ls = ls_ + 1;
+    __global T *rs = rs_ - 1;
 
-    for (int disp = 1; disp < cndisp; ++disp)
+    float4 minimum = (float4)(FLOAT_MAX);
+
+    T4 t_dst[CNDISP];
+    float4 dst_reg;
+    float4 prev;
+    float4 cur;
+
+    T t_us = us[0];
+    T t_ds = ds[0];
+    T t_ls = ls[0];
+    T t_rs = rs[0];
+    T t_dt = dt[0];
+
+    prev = (float4)(t_us + t_ls + t_rs + t_dt,
+                    t_ds + t_ls + t_rs + t_dt,
+                    t_us + t_ds + t_rs + t_dt,
+                    t_us + t_ds + t_ls + t_dt);
+
+    minimum = min(prev, minimum);
+
+    t_dst[0] = saturate_cast4(prev);
+
+    for(int i = 1, idx = msg_disp_step; i < CNDISP; ++i, idx+=msg_disp_step)
     {
+        t_us = us[idx];
+        t_ds = ds[idx];
+        t_ls = ls[idx];
+        t_rs = rs[idx];
+        t_dt = dt[data_disp_step * i];
+
+        dst_reg = (float4)(t_us + t_ls + t_rs + t_dt,
+                           t_ds + t_ls + t_rs + t_dt,
+                           t_us + t_ds + t_rs + t_dt,
+                           t_us + t_ds + t_ls + t_dt);
+
+        minimum = min(dst_reg, minimum);
+
         prev += cdisc_single_jump;
-        cur = dst[disp_step * disp];
+        prev = min(prev, dst_reg);
 
-        if (prev < cur)
-        {
-            cur = prev;
-            dst[disp_step * disp] = saturate_cast(prev);
-        }
-
-        prev = cur;
+        t_dst[i] = saturate_cast4(prev);
     }
 
-    prev = dst[(cndisp - 1) * disp_step];
-    for (int disp = cndisp - 2; disp >= 0; disp--)
-    {
-        prev += cdisc_single_jump;
-        cur = dst[disp_step * disp];
-
-        if (prev < cur)
-        {
-            cur = prev;
-            dst[disp_step * disp] = saturate_cast(prev);
-        }
-        prev = cur;
-    }
-}
-void message(const __global T *msg1, const __global T *msg2,
-             const __global T *msg3, const __global T *data, __global T *dst,
-             int msg_disp_step, int data_disp_step, int cndisp, float cmax_disc_term, float cdisc_single_jump)
-{
-    float minimum = FLOAT_MAX;
-
-    for(int i = 0; i < cndisp; ++i)
-    {
-        float dst_reg;
-        dst_reg  = msg1[msg_disp_step * i];
-        dst_reg += msg2[msg_disp_step * i];
-        dst_reg += msg3[msg_disp_step * i];
-        dst_reg += data[data_disp_step * i];
-
-        if (dst_reg < minimum)
-            minimum = dst_reg;
-
-        dst[msg_disp_step * i] = saturate_cast(dst_reg);
-    }
-
-    calc_min_linear_penalty(dst, msg_disp_step, cndisp, cdisc_single_jump);
-
     minimum += cmax_disc_term;
 
-    float sum = 0;
-    for(int i = 0; i < cndisp; ++i)
+    float4 sum = 0;
+    prev = convert_float4(t_dst[CNDISP - 1]);
+    for (int disp = CNDISP - 2; disp >= 0; disp--)
     {
-        float dst_reg = dst[msg_disp_step * i];
-        if (dst_reg > minimum)
-        {
-            dst_reg = minimum;
-            dst[msg_disp_step * i] = saturate_cast(minimum);
-        }
-        sum += dst_reg;
-    }
-    sum /= cndisp;
+        prev += cdisc_single_jump;
+        cur = convert_float4(t_dst[disp]);
+        prev = min(prev, cur);
+        cur = min(prev, minimum);
+        sum += cur;
 
-    for(int i = 0; i < cndisp; ++i)
-        dst[msg_disp_step * i] -= sum;
+        t_dst[disp] = saturate_cast4(cur);
+    }
+
+    dst_reg = convert_float4(t_dst[CNDISP - 1]);
+    dst_reg = min(dst_reg, minimum);
+    t_dst[CNDISP - 1] = saturate_cast4(dst_reg);
+    sum += dst_reg;
+
+    sum /= CNDISP;
+#pragma unroll
+    for(int i = 0, idx = 0; i < CNDISP; ++i, idx+=msg_disp_step)
+    {
+        T4 dst = t_dst[i];
+        us_[idx] = dst.x - sum.x;
+        ds_[idx] = dst.y - sum.y;
+        rs_[idx] = dst.z - sum.z;
+        ls_[idx] = dst.w - sum.w;
+    }
 }
 __kernel void one_iteration(__global T *u,    int u_step,
                             __global T *data, int data_step,
                             __global T *d,    __global T *l, __global T *r,
                             int t, int cols, int rows,
-                            int cndisp, float cmax_disc_term, float cdisc_single_jump)
+                            float cmax_disc_term, float cdisc_single_jump)
 {
     const int y = get_global_id(1);
     const int x = ((get_global_id(0)) << 1) + ((y + t) & 1);
@@ -321,15 +342,9 @@ __kernel void one_iteration(__global T *u,    int u_step,
         int msg_disp_step = u_step * rows;
         int data_disp_step = data_step * rows;
 
-        message(us + u_step, ls      + 1, rs - 1, dt, us, msg_disp_step, data_disp_step, cndisp,
-            cmax_disc_term, cdisc_single_jump);
-        message(ds - u_step, ls      + 1, rs - 1, dt, ds, msg_disp_step, data_disp_step, cndisp,
-            cmax_disc_term, cdisc_single_jump);
-
-        message(us + u_step, ds - u_step, rs - 1, dt, rs, msg_disp_step, data_disp_step, cndisp,
-            cmax_disc_term, cdisc_single_jump);
-        message(us + u_step, ds - u_step, ls + 1, dt, ls, msg_disp_step, data_disp_step, cndisp,
-            cmax_disc_term, cdisc_single_jump);
+        message(us, ds, ls, rs, dt,
+                u_step, msg_disp_step, data_disp_step,
+                (float4)(cmax_disc_term), (float4)(cdisc_single_jump));
     }
 }
 
diff --git a/modules/ocl/src/opencl/stereocsbp.cl b/modules/ocl/src/opencl/stereocsbp.cl
index ea7af62b2..89f2bb8dc 100644
--- a/modules/ocl/src/opencl/stereocsbp.cl
+++ b/modules/ocl/src/opencl/stereocsbp.cl
@@ -129,7 +129,7 @@ __kernel void get_first_k_initial_global_1(__global  float *data_cost_selected_,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void get_first_k_initial_local_0(__global  short *data_cost_selected_, __global short *selected_disp_pyr, 
+__kernel void get_first_k_initial_local_0(__global  short *data_cost_selected_, __global short *selected_disp_pyr,
     __global  short *ctemp,int h, int w, int nr_plane,
     int cmsg_step1, int cdisp_step1, int cndisp)
 {
@@ -187,7 +187,7 @@ __kernel void get_first_k_initial_local_0(__global  short *data_cost_selected_,
     }
 }
 
-__kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, __global float *selected_disp_pyr, 
+__kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
     __global float *ctemp,int h, int w, int nr_plane,
     int cmsg_step1,  int cdisp_step1, int cndisp)
 {
@@ -257,20 +257,20 @@ float compute_3(__global uchar* left, __global uchar* right,
 
     return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
 }
-float compute_1(__global uchar* left, __global uchar* right, 
+float compute_1(__global uchar* left, __global uchar* right,
     float cdata_weight,  float cmax_data_term)
 {
     return fmin(cdata_weight * abs((int)*left - (int)*right), cdata_weight * cmax_data_term);
 }
 short round_short(float v){
-    return convert_short_sat_rte(v); 
+    return convert_short_sat_rte(v);
 }
 ///////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////init_data_cost///////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright, 
+__kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
     int h, int w, int level, int channels,
-    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1, 
+    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
     int cth, int cimg_step, int cndisp)
 {
     int x = get_global_id(0);
@@ -312,9 +312,9 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
         }
     }
 }
-__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright, 
+__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
     int h, int w, int level, int channels,
-    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1, 
+    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
     int cth, int cimg_step, int cndisp)
 {
     int x = get_global_id(0);
@@ -361,13 +361,13 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
 __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
     __local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
-    int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth, 
+    int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
     int cdisp_step1, int cmsg_step1)
 {
     int x_out = get_group_id(0);
     int y_out = get_group_id(1) % h;
     //int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
-    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2); 
+    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
 
     int tid = get_local_id(0);
 
@@ -411,39 +411,39 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
     if(d < cndisp)
     {
         __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 256) 
+        if (winsz >= 256)
         {
-            if (tid < 128) 
-                dline[tid] += dline[tid + 128]; 
+            if (tid < 128)
+                dline[tid] += dline[tid + 128];
         }
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 128) 
+        if (winsz >= 128)
         {
-            if (tid <  64) 
-                dline[tid] += dline[tid + 64]; 
+            if (tid <  64)
+                dline[tid] += dline[tid + 64];
         }
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64) 
-            if (tid < 32) 
+        if (winsz >= 64)
+            if (tid < 32)
                 vdline[tid] += vdline[tid + 32];
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32) 
-            if (tid < 16) 
+        if (winsz >= 32)
+            if (tid < 16)
                 vdline[tid] += vdline[tid + 16];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -452,7 +452,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
         if (winsz >= 16)
-            if (tid <  8) 
+            if (tid <  8)
                 vdline[tid] += vdline[tid + 8];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -461,7 +461,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
         if (winsz >= 8)
-            if (tid <  4) 
+            if (tid <  4)
                 vdline[tid] += vdline[tid + 4];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -470,7 +470,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
         if (winsz >= 4)
-            if (tid <  2) 
+            if (tid <  2)
                 vdline[tid] += vdline[tid + 2];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -479,7 +479,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
         if (winsz >= 2)
-            if (tid <  1) 
+            if (tid <  1)
                 vdline[tid] += vdline[tid + 1];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -500,7 +500,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
 {
     int x_out = get_group_id(0);
     int y_out = get_group_id(1) % h;
-    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2); 
+    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
 
     int tid = get_local_id(0);
 
@@ -545,74 +545,74 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
     if(d < cndisp)
     {
         __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 256) 
-            if (tid < 128) 
-                dline[tid] += dline[tid + 128]; 
+        if (winsz >= 256)
+            if (tid < 128)
+                dline[tid] += dline[tid + 128];
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 128) 
-            if (tid < 64) 
-                dline[tid] += dline[tid + 64]; 
+        if (winsz >= 128)
+            if (tid < 64)
+                dline[tid] += dline[tid + 64];
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64) 
-            if (tid < 32) 
-                vdline[tid] += vdline[tid + 32]; 
+        if (winsz >= 64)
+            if (tid < 32)
+                vdline[tid] += vdline[tid + 32];
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32) 
-            if (tid < 16) 
-                vdline[tid] += vdline[tid + 16]; 
+        if (winsz >= 32)
+            if (tid < 16)
+                vdline[tid] += vdline[tid + 16];
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16) 
-            if (tid < 8) 
-                vdline[tid] += vdline[tid + 8]; 
+        if (winsz >= 16)
+            if (tid < 8)
+                vdline[tid] += vdline[tid + 8];
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 8) 
-            if (tid < 4) 
-                vdline[tid] += vdline[tid + 4]; 
+        if (winsz >= 8)
+            if (tid < 4)
+                vdline[tid] += vdline[tid + 4];
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 4) 
-            if (tid < 2) 
-                vdline[tid] += vdline[tid + 2]; 
+        if (winsz >= 4)
+            if (tid < 2)
+                vdline[tid] += vdline[tid + 2];
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 2) 
-            if (tid < 1) 
-                vdline[tid] += vdline[tid + 1]; 
+        if (winsz >= 2)
+            if (tid < 1)
+                vdline[tid] += vdline[tid + 1];
     }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);
 
     if(d < cndisp)
     {
@@ -626,10 +626,10 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
 ///////////////////////////////////////////////////////////////
 ////////////////////// compute data cost //////////////////////
 ///////////////////////////////////////////////////////////////
-__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_, 
+__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
     __global uchar *cleft, __global uchar *cright,
     int h, int w, int level, int nr_plane, int channels,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight, 
+    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
     float cmax_data_term, int cimg_step, int cth)
 {
 
@@ -676,10 +676,10 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
         }
     }
 }
-__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_, 
+__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
     __global uchar *cleft, __global uchar *cright,
     int h, int w, int level, int nr_plane, int channels,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight, 
+    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
     float cmax_data_term, int cimg_step, int cth)
 {
 
@@ -728,11 +728,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_, 
+__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
     __global uchar *cleft, __global uchar *cright,__local float *smem,
-    int level, int rows, int cols, int h, int nr_plane, 
+    int level, int rows, int cols, int h, int nr_plane,
     int channels, int winsz,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, 
+    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
     float cdata_weight,  float cmax_data_term, int cimg_step,int cth)
 
 {
@@ -788,9 +788,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64) 
+        if (winsz >= 64)
         {
-            if (tid < 32) 
+            if (tid < 32)
                 vdline[tid] += vdline[tid + 32];
         }
     }
@@ -799,9 +799,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32) 
+        if (winsz >= 32)
         {
-            if (tid < 16) 
+            if (tid < 16)
                 vdline[tid] += vdline[tid + 16];
         }
     }
@@ -810,9 +810,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16) 
+        if (winsz >= 16)
         {
-            if (tid < 8) 
+            if (tid < 8)
                 vdline[tid] += vdline[tid + 8];
         }
     }
@@ -821,9 +821,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 8) 
+        if (winsz >= 8)
         {
-            if (tid < 4) 
+            if (tid < 4)
                 vdline[tid] += vdline[tid + 4];
         }
     }
@@ -832,9 +832,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 4) 
+        if (winsz >= 4)
         {
-            if (tid < 2) 
+            if (tid < 2)
                 vdline[tid] += vdline[tid + 2];
         }
     }
@@ -843,9 +843,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 2) 
+        if (winsz >= 2)
         {
-            if (tid < 1) 
+            if (tid < 1)
                 vdline[tid] += vdline[tid + 1];
         }
     }
@@ -859,11 +859,11 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
     }
 }
 
-__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_, 
+__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
     __global uchar *cleft, __global uchar *cright, __local float *smem,
-    int level, int rows, int cols, int h, int nr_plane, 
+    int level, int rows, int cols, int h, int nr_plane,
     int channels, int winsz,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight, 
+    int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
     float cmax_data_term, int cimg_step, int cth)
 
 {
@@ -918,21 +918,21 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64) 
+        if (winsz >= 64)
         {
-            if (tid < 32) 
+            if (tid < 32)
                 vdline[tid] += vdline[tid + 32];
         }
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
 
-    if(d < nr_plane)	
+    if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32) 
+        if (winsz >= 32)
         {
-            if (tid < 16) 
+            if (tid < 16)
                 vdline[tid] += vdline[tid + 16];
         }
     }
@@ -941,9 +941,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16) 
+        if (winsz >= 16)
         {
-            if (tid <  8) 
+            if (tid <  8)
                 vdline[tid] += vdline[tid + 8];
         }
     }
@@ -952,9 +952,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >=  8) 
+        if (winsz >=  8)
         {
-            if (tid <  4) 
+            if (tid <  4)
                 vdline[tid] += vdline[tid + 4];
         }
     }
@@ -963,9 +963,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >=  4) 
+        if (winsz >=  4)
         {
-            if (tid <  2) 
+            if (tid <  2)
                 vdline[tid] += vdline[tid + 2];
         }
     }
@@ -974,9 +974,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
     if(d < nr_plane)
     {
         __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >=  2) 
+        if (winsz >=  2)
         {
-            if (tid <  1) 
+            if (tid <  1)
                 vdline[tid] += vdline[tid + 1];
         }
     }
@@ -993,11 +993,11 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
 ///////////////////////////////////////////////////////////////
 //////////////////////// init message /////////////////////////
 ///////////////////////////////////////////////////////////////
-void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new, 
-    __global short *r_new, __global const short *u_cur, __global const short *d_cur, 
-    __global const short *l_cur, __global const short *r_cur, 
-    __global short *data_cost_selected, __global short *disparity_selected_new, 
-    __global short *data_cost_new, __global const short* data_cost_cur, 
+void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
+    __global short *r_new, __global const short *u_cur, __global const short *d_cur,
+    __global const short *l_cur, __global const short *r_cur,
+    __global short *data_cost_selected, __global short *disparity_selected_new,
+    __global short *data_cost_new, __global const short* data_cost_cur,
     __global const short *disparity_selected_cur,
     int nr_plane, int nr_plane2,
     int cdisp_step1, int cdisp_step2)
@@ -1027,11 +1027,11 @@ void get_first_k_element_increase_0(__global short* u_new, __global short *d_new
         data_cost_new[id * cdisp_step1] = SHRT_MAX;
     }
 }
-void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new, 
-    __global float *r_new, __global const float *u_cur, __global const float *d_cur, 
+void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
+    __global float *r_new, __global const float *u_cur, __global const float *d_cur,
     __global const float *l_cur, __global const float *r_cur,
-    __global float *data_cost_selected, __global float *disparity_selected_new, 
-    __global float *data_cost_new, __global const float *data_cost_cur, 
+    __global float *data_cost_selected, __global float *disparity_selected_new,
+    __global float *data_cost_new, __global const float *data_cost_cur,
     __global const float *disparity_selected_cur,
     int nr_plane, int nr_plane2,
     int cdisp_step1, int cdisp_step2)
@@ -1057,13 +1057,13 @@ void get_first_k_element_increase_1(__global float *u_new, __global float *d_new
         u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
         d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
         l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
-        r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];   
+        r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
         data_cost_new[id * cdisp_step1] = FLT_MAX;
 
     }
 }
 __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
-    __global short *r_new_, __global  short *u_cur_, __global const short *d_cur_, 
+    __global short *r_new_, __global  short *u_cur_, __global const short *d_cur_,
     __global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
     __global short *selected_disp_pyr_new, __global const short *selected_disp_pyr_cur,
     __global short *data_cost_selected_, __global const short *data_cost_,
@@ -1113,7 +1113,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
     }
 }
 __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
-    __global float *r_new_, __global const float *u_cur_, __global const float *d_cur_, 
+    __global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
     __global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
     __global float *selected_disp_pyr_new, __global const float *selected_disp_pyr_cur,
     __global float *data_cost_selected_, __global const float *data_cost_,
@@ -1176,28 +1176,28 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
                     id = j;
                 }
             }
-            data_cost_selected[i * cdisp_step1] = data_cost[id * cdisp_step1];	
+            data_cost_selected[i * cdisp_step1] = data_cost[id * cdisp_step1];
             disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
             u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
             d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
             l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
-            r_new[i * cdisp_step1] = r_cur[id * cdisp_step2]; 
+            r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
             data_cost_new[id * cdisp_step1] = FLT_MAX;
-        }  
+        }
     }
 }
 
 ///////////////////////////////////////////////////////////////
 ////////////////////  calc all iterations /////////////////////
 ///////////////////////////////////////////////////////////////
-void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1, 
+void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
     __global const short *msg2, __global const short *msg3,
-    __global const short *dst_disp, __global const short *src_disp, 
+    __global const short *dst_disp, __global const short *src_disp,
     int nr_plane, __global short *temp,
     float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
 {
     short minimum = SHRT_MAX;
-    for(int d = 0; d < nr_plane; d++)     
+    for(int d = 0; d < nr_plane; d++)
     {
         int idx = d * cdisp_step1;
         short val  = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
@@ -1215,7 +1215,7 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
         short src_disp_reg = src_disp[d * cdisp_step1];
 
         for(int d2 = 0; d2 < nr_plane; d2++)
-            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] + 
+            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
             cdisc_single_jump * abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));
 
         temp[d * cdisp_step1] = convert_short_sat_rte(cost_min);
@@ -1226,14 +1226,14 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
     for(int d = 0; d < nr_plane; d++)
         msg_dst[d * cdisp_step1] = convert_short_sat_rte(temp[d * cdisp_step1] - sum);
 }
-void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1, 
+void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
     __global const float *msg2, __global const float *msg3,
-    __global const float *dst_disp, __global const float *src_disp, 
+    __global const float *dst_disp, __global const float *src_disp,
     int nr_plane, __global float *temp,
     float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
 {
     float minimum = FLT_MAX;
-    for(int d = 0; d < nr_plane; d++)     
+    for(int d = 0; d < nr_plane; d++)
     {
         int idx = d * cdisp_step1;
         float val  = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
@@ -1251,7 +1251,7 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
         float src_disp_reg = src_disp[d * cdisp_step1];
 
         for(int d2 = 0; d2 < nr_plane; d2++)
-            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] + 
+            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
             cdisc_single_jump * fabs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));
 
         temp[d * cdisp_step1] = cost_min;
@@ -1262,9 +1262,9 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
     for(int d = 0; d < nr_plane; d++)
         msg_dst[d * cdisp_step1] = temp[d * cdisp_step1] - sum;
 }
-__kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_, 
-    __global const short *data_cost_selected, __global const short *selected_disp_pyr_cur, 
-    __global short *ctemp, int h, int w, int nr_plane, int i, 
+__kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_,
+    __global const short *data_cost_selected, __global const short *selected_disp_pyr_cur,
+    __global short *ctemp, int h, int w, int nr_plane, int i,
     float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
 {
     int y = get_global_id(1);
@@ -1283,7 +1283,7 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
 
         __global short *temp = ctemp + y * cmsg_step1 + x;
 
-        message_per_pixel_0(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp, 
+        message_per_pixel_0(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
             cmax_disc_term, cdisp_step1, cdisc_single_jump);
         message_per_pixel_0(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
             cmax_disc_term, cdisp_step1, cdisc_single_jump);
@@ -1293,9 +1293,9 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
             cmax_disc_term, cdisp_step1, cdisc_single_jump);
     }
 }
-__kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_, 
-    __global const float *data_cost_selected, __global const float *selected_disp_pyr_cur, 
-    __global float *ctemp, int h, int w, int nr_plane, int i, 
+__kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_,
+    __global const float *data_cost_selected, __global const float *selected_disp_pyr_cur,
+    __global float *ctemp, int h, int w, int nr_plane, int i,
     float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
 {
     int y = get_global_id(1);
@@ -1313,7 +1313,7 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
         __global const float *disp = selected_disp_pyr_cur + y * cmsg_step1 + x;
         __global float *temp = ctemp + y * cmsg_step1 + x;
 
-        message_per_pixel_1(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp, 
+        message_per_pixel_1(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
             cmax_disc_term, cdisp_step1, cdisc_single_jump);
         message_per_pixel_1(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
             cmax_disc_term, cdisp_step1, cdisc_single_jump);
@@ -1327,10 +1327,10 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
 ///////////////////////////////////////////////////////////////
 /////////////////////////// output ////////////////////////////
 ///////////////////////////////////////////////////////////////
-__kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_, 
-    __global const short *r_, __global const short * data_cost_selected, 
+__kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_,
+    __global const short *r_, __global const short * data_cost_selected,
     __global const short *disp_selected_pyr,
-    __global short* disp, 
+    __global short* disp,
     int res_step, int cols, int rows, int nr_plane,
     int cmsg_step1, int cdisp_step1)
 {
@@ -1364,10 +1364,10 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
         disp[res_step * y + x] = best;
     }
 }
-__kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_, 
-    __global const float *r_, __global const float *data_cost_selected, 
+__kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_,
+    __global const float *r_, __global const float *data_cost_selected,
     __global const float *disp_selected_pyr,
-    __global short *disp, 
+    __global short *disp,
     int res_step, int cols, int rows, int nr_plane,
     int cmsg_step1, int cdisp_step1)
 {
diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl
index e0ff7307b..095b339f8 100644
--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ b/modules/ocl/src/opencl/tvl1flow.cl
@@ -43,7 +43,7 @@
 //
 //M*/
 
-__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step, 
+__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
 __global float* dx, __global float* dy, int dx_step)
 {
     int x = get_global_id(0);
@@ -51,9 +51,9 @@ __global float* dx, __global float* dy, int dx_step)
 
     if((x < src_col)&&(y < src_row))
     {
-		int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
-     	int src_x2 = (x - 1) > 0 ? (x -1) : 0;
-        
+        int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
+        int src_x2 = (x - 1) > 0 ? (x -1) : 0;
+
         //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
         //{
         //    printf("y = %d\n", y);
@@ -61,8 +61,8 @@ __global float* dx, __global float* dy, int dx_step)
         //    printf("src_x2 = %d\n", src_x2);
         //}
         dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
-        
-		int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
+
+        int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
         int src_y2 = (y - 1) > 0 ? (y - 1) : 0;
         dy[y * dx_step+ x] = 0.5f * (src[src_y1 * src_step + x] - src[src_y2 * src_step+ x]);
     }
@@ -89,20 +89,20 @@ float bicubicCoeff(float x_)
 }
 
 __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
-	image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,  
-    __global const float* u1, int u1_step, 
+    image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,
+    __global const float* u1, int u1_step,
     __global const float* u2,
     __global float* I1w,
-	__global float* I1wx, /*int I1wx_step,*/
-	__global float* I1wy, /*int I1wy_step,*/
-	__global float* grad, /*int grad_step,*/
-	__global float* rho,
-	int I1w_step,
-	int u2_step,
-	int u1_offset_x,
-	int u1_offset_y,
-	int u2_offset_x,
-	int u2_offset_y)
+    __global float* I1wx, /*int I1wx_step,*/
+    __global float* I1wy, /*int I1wy_step,*/
+    __global float* grad, /*int grad_step,*/
+    __global float* rho,
+    int I1w_step,
+    int u2_step,
+    int u1_offset_x,
+    int u1_offset_y,
+    int u2_offset_x,
+    int u2_offset_y)
 {
     const int x = get_global_id(0);
     const int y = get_global_id(1);
@@ -136,7 +136,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
                 const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
 
                 //sum  += w * tex2D(tex_I1 , cx, cy);
-				int2 cood = (int2)(cx, cy);
+                int2 cood = (int2)(cx, cy);
                 sum += w * read_imagef(tex_I1, sampleri, cood).x;
                 //sumx += w * tex2D(tex_I1x, cx, cy);
                 sumx += w * read_imagef(tex_I1x, sampleri, cood).x;
@@ -181,18 +181,18 @@ float readImage(__global const float *image,  const int x,  const int y,  const
 }
 
 __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row,
-	__global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,  
-    __global const float* u1, int u1_step, 
+    __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,
+    __global const float* u1, int u1_step,
     __global const float* u2,
     __global float* I1w,
-	__global float* I1wx, /*int I1wx_step,*/
-	__global float* I1wy, /*int I1wy_step,*/
-	__global float* grad, /*int grad_step,*/
-	__global float* rho,
-	int I1w_step,
-	int u2_step,
-	int I1_step,
-	int I1x_step)
+    __global float* I1wx, /*int I1wx_step,*/
+    __global float* I1wy, /*int I1wy_step,*/
+    __global float* grad, /*int grad_step,*/
+    __global float* rho,
+    int I1w_step,
+    int u2_step,
+    int I1_step,
+    int I1x_step)
 {
     const int x = get_global_id(0);
     const int y = get_global_id(1);
@@ -224,7 +224,7 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
             {
                 const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
 
-				int2 cood = (int2)(cx, cy);
+                int2 cood = (int2)(cx, cy);
                 sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
                 sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step);
                 sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step);
@@ -256,18 +256,18 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
 }
 
 
-__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step, 
-    __global const float* u2, 
-    __global float* p11, int p11_step, 
+__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,
+    __global const float* u2,
+    __global float* p11, int p11_step,
     __global float* p12,
     __global float* p21,
-    __global float* p22, 
+    __global float* p22,
     const float taut,
-	int u2_step,
-	int u1_offset_x,
-	int u1_offset_y,
-	int u2_offset_x,
-	int u2_offset_y)
+    int u2_step,
+    int u1_offset_x,
+    int u1_offset_y,
+    int u2_offset_x,
+    int u2_offset_y)
 {
 
     //const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -277,16 +277,16 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
 
     if(x < u1_col && y < u1_row)
     {
-		int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
+        int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
         const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-        
-		int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
+
+        int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
         const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
 
-		int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
+        int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
         const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
 
-		int src_y2 = (y + 1) <  (u1_row - 1) ? (y + 1) : (u1_row - 1);
+        int src_y2 = (y + 1) <  (u1_row - 1) ? (y + 1) : (u1_row - 1);
         const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
 
         const float g1 = hypot(u1x, u1y);
@@ -329,19 +329,20 @@ float divergence(__global const float* v1, __global const float* v2, int y, int
 
 __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step,
     __global const float* I1wy, /*int I1wy_step,*/
-    __global const float* grad, /*int grad_step,*/ 
+    __global const float* grad, /*int grad_step,*/
     __global const float* rho_c, /*int rho_c_step,*/
     __global const float* p11, /*int p11_step,*/
     __global const float* p12, /*int p12_step,*/
     __global const float* p21, /*int p21_step,*/
     __global const float* p22, /*int p22_step,*/
-    __global float* u1, int u1_step, 
-    __global float* u2, 
+    __global float* u1, int u1_step,
+    __global float* u2,
     __global float* error, const float l_t, const float theta, int u2_step,
-	int u1_offset_x,
-	int u1_offset_y,
-	int u2_offset_x,
-	int u2_offset_y)
+    int u1_offset_x,
+    int u1_offset_y,
+    int u2_offset_x,
+    int u2_offset_y,
+    char calc_error)
 {
 
     //const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -399,9 +400,12 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
         u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
         u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
 
-        const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
-        const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
-        error[y * I1wx_step + x] = n1 + n2;
+        if(calc_error)
+        {
+            const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+            const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+            error[y * I1wx_step + x] = n1 + n2;
+        }
     }
 
 }
diff --git a/modules/ocl/src/optical_flow_farneback.cpp b/modules/ocl/src/optical_flow_farneback.cpp
new file mode 100644
index 000000000..e622446bb
--- /dev/null
+++ b/modules/ocl/src/optical_flow_farneback.cpp
@@ -0,0 +1,540 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//      Sen Liu, swjtuls1987@126.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+#include "opencv2/video/tracking.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+#define MIN_SIZE 32
+
+namespace cv
+{
+namespace ocl
+{
+///////////////////////////OpenCL kernel strings///////////////////////////
+extern const char *optical_flow_farneback;
+}
+}
+
+namespace cv {
+namespace ocl {
+namespace optflow_farneback
+{
+oclMat g;
+oclMat xg;
+oclMat xxg;
+oclMat gKer;
+
+float ig[4];
+
+inline int divUp(int total, int grain)
+{
+    return (total + grain - 1) / grain;
+}
+
+inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
+{
+    cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
+    gKer.upload(t_gKer);
+}
+
+static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
+{
+    string kernelName("gaussianBlur");
+    size_t localThreads[3] = { 256, 1, 1 };
+    size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], src.rows, 1 };
+    int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
+
+    CV_Assert(dst.size() == src.size());
+    std::vector< std::pair<size_t, const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
+    args.push_back(std::make_pair(smem_size, (void *)NULL));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
+
+    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+                        globalThreads, localThreads, args, -1, -1);
+}
+
+static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
+{
+    string kernelName("polynomialExpansion");
+    size_t localThreads[3] = { 256, 1, 1 };
+    size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
+    int smem_size = 3 * localThreads[0] * sizeof(float);
+
+    std::vector< std::pair<size_t, const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data));
+    args.push_back(std::make_pair(smem_size, (void *)NULL));
+    args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+
+    char opt [128];
+    sprintf(opt, "-D polyN=%d", polyN);
+
+    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+                        globalThreads, localThreads, args, -1, -1, opt);
+}
+
+static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M)
+{
+    string kernelName("updateMatrices");
+    size_t localThreads[3] = { 32, 8, 1 };
+    size_t globalThreads[3] = { divUp(flowx.cols, localThreads[0]) * localThreads[0],
+                                divUp(flowx.rows, localThreads[1]) * localThreads[1],
+                                1
+                              };
+
+    std::vector< std::pair<size_t, const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step));
+
+    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+                        globalThreads, localThreads, args, -1, -1);
+}
+
+static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
+{
+    string kernelName("boxFilter5");
+    int height = src.rows / 5;
+    size_t localThreads[3] = { 256, 1, 1 };
+    size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], height, 1 };
+    int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
+
+    std::vector< std::pair<size_t, const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+    args.push_back(std::make_pair(smem_size, (void *)NULL));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
+
+    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+                        globalThreads, localThreads, args, -1, -1);
+}
+
+static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
+{
+    string kernelName("updateFlow");
+    int cols = divUp(flowx.cols, 4);
+    size_t localThreads[3] = { 32, 8, 1 };
+    size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
+                                divUp(flowx.rows, localThreads[1]) * localThreads[0],
+                                1
+                              };
+
+    std::vector< std::pair<size_t, const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
+
+    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+                        globalThreads, localThreads, args, -1, -1);
+}
+
+static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
+{
+    string kernelName("gaussianBlur5");
+    int height = src.rows / 5;
+    int width = src.cols;
+    size_t localThreads[3] = { 256, 1, 1 };
+    size_t globalThreads[3] = { divUp(width, localThreads[0]) * localThreads[0], height, 1 };
+    int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
+
+    std::vector< std::pair<size_t, const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
+    args.push_back(std::make_pair(smem_size, (void *)NULL));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&width));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
+
+    openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+                        globalThreads, localThreads, args, -1, -1);
+}
+}
+}
+} // namespace cv { namespace ocl { namespace optflow_farneback
+
+static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
+{
+    if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
+        return mat(Rect(0, 0, cols, rows));
+    return mat = oclMat(rows, cols, type);
+}
+
+cv::ocl::FarnebackOpticalFlow::FarnebackOpticalFlow()
+{
+    numLevels = 5;
+    pyrScale = 0.5;
+    fastPyramids = false;
+    winSize = 13;
+    numIters = 10;
+    polyN = 5;
+    polySigma = 1.1;
+    flags = 0;
+}
+
+void cv::ocl::FarnebackOpticalFlow::releaseMemory()
+{
+    frames_[0].release();
+    frames_[1].release();
+    pyrLevel_[0].release();
+    pyrLevel_[1].release();
+    M_.release();
+    bufM_.release();
+    R_[0].release();
+    R_[1].release();
+    blurredFrame_[0].release();
+    blurredFrame_[1].release();
+    pyramid0_.clear();
+    pyramid1_.clear();
+}
+
+void cv::ocl::FarnebackOpticalFlow::prepareGaussian(
+    int n, double sigma, float *g, float *xg, float *xxg,
+    double &ig11, double &ig03, double &ig33, double &ig55)
+{
+    double s = 0.;
+    for (int x = -n; x <= n; x++)
+    {
+        g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
+        s += g[x];
+    }
+
+    s = 1./s;
+    for (int x = -n; x <= n; x++)
+    {
+        g[x] = (float)(g[x]*s);
+        xg[x] = (float)(x*g[x]);
+        xxg[x] = (float)(x*x*g[x]);
+    }
+
+    Mat_<double> G(6, 6);
+    G.setTo(0);
+
+    for (int y = -n; y <= n; y++)
+    {
+        for (int x = -n; x <= n; x++)
+        {
+            G(0,0) += g[y]*g[x];
+            G(1,1) += g[y]*g[x]*x*x;
+            G(3,3) += g[y]*g[x]*x*x*x*x;
+            G(5,5) += g[y]*g[x]*x*x*y*y;
+        }
+    }
+
+    //G[0][0] = 1.;
+    G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
+    G(4,4) = G(3,3);
+    G(3,4) = G(4,3) = G(5,5);
+
+    // invG:
+    // [ x        e  e    ]
+    // [    y             ]
+    // [       y          ]
+    // [ e        z       ]
+    // [ e           z    ]
+    // [                u ]
+    Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
+
+    ig11 = invG(1,1);
+    ig03 = invG(0,3);
+    ig33 = invG(3,3);
+    ig55 = invG(5,5);
+}
+
+void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
+{
+    vector<float> buf(n*6 + 3);
+    float* g = &buf[0] + n;
+    float* xg = g + n*2 + 1;
+    float* xxg = xg + n*2 + 1;
+
+    if (sigma < FLT_EPSILON)
+        sigma = n*0.3;
+
+    double ig11, ig03, ig33, ig55;
+    prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
+
+    cv::Mat t_g(1, n + 1, CV_32FC1, g);
+    cv::Mat t_xg(1, n + 1, CV_32FC1, xg);
+    cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg);
+
+    optflow_farneback::g.upload(t_g);
+    optflow_farneback::xg.upload(t_xg);
+    optflow_farneback::xxg.upload(t_xxg);
+
+    optflow_farneback::ig[0] = static_cast<float>(ig11);
+    optflow_farneback::ig[1] = static_cast<float>(ig03);
+    optflow_farneback::ig[2] = static_cast<float>(ig33);
+    optflow_farneback::ig[3] = static_cast<float>(ig55);
+}
+
+void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter(
+    const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
+    oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
+{
+    optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM);
+
+    swap(M, bufM);
+
+    finish();
+
+    optflow_farneback::updateFlowOcl(M, flowx, flowy);
+
+    if (updateMatrices)
+        optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
+}
+
+
+void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur(
+    const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
+    oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
+{
+    optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM);
+
+    swap(M, bufM);
+
+    optflow_farneback::updateFlowOcl(M, flowx, flowy);
+
+    if (updateMatrices)
+        optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
+}
+
+
+void cv::ocl::FarnebackOpticalFlow::operator ()(
+    const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy)
+{
+    CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
+    CV_Assert(frame0.size() == frame1.size());
+    CV_Assert(polyN == 5 || polyN == 7);
+    CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
+
+    Size size = frame0.size();
+    oclMat prevFlowX, prevFlowY, curFlowX, curFlowY;
+
+    flowx.create(size, CV_32F);
+    flowy.create(size, CV_32F);
+    oclMat flowx0 = flowx;
+    oclMat flowy0 = flowy;
+
+    // Crop unnecessary levels
+    double scale = 1;
+    int numLevelsCropped = 0;
+    for (; numLevelsCropped < numLevels; numLevelsCropped++)
+    {
+        scale *= pyrScale;
+        if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
+            break;
+    }
+
+    frame0.convertTo(frames_[0], CV_32F);
+    frame1.convertTo(frames_[1], CV_32F);
+
+    if (fastPyramids)
+    {
+        // Build Gaussian pyramids using pyrDown()
+        pyramid0_.resize(numLevelsCropped + 1);
+        pyramid1_.resize(numLevelsCropped + 1);
+        pyramid0_[0] = frames_[0];
+        pyramid1_[0] = frames_[1];
+        for (int i = 1; i <= numLevelsCropped; ++i)
+        {
+            pyrDown(pyramid0_[i - 1], pyramid0_[i]);
+            pyrDown(pyramid1_[i - 1], pyramid1_[i]);
+        }
+    }
+
+    setPolynomialExpansionConsts(polyN, polySigma);
+
+    for (int k = numLevelsCropped; k >= 0; k--)
+    {
+        scale = 1;
+        for (int i = 0; i < k; i++)
+            scale *= pyrScale;
+
+        double sigma = (1./scale - 1) * 0.5;
+        int smoothSize = cvRound(sigma*5) | 1;
+        smoothSize = std::max(smoothSize, 3);
+
+        int width = cvRound(size.width*scale);
+        int height = cvRound(size.height*scale);
+
+        if (fastPyramids)
+        {
+            width = pyramid0_[k].cols;
+            height = pyramid0_[k].rows;
+        }
+
+        if (k > 0)
+        {
+            curFlowX.create(height, width, CV_32F);
+            curFlowY.create(height, width, CV_32F);
+        }
+        else
+        {
+            curFlowX = flowx0;
+            curFlowY = flowy0;
+        }
+
+        if (!prevFlowX.data)
+        {
+            if (flags & cv::OPTFLOW_USE_INITIAL_FLOW)
+            {
+                resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
+                resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
+                multiply(scale, curFlowX, curFlowX);
+                multiply(scale, curFlowY, curFlowY);
+            }
+            else
+            {
+                curFlowX.setTo(0);
+                curFlowY.setTo(0);
+            }
+        }
+        else
+        {
+            resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
+            resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
+            multiply(1./pyrScale, curFlowX, curFlowX);
+            multiply(1./pyrScale, curFlowY, curFlowY);
+        }
+
+        oclMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
+        oclMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
+        oclMat R[2] =
+        {
+            allocMatFromBuf(5*height, width, CV_32F, R_[0]),
+            allocMatFromBuf(5*height, width, CV_32F, R_[1])
+        };
+
+        if (fastPyramids)
+        {
+            optflow_farneback::polynomialExpansionOcl(pyramid0_[k], polyN, R[0]);
+            optflow_farneback::polynomialExpansionOcl(pyramid1_[k], polyN, R[1]);
+        }
+        else
+        {
+            oclMat blurredFrame[2] =
+            {
+                allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
+                allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
+            };
+            oclMat pyrLevel[2] =
+            {
+                allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
+                allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
+            };
+
+            Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
+            optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
+
+            for (int i = 0; i < 2; i++)
+            {
+                optflow_farneback::gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]);
+                resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR);
+                optflow_farneback::polynomialExpansionOcl(pyrLevel[i], polyN, R[i]);
+            }
+        }
+
+        optflow_farneback::updateMatricesOcl(curFlowX, curFlowY, R[0], R[1], M);
+
+        if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
+        {
+            Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
+            optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
+        }
+        for (int i = 0; i < numIters; i++)
+        {
+            if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
+                updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
+            else
+                updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
+        }
+
+        prevFlowX = curFlowX;
+        prevFlowY = curFlowY;
+    }
+
+    flowx = curFlowX;
+    flowy = curFlowY;
+}
diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp
index 3b75f303d..cf8743fcc 100644
--- a/modules/ocl/src/precomp.hpp
+++ b/modules/ocl/src/precomp.hpp
@@ -64,14 +64,12 @@
 #undef OPENCV_NOSTL
 
 #include "opencv2/imgproc.hpp"
-#include "opencv2/objdetect.hpp"
+#include "opencv2/objdetect/objdetect_c.h"
 #include "opencv2/ocl.hpp"
 
 #include "opencv2/core/utility.hpp"
 #include "opencv2/core/private.hpp"
 
-//#include "opencv2/highgui.hpp"
-
 #define __ATI__
 
 #if defined (HAVE_OPENCL)
diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp
index fc2b706a8..801a29f25 100644
--- a/modules/ocl/src/pyrdown.cpp
+++ b/modules/ocl/src/pyrdown.cpp
@@ -109,4 +109,3 @@ void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)
 
     pyrdown_run(src, dst);
 }
-
diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp
index 0034a1cce..1f80f4fb3 100644
--- a/modules/ocl/src/pyrup.cpp
+++ b/modules/ocl/src/pyrup.cpp
@@ -85,4 +85,4 @@ namespace cv
             openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/ocl/src/sort_by_key.cpp b/modules/ocl/src/sort_by_key.cpp
new file mode 100644
index 000000000..0025f0d91
--- /dev/null
+++ b/modules/ocl/src/sort_by_key.cpp
@@ -0,0 +1,454 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <iomanip>
+#include "precomp.hpp"
+
+namespace cv
+{
+namespace ocl
+{
+
+extern const char * kernel_sort_by_key;
+extern const char * kernel_stablesort_by_key;
+extern const char * kernel_radix_sort_by_key;
+
+void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, int method, bool isGreaterThan);
+
+//TODO(pengx17): change this value depending on device other than a constant
+const static unsigned int GROUP_SIZE = 256;
+
+const char * depth_strings[] =
+{
+    "uchar",   //CV_8U
+    "char",    //CV_8S
+    "ushort",  //CV_16U
+    "short",   //CV_16S
+    "int",     //CV_32S
+    "float",   //CV_32F
+    "double"   //CV_64F
+};
+
+void static genSortBuildOption(const oclMat& keys, const oclMat& vals, bool isGreaterThan, char * build_opt_buf)
+{
+    sprintf(build_opt_buf, "-D IS_GT=%d -D K_T=%s -D V_T=%s",
+            isGreaterThan?1:0, depth_strings[keys.depth()], depth_strings[vals.depth()]);
+    if(vals.oclchannels() > 1)
+    {
+        sprintf( build_opt_buf + strlen(build_opt_buf), "%d", vals.oclchannels());
+    }
+}
+inline bool isSizePowerOf2(size_t size)
+{
+    return ((size - 1) & (size)) == 0;
+}
+
+namespace bitonic_sort
+{
+static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
+{
+    CV_Assert(isSizePowerOf2(vecSize));
+
+    Context * cxt = Context::getContext();
+    size_t globalThreads[3] = {vecSize / 2, 1, 1};
+    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
+
+    // 2^numStages should be equal to vecSize or the output is invalid
+    int numStages = 0;
+    for(int i = vecSize; i > 1; i >>= 1)
+    {
+        ++numStages;
+    }
+    char build_opt_buf [100];
+    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
+    const int argc = 5;
+    std::vector< std::pair<size_t, const void *> > args(argc);
+    String kernelname = "bitonicSort";
+
+    args[0] = std::make_pair(sizeof(cl_mem), (void *)&keys.data);
+    args[1] = std::make_pair(sizeof(cl_mem), (void *)&vals.data);
+    args[2] = std::make_pair(sizeof(cl_int), (void *)&vecSize);
+
+    for(int stage = 0; stage < numStages; ++stage)
+    {
+        args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
+        for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
+        {
+            args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
+            openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
+        }
+    }
+}
+}  /* bitonic_sort */
+
+namespace selection_sort
+{
+// FIXME:
+// This function cannot sort arrays with duplicated keys
+static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
+{
+    CV_Error(-1, "This function is incorrect at the moment.");
+    Context * cxt = Context::getContext();
+
+    size_t globalThreads[3] = {vecSize, 1, 1};
+    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
+
+    std::vector< std::pair<size_t, const void *> > args;
+    char build_opt_buf [100];
+    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
+
+    //local
+    String kernelname = "selectionSortLocal";
+    int lds_size = GROUP_SIZE * keys.elemSize();
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&keys.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&vals.data));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize));
+    args.push_back(std::make_pair(lds_size,       (void*)NULL));
+
+    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
+
+    //final
+    kernelname = "selectionSortFinal";
+    args.pop_back();
+    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
+}
+
+}  /* selection_sort */
+
+
+namespace radix_sort
+{
+//FIXME(pengx17):
+// exclusive scan, need to be optimized as this is too naive...
+//void naive_scan_addition(oclMat& input, oclMat& output)
+//{
+//    Context * cxt = Context::getContext();
+//    size_t vecSize = input.cols;
+//    size_t globalThreads[3] = {1, 1, 1};
+//    size_t localThreads[3]  = {1, 1, 1};
+//
+//    String kernelname = "naiveScanAddition";
+//
+//    std::vector< std::pair<size_t, const void *> > args;
+//    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&input.data));
+//    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&output.data));
+//    args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize));
+//    openCLExecuteKernel(cxt, &kernel_radix_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1);
+//}
+
+void static naive_scan_addition_cpu(oclMat& input, oclMat& output)
+{
+    Mat m_input = input, m_output(output.size(), output.type());
+    MatIterator_<int> i_mit = m_input.begin<int>();
+    MatIterator_<int> o_mit = m_output.begin<int>();
+    *o_mit = 0;
+    ++i_mit;
+    ++o_mit;
+    for(; i_mit != m_input.end<int>(); ++i_mit, ++o_mit)
+    {
+        *o_mit = *(o_mit - 1) + *(i_mit - 1);
+    }
+    output = m_output;
+}
+
+
+//radix sort ported from Bolt
+static void sortByKey(oclMat& keys, oclMat& vals, size_t origVecSize, bool isGreaterThan)
+{
+    CV_Assert(keys.depth() == CV_32S || keys.depth() == CV_32F); // we assume keys are 4 bytes
+
+    bool isKeyFloat = keys.type() == CV_32F;
+
+    const int RADIX = 4; //Now you cannot replace this with Radix 8 since there is a
+                         //local array of 16 elements in the histogram kernel.
+    const int RADICES = (1 << RADIX); //Values handeled by each work-item?
+
+    bool  newBuffer = false;
+    size_t vecSize = origVecSize;
+
+    unsigned int groupSize  = RADICES;
+
+    size_t mulFactor = groupSize * RADICES;
+
+    oclMat buffer_keys, buffer_vals;
+
+    if(origVecSize % mulFactor != 0)
+    {
+        vecSize = ((vecSize + mulFactor) / mulFactor) * mulFactor;
+        buffer_keys.create(1, vecSize, keys.type());
+        buffer_vals.create(1, vecSize, vals.type());
+        Scalar padding_value;
+        oclMat roi_buffer_vals = buffer_vals(Rect(0,0,origVecSize,1));
+
+        if(isGreaterThan)
+        {
+            switch(buffer_keys.depth())
+            {
+            case CV_32F:
+                padding_value = Scalar::all(-FLT_MAX);
+                break;
+            case CV_32S:
+                padding_value = Scalar::all(INT_MIN);
+                break;
+            }
+        }
+        else
+        {
+            switch(buffer_keys.depth())
+            {
+            case CV_32F:
+                padding_value = Scalar::all(FLT_MAX);
+                break;
+            case CV_32S:
+                padding_value = Scalar::all(INT_MAX);
+                break;
+            }
+        }
+        ocl::copyMakeBorder(
+            keys(Rect(0,0,origVecSize,1)), buffer_keys,
+            0, 0, 0, vecSize - origVecSize,
+            BORDER_CONSTANT, padding_value);
+        vals(Rect(0,0,origVecSize,1)).copyTo(roi_buffer_vals);
+        newBuffer = true;
+    }
+    else
+    {
+        buffer_keys = keys;
+        buffer_vals = vals;
+        newBuffer = false;
+    }
+    oclMat swap_input_keys(1, vecSize, keys.type());
+    oclMat swap_input_vals(1, vecSize, vals.type());
+    oclMat hist_bin_keys(1, vecSize, CV_32SC1);
+    oclMat hist_bin_dest_keys(1, vecSize, CV_32SC1);
+
+    Context * cxt = Context::getContext();
+
+    size_t globalThreads[3] = {vecSize / RADICES, 1, 1};
+    size_t localThreads[3]  = {groupSize, 1, 1};
+
+    std::vector< std::pair<size_t, const void *> > args;
+    char build_opt_buf [100];
+    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
+
+    //additional build option for radix sort
+    sprintf(build_opt_buf + strlen(build_opt_buf), " -D K_%s", isKeyFloat?"FLT":"INT");
+
+    String kernelnames[2] = {String("histogramRadixN"), String("permuteRadixN")};
+
+    int swap = 0;
+    for(int bits = 0; bits < (static_cast<int>(keys.elemSize()) * 8); bits += RADIX)
+    {
+        args.clear();
+        //Do a histogram pass locally
+        if(swap == 0)
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_keys.data));
+        }
+        else
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_keys.data));
+        }
+        args.push_back(std::make_pair(sizeof(cl_mem), (void *)&hist_bin_keys.data));
+        args.push_back(std::make_pair(sizeof(cl_int), (void *)&bits));
+        openCLExecuteKernel(cxt, &kernel_radix_sort_by_key, kernelnames[0], globalThreads, localThreads,
+            args, -1, -1, build_opt_buf);
+
+        args.clear();
+        //Perform a global scan
+        naive_scan_addition_cpu(hist_bin_keys, hist_bin_dest_keys);
+        // end of scan
+        if(swap == 0)
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_keys.data));
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_vals.data));
+        }
+        else
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_keys.data));
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_vals.data));
+        }
+        args.push_back(std::make_pair(sizeof(cl_mem), (void *)&hist_bin_dest_keys.data));
+        args.push_back(std::make_pair(sizeof(cl_int), (void *)&bits));
+
+        if(swap == 0)
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_keys.data));
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&swap_input_vals.data));
+        }
+        else
+        {
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_keys.data));
+            args.push_back(std::make_pair(sizeof(cl_mem), (void *)&buffer_vals.data));
+        }
+        openCLExecuteKernel(cxt, &kernel_radix_sort_by_key, kernelnames[1], globalThreads, localThreads,
+            args, -1, -1, build_opt_buf);
+        swap = swap ? 0 : 1;
+    }
+    if(newBuffer)
+    {
+        buffer_keys(Rect(0,0,origVecSize,1)).copyTo(keys);
+        buffer_vals(Rect(0,0,origVecSize,1)).copyTo(vals);
+    }
+}
+
+}  /* radix_sort */
+
+namespace merge_sort
+{
+static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
+{
+    Context * cxt = Context::getContext();
+
+    size_t globalThreads[3] = {vecSize, 1, 1};
+    size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
+
+    std::vector< std::pair<size_t, const void *> > args;
+    char build_opt_buf [100];
+    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
+
+    String kernelname[] = {String("blockInsertionSort"), String("merge")};
+    int keylds_size = GROUP_SIZE * keys.elemSize();
+    int vallds_size = GROUP_SIZE * vals.elemSize();
+    args.push_back(std::make_pair(sizeof(cl_mem),  (void *)&keys.data));
+    args.push_back(std::make_pair(sizeof(cl_mem),  (void *)&vals.data));
+    args.push_back(std::make_pair(sizeof(cl_uint), (void *)&vecSize));
+    args.push_back(std::make_pair(keylds_size,     (void*)NULL));
+    args.push_back(std::make_pair(vallds_size,     (void*)NULL));
+
+    openCLExecuteKernel(cxt, &kernel_stablesort_by_key, kernelname[0], globalThreads, localThreads, args, -1, -1, build_opt_buf);
+
+    //  Early exit for the case of no merge passes, values are already in destination vector
+    if(vecSize <= GROUP_SIZE)
+    {
+        return;
+    }
+
+    //  An odd number of elements requires an extra merge pass to sort
+    size_t numMerges = 0;
+    //  Calculate the log2 of vecSize, taking into acvecSize our block size from kernel 1 is 64
+    //  this is how many merge passes we want
+    size_t log2BlockSize = vecSize >> 6;
+    for( ; log2BlockSize > 1; log2BlockSize >>= 1 )
+    {
+        ++numMerges;
+    }
+    //  Check to see if the input vector size is a power of 2, if not we will need last merge pass
+    numMerges += isSizePowerOf2(vecSize)? 1: 0;
+
+    //  Allocate a flipflop buffer because the merge passes are out of place
+    oclMat tmpKeyBuffer(keys.size(), keys.type());
+    oclMat tmpValBuffer(vals.size(), vals.type());
+    args.resize(8);
+
+    args[4] = std::make_pair(sizeof(cl_uint), (void *)&vecSize);
+    args[6] = std::make_pair(keylds_size,    (void*)NULL);
+    args[7] = std::make_pair(vallds_size,    (void*)NULL);
+
+    for(size_t pass = 1; pass <= numMerges; ++pass )
+    {
+        //  For each pass, flip the input-output buffers
+        if( pass & 0x1 )
+        {
+            args[0] = std::make_pair(sizeof(cl_mem), (void *)&keys.data);
+            args[1] = std::make_pair(sizeof(cl_mem), (void *)&vals.data);
+            args[2] = std::make_pair(sizeof(cl_mem), (void *)&tmpKeyBuffer.data);
+            args[3] = std::make_pair(sizeof(cl_mem), (void *)&tmpValBuffer.data);
+        }
+        else
+        {
+            args[0] = std::make_pair(sizeof(cl_mem), (void *)&tmpKeyBuffer.data);
+            args[1] = std::make_pair(sizeof(cl_mem), (void *)&tmpValBuffer.data);
+            args[2] = std::make_pair(sizeof(cl_mem), (void *)&keys.data);
+            args[3] = std::make_pair(sizeof(cl_mem), (void *)&vals.data);
+        }
+        //  For each pass, the merge window doubles
+        unsigned int srcLogicalBlockSize = static_cast<unsigned int>( localThreads[0] << (pass-1) );
+        args[5] = std::make_pair(sizeof(cl_uint), (void *)&srcLogicalBlockSize);
+        openCLExecuteKernel(cxt, &kernel_stablesort_by_key, kernelname[1], globalThreads, localThreads, args, -1, -1, build_opt_buf);
+    }
+    //  If there are an odd number of merges, then the output data is sitting in the temp buffer.  We need to copy
+    //  the results back into the input array
+    if( numMerges & 1 )
+    {
+        tmpKeyBuffer.copyTo(keys);
+        tmpValBuffer.copyTo(vals);
+    }
+}
+}  /* merge_sort */
+
+}
+} /* namespace cv { namespace ocl */
+
+
+void cv::ocl::sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, int method, bool isGreaterThan)
+{
+    CV_Assert( keys.rows == 1 ); // we only allow one dimensional input
+    CV_Assert( keys.channels() == 1 ); // we only allow one channel keys
+    CV_Assert( vecSize <= static_cast<size_t>(keys.cols) );
+    switch(method)
+    {
+    case SORT_BITONIC:
+        bitonic_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
+        break;
+    case SORT_SELECTION:
+        selection_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
+        break;
+    case SORT_RADIX:
+        radix_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
+        break;
+    case SORT_MERGE:
+        merge_sort::sortByKey(keys, vals, vecSize, isGreaterThan);
+        break;
+    }
+}
+
+void cv::ocl::sortByKey(oclMat& keys, oclMat& vals, int method, bool isGreaterThan)
+{
+    CV_Assert( keys.size() == vals.size() );
+    CV_Assert( keys.rows == 1 ); // we only allow one dimensional input
+    size_t vecSize = static_cast<size_t>(keys.cols);
+    sortByKey(keys, vals, vecSize, method, isGreaterThan);
+}
diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp
index 50072c2d5..fa534893c 100644
--- a/modules/ocl/src/stereobp.cpp
+++ b/modules/ocl/src/stereobp.cpp
@@ -235,13 +235,13 @@ namespace cv
                 args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t));
                 args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols));
                 args.push_back( std::make_pair( sizeof(cl_int) , (void *)&rows));
-                args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cndisp));
                 args.push_back( std::make_pair( sizeof(cl_float) , (void *)&cmax_disc_term));
                 args.push_back( std::make_pair( sizeof(cl_float) , (void *)&cdisc_single_jump));
 
                 size_t gt[3] = {cols, rows, 1}, lt[3] = {16, 16, 1};
-                const char* t_opt  = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT";
-                openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt);
+                char opt[80] = "";
+                sprintf(opt, "-D %s -D CNDISP=%d", data_type == CV_16S ? "T_SHORT":"T_FLOAT", cndisp);
+                openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, opt);
             }
 
             static void calc_all_iterations_calls(int cols, int rows, int iters, oclMat &u,
diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp
index c2e85b6ae..daf3a2295 100644
--- a/modules/ocl/src/tvl1flow.cpp
+++ b/modules/ocl/src/tvl1flow.cpp
@@ -172,7 +172,7 @@ namespace ocl_tvl1flow
     void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
         oclMat &rho_c, oclMat &p11, oclMat &p12,
         oclMat &p21, oclMat &p22, oclMat &u1,
-        oclMat &u2, oclMat &error, float l_t, float theta);
+        oclMat &u2, oclMat &error, float l_t, float theta, char calc_error);
 
     void estimateDualVariables(oclMat &u1, oclMat &u2,
         oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut);
@@ -229,18 +229,29 @@ void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const ocl
         warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
 
         double error = numeric_limits<double>::max();
+        double prev_error = 0;
         for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
         {
+            // some tweaks to make sum operation less frequently
+            char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
             estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
-                u1, u2, diff, l_t, static_cast<float>(theta));
-
-            error = ocl::sum(diff)[0];
-
+                      u1, u2, diff, l_t, static_cast<float>(theta), calc_error);
+            if(calc_error)
+            {
+                error = ocl::sum(diff)[0];
+                prev_error = error;
+            }
+            else
+            {
+                error = numeric_limits<double>::max();
+                prev_error -= scaledEpsilon;
+            }
             estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
 
         }
     }
 
+
 }
 
 void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage()
@@ -348,7 +359,7 @@ void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oc
 void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
     oclMat &rho_c, oclMat &p11, oclMat &p12,
     oclMat &p21, oclMat &p22, oclMat &u1,
-    oclMat &u2, oclMat &error, float l_t, float theta)
+    oclMat &u2, oclMat &error, float l_t, float theta, char calc_error)
 {
     Context* clCxt = I1wx.clCxt;
 
@@ -401,6 +412,7 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+    args.push_back( make_pair( sizeof(cl_char), (void*)&calc_error));
 
     openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
 }
diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp
index 3b16cffdd..2a7be4900 100644
--- a/modules/ocl/test/main.cpp
+++ b/modules/ocl/test/main.cpp
@@ -39,15 +39,16 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
 #ifdef HAVE_OPENCL
 
-using namespace std;
 using namespace cv;
 using namespace cv::ocl;
 using namespace cvtest;
 using namespace testing;
+using std::cout;
+using std::endl;
 
 void print_info()
 {
@@ -73,14 +74,12 @@ void print_info()
 #endif
 
 }
-std::string workdir;
 int main(int argc, char **argv)
 {
-    TS::ptr()->init("ocl");
+    TS::ptr()->init(".");
     InitGoogleTest(&argc, argv);
     const char *keys =
         "{ h | false              | print help message }"
-		"{ w | ../../../samples/c/| set working directory i.e. -w=C:\\}"
         "{ t | gpu                | set device type:i.e. -t=cpu or gpu}"
         "{ p | 0                  | set platform id i.e. -p=0}"
         "{ d | 0                  | set device id i.e. -d=0}";
@@ -88,12 +87,11 @@ int main(int argc, char **argv)
     CommandLineParser cmd(argc, argv, keys);
     if (cmd.get<string>("h")=="true")
     {
-        cout << "Avaible options besides goole test option:" << endl;
+        cout << "Available options besides google test options:" << endl;
         cmd.printMessage();
         return 0;
     }
-    workdir = cmd.get<string>("w");
-    string type = cmd.get<string>("t");
+    string type = cmd.get<String>("t");
     unsigned int pid = cmd.get<unsigned int>("p");
     int device = cmd.get<int>("d");
 
@@ -117,6 +115,8 @@ int main(int argc, char **argv)
     }
 
     setDevice(oclinfo[pid], device);
+    setBinaryDiskCache(CACHE_UPDATE);
+
     cout << "Platform name:" << oclinfo[pid].PlatformName << endl;
     cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
     return RUN_ALL_TESTS();
diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp
index d12cef0b2..fa9d09999 100644
--- a/modules/ocl/test/test_arithm.cpp
+++ b/modules/ocl/test/test_arithm.cpp
@@ -54,7 +54,7 @@
 //#define PRINT_TIME
 
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 #include <iomanip>
 
 #ifdef HAVE_OPENCL
@@ -178,7 +178,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
     }
 
     void Near1(double threshold = 0.)
-    {     
+    {
         EXPECT_MAT_NEAR(dst1, Mat(gdst1_whole), threshold);
     }
 
diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp
new file mode 100644
index 000000000..d7492d600
--- /dev/null
+++ b/modules/ocl/test/test_bgfg.cpp
@@ -0,0 +1,227 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma, jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+extern string workdir;
+//////////////////////////////////////////////////////
+// MOG
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(UseGray, bool)
+    IMPLEMENT_PARAM_CLASS(LearningRate, double)
+}
+
+PARAM_TEST_CASE(mog, UseGray, LearningRate, bool)
+{
+    bool useGray;
+    double learningRate;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        useGray = GET_PARAM(0);
+
+        learningRate = GET_PARAM(1);
+
+        useRoi = GET_PARAM(2);
+    }
+};
+
+TEST_P(mog, Update)
+{
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::ocl::MOG mog;
+    cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi);
+
+    Ptr<cv::BackgroundSubtractorMOG> mog_gold = createBackgroundSubtractorMOG();
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog(loadMat_ocl(frame, useRoi), foreground, (float)learningRate);
+
+        mog_gold->apply(frame, foreground_gold, learningRate);
+
+        EXPECT_MAT_NEAR(foreground_gold, foreground, 0.0);
+    }
+}
+INSTANTIATE_TEST_CASE_P(OCL_Video, mog, testing::Combine(
+    testing::Values(UseGray(false), UseGray(true)),
+    testing::Values(LearningRate(0.0), LearningRate(0.01)),
+    Values(true, false)));
+
+//////////////////////////////////////////////////////
+// MOG2
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
+}
+
+PARAM_TEST_CASE(mog2, UseGray, DetectShadow, bool)
+{
+    bool useGray;
+    bool detectShadow;
+    bool useRoi;
+    virtual void SetUp()
+    {
+        useGray = GET_PARAM(0);
+        detectShadow = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+    }
+};
+
+TEST_P(mog2, Update)
+{
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::ocl::MOG2 mog2;
+    mog2.bShadowDetection = detectShadow;
+    cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi);
+
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = createBackgroundSubtractorMOG2();
+    mog2_gold->set("detectShadows", detectShadow);
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog2(loadMat_ocl(frame, useRoi), foreground);
+
+        mog2_gold->apply(frame, foreground_gold);
+
+        if (detectShadow)
+            EXPECT_MAT_SIMILAR(foreground_gold, foreground, 15e-3)
+        else
+            EXPECT_MAT_NEAR(foreground_gold, foreground, 0)
+    }
+}
+
+TEST_P(mog2, getBackgroundImage)
+{
+    if (useGray)
+        return;
+
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::ocl::MOG2 mog2;
+    mog2.bShadowDetection = detectShadow;
+    cv::ocl::oclMat foreground;
+
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = createBackgroundSubtractorMOG2();
+    mog2_gold->set("detectShadows", detectShadow);
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        mog2(loadMat_ocl(frame, useRoi), foreground);
+
+        mog2_gold->apply(frame, foreground_gold);
+    }
+
+    cv::ocl::oclMat background = createMat_ocl(frame.size(), frame.type(), useRoi);
+    mog2.getBackgroundImage(background);
+
+    cv::Mat background_gold;
+    mog2_gold->getBackgroundImage(background_gold);
+
+    EXPECT_MAT_NEAR(background_gold, background, 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, mog2, testing::Combine(
+    testing::Values(UseGray(true), UseGray(false)),
+    testing::Values(DetectShadow(true), DetectShadow(false)),
+    Values(true, false)));
+
+#endif
diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp
index fa1aea172..ae0cbd41b 100644
--- a/modules/ocl/test/test_blend.cpp
+++ b/modules/ocl/test/test_blend.cpp
@@ -42,7 +42,7 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 #include <iomanip>
 
 using namespace cv;
@@ -116,4 +116,4 @@ INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Blend, Combine(
                             DIFFERENT_SIZES,
                             testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
                         ));
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/ocl/test/test_brute_force_matcher.cpp b/modules/ocl/test/test_brute_force_matcher.cpp
index 84e5d4c77..4d0b45fb7 100644
--- a/modules/ocl/test/test_brute_force_matcher.cpp
+++ b/modules/ocl/test/test_brute_force_matcher.cpp
@@ -43,7 +43,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 #ifdef HAVE_OPENCL
 namespace
 {
diff --git a/modules/ocl/test/test_calib3d.cpp b/modules/ocl/test/test_calib3d.cpp
index 14fb31f53..7e5c4a419 100644
--- a/modules/ocl/test/test_calib3d.cpp
+++ b/modules/ocl/test/test_calib3d.cpp
@@ -43,14 +43,13 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 #include <iomanip>
 
 #ifdef HAVE_OPENCL
 
 using namespace cv;
 
-extern std::string workdir;
 PARAM_TEST_CASE(StereoMatchBM, int, int)
 {
     int n_disp;
@@ -66,9 +65,9 @@ PARAM_TEST_CASE(StereoMatchBM, int, int)
 TEST_P(StereoMatchBM, Regression)
 {
 
-    Mat left_image  = readImage("stereobm/aloe-L.png", IMREAD_GRAYSCALE);
-    Mat right_image = readImage("stereobm/aloe-R.png", IMREAD_GRAYSCALE);
-    Mat disp_gold   = readImage("stereobm/aloe-disp.png", IMREAD_GRAYSCALE);
+    Mat left_image  = readImage("gpu/stereobm/aloe-L.png", IMREAD_GRAYSCALE);
+    Mat right_image = readImage("gpu/stereobm/aloe-R.png", IMREAD_GRAYSCALE);
+    Mat disp_gold   = readImage("gpu/stereobm/aloe-disp.png", IMREAD_GRAYSCALE);
     ocl::oclMat d_left, d_right;
     ocl::oclMat d_disp(left_image.size(), CV_8U);
     Mat  disp;
@@ -113,9 +112,9 @@ PARAM_TEST_CASE(StereoMatchBP, int, int, int, float, float, float, float)
 };
 TEST_P(StereoMatchBP, Regression)
 {
-    Mat left_image  = readImage("stereobp/aloe-L.png");
-    Mat right_image = readImage("stereobp/aloe-R.png");
-    Mat disp_gold   = readImage("stereobp/aloe-disp.png", IMREAD_GRAYSCALE);
+    Mat left_image  = readImage("gpu/stereobp/aloe-L.png");
+    Mat right_image = readImage("gpu/stereobp/aloe-R.png");
+    Mat disp_gold   = readImage("gpu/stereobp/aloe-disp.png", IMREAD_GRAYSCALE);
     ocl::oclMat d_left, d_right;
     ocl::oclMat d_disp;
     Mat  disp;
@@ -166,9 +165,9 @@ PARAM_TEST_CASE(StereoMatchConstSpaceBP, int, int, int, int, float, float, float
 };
 TEST_P(StereoMatchConstSpaceBP, Regression)
 {
-    Mat left_image  = readImage("csstereobp/aloe-L.png");
-    Mat right_image = readImage("csstereobp/aloe-R.png");
-    Mat disp_gold   = readImage("csstereobp/aloe-disp.png", IMREAD_GRAYSCALE);
+    Mat left_image  = readImage("gpu/csstereobp/aloe-L.png");
+    Mat right_image = readImage("gpu/csstereobp/aloe-R.png");
+    Mat disp_gold   = readImage("gpu/csstereobp/aloe-disp.png", IMREAD_GRAYSCALE);
 
     ocl::oclMat d_left, d_right;
     ocl::oclMat d_disp;
diff --git a/modules/ocl/test/test_canny.cpp b/modules/ocl/test/test_canny.cpp
index 10032e897..5d8d108e7 100644
--- a/modules/ocl/test/test_canny.cpp
+++ b/modules/ocl/test/test_canny.cpp
@@ -43,12 +43,11 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 #ifdef HAVE_OPENCL
 
 ////////////////////////////////////////////////////////
 // Canny
-extern std::string workdir;
 IMPLEMENT_PARAM_CLASS(AppertureSize, int);
 IMPLEMENT_PARAM_CLASS(L2gradient, bool);
 
@@ -67,7 +66,7 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient)
 
 TEST_P(Canny, Accuracy)
 {
-    cv::Mat img = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE);
+    cv::Mat img = readImage("cv/shared/fruits.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
 
     double low_thresh = 50.0;
@@ -87,4 +86,4 @@ TEST_P(Canny, Accuracy)
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
                             testing::Values(AppertureSize(3), AppertureSize(5)),
                             testing::Values(L2gradient(false), L2gradient(true))));
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index efc96de23..b0a223091 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -43,7 +43,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
 using namespace cv;
 
diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp
index 3d80bc814..29a2b815a 100644
--- a/modules/ocl/test/test_fft.cpp
+++ b/modules/ocl/test/test_fft.cpp
@@ -43,7 +43,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 using namespace std;
 #ifdef HAVE_CLAMDFFT
 ////////////////////////////////////////////////////////////////////////////
@@ -132,7 +132,7 @@ TEST_P(MulSpectrums, Simple)
     cv::Mat c_gold;
     cv::mulSpectrums(a, b, c_gold, flag, ccorr);
 
-    EXPECT_MAT_NEAR(c_gold, c, 1e-2, "");
+    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
 }
 
 TEST_P(MulSpectrums, Scaled)
@@ -146,7 +146,7 @@ TEST_P(MulSpectrums, Scaled)
     cv::mulSpectrums(a, b, c_gold, flag, ccorr);
     c_gold.convertTo(c_gold, c_gold.type(), scale);
 
-    EXPECT_MAT_NEAR(c_gold, c, 1e-2, "");
+    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
 }
 
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(
@@ -230,7 +230,7 @@ TEST_P(Convolve_DFT, Accuracy)
     cv::Mat dst_gold;
     convolveDFT(src, kernel, dst_gold, ccorr);
 
-    EXPECT_MAT_NEAR(dst, dst_gold, 1e-1, "");
+    EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
 }
 #define DIFFERENT_CONVOLVE_SIZES testing::Values(cv::Size(251, 257), cv::Size(113, 113), cv::Size(200, 480), cv::Size(1300, 1300))
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Convolve_DFT, testing::Combine(
diff --git a/modules/ocl/test/test_filters.cpp b/modules/ocl/test/test_filters.cpp
index cfd57413e..4a22ec503 100644
--- a/modules/ocl/test/test_filters.cpp
+++ b/modules/ocl/test/test_filters.cpp
@@ -48,7 +48,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
 #ifdef HAVE_OPENCL
 
@@ -57,8 +57,8 @@ using namespace testing;
 using namespace std;
 
 
-PARAM_TEST_CASE(FilterTestBase, 
-                MatType, 
+PARAM_TEST_CASE(FilterTestBase,
+                MatType,
                 cv::Size, // kernel size
                 cv::Size, // dx,dy
                 int       // border type, or iteration
@@ -324,6 +324,98 @@ TEST_P(GaussianBlur, Mat)
 
 
 
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Filter2D
+struct Filter2D : FilterTestBase
+{
+    int type;
+    cv::Size ksize;
+    int bordertype;
+    Point anchor;
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        bordertype = GET_PARAM(3);
+        Init(type);
+        anchor = Point(-1,-1);
+    }
+};
+
+TEST_P(Filter2D, Mat)
+{
+    cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0);
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::filter2D(mat1_roi, dst_roi, -1, kernel, anchor, 0.0, bordertype);
+        cv::ocl::filter2D(gmat1, gdst, -1, kernel, anchor, bordertype);
+        Near(1);
+    }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Bilateral
+struct Bilateral : FilterTestBase
+{
+    int type;
+    cv::Size ksize;
+    int bordertype;
+    double sigmacolor, sigmaspace;
+
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        bordertype = GET_PARAM(3);
+        Init(type);
+        cv::RNG &rng = TS::ptr()->get_rng();
+        sigmacolor = rng.uniform(20, 100);
+        sigmaspace = rng.uniform(10, 40);
+    }
+};
+
+TEST_P(Bilateral, Mat)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::bilateralFilter(mat1_roi, dst_roi, ksize.width, sigmacolor, sigmaspace, bordertype);
+        cv::ocl::bilateralFilter(gmat1, gdst, ksize.width, sigmacolor, sigmaspace, bordertype);
+        Near(1);
+    }
+
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// AdaptiveBilateral
+struct AdaptiveBilateral : FilterTestBase
+{
+    int type;
+    cv::Size ksize;
+    int bordertype;
+    Point anchor;
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        bordertype = GET_PARAM(3);
+        Init(type);
+        anchor = Point(-1,-1);
+    }
+};
+
+TEST_P(AdaptiveBilateral, Mat)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::adaptiveBilateralFilter(mat1_roi, dst_roi, ksize, 5, anchor, bordertype);
+        cv::ocl::adaptiveBilateralFilter(gmat1, gdst, ksize, 5, anchor, bordertype);
+        Near(1);
+    }
+
+}
+
 INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
                         Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
                         Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
@@ -331,14 +423,14 @@ INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
                         Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
 
 
-INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
+INSTANTIATE_TEST_CASE_P(Filter, Laplacian, Combine(
                         Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
                         Values(Size(3, 3)),
                         Values(Size(0, 0)), //not use
                         Values(0)));        //not use
 
 INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(
-                        Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), 
+                        Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
                         Values(Size(0, 0)), //not use
                         Values(Size(0, 0)), //not use
                         Values(1)));
@@ -354,7 +446,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(
 INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
                         Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
                         Values(Size(0, 0)), //not use
-                        Values(Size(0, 1), Size(1, 0)), 
+                        Values(Size(0, 1), Size(1, 0)),
                         Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
 
 INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
@@ -365,4 +457,23 @@ INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
 
 
 
+INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
+                        Values(CV_8UC1, CV_32FC1, CV_32FC4),
+                        Values(Size(3, 3), Size(15, 15), Size(25, 25)),
+                        Values(Size(0, 0)), //not use
+                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT)));
+
+INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
+                        Values(CV_8UC1, CV_8UC3),
+                        Values(Size(5, 5), Size(9, 9)),
+                        Values(Size(0, 0)), //not use
+                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
+                               (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_WRAP, (MatType)cv::BORDER_REFLECT_101)));
+
+INSTANTIATE_TEST_CASE_P(Filter, AdaptiveBilateral, Combine(
+                        Values(CV_8UC1, CV_8UC3),
+                        Values(Size(5, 5), Size(9, 9)),
+                        Values(Size(0, 0)), //not use
+                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
+                               (MatType)cv::BORDER_REFLECT,  (MatType)cv::BORDER_REFLECT_101)));
 #endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_gemm.cpp b/modules/ocl/test/test_gemm.cpp
index 554845656..00d428770 100644
--- a/modules/ocl/test/test_gemm.cpp
+++ b/modules/ocl/test/test_gemm.cpp
@@ -43,7 +43,7 @@
 //M*/
 
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 using namespace std;
 #ifdef HAVE_CLAMDBLAS
 ////////////////////////////////////////////////////////////////////////////
diff --git a/modules/ocl/test/test_haar.cpp b/modules/ocl/test/test_haar.cpp
deleted file mode 100644
index fa6dd6807..000000000
--- a/modules/ocl/test/test_haar.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jia Haipeng, jiahaipeng95@gmail.com
-//    Sen Liu, swjutls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "opencv2/objdetect.hpp"
-#include "precomp.hpp"
-
-#if 0 //def HAVE_OPENCL
-
-using namespace cvtest;
-using namespace testing;
-using namespace std;
-using namespace cv;
-extern string workdir;
-
-namespace
-{
-IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
-CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
-CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
-struct getRect
-{
-    Rect operator ()(const CvAvgComp &e) const
-    {
-        return e.rect;
-    }
-};
-}
-
-PARAM_TEST_CASE(Haar, double, int, CascadeName)
-{
-    cv::ocl::OclCascadeClassifier cascade, nestedCascade;
-    cv::CascadeClassifier cpucascade, cpunestedCascade;
-
-    double scale;
-    int flags;
-    std::string cascadeName;
-
-    virtual void SetUp()
-    {
-        scale = GET_PARAM(0);
-        flags = GET_PARAM(1);
-        cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(2));
-
-        if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
-        {
-            cout << "ERROR: Could not load classifier cascade" << endl;
-            return;
-        }
-    }
-};
-
-////////////////////////////////faceDetect/////////////////////////////////////////////////
-TEST_P(Haar, FaceDetect)
-{
-    string imgName = workdir + "lena.jpg";
-    Mat img = imread( imgName, 1 );
-
-    if(img.empty())
-    {
-        std::cout << "Couldn't read " << imgName << std::endl;
-        return ;
-    }
-
-    vector<Rect> faces, oclfaces;
-
-    Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
-    MemStorage storage(cvCreateMemStorage(0));
-    cvtColor( img, gray, COLOR_BGR2GRAY );
-    resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
-    equalizeHist( smallImg, smallImg );
-
-    cv::ocl::oclMat image;
-    CvSeq *_objects;
-    image.upload(smallImg);
-    _objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
-                   3, flags, Size(30, 30), Size(0, 0) );
-    vector<CvAvgComp> vecAvgComp;
-    Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
-    oclfaces.resize(vecAvgComp.size());
-    std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
-    
-    cpucascade.detectMultiScale( smallImg, faces,  1.1, 3,
-                                 flags,
-                                 Size(30, 30), Size(0, 0) );
-    EXPECT_EQ(faces.size(), oclfaces.size());
-}
-
-TEST_P(Haar, FaceDetectUseBuf)
-{
-    string imgName = workdir + "lena.jpg";
-    Mat img = imread( imgName, 1 );
-
-    if(img.empty())
-    {
-        std::cout << "Couldn't read " << imgName << std::endl;
-        return ;
-    }
-
-    vector<Rect> faces, oclfaces;
-
-    Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
-    cvtColor( img, gray, CV_BGR2GRAY );
-    resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
-    equalizeHist( smallImg, smallImg );
-
-    cv::ocl::oclMat image;
-    image.upload(smallImg);
-
-    cv::ocl::OclCascadeClassifierBuf cascadebuf;
-    if( !cascadebuf.load( cascadeName ) )
-    {
-        cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl;
-        return;
-    }
-    cascadebuf.detectMultiScale( image, oclfaces,  1.1, 3,
-                                 flags,
-                                 Size(30, 30), Size(0, 0) );
-
-    cpucascade.detectMultiScale( smallImg, faces,  1.1, 3,
-                                 flags,
-                                 Size(30, 30), Size(0, 0) );
-    EXPECT_EQ(faces.size(), oclfaces.size());
-
-    // intentionally run ocl facedetect again and check if it still works after the first run
-    cascadebuf.detectMultiScale( image, oclfaces,  1.1, 3,
-        flags,
-        Size(30, 30));
-    cascadebuf.release();
-    EXPECT_EQ(faces.size(), oclfaces.size());
-}
-
-INSTANTIATE_TEST_CASE_P(FaceDetect, Haar,
-    Combine(Values(1.0),
-            Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2)));
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_hough.cpp b/modules/ocl/test/test_hough.cpp
index 365e0dadb..9e45076cd 100644
--- a/modules/ocl/test/test_hough.cpp
+++ b/modules/ocl/test/test_hough.cpp
@@ -40,7 +40,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
 #ifdef HAVE_OPENCL
 
diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index 85e6052df..4cad2fabe 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -51,7 +51,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
 #ifdef HAVE_OPENCL
 
@@ -448,7 +448,7 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo
     {
         cv::Mat cpu_cldst;
         cldst.download(cpu_cldst);
-        EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);       
+        EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);
     }
 };
 ////////////////////////////////equalizeHist//////////////////////////////////////////
@@ -475,56 +475,6 @@ TEST_P(equalizeHist, Mat)
 }
 
 
-
-
-
-////////////////////////////////bilateralFilter////////////////////////////////////////////
-
-struct bilateralFilter : ImgprocTestBase {};
-
-TEST_P(bilateralFilter, Mat)
-{
-    double sigmacolor = 50.0;
-    int radius = 9;
-    int d = 2 * radius + 1;
-    double sigmaspace = 20.0;
-    int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
-    //const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
-
-    if (mat1.depth() != CV_8U || mat1.type() != dst.type())
-    {
-        cout << "Unsupported type" << endl;
-        EXPECT_DOUBLE_EQ(0.0, 0.0);
-    }
-    else
-    {
-        for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++)
-            for(int j = 0; j < LOOP_TIMES; j++)
-            {
-                random_roi();
-                if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
-                {
-                    continue;
-                }
-                //if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
-                //{
-                //	dst_roi.adjustROI(radius, radius, radius, radius);
-                //	cldst_roi.adjustROI(radius, radius, radius, radius);
-                //}
-                //else
-                //{
-                //	continue;
-                //}
-
-                cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
-                cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
-                Near(1.);
-            }
-    }
-}
-
-
-
 ////////////////////////////////copyMakeBorder////////////////////////////////////////////
 
 struct CopyMakeBorder : ImgprocTestBase {};
@@ -1396,14 +1346,10 @@ TEST_P(calcHist, Mat)
 }
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // CLAHE
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(ClipLimit, double)
-}
 
-PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)
+PARAM_TEST_CASE(CLAHE, cv::Size, double)
 {
-    cv::Size size;
+    cv::Size gridSize;
     double clipLimit;
 
     cv::Mat src;
@@ -1414,22 +1360,22 @@ PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)
 
     virtual void SetUp()
     {
-        size = GET_PARAM(0);
+        gridSize = GET_PARAM(0);
         clipLimit = GET_PARAM(1);
 
         cv::RNG &rng = TS::ptr()->get_rng();
-        src = randomMat(rng, size, CV_8UC1, 0, 256, false);
+        src = randomMat(rng, cv::Size(MWIDTH, MHEIGHT), CV_8UC1, 0, 256, false);
         g_src.upload(src);
     }
 };
 
 TEST_P(CLAHE, Accuracy)
 {
-    cv::Ptr<cv::ocl::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit);
+    cv::Ptr<cv::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit, gridSize);
     clahe->apply(g_src, g_dst);
     cv::Mat dst(g_dst);
 
-    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit);
+    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit, gridSize);
     clahe_gold->apply(src, dst_gold);
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
@@ -1573,6 +1519,47 @@ TEST_P(Convolve, Mat)
     }
 }
 
+//////////////////////////////// ColumnSum //////////////////////////////////////
+PARAM_TEST_CASE(ColumnSum, cv::Size)
+{
+    cv::Size size;
+    cv::Mat src;
+
+    virtual void SetUp()
+    {
+        size = GET_PARAM(0);
+    }
+};
+
+TEST_P(ColumnSum, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_32FC1);
+    cv::ocl::oclMat d_dst;
+    cv::ocl::oclMat d_src(src);
+
+    cv::ocl::columnSum(d_src, d_dst);
+
+    cv::Mat dst(d_dst);
+
+    for (int j = 0; j < src.cols; ++j)
+    {
+        float gold = src.at<float>(0, j);
+        float res = dst.at<float>(0, j);
+        ASSERT_NEAR(res, gold, 1e-5);
+    }
+
+    for (int i = 1; i < src.rows; ++i)
+    {
+        for (int j = 0; j < src.cols; ++j)
+        {
+            float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
+            float res = dst.at<float>(i, j);
+            ASSERT_NEAR(res, gold, 1e-5);
+        }
+    }
+}
+/////////////////////////////////////////////////////////////////////////////////////
+
 INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
                             ONE_TYPE(CV_8UC1),
                             NULL_TYPE,
@@ -1581,21 +1568,6 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
                             NULL_TYPE,
                             Values(false))); // Values(false) is the reserved parameter
 
-//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
-//	ONE_TYPE(CV_8UC1),
-//	NULL_TYPE,
-//	ONE_TYPE(CV_8UC1),
-//	NULL_TYPE,
-//	NULL_TYPE,
-//	Values(false))); // Values(false) is the reserved parameter
-INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
-                            Values(CV_8UC1, CV_8UC3),
-                            NULL_TYPE,
-                            Values(CV_8UC1, CV_8UC3),
-                            NULL_TYPE,
-                            NULL_TYPE,
-                            Values(false))); // Values(false) is the reserved parameter
-
 
 INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
                             Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
@@ -1684,11 +1656,10 @@ INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
                             ONE_TYPE(CV_32SC1) //no use
                         ));
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine(
-                        Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)),
-                        Values(0.0, 40.0)));
+INSTANTIATE_TEST_CASE_P(Imgproc, CLAHE, Combine(
+                        Values(cv::Size(4, 4), cv::Size(32, 8), cv::Size(8, 64)),
+                        Values(0.0, 10.0, 62.0, 300.0)));
+
+INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, DIFFERENT_SIZES);
 
-//INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine(
-//                            Values(CV_32FC1, CV_32FC1),
-//                            Values(false))); // Values(false) is the reserved parameter
 #endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_kmeans.cpp b/modules/ocl/test/test_kmeans.cpp
new file mode 100644
index 000000000..008acfaa4
--- /dev/null
+++ b/modules/ocl/test/test_kmeans.cpp
@@ -0,0 +1,162 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Erping Pang,   pang_er_ping@163.com
+//    Xiaopeng Fu,   fuxiaopeng2222@163.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv;
+
+#define OCL_KMEANS_USE_INITIAL_LABELS 1
+#define OCL_KMEANS_PP_CENTERS         2
+
+PARAM_TEST_CASE(Kmeans, int, int, int)
+{
+    int type;
+    int K;
+    int flags;
+    cv::Mat src ;
+    ocl::oclMat d_src, d_dists;
+
+    Mat labels, centers;
+    ocl::oclMat d_labels, d_centers;
+    cv::RNG rng ;
+    virtual void SetUp(){
+        K = GET_PARAM(0);
+        type = GET_PARAM(1);
+        flags = GET_PARAM(2);
+        rng = TS::ptr()->get_rng();
+
+        // MWIDTH=256, MHEIGHT=256. defined in utility.hpp
+        cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+        src.create(size, type);
+        int row_idx = 0;
+        const int max_neighbour = MHEIGHT / K - 1;
+        CV_Assert(K <= MWIDTH);
+        for(int i = 0; i < K; i++ )
+        {
+            Mat center_row_header = src.row(row_idx);
+            center_row_header.setTo(0);
+            int nchannel = center_row_header.channels();
+            for(int j = 0; j < nchannel; j++)
+                center_row_header.at<float>(0, i*nchannel+j) = 50000.0;
+
+            for(int j = 0; (j < max_neighbour) ||
+                           (i == K-1 && j < max_neighbour + MHEIGHT%K); j ++)
+            {
+                Mat cur_row_header = src.row(row_idx + 1 + j);
+                center_row_header.copyTo(cur_row_header);
+                Mat tmpmat = randomMat(rng, cur_row_header.size(), cur_row_header.type(), -200, 200, false);
+                cur_row_header += tmpmat;
+            }
+            row_idx += 1 + max_neighbour;
+        }
+    }
+};
+TEST_P(Kmeans, Mat){
+
+    if(flags & KMEANS_USE_INITIAL_LABELS)
+    {
+        // inital a given labels
+        labels.create(src.rows, 1, CV_32S);
+        int *label = labels.ptr<int>();
+        for(int i = 0; i < src.rows; i++)
+            label[i] = rng.uniform(0, K);
+        d_labels.upload(labels);
+    }
+    d_src.upload(src);
+
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        kmeans(src, K, labels,
+            TermCriteria( TermCriteria::EPS + TermCriteria::MAX_ITER, 100, 0),
+            1, flags, centers);
+
+        ocl::kmeans(d_src, K, d_labels,
+            TermCriteria( TermCriteria::EPS + TermCriteria::MAX_ITER, 100, 0),
+            1, flags, d_centers);
+
+        Mat dd_labels(d_labels);
+        Mat dd_centers(d_centers);
+        if(flags & KMEANS_USE_INITIAL_LABELS)
+        {
+            EXPECT_MAT_NEAR(labels, dd_labels, 0);
+            EXPECT_MAT_NEAR(centers, dd_centers, 1e-3);
+        }
+        else
+        {
+            int row_idx = 0;
+            for(int i = 0; i < K; i++)
+            {
+                // verify lables with ground truth resutls
+                int label = labels.at<int>(row_idx);
+                int header_label = dd_labels.at<int>(row_idx);
+                for(int j = 0; (j < MHEIGHT/K)||(i == K-1 && j < MHEIGHT/K+MHEIGHT%K); j++)
+                {
+                    ASSERT_NEAR(labels.at<int>(row_idx+j), label, 0);
+                    ASSERT_NEAR(dd_labels.at<int>(row_idx+j), header_label, 0);
+                }
+
+                // verify centers
+                float *center = centers.ptr<float>(label);
+                float *header_center = dd_centers.ptr<float>(header_label);
+                for(int t = 0; t < centers.cols; t++)
+                    ASSERT_NEAR(center[t], header_center[t], 1e-3);
+
+                row_idx += MHEIGHT/K;
+            }
+        }
+    }
+}
+INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine(
+    Values(3, 5, 8),
+    Values(CV_32FC1, CV_32FC2, CV_32FC4),
+    Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/)));
+
+#endif
diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp
index a393abdeb..551c9ff12 100644
--- a/modules/ocl/test/test_match_template.cpp
+++ b/modules/ocl/test/test_match_template.cpp
@@ -43,7 +43,7 @@
 //M*/
 
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
 #ifdef HAVE_OPENCL
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp
index 92d810818..e8b502232 100644
--- a/modules/ocl/test/test_matrix_operation.cpp
+++ b/modules/ocl/test/test_matrix_operation.cpp
@@ -44,7 +44,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
 #ifdef HAVE_OPENCL
 
diff --git a/modules/ocl/test/test_moments.cpp b/modules/ocl/test/test_moments.cpp
index 23a9a8eb2..50b8379d8 100644
--- a/modules/ocl/test/test_moments.cpp
+++ b/modules/ocl/test/test_moments.cpp
@@ -1,4 +1,4 @@
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 #include <iomanip>
 
 #ifdef HAVE_OPENCL
@@ -7,8 +7,7 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace cvtest;
 using namespace testing;
-using namespace std;
-extern string workdir;
+
 PARAM_TEST_CASE(MomentsTest, MatType, bool)
 {
     int type;
@@ -44,12 +43,12 @@ TEST_P(MomentsTest, Mat)
     {
         if(test_contours)
         {
-            Mat src = imread( workdir + "../cpp/pic3.png", 1 );
-            Mat src_gray, canny_output;
-            cvtColor( src, src_gray, COLOR_BGR2GRAY );
+            Mat src = readImage( "cv/shared/pic3.png", IMREAD_GRAYSCALE );
+            ASSERT_FALSE(src.empty());
+            Mat canny_output;
             vector<vector<Point> > contours;
             vector<Vec4i> hierarchy;
-            Canny( src_gray, canny_output, 100, 200, 3 );
+            Canny( src, canny_output, 100, 200, 3 );
             findContours( canny_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0) );
             for( size_t i = 0; i < contours.size(); i++ )
             {
@@ -63,9 +62,9 @@ TEST_P(MomentsTest, Mat)
         cv::Moments oclMom = cv::ocl::ocl_moments(_array, binaryImage);
 
         Compare(CvMom, oclMom);
-
     }
 }
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MomentsTest, Combine(
                             Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_64FC1), Values(true,false)));
+
 #endif // HAVE_OPENCL
diff --git a/modules/ocl/test/precomp.cpp b/modules/ocl/test/test_norm.cpp
similarity index 76%
rename from modules/ocl/test/precomp.cpp
rename to modules/ocl/test/test_norm.cpp
index 7d287004e..2bd847068 100644
--- a/modules/ocl/test/precomp.cpp
+++ b/modules/ocl/test/test_norm.cpp
@@ -39,6 +39,25 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
+typedef ::testing::TestWithParam<cv::Size> normFixture;
 
+TEST_P(normFixture, DISABLED_accuracy)
+{
+    const cv::Size srcSize = GetParam();
+
+    cv::Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1);
+    cv::randu(src1, 0, 2);
+    cv::randu(src2, 0, 2);
+
+    cv::ocl::oclMat oclSrc1(src1), oclSrc2(src2);
+
+    double value = cv::norm(src1, src2, cv::NORM_INF);
+    double oclValue = cv::ocl::norm(oclSrc1, oclSrc2, cv::NORM_INF);
+
+    ASSERT_EQ(value, oclValue);
+}
+
+INSTANTIATE_TEST_CASE_P(oclNormTest, normFixture,
+                        ::testing::Values(cv::Size(500, 500), cv::Size(1000, 1000)));
diff --git a/modules/ocl/test/test_hog.cpp b/modules/ocl/test/test_objdetect.cpp
similarity index 51%
rename from modules/ocl/test/test_hog.cpp
rename to modules/ocl/test/test_objdetect.cpp
index e968d0444..295ae642b 100644
--- a/modules/ocl/test/test_hog.cpp
+++ b/modules/ocl/test/test_objdetect.cpp
@@ -15,7 +15,7 @@
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
-//		Wenju He, wenju@multicorewareinc.com
+//		Yao Wang, bitwangyaoyao@gmail.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -43,53 +43,55 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
+#include "opencv2/objdetect.hpp"
+
+using namespace cv;
+using namespace testing;
 
-using namespace std;
 #ifdef HAVE_OPENCL
 
-extern string workdir;
-PARAM_TEST_CASE(HOG, cv::Size, int)
+///////////////////// HOG /////////////////////////////
+PARAM_TEST_CASE(HOG, Size, int)
 {
-    cv::Size winSize;
+    Size winSize;
     int type;
+    Mat img_rgb;
     virtual void SetUp()
     {
         winSize = GET_PARAM(0);
         type = GET_PARAM(1);
+        img_rgb = readImage("gpu/hog/road.png");
+        ASSERT_FALSE(img_rgb.empty());
     }
 };
 
 TEST_P(HOG, GetDescriptors)
 {
-    // Load image
-    cv::Mat img_rgb = readImage(workdir + "lena.jpg");
-    ASSERT_FALSE(img_rgb.empty());
-
     // Convert image
-    cv::Mat img;
+    Mat img;
     switch (type)
     {
     case CV_8UC1:
-        cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
+        cvtColor(img_rgb, img, COLOR_BGR2GRAY);
         break;
     case CV_8UC4:
     default:
-        cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
+        cvtColor(img_rgb, img, COLOR_BGR2BGRA);
         break;
     }
-    cv::ocl::oclMat d_img(img);
+    ocl::oclMat d_img(img);
 
     // HOGs
-    cv::ocl::HOGDescriptor ocl_hog;
+    ocl::HOGDescriptor ocl_hog;
     ocl_hog.gamma_correction = true;
-    cv::HOGDescriptor hog;
+    HOGDescriptor hog;
     hog.gammaCorrection = true;
 
     // Compute descriptor
-    cv::ocl::oclMat d_descriptors;
+    ocl::oclMat d_descriptors;
     ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL);
-    cv::Mat down_descriptors;
+    Mat down_descriptors;
     d_descriptors.download(down_descriptors);
     down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows);
 
@@ -105,148 +107,123 @@ TEST_P(HOG, GetDescriptors)
         hog.compute(img_rgb, descriptors, ocl_hog.win_size);
         break;
     }
-    cv::Mat cpu_descriptors(descriptors);
+    Mat cpu_descriptors(descriptors);
 
     EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2);
 }
 
-
-bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
-{
-    return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
-            (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
-}
-
 TEST_P(HOG, Detect)
 {
-    // Load image
-    cv::Mat img_rgb = readImage(workdir + "lena.jpg");
-    ASSERT_FALSE(img_rgb.empty());
-
     // Convert image
-    cv::Mat img;
+    Mat img;
     switch (type)
     {
     case CV_8UC1:
-        cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
+        cvtColor(img_rgb, img, COLOR_BGR2GRAY);
         break;
     case CV_8UC4:
     default:
-        cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
+        cvtColor(img_rgb, img, COLOR_BGR2BGRA);
         break;
     }
-    cv::ocl::oclMat d_img(img);
+    ocl::oclMat d_img(img);
 
     // HOGs
-    if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128)))
-        winSize = cv::Size(64, 128);
-    cv::ocl::HOGDescriptor ocl_hog(winSize);
+    if ((winSize != Size(48, 96)) && (winSize != Size(64, 128)))
+        winSize = Size(64, 128);
+    ocl::HOGDescriptor ocl_hog(winSize);
     ocl_hog.gamma_correction = true;
 
-    cv::HOGDescriptor hog;
+    HOGDescriptor hog;
     hog.winSize = winSize;
     hog.gammaCorrection = true;
 
     if (winSize.width == 48 && winSize.height == 96)
     {
         // daimler's base
-        ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96());
+        ocl_hog.setSVMDetector(hog.getDaimlerPeopleDetector());
         hog.setSVMDetector(hog.getDaimlerPeopleDetector());
     }
     else if (winSize.width == 64 && winSize.height == 128)
     {
-        ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128());
+        ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector());
         hog.setSVMDetector(hog.getDefaultPeopleDetector());
     }
     else
     {
-        ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
+        ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector());
         hog.setSVMDetector(hog.getDefaultPeopleDetector());
     }
 
     // OpenCL detection
-    std::vector<cv::Rect> d_found;
-    ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
+    std::vector<Rect> d_found;
+    ocl_hog.detectMultiScale(d_img, d_found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
 
     // CPU detection
-    std::vector<cv::Rect> found;
+    std::vector<Rect> found;
     switch (type)
     {
     case CV_8UC1:
-        hog.detectMultiScale(img, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
+        hog.detectMultiScale(img, found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
         break;
     case CV_8UC4:
     default:
-        hog.detectMultiScale(img_rgb, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
+        hog.detectMultiScale(img_rgb, found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
         break;
     }
 
-    // Ground-truth rectangular people window
-    cv::Rect win1_64x128(231, 190, 72, 144);
-    cv::Rect win2_64x128(621, 156, 97, 194);
-    cv::Rect win1_48x96(238, 198, 63, 126);
-    cv::Rect win2_48x96(619, 161, 92, 185);
-    cv::Rect win3_48x96(488, 136, 56, 112);
-
-    // Compare whether ground-truth windows are detected and compare the number of windows detected.
-    std::vector<int> d_comp(4);
-    std::vector<int> comp(4);
-    for(int i = 0; i < (int)d_comp.size(); i++)
-    {
-        d_comp[i] = 0;
-        comp[i] = 0;
-    }
-
-    int threshold = 10;
-    int val = 32;
-    d_comp[0] = (int)d_found.size();
-    comp[0] = (int)found.size();
-    if (winSize == cv::Size(48, 96))
-    {
-        for(int i = 0; i < (int)d_found.size(); i++)
-        {
-            if (match_rect(d_found[i], win1_48x96, threshold))
-                d_comp[1] = val;
-            if (match_rect(d_found[i], win2_48x96, threshold))
-                d_comp[2] = val;
-            if (match_rect(d_found[i], win3_48x96, threshold))
-                d_comp[3] = val;
-        }
-        for(int i = 0; i < (int)found.size(); i++)
-        {
-            if (match_rect(found[i], win1_48x96, threshold))
-                comp[1] = val;
-            if (match_rect(found[i], win2_48x96, threshold))
-                comp[2] = val;
-            if (match_rect(found[i], win3_48x96, threshold))
-                comp[3] = val;
-        }
-    }
-    else if (winSize == cv::Size(64, 128))
-    {
-        for(int i = 0; i < (int)d_found.size(); i++)
-        {
-            if (match_rect(d_found[i], win1_64x128, threshold))
-                d_comp[1] = val;
-            if (match_rect(d_found[i], win2_64x128, threshold))
-                d_comp[2] = val;
-        }
-        for(int i = 0; i < (int)found.size(); i++)
-        {
-            if (match_rect(found[i], win1_64x128, threshold))
-                comp[1] = val;
-            if (match_rect(found[i], win2_64x128, threshold))
-                comp[2] = val;
-        }
-    }
-
-    EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3);
+    EXPECT_LT(checkRectSimilarity(img.size(), found, d_found), 1.0);
 }
 
 
 INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
-                            testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
+                            testing::Values(Size(64, 128), Size(48, 96)),
                             testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
 
 
+///////////////////////////// Haar //////////////////////////////
+IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
+CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
+CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
+
+PARAM_TEST_CASE(Haar, int, CascadeName)
+{
+    ocl::OclCascadeClassifier cascade, nestedCascade;
+    CascadeClassifier cpucascade, cpunestedCascade;
+
+    int flags;
+    std::string cascadeName;
+    std::vector<Rect> faces, oclfaces;
+    Mat img;
+    ocl::oclMat d_img;
+
+    virtual void SetUp()
+    {
+        flags = GET_PARAM(0);
+        cascadeName = (std::string(cvtest::TS::ptr()->get_data_path()) + "cv/cascadeandhog/cascades/").append(GET_PARAM(1));
+        ASSERT_TRUE(cascade.load( cascadeName ));
+        ASSERT_TRUE(cpucascade.load(cascadeName));
+        img = readImage("cv/shared/lena.png", IMREAD_GRAYSCALE);
+        ASSERT_FALSE(img.empty());
+        equalizeHist(img, img);
+        d_img.upload(img);
+    }
+};
+
+TEST_P(Haar, FaceDetect)
+{
+    cascade.detectMultiScale(d_img, oclfaces, 1.1, 3,
+                             flags, Size(30, 30));
+
+    cpucascade.detectMultiScale(img, faces, 1.1, 3,
+                                flags, Size(30, 30));
+
+    EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, Haar,
+    Combine(Values((int)CASCADE_SCALE_IMAGE, 0),
+            Values(cascade_frontalface_alt, cascade_frontalface_alt2)));
+
+
 #endif //HAVE_OPENCL
diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp
index 0121be8f9..3b6c48050 100644
--- a/modules/ocl/test/test_optflow.cpp
+++ b/modules/ocl/test/test_optflow.cpp
@@ -43,7 +43,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 #include <iomanip>
 
 #ifdef HAVE_OPENCL
@@ -52,10 +52,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace cvtest;
 using namespace testing;
-using namespace std;
-
-extern string workdir;
-
 
 //////////////////////////////////////////////////////
 // GoodFeaturesToTrack
@@ -75,7 +71,7 @@ PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance)
 
 TEST_P(GoodFeaturesToTrack, Accuracy)
 {
-    cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame.empty());
 
     int maxCorners = 1000;
@@ -89,7 +85,7 @@ TEST_P(GoodFeaturesToTrack, Accuracy)
     ASSERT_FALSE(d_pts.empty());
 
     std::vector<cv::Point2f> pts(d_pts.cols);
-    
+
     detector.downloadPoints(d_pts, pts);
 
     std::vector<cv::Point2f> pts_gold;
@@ -129,7 +125,7 @@ TEST_P(GoodFeaturesToTrack, EmptyCorners)
     ASSERT_TRUE(corners.empty());
 }
 
-INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack, 
+INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
     testing::Values(MinDistance(0.0), MinDistance(3.0)));
 
 //////////////////////////////////////////////////////////////////////////
@@ -144,12 +140,12 @@ PARAM_TEST_CASE(TVL1, bool)
 
 };
 
-TEST_P(TVL1, Accuracy)
+TEST_P(TVL1, DISABLED_Accuracy) // TODO implementations of TV1 in video module are different in 2.4 and master branches
 {
-    cv::Mat frame0 = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage(workdir + "../gpu/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
     cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
@@ -168,7 +164,7 @@ TEST_P(TVL1, Accuracy)
     EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3);
     EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3);
 }
-INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(true, false));
+INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(false, true));
 
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
@@ -188,10 +184,10 @@ PARAM_TEST_CASE(Sparse, bool, bool)
 
 TEST_P(Sparse, Mat)
 {
-    cv::Mat frame0 = readImage(workdir + "../gpu/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
+    cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage(workdir + "../gpu/rubberwhale2.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
+    cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
     ASSERT_FALSE(frame1.empty());
 
     cv::Mat gray_frame;
@@ -272,6 +268,77 @@ TEST_P(Sparse, Mat)
 INSTANTIATE_TEST_CASE_P(OCL_Video, Sparse, Combine(
     Values(false, true),
     Values(false, true)));
+//////////////////////////////////////////////////////
+// FarnebackOpticalFlow
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(PyrScale, double)
+        IMPLEMENT_PARAM_CLASS(PolyN, int)
+        CV_FLAGS(FarnebackOptFlowFlags, 0, OPTFLOW_FARNEBACK_GAUSSIAN)
+        IMPLEMENT_PARAM_CLASS(UseInitFlow, bool)
+}
+
+PARAM_TEST_CASE(Farneback, PyrScale, PolyN, FarnebackOptFlowFlags, UseInitFlow)
+{
+    double pyrScale;
+    int polyN;
+    int flags;
+    bool useInitFlow;
+
+    virtual void SetUp()
+    {
+        pyrScale = GET_PARAM(0);
+        polyN = GET_PARAM(1);
+        flags = GET_PARAM(2);
+        useInitFlow = GET_PARAM(3);
+    }
+};
+
+TEST_P(Farneback, Accuracy)
+{
+    cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame0.empty());
+
+    cv::Mat frame1 = readImage("gpu/opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame1.empty());
+
+    double polySigma = polyN <= 5 ? 1.1 : 1.5;
+
+    cv::ocl::FarnebackOpticalFlow farn;
+    farn.pyrScale = pyrScale;
+    farn.polyN = polyN;
+    farn.polySigma = polySigma;
+    farn.flags = flags;
+
+    cv::ocl::oclMat d_flowx, d_flowy;
+    farn(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
+
+    cv::Mat flow;
+    if (useInitFlow)
+    {
+        cv::Mat flowxy[] = {cv::Mat(d_flowx), cv::Mat(d_flowy)};
+        cv::merge(flowxy, 2, flow);
+
+        farn.flags |= cv::OPTFLOW_USE_INITIAL_FLOW;
+        farn(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
+    }
+
+    cv::calcOpticalFlowFarneback(
+        frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
+        farn.numIters, farn.polyN, farn.polySigma, farn.flags);
+
+    std::vector<cv::Mat> flowxy;
+    cv::split(flow, flowxy);
+
+    EXPECT_MAT_SIMILAR(flowxy[0], d_flowx, 0.1);
+    EXPECT_MAT_SIMILAR(flowxy[1], d_flowy, 0.1);
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, Farneback, testing::Combine(
+    testing::Values(PyrScale(0.3), PyrScale(0.5), PyrScale(0.8)),
+    testing::Values(PolyN(5), PolyN(7)),
+    testing::Values(FarnebackOptFlowFlags(0), FarnebackOptFlowFlags(cv::OPTFLOW_FARNEBACK_GAUSSIAN)),
+    testing::Values(UseInitFlow(false), UseInitFlow(true))));
 
 #endif // HAVE_OPENCL
-
diff --git a/modules/ocl/test/precomp.hpp b/modules/ocl/test/test_precomp.hpp
similarity index 100%
rename from modules/ocl/test/precomp.hpp
rename to modules/ocl/test/test_precomp.hpp
diff --git a/modules/ocl/test/test_pyrdown.cpp b/modules/ocl/test/test_pyramids.cpp
similarity index 75%
rename from modules/ocl/test/test_pyrdown.cpp
rename to modules/ocl/test/test_pyramids.cpp
index 6d00fb5e4..58179ac18 100644
--- a/modules/ocl/test/test_pyrdown.cpp
+++ b/modules/ocl/test/test_pyramids.cpp
@@ -15,7 +15,6 @@
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
-//    Dachuan Zhao, dachuan@multicorewareinc.com
 //    Yao Wang yao@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -45,7 +44,7 @@
 //M*/
 
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 #include <iomanip>
 
 #ifdef HAVE_OPENCL
@@ -56,11 +55,12 @@ using namespace cvtest;
 using namespace testing;
 using namespace std;
 
-PARAM_TEST_CASE(PyrDown, MatType, int)
+PARAM_TEST_CASE(PyrBase, MatType, int)
 {
     int type;
     int channels;
-
+    Mat dst_cpu;
+    oclMat gdst;
     virtual void SetUp()
     {
         type = GET_PARAM(0);
@@ -69,19 +69,19 @@ PARAM_TEST_CASE(PyrDown, MatType, int)
 
 };
 
+/////////////////////// PyrDown //////////////////////////
+struct PyrDown : PyrBase {};
 
 TEST_P(PyrDown, Mat)
 {
     for(int j = 0; j < LOOP_TIMES; j++)
     {
-        cv::Size size(MWIDTH, MHEIGHT);
-        cv::RNG &rng = TS::ptr()->get_rng();
-        cv::Mat src = randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false);
+        Size size(MWIDTH, MHEIGHT);
+        Mat src = randomMat(size, CV_MAKETYPE(type, channels));
+        oclMat gsrc(src);
 
-        cv::ocl::oclMat gsrc(src), gdst;
-        cv::Mat dst_cpu;
-        cv::pyrDown(src, dst_cpu);
-        cv::ocl::pyrDown(gsrc, gdst);
+        pyrDown(src, dst_cpu);
+        pyrDown(gsrc, gdst);
 
         EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f);
     }
@@ -90,5 +90,27 @@ TEST_P(PyrDown, Mat)
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine(
                             Values(CV_8U, CV_32F), Values(1, 3, 4)));
 
+/////////////////////// PyrUp //////////////////////////
 
+struct PyrUp : PyrBase {};
+
+TEST_P(PyrUp, Accuracy)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        Size size(MWIDTH, MHEIGHT);
+        Mat src = randomMat(size, CV_MAKETYPE(type, channels));
+        oclMat gsrc(src);
+
+        pyrUp(src, dst_cpu);
+        pyrUp(gsrc, gdst);
+
+        EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (type == CV_32F ? 1e-4f : 1.0));
+    }
+
+}
+
+
+INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine(
+                            Values(CV_8U, CV_32F), Values(1, 3, 4)));
 #endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_sort.cpp b/modules/ocl/test/test_sort.cpp
new file mode 100644
index 000000000..d30366556
--- /dev/null
+++ b/modules/ocl/test/test_sort.cpp
@@ -0,0 +1,244 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include <map>
+#include <functional>
+#include "test_precomp.hpp"
+
+using namespace std;
+using namespace cvtest;
+using namespace testing;
+using namespace cv;
+
+
+namespace
+{
+IMPLEMENT_PARAM_CLASS(IsGreaterThan, bool)
+IMPLEMENT_PARAM_CLASS(InputSize, int)
+IMPLEMENT_PARAM_CLASS(SortMethod, int)
+
+
+template<class T>
+struct KV_CVTYPE{ static int toType() {return 0;} };
+
+template<> struct KV_CVTYPE<int>  { static int toType() {return CV_32SC1;} };
+template<> struct KV_CVTYPE<float>{ static int toType() {return CV_32FC1;} };
+template<> struct KV_CVTYPE<Vec2i>{ static int toType() {return CV_32SC2;} };
+template<> struct KV_CVTYPE<Vec2f>{ static int toType() {return CV_32FC2;} };
+
+template<class key_type, class val_type>
+bool kvgreater(pair<key_type, val_type> p1, pair<key_type, val_type> p2)
+{
+    return p1.first > p2.first;
+}
+
+template<class key_type, class val_type>
+bool kvless(pair<key_type, val_type> p1, pair<key_type, val_type> p2)
+{
+    return p1.first < p2.first;
+}
+
+template<class key_type, class val_type>
+void toKVPair(
+    MatConstIterator_<key_type> kit,
+    MatConstIterator_<val_type> vit,
+    int vecSize,
+    vector<pair<key_type, val_type> >& kvres
+    )
+{
+    kvres.clear();
+    for(int i = 0; i < vecSize; i ++)
+    {
+        kvres.push_back(make_pair(*kit, *vit));
+        ++kit;
+        ++vit;
+    }
+}
+
+template<class key_type, class val_type>
+void kvquicksort(Mat& keys, Mat& vals, bool isGreater = false)
+{
+    vector<pair<key_type, val_type> > kvres;
+    toKVPair(keys.begin<key_type>(), vals.begin<val_type>(), keys.cols, kvres);
+
+    if(isGreater)
+    {
+        std::sort(kvres.begin(), kvres.end(), kvgreater<key_type, val_type>);
+    }
+    else
+    {
+        std::sort(kvres.begin(), kvres.end(), kvless<key_type, val_type>);
+    }
+    key_type * kptr = keys.ptr<key_type>();
+    val_type * vptr = vals.ptr<val_type>();
+    for(int i = 0; i < keys.cols; i ++)
+    {
+        kptr[i] = kvres[i].first;
+        vptr[i] = kvres[i].second;
+    }
+}
+
+class SortByKey_STL
+{
+public:
+    static void sort(cv::Mat&, cv::Mat&, bool is_gt);
+private:
+    typedef void (*quick_sorter)(cv::Mat&, cv::Mat&, bool);
+    SortByKey_STL();
+    quick_sorter quick_sorters[CV_64FC4][CV_64FC4];
+    static SortByKey_STL instance;
+};
+
+SortByKey_STL SortByKey_STL::instance = SortByKey_STL();
+
+SortByKey_STL::SortByKey_STL()
+{
+    memset(instance.quick_sorters, 0, sizeof(quick_sorters));
+#define NEW_SORTER(KT, VT) \
+    instance.quick_sorters[KV_CVTYPE<KT>::toType()][KV_CVTYPE<VT>::toType()] = kvquicksort<KT, VT>;
+
+    NEW_SORTER(int, int);
+    NEW_SORTER(int, Vec2i);
+    NEW_SORTER(int, float);
+    NEW_SORTER(int, Vec2f);
+
+    NEW_SORTER(float, int);
+    NEW_SORTER(float, Vec2i);
+    NEW_SORTER(float, float);
+    NEW_SORTER(float, Vec2f);
+#undef NEW_SORTER
+}
+
+void SortByKey_STL::sort(cv::Mat& keys, cv::Mat& vals, bool is_gt)
+{
+    instance.quick_sorters[keys.type()][vals.type()](keys, vals, is_gt);
+}
+
+bool checkUnstableSorterResult(const Mat& gkeys_, const Mat& gvals_,
+                               const Mat& /*dkeys_*/, const Mat& dvals_)
+{
+    int cn_val = gvals_.channels();
+    int count  = gkeys_.cols;
+
+    //for convenience we convert depth to float and channels to 1
+    Mat gkeys, gvals, dkeys, dvals;
+    gkeys_.reshape(1).convertTo(gkeys, CV_32F);
+    gvals_.reshape(1).convertTo(gvals, CV_32F);
+    //dkeys_.reshape(1).convertTo(dkeys, CV_32F);
+    dvals_.reshape(1).convertTo(dvals, CV_32F);
+    float * gkptr = gkeys.ptr<float>();
+    float * gvptr = gvals.ptr<float>();
+    //float * dkptr = dkeys.ptr<float>();
+    float * dvptr = dvals.ptr<float>();
+
+    for(int i = 0; i < count - 1; ++i)
+    {
+        int iden_count = 0;
+        // firstly calculate the number of identical keys
+        while(gkptr[i + iden_count] == gkptr[i + 1 + iden_count])
+        {
+            ++ iden_count;
+        }
+
+        // sort dv and gv
+        int num_of_val = (iden_count + 1) * cn_val;
+        std::sort(gvptr + i * cn_val, gvptr + i * cn_val + num_of_val);
+        std::sort(dvptr + i * cn_val, dvptr + i * cn_val + num_of_val);
+
+        // then check if [i, i + iden_count) is the same
+        for(int j = 0; j < num_of_val; ++j)
+        {
+            if(gvptr[i + j] != dvptr[i + j])
+            {
+                return false;
+            }
+        }
+        i += iden_count;
+    }
+    return true;
+}
+}
+
+#define INPUT_SIZES  Values(InputSize(0x10), InputSize(0x100), InputSize(0x10000)) //2^4, 2^8, 2^16
+#define KEY_TYPES    Values(MatType(CV_32SC1), MatType(CV_32FC1))
+#define VAL_TYPES    Values(MatType(CV_32SC1), MatType(CV_32SC2), MatType(CV_32FC1), MatType(CV_32FC2))
+#define SORT_METHODS Values(SortMethod(cv::ocl::SORT_BITONIC),SortMethod(cv::ocl::SORT_MERGE),SortMethod(cv::ocl::SORT_RADIX)/*,SortMethod(cv::ocl::SORT_SELECTION)*/)
+#define F_OR_T       Values(IsGreaterThan(false), IsGreaterThan(true))
+
+PARAM_TEST_CASE(SortByKey, InputSize, MatType, MatType, SortMethod, IsGreaterThan)
+{
+    InputSize input_size;
+    MatType key_type, val_type;
+    SortMethod method;
+    IsGreaterThan is_gt;
+
+    Mat mat_key, mat_val;
+    virtual void SetUp()
+    {
+        input_size = GET_PARAM(0);
+        key_type   = GET_PARAM(1);
+        val_type   = GET_PARAM(2);
+        method     = GET_PARAM(3);
+        is_gt      = GET_PARAM(4);
+
+        using namespace cv;
+        // fill key and val
+        mat_key = randomMat(Size(input_size, 1), key_type, INT_MIN, INT_MAX);
+        mat_val = randomMat(Size(input_size, 1), val_type, INT_MIN, INT_MAX);
+    }
+};
+
+TEST_P(SortByKey, Accuracy)
+{
+    using namespace cv;
+    ocl::oclMat oclmat_key(mat_key);
+    ocl::oclMat oclmat_val(mat_val);
+
+    ocl::sortByKey(oclmat_key, oclmat_val, method, is_gt);
+    SortByKey_STL::sort(mat_key, mat_val, is_gt);
+
+    EXPECT_MAT_NEAR(mat_key, oclmat_key, 0.0);
+    EXPECT_TRUE(checkUnstableSorterResult(mat_key, mat_val, oclmat_key, oclmat_val));
+}
+INSTANTIATE_TEST_CASE_P(OCL_SORT, SortByKey, Combine(INPUT_SIZES, KEY_TYPES, VAL_TYPES, SORT_METHODS, F_OR_T));
diff --git a/modules/ocl/test/test_split_merge.cpp b/modules/ocl/test/test_split_merge.cpp
index 854ce309c..9663f5321 100644
--- a/modules/ocl/test/test_split_merge.cpp
+++ b/modules/ocl/test/test_split_merge.cpp
@@ -44,7 +44,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 
 #ifdef HAVE_OPENCL
 
diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp
index 9873a8855..f8d585cd4 100644
--- a/modules/ocl/test/utility.cpp
+++ b/modules/ocl/test/utility.cpp
@@ -39,7 +39,7 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#include "test_precomp.hpp"
 #define VARNAME(A) #A
 using namespace std;
 using namespace cv;
@@ -100,12 +100,44 @@ Mat randomMat(Size size, int type, double minVal, double maxVal)
     return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
 }
 
+cv::ocl::oclMat createMat_ocl(Size size, int type, bool useRoi)
+{
+    Size size0 = size;
 
+    if (useRoi)
+    {
+        size0.width += randomInt(5, 15);
+        size0.height += randomInt(5, 15);
+    }
 
+    cv::ocl::oclMat d_m(size0, type);
 
+    if (size0 != size)
+        d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height));
 
+    return d_m;
+}
 
+cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi)
+{
+    CV_Assert(m.type() == CV_8UC1 || m.type() == CV_8UC3);
+    cv::ocl::oclMat d_m;
+    d_m = createMat_ocl(m.size(), m.type(), useRoi);
 
+    Size ls;
+    Point pt;
+
+    d_m.locateROI(ls, pt);
+
+    Rect roi(pt.x, pt.y, d_m.size().width, d_m.size().height);
+
+    cv::ocl::oclMat m_ocl(m);
+
+    cv::ocl::oclMat d_m_roi(d_m, roi);
+
+    m_ocl.copyTo(d_m);
+    return d_m;
+}
 /*
 void showDiff(InputArray gold_, InputArray actual_, double eps)
 {
@@ -137,58 +169,7 @@ void showDiff(InputArray gold_, InputArray actual_, double eps)
 }
 */
 
-/*
-bool supportFeature(const DeviceInfo& info, FeatureSet feature)
-{
-    return TargetArchs::builtWith(feature) && info.supports(feature);
-}
 
-const vector<DeviceInfo>& devices()
-{
-    static vector<DeviceInfo> devs;
-    static bool first = true;
-
-    if (first)
-    {
-        int deviceCount = getCudaEnabledDeviceCount();
-
-        devs.reserve(deviceCount);
-
-        for (int i = 0; i < deviceCount; ++i)
-        {
-            DeviceInfo info(i);
-            if (info.isCompatible())
-                devs.push_back(info);
-        }
-
-        first = false;
-    }
-
-    return devs;
-}
-
-vector<DeviceInfo> devices(FeatureSet feature)
-{
-    const vector<DeviceInfo>& d = devices();
-
-    vector<DeviceInfo> devs_filtered;
-
-    if (TargetArchs::builtWith(feature))
-    {
-        devs_filtered.reserve(d.size());
-
-        for (size_t i = 0, size = d.size(); i < size; ++i)
-        {
-            const DeviceInfo& info = d[i];
-
-            if (info.supports(feature))
-                devs_filtered.push_back(info);
-        }
-    }
-
-    return devs_filtered;
-}
-*/
 
 vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
 {
@@ -264,3 +245,47 @@ void PrintTo(const Inverse &inverse, std::ostream *os)
         (*os) << "direct";
 }
 
+double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
+{
+    double final_test_result = 0.0;
+    size_t sz1 = ob1.size();
+    size_t sz2 = ob2.size();
+
+    if(sz1 != sz2)
+    {
+        return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+    }
+    else
+    {
+        if(sz1==0 && sz2==0)
+            return 0;
+        cv::Mat cpu_result(sz, CV_8UC1);
+        cpu_result.setTo(0);
+
+        for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
+        {
+            cv::Mat cpu_result_roi(cpu_result, *r);
+            cpu_result_roi.setTo(1);
+            cpu_result.copyTo(cpu_result);
+        }
+        int cpu_area = cv::countNonZero(cpu_result > 0);
+
+        cv::Mat gpu_result(sz, CV_8UC1);
+        gpu_result.setTo(0);
+        for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
+        {
+            cv::Mat gpu_result_roi(gpu_result, *r2);
+            gpu_result_roi.setTo(1);
+            gpu_result.copyTo(gpu_result);
+        }
+
+        cv::Mat result_;
+        multiply(cpu_result, gpu_result, result_);
+        int result = cv::countNonZero(result_ > 0);
+        if(cpu_area!=0 && result!=0)
+            final_test_result = 1.0 - (double)result/(double)cpu_area;
+        else if(cpu_area==0 && result!=0)
+            final_test_result = -1;
+    }
+    return final_test_result;
+}
diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp
index 9eb48a0ef..ade7620b0 100644
--- a/modules/ocl/test/utility.hpp
+++ b/modules/ocl/test/utility.hpp
@@ -57,13 +57,12 @@ cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal =
 
 void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
 
-//! return true if device supports specified feature and gpu module was built with support the feature.
-//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
+// This function test if gpu_rst matches cpu_rst.
+// If the two vectors are not equal, it will return the difference in vector size
+// Else it will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
+// The smaller, the better matched
+double checkRectSimilarity(cv::Size sz, std::vector<cv::Rect>& ob1, std::vector<cv::Rect>& ob2);
 
-//! return all devices compatible with current gpu module build.
-//const std::vector<cv::ocl::DeviceInfo>& devices();
-//! return all devices compatible with current gpu module build which support specified feature.
-//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
 
 //! read image from testdata folder.
 cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR);
@@ -73,6 +72,9 @@ double checkNorm(const cv::Mat &m);
 double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
 double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
 
+//oclMat create
+cv::ocl::oclMat createMat_ocl(cv::Size size, int type, bool useRoi = false);
+cv::ocl::oclMat loadMat_ocl(const cv::Mat& m, bool useRoi = false);
 #define EXPECT_MAT_NORM(mat, eps) \
 { \
     EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
diff --git a/modules/optim/CMakeLists.txt b/modules/optim/CMakeLists.txt
new file mode 100644
index 000000000..c36c24d9d
--- /dev/null
+++ b/modules/optim/CMakeLists.txt
@@ -0,0 +1,2 @@
+set(the_description "Generic optimization")
+ocv_define_module(optim opencv_core)
diff --git a/modules/optim/doc/downhill_simplex_method.rst b/modules/optim/doc/downhill_simplex_method.rst
new file mode 100644
index 000000000..94d084c23
--- /dev/null
+++ b/modules/optim/doc/downhill_simplex_method.rst
@@ -0,0 +1,161 @@
+Downhill Simplex Method
+=======================
+
+.. highlight:: cpp
+
+optim::DownhillSolver
+---------------------------------
+
+.. ocv:class:: optim::DownhillSolver
+
+This class is used to perform the non-linear non-constrained *minimization* of a function, given on an *n*-dimensional Euclidean space,
+using the **Nelder-Mead method**, also known as **downhill simplex method**. The basic idea about the method can be obtained from
+(`http://en.wikipedia.org/wiki/Nelder-Mead\_method <http://en.wikipedia.org/wiki/Nelder-Mead_method>`_). It should be noted, that
+this method, although deterministic, is rather a heuristic and therefore may converge to a local minima, not necessary a global one.
+It is iterative optimization technique, which at each step uses an information about the values of a function evaluated only at
+*n+1* points, arranged as a *simplex* in *n*-dimensional space (hence the second name of the method). At each step new point is
+chosen to evaluate function at, obtained value is compared with previous ones and based on this information simplex changes it's shape
+, slowly moving to the local minimum.
+
+Algorithm stops when the number of function evaluations done exceeds ``termcrit.maxCount``, when the function values at the
+vertices of simplex are within ``termcrit.epsilon`` range or simplex becomes so small that it
+can enclosed in a box with ``termcrit.epsilon`` sides, whatever comes first, for some defined by user
+positive integer ``termcrit.maxCount`` and positive non-integer ``termcrit.epsilon``.
+
+::
+
+    class CV_EXPORTS Solver : public Algorithm
+    {
+    public:
+        class CV_EXPORTS Function
+        {
+        public:
+           virtual ~Function() {}
+           //! ndim - dimensionality
+           virtual double calc(const double* x) const = 0;
+        };
+
+        virtual Ptr<Function> getFunction() const = 0;
+        virtual void setFunction(const Ptr<Function>& f) = 0;
+
+        virtual TermCriteria getTermCriteria() const = 0;
+        virtual void setTermCriteria(const TermCriteria& termcrit) = 0;
+
+        // x contain the initial point before the call and the minima position (if algorithm converged) after. x is assumed to be (something that
+        // after getMat() will return) row-vector or column-vector. *It's size  and should
+        // be consisted with previous dimensionality data given, if any (otherwise, it determines dimensionality)*
+        virtual double minimize(InputOutputArray x) = 0;
+    };
+
+    class CV_EXPORTS DownhillSolver : public Solver
+    {
+    public:
+        //! returns row-vector, even if the column-vector was given
+        virtual void getInitStep(OutputArray step) const=0;
+        //!This should be called at least once before the first call to minimize() and step is assumed to be (something that
+        //! after getMat() will return) row-vector or column-vector. *It's dimensionality determines the dimensionality of a problem.*
+        virtual void setInitStep(InputArray step)=0;
+    };
+
+It should be noted, that ``optim::DownhillSolver`` is a derivative of the abstract interface ``optim::Solver``, which in
+turn is derived from the ``Algorithm`` interface and is used to encapsulate the functionality, common to all non-linear optimization
+algorithms in the ``optim`` module.
+
+optim::DownhillSolver::getFunction
+--------------------------------------------
+
+Getter for the optimized function. The optimized function is represented by ``Solver::Function`` interface, which requires
+derivatives to implement the sole method ``calc(double*)`` to evaluate the function.
+
+.. ocv:function:: Ptr<Solver::Function> optim::DownhillSolver::getFunction()
+
+    :return: Smart-pointer to an object that implements ``Solver::Function`` interface - it represents the function that is being optimized. It can be empty, if no function was given so far.
+
+optim::DownhillSolver::setFunction
+-----------------------------------------------
+
+Setter for the optimized function. *It should be called at least once before the call to* ``DownhillSolver::minimize()``, as
+default value is not usable.
+
+.. ocv:function:: void optim::DownhillSolver::setFunction(const Ptr<Solver::Function>& f)
+
+    :param f: The new function to optimize.
+
+optim::DownhillSolver::getTermCriteria
+----------------------------------------------------
+
+Getter for the previously set terminal criteria for this algorithm.
+
+.. ocv:function:: TermCriteria optim::DownhillSolver::getTermCriteria()
+
+    :return: Deep copy of the terminal criteria used at the moment.
+
+optim::DownhillSolver::setTermCriteria
+------------------------------------------
+
+Set terminal criteria for downhill simplex method. Two things should be noted. First, this method *is not necessary* to be called
+before the first call to ``DownhillSolver::minimize()``, as the default value is sensible. Second, the method will raise an error
+if ``termcrit.type!=(TermCriteria::MAX_ITER+TermCriteria::EPS)``, ``termcrit.epsilon<=0`` or ``termcrit.maxCount<=0``. That is,
+both ``epsilon`` and ``maxCount`` should be set to positive values (non-integer and integer respectively) and they represent
+tolerance and maximal number of function evaluations that is allowed.
+
+Algorithm stops when the number of function evaluations done exceeds ``termcrit.maxCount``, when the function values at the
+vertices of simplex are within ``termcrit.epsilon`` range or simplex becomes so small that it
+can enclosed in a box with ``termcrit.epsilon`` sides, whatever comes first.
+
+.. ocv:function:: void optim::DownhillSolver::setTermCriteria(const TermCriteria& termcrit)
+
+    :param termcrit: Terminal criteria to be used, represented as ``TermCriteria`` structure (defined elsewhere in openCV). Mind you, that it should meet ``(termcrit.type==(TermCriteria::MAX_ITER+TermCriteria::EPS) && termcrit.epsilon>0 && termcrit.maxCount>0)``, otherwise the error will be raised.
+
+optim::DownhillSolver::getInitStep
+-----------------------------------
+
+Returns the initial step that will be used in downhill simplex algorithm. See the description
+of corresponding setter (follows next) for the meaning of this parameter.
+
+.. ocv:function:: void optim::getInitStep(OutputArray step)
+
+    :param step: Initial step that will be used in algorithm. Note, that although corresponding setter accepts column-vectors as well as row-vectors, this method will return a row-vector.
+
+optim::DownhillSolver::setInitStep
+----------------------------------
+
+Sets the initial step that will be used in downhill simplex algorithm. Step, together with initial point (givin in ``DownhillSolver::minimize``)
+are two *n*-dimensional vectors that are used to determine the shape of initial simplex. Roughly said, initial point determines the position
+of a simplex (it will become simplex's centroid), while step determines the spread (size in each dimension) of a simplex. To be more precise,
+if :math:`s,x_0\in\mathbb{R}^n` are the initial step and initial point respectively, the vertices of a simplex will be: :math:`v_0:=x_0-\frac{1}{2}
+s` and :math:`v_i:=x_0+s_i` for :math:`i=1,2,\dots,n` where :math:`s_i` denotes projections of the initial step of *n*-th coordinate (the result
+of projection is treated to be vector given by :math:`s_i:=e_i\cdot\left<e_i\cdot s\right>`, where :math:`e_i` form canonical basis)
+
+.. ocv:function:: void optim::setInitStep(InputArray step)
+
+    :param step: Initial step that will be used in algorithm. Roughly said, it determines the spread (size in each dimension) of an initial simplex.
+
+optim::DownhillSolver::minimize
+-----------------------------------
+
+The main method of the ``DownhillSolver``. It actually runs the algorithm and performs the minimization. The sole input parameter determines the
+centroid of the starting simplex (roughly, it tells where to start), all the others (terminal criteria, initial step, function to be minimized)
+are supposed to be set via the setters before the call to this method or the default values (not always sensible) will be used.
+
+.. ocv:function:: double optim::DownhillSolver::minimize(InputOutputArray x)
+
+    :param x: The initial point, that will become a centroid of an initial simplex. After the algorithm will terminate, it will be setted to the point where the algorithm stops, the point of possible minimum.
+
+    :return: The value of a function at the point found.
+
+optim::createDownhillSolver
+------------------------------------
+
+This function returns the reference to the ready-to-use ``DownhillSolver`` object. All the parameters are optional, so this procedure can be called
+even without parameters at all. In this case, the default values will be used. As default value for terminal criteria are the only sensible ones,
+``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()`` should be called upon the obtained object, if the respective parameters
+were not given to ``createDownhillSolver()``. Otherwise, the two ways (give parameters to ``createDownhillSolver()`` or miss the out and call the
+``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()``) are absolutely equivalent (and will drop the same errors in the same way,
+should invalid input be detected).
+
+.. ocv:function:: Ptr<optim::DownhillSolver> optim::createDownhillSolver(const Ptr<Solver::Function>& f,InputArray initStep, TermCriteria termcrit)
+
+    :param f: Pointer to the function that will be minimized, similarly to the one you submit via ``DownhillSolver::setFunction``.
+    :param step: Initial step, that will be used to construct the initial simplex, similarly to the one you submit via ``DownhillSolver::setInitStep``.
+    :param termcrit: Terminal criteria to the algorithm, similarly to the one you submit via ``DownhillSolver::setTermCriteria``.
diff --git a/modules/optim/doc/linear_programming.rst b/modules/optim/doc/linear_programming.rst
new file mode 100644
index 000000000..946df9e95
--- /dev/null
+++ b/modules/optim/doc/linear_programming.rst
@@ -0,0 +1,48 @@
+Linear Programming
+==================
+
+.. highlight:: cpp
+
+optim::solveLP
+--------------------
+Solve given (non-integer) linear programming problem using the Simplex Algorithm (Simplex Method).
+What we mean here by "linear programming problem" (or LP problem, for short) can be
+formulated as:
+
+.. math::
+    \mbox{Maximize } c\cdot x\\
+    \mbox{Subject to:}\\
+    Ax\leq b\\
+    x\geq 0
+
+Where :math:`c` is fixed *1*-by-*n* row-vector, :math:`A` is fixed *m*-by-*n* matrix, :math:`b` is fixed *m*-by-*1* column vector and
+:math:`x` is an arbitrary *n*-by-*1* column vector, which satisfies the constraints.
+
+Simplex algorithm is one of many algorithms that are designed to handle this sort of problems efficiently. Although it is not optimal in theoretical
+sense (there exist algorithms that can solve any problem written as above in polynomial type, while simplex method degenerates to exponential time
+for some special cases), it is well-studied, easy to implement and is shown to work well for real-life purposes.
+
+The particular implementation is taken almost verbatim from **Introduction to Algorithms, third edition**
+by T. H. Cormen, C. E. Leiserson, R. L. Rivest and Clifford Stein. In particular, the Bland's rule
+(`http://en.wikipedia.org/wiki/Bland%27s\_rule <http://en.wikipedia.org/wiki/Bland%27s_rule>`_) is used to prevent cycling.
+
+.. ocv:function:: int optim::solveLP(const Mat& Func, const Mat& Constr, Mat& z)
+
+    :param Func: This row-vector corresponds to :math:`c` in the LP problem formulation (see above). It should contain 32- or 64-bit floating point numbers. As a convenience, column-vector may be also submitted, in the latter case it is understood to correspond to :math:`c^T`.
+
+    :param Constr: *m*-by-*n\+1* matrix, whose rightmost column corresponds to :math:`b` in formulation above and the remaining to :math:`A`. It should containt 32- or 64-bit floating point numbers.
+
+    :param z: The solution will be returned here as a column-vector - it corresponds to :math:`c` in the formulation above. It will contain 64-bit floating point numbers.
+
+    :return: One of the return codes:
+
+::
+
+    //!the return codes for solveLP() function
+    enum
+    {
+        SOLVELP_UNBOUNDED    = -2, //problem is unbounded (target function can achieve arbitrary high values)
+        SOLVELP_UNFEASIBLE    = -1, //problem is unfeasible (there are no points that satisfy all the constraints imposed)
+        SOLVELP_SINGLE    = 0, //there is only one maximum for target function
+        SOLVELP_MULTI    = 1 //there are multiple maxima for target function - the arbitrary one is returned
+    };
diff --git a/modules/optim/doc/optim.rst b/modules/optim/doc/optim.rst
new file mode 100644
index 000000000..b3c7a740b
--- /dev/null
+++ b/modules/optim/doc/optim.rst
@@ -0,0 +1,12 @@
+**************************************
+optim. Generic numerical optimization
+**************************************
+
+.. highlight:: cpp
+
+.. toctree::
+    :maxdepth: 2
+
+    linear_programming
+    downhill_simplex_method
+    primal_dual_algorithm
diff --git a/modules/optim/doc/primal_dual_algorithm.rst b/modules/optim/doc/primal_dual_algorithm.rst
new file mode 100644
index 000000000..09d736f24
--- /dev/null
+++ b/modules/optim/doc/primal_dual_algorithm.rst
@@ -0,0 +1,48 @@
+Primal-Dual Algorithm
+=======================
+
+.. highlight:: cpp
+
+optim::denoise_TVL1
+---------------------------------
+
+Primal-dual algorithm is an algorithm for solving special types of variational
+problems (that is, finding a function to minimize some functional)
+. As the image denoising, in particular, may be seen as the variational
+problem, primal-dual algorithm then can be used to perform denoising and this
+is exactly what is implemented.
+
+It should be noted, that this implementation was taken from the July 2013 blog entry [Mordvintsev]_, which also contained
+(slightly more general) ready-to-use
+source code on Python. Subsequently, that code was rewritten on C++ with the usage of openCV by Vadim Pisarevsky
+at the end of July 2013 and finally it was slightly adapted by later authors.
+
+Although the thorough discussion and justification
+of the algorithm involved may be found in [ChambolleEtAl]_, it might make sense to skim over it here, following [Mordvintsev]_. To
+begin with, we consider the 1-byte gray-level images as the functions from the rectangular domain of pixels
+(it may be seen as set :math:`\left\{(x,y)\in\mathbb{N}\times\mathbb{N}\mid 1\leq x\leq n,\;1\leq y\leq m\right\}`
+for some :math:`m,\;n\in\mathbb{N}`) into :math:`\{0,1,\dots,255\}`. We shall denote the noised images as :math:`f_i` and with this
+view, given some image :math:`x` of the same size, we may measure how bad it is by the formula
+
+.. math::
+        \left\|\left\|\nabla x\right\|\right\| + \lambda\sum_i\left\|\left\|x-f_i\right\|\right\|
+
+:math:`\|\|\cdot\|\|` here denotes :math:`L_2`-norm and as you see, the first addend states that we want our image to be smooth
+(ideally, having zero gradient, thus being constant) and the second states that we want our result to be close to the observations we've got.
+If we treat :math:`x` as a function, this is exactly the functional what we seek to minimize and here the Primal-Dual algorithm comes
+into play.
+
+.. ocv:function:: void optim::denoise_TVL1(const std::vector<Mat>& observations,Mat& result, double lambda, int niters)
+
+    :param observations: This array should contain one or more noised versions of the image that is to be restored.
+
+    :param result: Here the denoised image will be stored. There is no need to do pre-allocation of storage space, as it will be automatically allocated, if necessary.
+
+    :param lambda: Corresponds to :math:`\lambda` in the formulas above. As it is enlarged, the smooth (blurred) images are treated more favorably than detailed (but maybe more noised) ones. Roughly speaking, as it becomes smaller, the result will be more blur but more sever outliers will be removed.
+
+    :param niters: Number of iterations that the algorithm will run. Of course, as more iterations as better, but it is hard to quantitatively refine this statement, so just use the default and increase it if the results are poor.
+
+
+.. [ChambolleEtAl] A. Chambolle, V. Caselles, M. Novaga, D. Cremers and T. Pock, An Introduction to Total Variation for Image Analysis, http://hal.archives-ouvertes.fr/docs/00/43/75/81/PDF/preprint.pdf (pdf)
+
+.. [Mordvintsev] Alexander Mordvintsev, ROF and TV-L1 denoising with Primal-Dual algorithm, http://znah.net/rof-and-tv-l1-denoising-with-primal-dual-algorithm.html (blog entry)
diff --git a/modules/optim/include/opencv2/optim.hpp b/modules/optim/include/opencv2/optim.hpp
new file mode 100644
index 000000000..715372b69
--- /dev/null
+++ b/modules/optim/include/opencv2/optim.hpp
@@ -0,0 +1,102 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_OPTIM_HPP__
+#define __OPENCV_OPTIM_HPP__
+
+#include "opencv2/core.hpp"
+
+namespace cv{namespace optim
+{
+class CV_EXPORTS Solver : public Algorithm
+{
+public:
+    class CV_EXPORTS Function
+    {
+    public:
+       virtual ~Function() {}
+       //! ndim - dimensionality
+       virtual double calc(const double* x) const = 0;
+    };
+
+    virtual Ptr<Function> getFunction() const = 0;
+    virtual void setFunction(const Ptr<Function>& f) = 0;
+
+    virtual TermCriteria getTermCriteria() const = 0;
+    virtual void setTermCriteria(const TermCriteria& termcrit) = 0;
+
+    // x contain the initial point before the call and the minima position (if algorithm converged) after. x is assumed to be (something that
+    // after getMat() will return) row-vector or column-vector. *It's size  and should
+    // be consisted with previous dimensionality data given, if any (otherwise, it determines dimensionality)*
+    virtual double minimize(InputOutputArray x) = 0;
+};
+
+//! downhill simplex class
+class CV_EXPORTS DownhillSolver : public Solver
+{
+public:
+    //! returns row-vector, even if the column-vector was given
+    virtual void getInitStep(OutputArray step) const=0;
+    //!This should be called at least once before the first call to minimize() and step is assumed to be (something that
+    //! after getMat() will return) row-vector or column-vector. *It's dimensionality determines the dimensionality of a problem.*
+    virtual void setInitStep(InputArray step)=0;
+};
+
+// both minRange & minError are specified by termcrit.epsilon; In addition, user may specify the number of iterations that the algorithm does.
+CV_EXPORTS_W Ptr<DownhillSolver> createDownhillSolver(const Ptr<Solver::Function>& f=Ptr<Solver::Function>(),
+        InputArray initStep=Mat_<double>(1,1,0.0),
+        TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
+
+//!the return codes for solveLP() function
+enum
+{
+    SOLVELP_UNBOUNDED    = -2, //problem is unbounded (target function can achieve arbitrary high values)
+    SOLVELP_UNFEASIBLE    = -1, //problem is unfeasible (there are no points that satisfy all the constraints imposed)
+    SOLVELP_SINGLE    = 0, //there is only one maximum for target function
+    SOLVELP_MULTI    = 1 //there are multiple maxima for target function - the arbitrary one is returned
+};
+
+CV_EXPORTS_W int solveLP(const Mat& Func, const Mat& Constr, Mat& z);
+CV_EXPORTS_W void denoise_TVL1(const std::vector<Mat>& observations,Mat& result, double lambda=1.0, int niters=30);
+}}// cv
+
+#endif
diff --git a/modules/ml/src/precomp.cpp b/modules/optim/include/opencv2/optim/optim.hpp
similarity index 89%
rename from modules/ml/src/precomp.cpp
rename to modules/optim/include/opencv2/optim/optim.hpp
index e540cc5e8..b5a9ebf79 100644
--- a/modules/ml/src/precomp.cpp
+++ b/modules/optim/include/opencv2/optim/optim.hpp
@@ -7,11 +7,12 @@
 //  copy or use the software.
 //
 //
-//                           License Agreement
+//                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -40,6 +41,8 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
+#endif
 
-/* End of file. */
+#include "opencv2/optim.hpp"
diff --git a/modules/optim/src/debug.hpp b/modules/optim/src/debug.hpp
new file mode 100644
index 000000000..fe5d00e87
--- /dev/null
+++ b/modules/optim/src/debug.hpp
@@ -0,0 +1,18 @@
+namespace cv{namespace optim{
+#ifdef ALEX_DEBUG
+#define dprintf(x) printf x
+static void print_matrix(const Mat& x){
+    printf("\ttype:%d vs %d,\tsize: %d-on-%d\n",x.type(),CV_64FC1,x.rows,x.cols);
+    for(int i=0;i<x.rows;i++){
+        printf("\t[");
+        for(int j=0;j<x.cols;j++){
+            printf("%g, ",x.at<double>(i,j));
+        }
+        printf("]\n");
+    }
+}
+#else
+#define dprintf(x)
+#define print_matrix(x)
+#endif
+}}
diff --git a/modules/optim/src/denoise_tvl1.cpp b/modules/optim/src/denoise_tvl1.cpp
new file mode 100644
index 000000000..b11ebc058
--- /dev/null
+++ b/modules/optim/src/denoise_tvl1.cpp
@@ -0,0 +1,113 @@
+#include "precomp.hpp"
+#undef ALEX_DEBUG
+#include "debug.hpp"
+#include <vector>
+#include <algorithm>
+
+#define ABSCLIP(val,threshold) MIN(MAX((val),-(threshold)),(threshold))
+
+namespace cv{namespace optim{
+
+    class AddFloatToCharScaled{
+        public:
+            AddFloatToCharScaled(double scale):_scale(scale){}
+            inline double operator()(double a,uchar b){
+                return a+_scale*((double)b);
+            }
+        private:
+            double _scale;
+    };
+
+    void denoise_TVL1(const std::vector<Mat>& observations,Mat& result, double lambda, int niters){
+
+        CV_Assert(observations.size()>0 && niters>0 && lambda>0);
+
+        const double L2 = 8.0, tau = 0.02, sigma = 1./(L2*tau), theta = 1.0;
+        double clambda = (double)lambda;
+        double s=0;
+        const int workdepth = CV_64F;
+
+        int i, x, y, rows=observations[0].rows, cols=observations[0].cols,count;
+        for(i=1;i<(int)observations.size();i++){
+            CV_Assert(observations[i].rows==rows && observations[i].cols==cols);
+        }
+
+        Mat X, P = Mat::zeros(rows, cols, CV_MAKETYPE(workdepth, 2));
+        observations[0].convertTo(X, workdepth, 1./255);
+        std::vector< Mat_<double> > Rs(observations.size());
+        for(count=0;count<(int)Rs.size();count++){
+            Rs[count]=Mat::zeros(rows,cols,workdepth);
+        }
+
+        for( i = 0; i < niters; i++ )
+        {
+            double currsigma = i == 0 ? 1 + sigma : sigma;
+
+            // P_ = P + sigma*nabla(X)
+            // P(x,y) = P_(x,y)/max(||P(x,y)||,1)
+            for( y = 0; y < rows; y++ )
+            {
+                const double* x_curr = X.ptr<double>(y);
+                const double* x_next = X.ptr<double>(std::min(y+1, rows-1));
+                Point2d* p_curr = P.ptr<Point2d>(y);
+                double dx, dy, m;
+                for( x = 0; x < cols-1; x++ )
+                {
+                    dx = (x_curr[x+1] - x_curr[x])*currsigma + p_curr[x].x;
+                    dy = (x_next[x] - x_curr[x])*currsigma + p_curr[x].y;
+                    m = 1.0/std::max(std::sqrt(dx*dx + dy*dy), 1.0);
+                    p_curr[x].x = dx*m;
+                    p_curr[x].y = dy*m;
+                }
+                dy = (x_next[x] - x_curr[x])*currsigma + p_curr[x].y;
+                m = 1.0/std::max(std::abs(dy), 1.0);
+                p_curr[x].x = 0.0;
+                p_curr[x].y = dy*m;
+            }
+
+
+            //Rs = clip(Rs + sigma*(X-imgs), -clambda, clambda)
+            for(count=0;count<(int)Rs.size();count++){
+                std::transform<MatIterator_<double>,MatConstIterator_<uchar>,MatIterator_<double>,AddFloatToCharScaled>(
+                        Rs[count].begin(),Rs[count].end(),observations[count].begin<uchar>(),
+                        Rs[count].begin(),AddFloatToCharScaled(-sigma/255.0));
+                Rs[count]+=sigma*X;
+                min(Rs[count],clambda,Rs[count]);
+                max(Rs[count],-clambda,Rs[count]);
+            }
+
+            for( y = 0; y < rows; y++ )
+            {
+                double* x_curr = X.ptr<double>(y);
+                const Point2d* p_curr = P.ptr<Point2d>(y);
+                const Point2d* p_prev = P.ptr<Point2d>(std::max(y - 1, 0));
+
+                // X1 = X + tau*(-nablaT(P))
+                x = 0;
+                s=0.0;
+                for(count=0;count<(int)Rs.size();count++){
+                    s=s+Rs[count](y,x);
+                }
+                double x_new = x_curr[x] + tau*(p_curr[x].y - p_prev[x].y)-tau*s;
+                    // X = X2 + theta*(X2 - X)
+                x_curr[x] = x_new + theta*(x_new - x_curr[x]);
+
+
+                for(x = 1; x < cols; x++ )
+                {
+                    s=0.0;
+                    for(count=0;count<(int)Rs.size();count++){
+                        s+=Rs[count](y,x);
+                    }
+                        // X1 = X + tau*(-nablaT(P))
+                    x_new = x_curr[x] + tau*(p_curr[x].x - p_curr[x-1].x + p_curr[x].y - p_prev[x].y)-tau*s;
+                        // X = X2 + theta*(X2 - X)
+                    x_curr[x] = x_new + theta*(x_new - x_curr[x]);
+                }
+            }
+        }
+
+        result.create(X.rows,X.cols,CV_8U);
+        X.convertTo(result, CV_8U, 255);
+    }
+}}
diff --git a/modules/optim/src/lpsolver.cpp b/modules/optim/src/lpsolver.cpp
new file mode 100644
index 000000000..a046ddae1
--- /dev/null
+++ b/modules/optim/src/lpsolver.cpp
@@ -0,0 +1,316 @@
+#include "precomp.hpp"
+#include <climits>
+#include <algorithm>
+#include <cstdarg>
+#include <debug.hpp>
+
+namespace cv{namespace optim{
+using std::vector;
+
+#ifdef ALEX_DEBUG
+static void print_simplex_state(const Mat& c,const Mat& b,double v,const std::vector<int> N,const std::vector<int> B){
+    printf("\tprint simplex state\n");
+
+    printf("v=%g\n",v);
+
+    printf("here c goes\n");
+    print_matrix(c);
+
+    printf("non-basic: ");
+    print(Mat(N));
+    printf("\n");
+
+    printf("here b goes\n");
+    print_matrix(b);
+    printf("basic: ");
+
+    print(Mat(B));
+    printf("\n");
+}
+#else
+#define print_simplex_state(c,b,v,N,B)
+#endif
+
+/**Due to technical considerations, the format of input b and c is somewhat special:
+ *both b and c should be one column bigger than corresponding b and c of linear problem and the leftmost column will be used internally
+ by this procedure - it should not be cleaned before the call to procedure and may contain mess after
+ it also initializes N and B and does not make any assumptions about their init values
+ * @return SOLVELP_UNFEASIBLE if problem is unfeasible, 0 if feasible.
+*/
+static int initialize_simplex(Mat_<double>& c, Mat_<double>& b,double& v,vector<int>& N,vector<int>& B,vector<unsigned int>& indexToRow);
+static inline void pivot(Mat_<double>& c,Mat_<double>& b,double& v,vector<int>& N,vector<int>& B,int leaving_index,
+        int entering_index,vector<unsigned int>& indexToRow);
+/**@return SOLVELP_UNBOUNDED means the problem is unbdd, SOLVELP_MULTI means multiple solutions, SOLVELP_SINGLE means one solution.
+ */
+static int inner_simplex(Mat_<double>& c, Mat_<double>& b,double& v,vector<int>& N,vector<int>& B,vector<unsigned int>& indexToRow);
+static void swap_columns(Mat_<double>& A,int col1,int col2);
+#define SWAP(type,a,b) {type tmp=(a);(a)=(b);(b)=tmp;}
+
+//return codes:-2 (no_sol - unbdd),-1(no_sol - unfsbl), 0(single_sol), 1(multiple_sol=>least_l2_norm)
+int solveLP(const Mat& Func, const Mat& Constr, Mat& z){
+    dprintf(("call to solveLP\n"));
+
+    //sanity check (size, type, no. of channels)
+    CV_Assert(Func.type()==CV_64FC1 || Func.type()==CV_32FC1);
+    CV_Assert(Constr.type()==CV_64FC1 || Constr.type()==CV_32FC1);
+    CV_Assert((Func.rows==1 && (Constr.cols-Func.cols==1))||
+            (Func.cols==1 && (Constr.cols-Func.rows==1)));
+
+    //copy arguments for we will shall modify them
+    Mat_<double> bigC=Mat_<double>(1,(Func.rows==1?Func.cols:Func.rows)+1),
+        bigB=Mat_<double>(Constr.rows,Constr.cols+1);
+    if(Func.rows==1){
+        Func.convertTo(bigC.colRange(1,bigC.cols),CV_64FC1);
+    }else{
+        Mat FuncT=Func.t();
+        FuncT.convertTo(bigC.colRange(1,bigC.cols),CV_64FC1);
+    }
+    Constr.convertTo(bigB.colRange(1,bigB.cols),CV_64FC1);
+    double v=0;
+    vector<int> N,B;
+    vector<unsigned int> indexToRow;
+
+    if(initialize_simplex(bigC,bigB,v,N,B,indexToRow)==SOLVELP_UNFEASIBLE){
+        return SOLVELP_UNFEASIBLE;
+    }
+    Mat_<double> c=bigC.colRange(1,bigC.cols),
+        b=bigB.colRange(1,bigB.cols);
+
+    int res=0;
+    if((res=inner_simplex(c,b,v,N,B,indexToRow))==SOLVELP_UNBOUNDED){
+        return SOLVELP_UNBOUNDED;
+    }
+
+    //return the optimal solution
+    z.create(c.cols,1,CV_64FC1);
+    MatIterator_<double> it=z.begin<double>();
+    for(int i=1;i<=c.cols;i++,it++){
+        if(indexToRow[i]<N.size()){
+            *it=0;
+        }else{
+            *it=b.at<double>(indexToRow[i]-N.size(),b.cols-1);
+        }
+    }
+
+    return res;
+}
+
+static int initialize_simplex(Mat_<double>& c, Mat_<double>& b,double& v,vector<int>& N,vector<int>& B,vector<unsigned int>& indexToRow){
+    N.resize(c.cols);
+    N[0]=0;
+    for (std::vector<int>::iterator it = N.begin()+1 ; it != N.end(); ++it){
+        *it=it[-1]+1;
+    }
+    B.resize(b.rows);
+    B[0]=N.size();
+    for (std::vector<int>::iterator it = B.begin()+1 ; it != B.end(); ++it){
+        *it=it[-1]+1;
+    }
+    indexToRow.resize(c.cols+b.rows);
+    indexToRow[0]=0;
+    for (std::vector<unsigned int>::iterator it = indexToRow.begin()+1 ; it != indexToRow.end(); ++it){
+        *it=it[-1]+1;
+    }
+    v=0;
+
+    int k=0;
+    {
+        double min=DBL_MAX;
+        for(int i=0;i<b.rows;i++){
+            if(b(i,b.cols-1)<min){
+                min=b(i,b.cols-1);
+                k=i;
+            }
+        }
+    }
+
+    if(b(k,b.cols-1)>=0){
+        N.erase(N.begin());
+        for (std::vector<unsigned int>::iterator it = indexToRow.begin()+1 ; it != indexToRow.end(); ++it){
+            --(*it);
+        }
+        return 0;
+    }
+
+    Mat_<double> old_c=c.clone();
+    c=0;
+    c(0,0)=-1;
+    for(int i=0;i<b.rows;i++){
+        b(i,0)=-1;
+    }
+
+    print_simplex_state(c,b,v,N,B);
+
+    dprintf(("\tWE MAKE PIVOT\n"));
+    pivot(c,b,v,N,B,k,0,indexToRow);
+
+    print_simplex_state(c,b,v,N,B);
+
+    inner_simplex(c,b,v,N,B,indexToRow);
+
+    dprintf(("\tAFTER INNER_SIMPLEX\n"));
+    print_simplex_state(c,b,v,N,B);
+
+    if(indexToRow[0]>=N.size()){
+        int iterator_offset=indexToRow[0]-N.size();
+        if(b(iterator_offset,b.cols-1)>0){
+            return SOLVELP_UNFEASIBLE;
+        }
+        pivot(c,b,v,N,B,iterator_offset,0,indexToRow);
+    }
+
+    vector<int>::iterator iterator;
+    {
+        int iterator_offset=indexToRow[0];
+        iterator=N.begin()+iterator_offset;
+        std::iter_swap(iterator,N.begin());
+        SWAP(int,indexToRow[*iterator],indexToRow[0]);
+        swap_columns(c,iterator_offset,0);
+        swap_columns(b,iterator_offset,0);
+    }
+
+    dprintf(("after swaps\n"));
+    print_simplex_state(c,b,v,N,B);
+
+    //start from 1, because we ignore x_0
+    c=0;
+    v=0;
+    for(int I=1;I<old_c.cols;I++){
+        if(indexToRow[I]<N.size()){
+            dprintf(("I=%d from nonbasic\n",I));
+            int iterator_offset=indexToRow[I];
+            c(0,iterator_offset)+=old_c(0,I);
+            print_matrix(c);
+        }else{
+            dprintf(("I=%d from basic\n",I));
+            int iterator_offset=indexToRow[I]-N.size();
+            c-=old_c(0,I)*b.row(iterator_offset).colRange(0,b.cols-1);
+            v+=old_c(0,I)*b(iterator_offset,b.cols-1);
+            print_matrix(c);
+        }
+    }
+
+    dprintf(("after restore\n"));
+    print_simplex_state(c,b,v,N,B);
+
+    N.erase(N.begin());
+    for (std::vector<unsigned int>::iterator it = indexToRow.begin()+1 ; it != indexToRow.end(); ++it){
+        --(*it);
+    }
+    return 0;
+}
+
+static int inner_simplex(Mat_<double>& c, Mat_<double>& b,double& v,vector<int>& N,vector<int>& B,vector<unsigned int>& indexToRow){
+    int count=0;
+    for(;;){
+        dprintf(("iteration #%d\n",count));
+        count++;
+
+        static MatIterator_<double> pos_ptr;
+        int e=-1,pos_ctr=0,min_var=INT_MAX;
+        bool all_nonzero=true;
+        for(pos_ptr=c.begin();pos_ptr!=c.end();pos_ptr++,pos_ctr++){
+            if(*pos_ptr==0){
+                all_nonzero=false;
+            }
+            if(*pos_ptr>0){
+                if(N[pos_ctr]<min_var){
+                    e=pos_ctr;
+                    min_var=N[pos_ctr];
+                }
+            }
+        }
+        if(e==-1){
+            dprintf(("hello from e==-1\n"));
+            print_matrix(c);
+            if(all_nonzero==true){
+                return SOLVELP_SINGLE;
+            }else{
+                return SOLVELP_MULTI;
+            }
+        }
+
+        int l=-1;
+        min_var=INT_MAX;
+        double min=DBL_MAX;
+        int row_it=0;
+        MatIterator_<double> min_row_ptr=b.begin();
+        for(MatIterator_<double> it=b.begin();it!=b.end();it+=b.cols,row_it++){
+            double myite=0;
+            //check constraints, select the tightest one, reinforcing Bland's rule
+            if((myite=it[e])>0){
+                double val=it[b.cols-1]/myite;
+                if(val<min || (val==min && B[row_it]<min_var)){
+                    min_var=B[row_it];
+                    min_row_ptr=it;
+                    min=val;
+                    l=row_it;
+                }
+            }
+        }
+        if(l==-1){
+            return SOLVELP_UNBOUNDED;
+        }
+        dprintf(("the tightest constraint is in row %d with %g\n",l,min));
+
+        pivot(c,b,v,N,B,l,e,indexToRow);
+
+        dprintf(("objective, v=%g\n",v));
+        print_matrix(c);
+        dprintf(("constraints\n"));
+        print_matrix(b);
+        dprintf(("non-basic: "));
+        print_matrix(Mat(N));
+        dprintf(("basic: "));
+        print_matrix(Mat(B));
+    }
+}
+
+static inline void pivot(Mat_<double>& c,Mat_<double>& b,double& v,vector<int>& N,vector<int>& B,
+        int leaving_index,int entering_index,vector<unsigned int>& indexToRow){
+    double Coef=b(leaving_index,entering_index);
+    for(int i=0;i<b.cols;i++){
+        if(i==entering_index){
+            b(leaving_index,i)=1/Coef;
+        }else{
+            b(leaving_index,i)/=Coef;
+        }
+    }
+
+    for(int i=0;i<b.rows;i++){
+        if(i!=leaving_index){
+            double coef=b(i,entering_index);
+            for(int j=0;j<b.cols;j++){
+                if(j==entering_index){
+                    b(i,j)=-coef*b(leaving_index,j);
+                }else{
+                    b(i,j)-=(coef*b(leaving_index,j));
+                }
+            }
+        }
+    }
+
+    //objective function
+    Coef=c(0,entering_index);
+    for(int i=0;i<(b.cols-1);i++){
+        if(i==entering_index){
+            c(0,i)=-Coef*b(leaving_index,i);
+        }else{
+            c(0,i)-=Coef*b(leaving_index,i);
+        }
+    }
+    dprintf(("v was %g\n",v));
+    v+=Coef*b(leaving_index,b.cols-1);
+
+    SWAP(int,N[entering_index],B[leaving_index]);
+    SWAP(int,indexToRow[N[entering_index]],indexToRow[B[leaving_index]]);
+}
+
+static inline void swap_columns(Mat_<double>& A,int col1,int col2){
+    for(int i=0;i<A.rows;i++){
+        double tmp=A(i,col1);
+        A(i,col1)=A(i,col2);
+        A(i,col2)=tmp;
+    }
+}
+}}
diff --git a/modules/nonfree/src/precomp.cpp b/modules/optim/src/precomp.hpp
similarity index 95%
rename from modules/nonfree/src/precomp.cpp
rename to modules/optim/src/precomp.hpp
index 730edbb63..7aab572f8 100644
--- a/modules/nonfree/src/precomp.cpp
+++ b/modules/optim/src/precomp.hpp
@@ -40,6 +40,9 @@
 //
 //M*/
 
-#include "precomp.hpp"
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
 
-/* End of file. */
+#include "opencv2/optim.hpp"
+
+#endif
diff --git a/modules/optim/src/simplex.cpp b/modules/optim/src/simplex.cpp
new file mode 100644
index 000000000..f45d0ce0b
--- /dev/null
+++ b/modules/optim/src/simplex.cpp
@@ -0,0 +1,273 @@
+#include "precomp.hpp"
+#include "debug.hpp"
+#include "opencv2/core/core_c.h"
+
+namespace cv{namespace optim{
+
+    class DownhillSolverImpl : public DownhillSolver
+    {
+    public:
+        void getInitStep(OutputArray step) const;
+        void setInitStep(InputArray step);
+        Ptr<Function> getFunction() const;
+        void setFunction(const Ptr<Function>& f);
+        TermCriteria getTermCriteria() const;
+        DownhillSolverImpl();
+        void setTermCriteria(const TermCriteria& termcrit);
+        double minimize(InputOutputArray x);
+    protected:
+        Ptr<Solver::Function> _Function;
+        TermCriteria _termcrit;
+        Mat _step;
+    private:
+        inline void createInitialSimplex(Mat_<double>& simplex,Mat& step);
+        inline double innerDownhillSimplex(cv::Mat_<double>& p,double MinRange,double MinError,int& nfunk,
+                const Ptr<Solver::Function>& f,int nmax);
+        inline double tryNewPoint(Mat_<double>& p,Mat_<double>& y,Mat_<double>& coord_sum,const Ptr<Solver::Function>& f,int ihi,
+                double fac,Mat_<double>& ptry);
+    };
+
+    double DownhillSolverImpl::tryNewPoint(
+        Mat_<double>& p,
+        Mat_<double>& y,
+        Mat_<double>&  coord_sum,
+        const Ptr<Solver::Function>& f,
+        int      ihi,
+        double   fac,
+        Mat_<double>& ptry
+        )
+    {
+        int ndim=p.cols;
+        int j;
+        double fac1,fac2,ytry;
+
+        fac1=(1.0-fac)/ndim;
+        fac2=fac1-fac;
+        for (j=0;j<ndim;j++)
+        {
+            ptry(j)=coord_sum(j)*fac1-p(ihi,j)*fac2;
+        }
+        ytry=f->calc((double*)ptry.data);
+        if (ytry < y(ihi))
+        {
+            y(ihi)=ytry;
+            for (j=0;j<ndim;j++)
+            {
+                coord_sum(j) += ptry(j)-p(ihi,j);
+                p(ihi,j)=ptry(j);
+            }
+        }
+
+        return ytry;
+    }
+
+    /*
+    Performs the actual minimization of Solver::Function f (after the initialization was done)
+
+    The matrix p[ndim+1][1..ndim] represents ndim+1 vertices that
+    form a simplex - each row is an ndim vector.
+    On output, nfunk gives the number of function evaluations taken.
+    */
+    double DownhillSolverImpl::innerDownhillSimplex(
+        cv::Mat_<double>&   p,
+        double     MinRange,
+        double     MinError,
+        int&       nfunk,
+        const Ptr<Solver::Function>& f,
+        int nmax
+        )
+    {
+        int ndim=p.cols;
+        double res;
+        int i,ihi,ilo,inhi,j,mpts=ndim+1;
+        double error, range,ysave,ytry;
+        Mat_<double> coord_sum(1,ndim,0.0),buf(1,ndim,0.0),y(1,ndim,0.0);
+
+        nfunk = 0;
+
+        for(i=0;i<ndim+1;++i)
+        {
+            y(i) = f->calc(p[i]);
+        }
+
+        nfunk = ndim+1;
+
+        reduce(p,coord_sum,0,CV_REDUCE_SUM);
+
+        for (;;)
+        {
+            ilo=0;
+            /*  find highest (worst), next-to-worst, and lowest
+                (best) points by going through all of them. */
+            ihi = y(0)>y(1) ? (inhi=1,0) : (inhi=0,1);
+            for (i=0;i<mpts;i++)
+            {
+                if (y(i) <= y(ilo))
+                    ilo=i;
+                if (y(i) > y(ihi))
+                {
+                    inhi=ihi;
+                    ihi=i;
+                }
+                else if (y(i) > y(inhi) && i != ihi)
+                    inhi=i;
+            }
+
+            /* check stop criterion */
+            error=fabs(y(ihi)-y(ilo));
+            range=0;
+            for(i=0;i<ndim;++i)
+            {
+                double min = p(0,i);
+                double max = p(0,i);
+                double d;
+                for(j=1;j<=ndim;++j)
+                {
+                    if( min > p(j,i) ) min = p(j,i);
+                    if( max < p(j,i) ) max = p(j,i);
+                }
+                d = fabs(max-min);
+                if(range < d) range = d;
+            }
+
+            if(range <= MinRange || error <= MinError)
+            { /* Put best point and value in first slot. */
+                std::swap(y(0),y(ilo));
+                for (i=0;i<ndim;i++)
+                {
+                    std::swap(p(0,i),p(ilo,i));
+                }
+                break;
+            }
+
+            if (nfunk >= nmax){
+                dprintf(("nmax exceeded\n"));
+                return y(ilo);
+            }
+            nfunk += 2;
+            /*Begin a new iteration. First, reflect the worst point about the centroid of others */
+            ytry = tryNewPoint(p,y,coord_sum,f,ihi,-1.0,buf);
+            if (ytry <= y(ilo))
+            { /*If that's better than the best point, go twice as far in that direction*/
+                ytry = tryNewPoint(p,y,coord_sum,f,ihi,2.0,buf);
+            }
+            else if (ytry >= y(inhi))
+            {   /* The new point is worse than the second-highest, but better
+                  than the worst so do not go so far in that direction */
+                ysave = y(ihi);
+                ytry = tryNewPoint(p,y,coord_sum,f,ihi,0.5,buf);
+                if (ytry >= ysave)
+                { /* Can't seem to improve things. Contract the simplex to good point
+               in hope to find a simplex landscape. */
+                    for (i=0;i<mpts;i++)
+                    {
+                        if (i != ilo)
+                        {
+                            for (j=0;j<ndim;j++)
+                            {
+                                p(i,j) = coord_sum(j) = 0.5*(p(i,j)+p(ilo,j));
+                            }
+                            y(i)=f->calc((double*)coord_sum.data);
+                        }
+                    }
+                    nfunk += ndim;
+                    reduce(p,coord_sum,0,CV_REDUCE_SUM);
+                }
+            } else --(nfunk); /* correct nfunk */
+            dprintf(("this is simplex on iteration %d\n",nfunk));
+            print_matrix(p);
+        } /* go to next iteration. */
+        res = y(0);
+
+        return res;
+    }
+
+    void DownhillSolverImpl::createInitialSimplex(Mat_<double>& simplex,Mat& step){
+        for(int i=1;i<=step.cols;++i)
+        {
+            simplex.row(0).copyTo(simplex.row(i));
+            simplex(i,i-1)+= 0.5*step.at<double>(0,i-1);
+        }
+        simplex.row(0) -= 0.5*step;
+
+        dprintf(("this is simplex\n"));
+        print_matrix(simplex);
+    }
+
+    double DownhillSolverImpl::minimize(InputOutputArray x){
+        dprintf(("hi from minimize\n"));
+        CV_Assert(_Function.empty()==false);
+        dprintf(("termcrit:\n\ttype: %d\n\tmaxCount: %d\n\tEPS: %g\n",_termcrit.type,_termcrit.maxCount,_termcrit.epsilon));
+        dprintf(("step\n"));
+        print_matrix(_step);
+
+        Mat x_mat=x.getMat();
+        CV_Assert(MIN(x_mat.rows,x_mat.cols)==1);
+        CV_Assert(MAX(x_mat.rows,x_mat.cols)==_step.cols);
+        CV_Assert(x_mat.type()==CV_64FC1);
+
+        Mat_<double> proxy_x;
+
+        if(x_mat.rows>1){
+            proxy_x=x_mat.t();
+        }else{
+            proxy_x=x_mat;
+        }
+
+        int count=0;
+        int ndim=_step.cols;
+        Mat_<double> simplex=Mat_<double>(ndim+1,ndim,0.0);
+
+        simplex.row(0).copyTo(proxy_x);
+        createInitialSimplex(simplex,_step);
+        double res = innerDownhillSimplex(
+                simplex,_termcrit.epsilon, _termcrit.epsilon, count,_Function,_termcrit.maxCount);
+        simplex.row(0).copyTo(proxy_x);
+
+        dprintf(("%d iterations done\n",count));
+
+        if(x_mat.rows>1){
+            Mat(x_mat.rows, 1, CV_64F, (double*)proxy_x.data).copyTo(x);
+        }
+        return res;
+    }
+    DownhillSolverImpl::DownhillSolverImpl(){
+        _Function=Ptr<Function>();
+        _step=Mat_<double>();
+    }
+    Ptr<Solver::Function> DownhillSolverImpl::getFunction()const{
+        return _Function;
+    }
+    void DownhillSolverImpl::setFunction(const Ptr<Function>& f){
+        _Function=f;
+    }
+    TermCriteria DownhillSolverImpl::getTermCriteria()const{
+        return _termcrit;
+    }
+    void DownhillSolverImpl::setTermCriteria(const TermCriteria& termcrit){
+        CV_Assert(termcrit.type==(TermCriteria::MAX_ITER+TermCriteria::EPS) && termcrit.epsilon>0 && termcrit.maxCount>0);
+        _termcrit=termcrit;
+    }
+    // both minRange & minError are specified by termcrit.epsilon; In addition, user may specify the number of iterations that the algorithm does.
+    Ptr<DownhillSolver> createDownhillSolver(const Ptr<Solver::Function>& f, InputArray initStep, TermCriteria termcrit){
+        DownhillSolver *DS=new DownhillSolverImpl();
+        DS->setFunction(f);
+        DS->setInitStep(initStep);
+        DS->setTermCriteria(termcrit);
+        return Ptr<DownhillSolver>(DS);
+    }
+    void DownhillSolverImpl::getInitStep(OutputArray step)const{
+        _step.copyTo(step);
+    }
+    void DownhillSolverImpl::setInitStep(InputArray step){
+        //set dimensionality and make a deep copy of step
+        Mat m=step.getMat();
+        dprintf(("m.cols=%d\nm.rows=%d\n",m.cols,m.rows));
+        CV_Assert(MIN(m.cols,m.rows)==1 && m.type()==CV_64FC1);
+        if(m.rows==1){
+            m.copyTo(_step);
+        }else{
+            transpose(m,_step);
+        }
+    }
+}}
diff --git a/modules/optim/test/test_denoise_tvl1.cpp b/modules/optim/test/test_denoise_tvl1.cpp
new file mode 100644
index 000000000..2721a7666
--- /dev/null
+++ b/modules/optim/test/test_denoise_tvl1.cpp
@@ -0,0 +1,80 @@
+#include "test_precomp.hpp"
+#include "opencv2/highgui.hpp"
+
+void make_noisy(const cv::Mat& img, cv::Mat& noisy, double sigma, double pepper_salt_ratio,cv::RNG& rng){
+    noisy.create(img.size(), img.type());
+    cv::Mat noise(img.size(), img.type()), mask(img.size(), CV_8U);
+    rng.fill(noise,cv::RNG::NORMAL,128.0,sigma);
+    cv::addWeighted(img, 1, noise, 1, -128, noisy);
+    cv::randn(noise, cv::Scalar::all(0), cv::Scalar::all(2));
+    noise *= 255;
+    cv::randu(mask, 0, cvRound(1./pepper_salt_ratio));
+    cv::Mat half = mask.colRange(0, img.cols/2);
+    half = cv::Scalar::all(1);
+    noise.setTo(128, mask);
+    cv::addWeighted(noisy, 1, noise, 1, -128, noisy);
+}
+void make_spotty(cv::Mat& img,cv::RNG& rng, int r=3,int n=1000){
+    for(int i=0;i<n;i++){
+        int x=rng(img.cols-r),y=rng(img.rows-r);
+        if(rng(2)==0){
+            img(cv::Range(y,y+r),cv::Range(x,x+r))=(uchar)0;
+        }else{
+            img(cv::Range(y,y+r),cv::Range(x,x+r))=(uchar)255;
+        }
+    }
+}
+
+bool validate_pixel(const cv::Mat& image,int x,int y,uchar val){
+    printf("test: image(%d,%d)=%d vs %d - %s\n",x,y,(int)image.at<uchar>(x,y),val,(val==image.at<uchar>(x,y))?"true":"false");
+    return (image.at<uchar>(x,y)==val);
+}
+
+TEST(Optim_denoise_tvl1, regression_basic){
+    cv::RNG rng(42);
+    cv::Mat img = cv::imread("lena.jpg", 0), noisy,res;
+    if(img.rows!=512 || img.cols!=512){
+        printf("\tplease, put lena.jpg from samples/c in the current folder\n");
+        printf("\tnow, the test will fail...\n");
+        ASSERT_TRUE(false);
+    }
+
+    const int obs_num=5;
+    std::vector<cv::Mat> images(obs_num,cv::Mat());
+    for(int i=0;i<(int)images.size();i++){
+        make_noisy(img,images[i], 20, 0.02,rng);
+        //make_spotty(images[i],rng);
+    }
+
+    //cv::imshow("test", images[0]);
+    cv::optim::denoise_TVL1(images, res);
+    //cv::imshow("denoised", res);
+    //cv::waitKey();
+
+#if 0
+    ASSERT_TRUE(validate_pixel(res,248,334,179));
+    ASSERT_TRUE(validate_pixel(res,489,333,172));
+    ASSERT_TRUE(validate_pixel(res,425,507,104));
+    ASSERT_TRUE(validate_pixel(res,489,486,105));
+    ASSERT_TRUE(validate_pixel(res,223,208,64));
+    ASSERT_TRUE(validate_pixel(res,418,3,78));
+    ASSERT_TRUE(validate_pixel(res,63,76,97));
+    ASSERT_TRUE(validate_pixel(res,29,134,126));
+    ASSERT_TRUE(validate_pixel(res,219,291,174));
+    ASSERT_TRUE(validate_pixel(res,384,124,76));
+#endif
+
+#if 1
+    ASSERT_TRUE(validate_pixel(res,248,334,194));
+    ASSERT_TRUE(validate_pixel(res,489,333,171));
+    ASSERT_TRUE(validate_pixel(res,425,507,103));
+    ASSERT_TRUE(validate_pixel(res,489,486,109));
+    ASSERT_TRUE(validate_pixel(res,223,208,72));
+    ASSERT_TRUE(validate_pixel(res,418,3,58));
+    ASSERT_TRUE(validate_pixel(res,63,76,93));
+    ASSERT_TRUE(validate_pixel(res,29,134,127));
+    ASSERT_TRUE(validate_pixel(res,219,291,180));
+    ASSERT_TRUE(validate_pixel(res,384,124,80));
+#endif
+
+}
diff --git a/modules/optim/test/test_downhill_simplex.cpp b/modules/optim/test/test_downhill_simplex.cpp
new file mode 100644
index 000000000..95b2c6e9e
--- /dev/null
+++ b/modules/optim/test/test_downhill_simplex.cpp
@@ -0,0 +1,63 @@
+#include "test_precomp.hpp"
+#include <cstdlib>
+#include <cmath>
+#include <algorithm>
+
+static void mytest(cv::Ptr<cv::optim::DownhillSolver> solver,cv::Ptr<cv::optim::Solver::Function> ptr_F,cv::Mat& x,cv::Mat& step,
+        cv::Mat& etalon_x,double etalon_res){
+    solver->setFunction(ptr_F);
+    int ndim=MAX(step.cols,step.rows);
+    solver->setInitStep(step);
+    cv::Mat settedStep;
+    solver->getInitStep(settedStep);
+    ASSERT_TRUE(settedStep.rows==1 && settedStep.cols==ndim);
+    ASSERT_TRUE(std::equal(step.begin<double>(),step.end<double>(),settedStep.begin<double>()));
+    std::cout<<"step setted:\n\t"<<step<<std::endl;
+    double res=solver->minimize(x);
+    std::cout<<"res:\n\t"<<res<<std::endl;
+    std::cout<<"x:\n\t"<<x<<std::endl;
+    std::cout<<"etalon_res:\n\t"<<etalon_res<<std::endl;
+    std::cout<<"etalon_x:\n\t"<<etalon_x<<std::endl;
+    double tol=solver->getTermCriteria().epsilon;
+    ASSERT_TRUE(std::abs(res-etalon_res)<tol);
+    /*for(cv::Mat_<double>::iterator it1=x.begin<double>(),it2=etalon_x.begin<double>();it1!=x.end<double>();it1++,it2++){
+        ASSERT_TRUE(std::abs((*it1)-(*it2))<tol);
+    }*/
+    std::cout<<"--------------------------\n";
+}
+
+class SphereF:public cv::optim::Solver::Function{
+public:
+    double calc(const double* x)const{
+        return x[0]*x[0]+x[1]*x[1];
+    }
+};
+class RosenbrockF:public cv::optim::Solver::Function{
+    double calc(const double* x)const{
+        return 100*(x[1]-x[0]*x[0])*(x[1]-x[0]*x[0])+(1-x[0])*(1-x[0]);
+    }
+};
+
+TEST(Optim_Downhill, regression_basic){
+    cv::Ptr<cv::optim::DownhillSolver> solver=cv::optim::createDownhillSolver();
+#if 1
+    {
+        cv::Ptr<cv::optim::Solver::Function> ptr_F(new SphereF());
+        cv::Mat x=(cv::Mat_<double>(1,2)<<1.0,1.0),
+            step=(cv::Mat_<double>(2,1)<<-0.5,-0.5),
+            etalon_x=(cv::Mat_<double>(1,2)<<-0.0,0.0);
+        double etalon_res=0.0;
+        mytest(solver,ptr_F,x,step,etalon_x,etalon_res);
+    }
+#endif
+#if 1
+    {
+        cv::Ptr<cv::optim::Solver::Function> ptr_F(new RosenbrockF());
+        cv::Mat x=(cv::Mat_<double>(2,1)<<0.0,0.0),
+            step=(cv::Mat_<double>(2,1)<<0.5,+0.5),
+            etalon_x=(cv::Mat_<double>(2,1)<<1.0,1.0);
+        double etalon_res=0.0;
+        mytest(solver,ptr_F,x,step,etalon_x,etalon_res);
+    }
+#endif
+}
diff --git a/modules/optim/test/test_lpsolver.cpp b/modules/optim/test/test_lpsolver.cpp
new file mode 100644
index 000000000..f39c7eb37
--- /dev/null
+++ b/modules/optim/test/test_lpsolver.cpp
@@ -0,0 +1,101 @@
+#include "test_precomp.hpp"
+#include <iostream>
+
+TEST(Optim_LpSolver, regression_basic){
+    cv::Mat A,B,z,etalon_z;
+
+#if 1
+    //cormen's example #1
+    A=(cv::Mat_<double>(3,1)<<3,1,2);
+    B=(cv::Mat_<double>(3,4)<<1,1,3,30,2,2,5,24,4,1,2,36);
+    std::cout<<"here A goes\n"<<A<<"\n";
+    cv::optim::solveLP(A,B,z);
+    std::cout<<"here z goes\n"<<z<<"\n";
+    etalon_z=(cv::Mat_<double>(3,1)<<8,4,0);
+    ASSERT_EQ(cv::countNonZero(z!=etalon_z),0);
+#endif
+
+#if 1
+    //cormen's example #2
+    A=(cv::Mat_<double>(1,2)<<18,12.5);
+    B=(cv::Mat_<double>(3,3)<<1,1,20,1,0,20,0,1,16);
+    std::cout<<"here A goes\n"<<A<<"\n";
+    cv::optim::solveLP(A,B,z);
+    std::cout<<"here z goes\n"<<z<<"\n";
+    etalon_z=(cv::Mat_<double>(2,1)<<20,0);
+    ASSERT_EQ(cv::countNonZero(z!=etalon_z),0);
+#endif
+
+#if 1
+    //cormen's example #3
+    A=(cv::Mat_<double>(1,2)<<5,-3);
+    B=(cv::Mat_<double>(2,3)<<1,-1,1,2,1,2);
+    std::cout<<"here A goes\n"<<A<<"\n";
+    cv::optim::solveLP(A,B,z);
+    std::cout<<"here z goes\n"<<z<<"\n";
+    etalon_z=(cv::Mat_<double>(2,1)<<1,0);
+    ASSERT_EQ(cv::countNonZero(z!=etalon_z),0);
+#endif
+}
+
+TEST(Optim_LpSolver, regression_init_unfeasible){
+    cv::Mat A,B,z,etalon_z;
+
+#if 1
+    //cormen's example #4 - unfeasible
+    A=(cv::Mat_<double>(1,3)<<-1,-1,-1);
+    B=(cv::Mat_<double>(2,4)<<-2,-7.5,-3,-10000,-20,-5,-10,-30000);
+    std::cout<<"here A goes\n"<<A<<"\n";
+    cv::optim::solveLP(A,B,z);
+    std::cout<<"here z goes\n"<<z<<"\n";
+    etalon_z=(cv::Mat_<double>(3,1)<<1250,1000,0);
+    ASSERT_EQ(cv::countNonZero(z!=etalon_z),0);
+#endif
+}
+
+TEST(Optim_LpSolver, regression_absolutely_unfeasible){
+    cv::Mat A,B,z,etalon_z;
+
+#if 1
+    //trivial absolutely unfeasible example
+    A=(cv::Mat_<double>(1,1)<<1);
+    B=(cv::Mat_<double>(2,2)<<1,-1);
+    std::cout<<"here A goes\n"<<A<<"\n";
+    int res=cv::optim::solveLP(A,B,z);
+    ASSERT_EQ(res,-1);
+#endif
+}
+
+TEST(Optim_LpSolver, regression_multiple_solutions){
+    cv::Mat A,B,z,etalon_z;
+
+#if 1
+    //trivial example with multiple solutions
+    A=(cv::Mat_<double>(2,1)<<1,1);
+    B=(cv::Mat_<double>(1,3)<<1,1,1);
+    std::cout<<"here A goes\n"<<A<<"\n";
+    int res=cv::optim::solveLP(A,B,z);
+    printf("res=%d\n",res);
+    printf("scalar %g\n",z.dot(A));
+    std::cout<<"here z goes\n"<<z<<"\n";
+    ASSERT_EQ(res,1);
+    ASSERT_EQ(z.dot(A),1);
+#endif
+}
+
+TEST(Optim_LpSolver, regression_cycling){
+    cv::Mat A,B,z,etalon_z;
+
+#if 1
+    //example with cycling from http://people.orie.cornell.edu/miketodd/or630/SimplexCyclingExample.pdf
+    A=(cv::Mat_<double>(4,1)<<10,-57,-9,-24);
+    B=(cv::Mat_<double>(3,5)<<0.5,-5.5,-2.5,9,0,0.5,-1.5,-0.5,1,0,1,0,0,0,1);
+    std::cout<<"here A goes\n"<<A<<"\n";
+    int res=cv::optim::solveLP(A,B,z);
+    printf("res=%d\n",res);
+    printf("scalar %g\n",z.dot(A));
+    std::cout<<"here z goes\n"<<z<<"\n";
+    ASSERT_EQ(z.dot(A),1);
+    //ASSERT_EQ(res,1);
+#endif
+}
diff --git a/modules/contrib/test/test_precomp.cpp b/modules/optim/test/test_main.cpp
similarity index 58%
rename from modules/contrib/test/test_precomp.cpp
rename to modules/optim/test/test_main.cpp
index 5956e13e3..6b2499344 100644
--- a/modules/contrib/test/test_precomp.cpp
+++ b/modules/optim/test/test_main.cpp
@@ -1 +1,3 @@
 #include "test_precomp.hpp"
+
+CV_TEST_MAIN("cv")
diff --git a/modules/optim/test/test_precomp.hpp b/modules/optim/test/test_precomp.hpp
new file mode 100644
index 000000000..9a86cab4b
--- /dev/null
+++ b/modules/optim/test/test_precomp.hpp
@@ -0,0 +1,15 @@
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/optim.hpp"
+
+#endif
diff --git a/modules/photo/doc/denoising.rst b/modules/photo/doc/denoising.rst
index 5a4d7448c..4c64b07c2 100644
--- a/modules/photo/doc/denoising.rst
+++ b/modules/photo/doc/denoising.rst
@@ -10,6 +10,8 @@ with several computational optimizations. Noise expected to be a gaussian white
 
 .. ocv:function:: void fastNlMeansDenoising( InputArray src, OutputArray dst, float h=3, int templateWindowSize=7, int searchWindowSize=21 )
 
+.. ocv:pyfunction:: cv2.fastNlMeansDenoising(src[, dst[, h[, templateWindowSize[, searchWindowSize]]]]) -> dst
+
     :param src: Input 8-bit 1-channel, 2-channel or 3-channel image.
 
     :param dst: Output image with the same size and type as  ``src`` .
@@ -30,6 +32,8 @@ Modification of ``fastNlMeansDenoising`` function for colored images
 
 .. ocv:function:: void fastNlMeansDenoisingColored( InputArray src, OutputArray dst, float h=3, float hColor=3, int templateWindowSize=7, int searchWindowSize=21 )
 
+.. ocv:pyfunction:: cv2.fastNlMeansDenoisingColored(src[, dst[, h[, hColor[, templateWindowSize[, searchWindowSize]]]]]) -> dst
+
     :param src: Input 8-bit 3-channel image.
 
     :param dst: Output image with the same size and type as  ``src`` .
@@ -51,6 +55,8 @@ For more details see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131
 
 .. ocv:function:: void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, float h=3, int templateWindowSize=7, int searchWindowSize=21 )
 
+.. ocv:pyfunction:: cv2.fastNlMeansDenoisingMulti(srcImgs, imgToDenoiseIndex, temporalWindowSize[, dst[, h[, templateWindowSize[, searchWindowSize]]]]) -> dst
+
     :param srcImgs: Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should have the same type and size.
 
     :param imgToDenoiseIndex: Target image to denoise index in ``srcImgs`` sequence
@@ -71,6 +77,8 @@ Modification of ``fastNlMeansDenoisingMulti`` function for colored images sequen
 
 .. ocv:function:: void fastNlMeansDenoisingColoredMulti( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, float h=3, float hColor=3, int templateWindowSize=7, int searchWindowSize=21 )
 
+.. ocv:pyfunction:: cv2.fastNlMeansDenoisingColoredMulti(srcImgs, imgToDenoiseIndex, temporalWindowSize[, dst[, h[, hColor[, templateWindowSize[, searchWindowSize]]]]]) -> dst
+
     :param srcImgs: Input 8-bit 3-channel images sequence. All images should have the same type and size.
 
     :param imgToDenoiseIndex: Target image to denoise index in ``srcImgs`` sequence
@@ -187,4 +195,3 @@ The function converts image to CIELAB colorspace and then separately denoise L a
 .. seealso::
 
     :ocv:func:`fastNlMeansDenoisingColored`
-
diff --git a/modules/photo/doc/inpainting.rst b/modules/photo/doc/inpainting.rst
index 9b6626613..625f70a26 100644
--- a/modules/photo/doc/inpainting.rst
+++ b/modules/photo/doc/inpainting.rst
@@ -23,10 +23,21 @@ Restores the selected region in an image using the region neighborhood.
 
     :param flags: Inpainting method that could be one of the following:
 
-            * **INPAINT_NS**     Navier-Stokes based method.
+            * **INPAINT_NS**     Navier-Stokes based method [Navier01]
 
             * **INPAINT_TELEA**     Method by Alexandru Telea  [Telea04]_.
 
 The function reconstructs the selected image area from the pixel near the area boundary. The function may be used to remove dust and scratches from a scanned photo, or to remove undesirable objects from still images or video. See
 http://en.wikipedia.org/wiki/Inpainting
 for more details.
+
+.. note::
+
+   * An example using the inpainting technique can be found at opencv_source_code/samples/cpp/inpaint.cpp
+
+   * (Python) An example using the inpainting technique can be found at opencv_source_code/samples/python2/inpaint.py
+
+
+.. [Telea04] Telea, Alexandru. "An image inpainting technique based on the fast marching method." Journal of graphics tools 9, no. 1 (2004): 23-34.
+
+.. [Navier01] Bertalmio, Marcelo, Andrea L. Bertozzi, and Guillermo Sapiro. "Navier-stokes, fluid dynamics, and image and video inpainting." In Computer Vision and Pattern Recognition, 2001. CVPR 2001. Proceedings of the 2001 IEEE Computer Society Conference on, vol. 1, pp. I-355. IEEE, 2001.
diff --git a/modules/photo/include/opencv2/photo/photo.hpp b/modules/photo/include/opencv2/photo/photo.hpp
index 41aa7ae40..8af5e9f0f 100644
--- a/modules/photo/include/opencv2/photo/photo.hpp
+++ b/modules/photo/include/opencv2/photo/photo.hpp
@@ -45,4 +45,4 @@
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 
-#include "opencv2/photo.hpp"
\ No newline at end of file
+#include "opencv2/photo.hpp"
diff --git a/modules/photo/perf/perf_main.cpp b/modules/photo/perf/perf_main.cpp
index f5863c197..fd3781212 100644
--- a/modules/photo/perf/perf_main.cpp
+++ b/modules/photo/perf/perf_main.cpp
@@ -1,3 +1,11 @@
 #include "perf_precomp.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
 
-CV_PERF_TEST_MAIN(photo)
+static const char * impls[] = {
+#ifdef HAVE_CUDA
+    "cuda",
+#endif
+    "plain"
+};
+
+CV_PERF_TEST_MAIN_WITH_IMPLS(photo, impls, perf::printCudaInfo())
diff --git a/modules/photo/perf/perf_precomp.cpp b/modules/photo/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/photo/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/photo/src/arrays.hpp b/modules/photo/src/arrays.hpp
index ae01e9af8..a33018e59 100644
--- a/modules/photo/src/arrays.hpp
+++ b/modules/photo/src/arrays.hpp
@@ -157,5 +157,3 @@ template <class T> struct Array4d {
 };
 
 #endif
-
-
diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp
index 834757898..166d7f892 100644
--- a/modules/photo/src/denoising.cpp
+++ b/modules/photo/src/denoising.cpp
@@ -238,5 +238,3 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
 
     cvtColor(dst_lab, dst, COLOR_Lab2LBGR);
 }
-
-
diff --git a/modules/photo/src/inpaint.cpp b/modules/photo/src/inpaint.cpp
index ec91e3c1b..f91db5f54 100644
--- a/modules/photo/src/inpaint.cpp
+++ b/modules/photo/src/inpaint.cpp
@@ -718,7 +718,7 @@ icvNSInpaintFMM(const CvMat *f, CvMat *t, CvMat *out, int range, CvPriorityQueue
    }
 
 namespace cv {
-template<> void cv::Ptr<IplConvKernel>::delete_obj()
+template<> void cv::DefaultDeleter<IplConvKernel>::operator ()(IplConvKernel* obj) const
 {
   cvReleaseStructuringElement(&obj);
 }
@@ -759,11 +759,11 @@ cvInpaint( const CvArr* _input_img, const CvArr* _inpaint_mask, CvArr* _output_i
     ecols = input_img->cols + 2;
     erows = input_img->rows + 2;
 
-    f = cvCreateMat(erows, ecols, CV_8UC1);
-    t = cvCreateMat(erows, ecols, CV_32FC1);
-    band = cvCreateMat(erows, ecols, CV_8UC1);
-    mask = cvCreateMat(erows, ecols, CV_8UC1);
-    el_cross = cvCreateStructuringElementEx(3,3,1,1,CV_SHAPE_CROSS,NULL);
+    f.reset(cvCreateMat(erows, ecols, CV_8UC1));
+    t.reset(cvCreateMat(erows, ecols, CV_32FC1));
+    band.reset(cvCreateMat(erows, ecols, CV_8UC1));
+    mask.reset(cvCreateMat(erows, ecols, CV_8UC1));
+    el_cross.reset(cvCreateStructuringElementEx(3,3,1,1,CV_SHAPE_CROSS,NULL));
 
     cvCopy( input_img, output_img );
     cvSet(mask,cvScalar(KNOWN,0,0,0));
@@ -772,7 +772,7 @@ cvInpaint( const CvArr* _input_img, const CvArr* _inpaint_mask, CvArr* _output_i
     cvSet(f,cvScalar(KNOWN,0,0,0));
     cvSet(t,cvScalar(1.0e6f,0,0,0));
     cvDilate(mask,band,el_cross,1);   // image with narrow band
-    Heap=new CvPriorityQueueFloat;
+    Heap=cv::makePtr<CvPriorityQueueFloat>();
     if (!Heap->Init(band))
         return;
     cvSub(band,mask,band,NULL);
@@ -785,12 +785,12 @@ cvInpaint( const CvArr* _input_img, const CvArr* _inpaint_mask, CvArr* _output_i
 
     if( flags == cv::INPAINT_TELEA )
     {
-        out = cvCreateMat(erows, ecols, CV_8UC1);
-        el_range = cvCreateStructuringElementEx(2*range+1,2*range+1,
-            range,range,CV_SHAPE_RECT,NULL);
+        out.reset(cvCreateMat(erows, ecols, CV_8UC1));
+        el_range.reset(cvCreateStructuringElementEx(2*range+1,2*range+1,
+            range,range,CV_SHAPE_RECT,NULL));
         cvDilate(mask,out,el_range,1);
         cvSub(out,mask,out,NULL);
-        Out=new CvPriorityQueueFloat;
+        Out=cv::makePtr<CvPriorityQueueFloat>();
         if (!Out->Init(out))
             return;
         if (!Out->Add(band))
diff --git a/modules/photo/src/precomp.cpp b/modules/photo/src/precomp.cpp
deleted file mode 100644
index 3e0ec42de..000000000
--- a/modules/photo/src/precomp.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-/* End of file. */
diff --git a/modules/photo/test/test_denoising_gpu.cpp b/modules/photo/test/test_denoising_gpu.cpp
index caf3b23f2..f8de826f2 100644
--- a/modules/photo/test/test_denoising_gpu.cpp
+++ b/modules/photo/test/test_denoising_gpu.cpp
@@ -46,6 +46,7 @@
 #include "opencv2/ts/gpu_test.hpp"
 
 #include "opencv2/opencv_modules.hpp"
+#include "cvconfig.h"
 
 #if defined (HAVE_CUDA) && defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUIMGPROC)
 
diff --git a/modules/photo/test/test_precomp.cpp b/modules/photo/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/photo/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt
index 0b4c59d63..d3bed1695 100644
--- a/modules/python/CMakeLists.txt
+++ b/modules/python/CMakeLists.txt
@@ -6,7 +6,7 @@ if(WIN32 AND CMAKE_BUILD_TYPE STREQUAL "Debug")
   ocv_module_disable(python)
 endif()
 
-if(ANDROID OR IOS OR NOT PYTHONLIBS_FOUND OR NOT PYTHON_NUMPY_INCLUDE_DIR)
+if(ANDROID OR IOS OR NOT PYTHONLIBS_FOUND OR NOT PYTHON_NUMPY_INCLUDE_DIRS)
   ocv_module_disable(python)
 endif()
 
@@ -15,7 +15,7 @@ ocv_add_module(python BINDINGS opencv_core opencv_flann opencv_imgproc opencv_vi
 
 ocv_module_include_directories(
     "${PYTHON_INCLUDE_PATH}"
-    "${PYTHON_NUMPY_INCLUDE_DIR}"
+    ${PYTHON_NUMPY_INCLUDE_DIRS}
     "${CMAKE_CURRENT_SOURCE_DIR}/src2"
     )
 
@@ -81,6 +81,10 @@ if(ENABLE_SOLUTION_FOLDERS)
   set_target_properties(${the_module} PROPERTIES FOLDER "bindings")
 endif()
 
+if(MSVC)
+    add_definitions(-DCVAPI_EXPORTS)
+endif()
+
 if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-function")
 endif()
diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp
index 8a231ef5a..9201b7257 100644
--- a/modules/python/src2/cv2.cpp
+++ b/modules/python/src2/cv2.cpp
@@ -1,6 +1,7 @@
 #include <Python.h>
 
 #define MODULESTR "cv2"
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #include <numpy/ndarrayobject.h>
 
 #include "opencv2/core.hpp"
@@ -97,6 +98,7 @@ using namespace cv;
 typedef cv::softcascade::ChannelFeatureBuilder softcascade_ChannelFeatureBuilder;
 
 typedef std::vector<uchar> vector_uchar;
+typedef std::vector<char> vector_char;
 typedef std::vector<int> vector_int;
 typedef std::vector<float> vector_float;
 typedef std::vector<double> vector_double;
@@ -112,6 +114,8 @@ typedef std::vector<KeyPoint> vector_KeyPoint;
 typedef std::vector<Mat> vector_Mat;
 typedef std::vector<DMatch> vector_DMatch;
 typedef std::vector<String> vector_String;
+
+typedef std::vector<std::vector<char> > vector_vector_char;
 typedef std::vector<std::vector<Point> > vector_vector_Point;
 typedef std::vector<std::vector<Point2f> > vector_vector_Point2f;
 typedef std::vector<std::vector<Point3f> > vector_vector_Point3f;
@@ -143,6 +147,7 @@ typedef Ptr<MergeDebevec> Ptr_MergeDebevec;
 typedef Ptr<MergeMertens> Ptr_MergeMertens;
 
 typedef Ptr<cv::softcascade::ChannelFeatureBuilder> Ptr_ChannelFeatureBuilder;
+typedef Ptr<CLAHE> Ptr_CLAHE;
 
 typedef SimpleBlobDetector::Params SimpleBlobDetector_Params;
 
@@ -208,10 +213,10 @@ public:
         if(!o)
             CV_Error_(Error::StsError, ("The numpy array of typenum=%d, ndims=%d can not be created", typenum, dims));
         refcount = refcountFromPyObject(o);
-        npy_intp* _strides = PyArray_STRIDES(o);
+        npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o);
         for( i = 0; i < dims - (cn > 1); i++ )
             step[i] = (size_t)_strides[i];
-        datastart = data = (uchar*)PyArray_DATA(o);
+        datastart = data = (uchar*)PyArray_DATA((PyArrayObject*) o);
     }
 
     void deallocate(int* refcount, uchar*, uchar*)
@@ -286,8 +291,10 @@ static bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo info)
         return false;
     }
 
+    PyArrayObject* oarr = (PyArrayObject*) o;
+
     bool needcopy = false, needcast = false;
-    int typenum = PyArray_TYPE(o), new_typenum = typenum;
+    int typenum = PyArray_TYPE(oarr), new_typenum = typenum;
     int type = typenum == NPY_UBYTE ? CV_8U :
                typenum == NPY_BYTE ? CV_8S :
                typenum == NPY_USHORT ? CV_16U :
@@ -316,7 +323,7 @@ static bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo info)
     const int CV_MAX_DIM = 32;
 #endif
 
-    int ndims = PyArray_NDIM(o);
+    int ndims = PyArray_NDIM(oarr);
     if(ndims >= CV_MAX_DIM)
     {
         failmsg("%s dimensionality (=%d) is too high", info.name, ndims);
@@ -326,8 +333,8 @@ static bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo info)
     int size[CV_MAX_DIM+1];
     size_t step[CV_MAX_DIM+1];
     size_t elemsize = CV_ELEM_SIZE1(type);
-    const npy_intp* _sizes = PyArray_DIMS(o);
-    const npy_intp* _strides = PyArray_STRIDES(o);
+    const npy_intp* _sizes = PyArray_DIMS(oarr);
+    const npy_intp* _strides = PyArray_STRIDES(oarr);
     bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX;
 
     for( int i = ndims-1; i >= 0 && !needcopy; i-- )
@@ -351,11 +358,17 @@ static bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo info)
             failmsg("Layout of the output array %s is incompatible with cv::Mat (step[ndims-1] != elemsize or step[1] != elemsize*nchannels)", info.name);
             return false;
         }
-        if( needcast )
-            o = (PyObject*)PyArray_Cast((PyArrayObject*)o, new_typenum);
-        else
-            o = (PyObject*)PyArray_GETCONTIGUOUS((PyArrayObject*)o);
-        _strides = PyArray_STRIDES(o);
+
+        if( needcast ) {
+            o = PyArray_Cast(oarr, new_typenum);
+            oarr = (PyArrayObject*) o;
+        }
+        else {
+            oarr = PyArray_GETCONTIGUOUS(oarr);
+            o = (PyObject*) oarr;
+        }
+
+        _strides = PyArray_STRIDES(oarr);
     }
 
     for(int i = 0; i < ndims; i++)
@@ -383,7 +396,7 @@ static bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo info)
         return false;
     }
 
-    m = Mat(ndims, size, type, PyArray_DATA(o), step);
+    m = Mat(ndims, size, type, PyArray_DATA(oarr), step);
 
     if( m.data )
     {
@@ -509,13 +522,6 @@ PyObject* pyopencv_from(const cvflann_flann_distance_t& value)
     return PyInt_FromLong(int(value));
 }
 
-template<>
-bool pyopencv_to(PyObject*, cv::flann::SearchParams &, const char *)
-{
-    CV_Assert(!"not implemented");
-    return false;
-}
-
 template<>
 bool pyopencv_to(PyObject* obj, int& value, const char* name)
 {
@@ -693,6 +699,23 @@ bool pyopencv_to(PyObject* obj, Point2f& p, const char* name)
     return PyArg_ParseTuple(obj, "ff", &p.x, &p.y) > 0;
 }
 
+template<>
+bool pyopencv_to(PyObject* obj, Point2d& p, const char* name)
+{
+    (void)name;
+    if(!obj || obj == Py_None)
+        return true;
+    if(!!PyComplex_CheckExact(obj))
+    {
+        Py_complex c = PyComplex_AsCComplex(obj);
+        p.x = saturate_cast<double>(c.real);
+        p.y = saturate_cast<double>(c.imag);
+        return true;
+    }
+    return PyArg_ParseTuple(obj, "dd", &p.x, &p.y) > 0;
+}
+
+
 template<>
 PyObject* pyopencv_from(const Point& p)
 {
@@ -841,7 +864,7 @@ template<typename _Tp> struct pyopencvVecConverter
     }
 };
 
-template <typename _Tp>
+template<typename _Tp>
 bool pyopencv_to(PyObject* obj, std::vector<_Tp>& value, const ArgInfo info)
 {
     return pyopencvVecConverter<_Tp>::to(obj, value, info);
@@ -899,9 +922,9 @@ template<typename _Tp> static inline PyObject* pyopencv_from_generic_vec(const s
 
 template<typename _Tp> struct pyopencvVecConverter<std::vector<_Tp> >
 {
-    static bool to(PyObject* obj, std::vector<std::vector<_Tp> >& value, const char* name="<unknown>")
+    static bool to(PyObject* obj, std::vector<std::vector<_Tp> >& value, const ArgInfo info)
     {
-        return pyopencv_to_generic_vec(obj, value, name);
+        return pyopencv_to_generic_vec(obj, value, info);
     }
 
     static PyObject* from(const std::vector<std::vector<_Tp> >& value)
@@ -1054,10 +1077,16 @@ bool pyopencv_to(PyObject *o, cv::flann::IndexParams& p, const char *name)
     return ok;
 }
 
+template<>
+bool pyopencv_to(PyObject* obj, cv::flann::SearchParams & value, const char * name)
+{
+    return pyopencv_to<cv::flann::IndexParams>(obj, value, name);
+}
+
 template <typename T>
 bool pyopencv_to(PyObject *o, Ptr<T>& p, const char *name)
 {
-    p = new T();
+    p = makePtr<T>();
     return pyopencv_to(o, *p, name);
 }
 
diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py
index 816a386c0..c5df21a39 100755
--- a/modules/python/src2/gen2.py
+++ b/modules/python/src2/gen2.py
@@ -13,20 +13,24 @@ ignored_arg_types = ["RNG*"]
 
 gen_template_check_self = Template("""    if(!PyObject_TypeCheck(self, &pyopencv_${name}_Type))
         return failmsgp("Incorrect type of self (must be '${name}' or its derivative)");
-    $cname* _self_ = ${amp}((pyopencv_${name}_t*)self)->v;
+    $cname* _self_ = ${amp}((pyopencv_${name}_t*)self)->v${get};
 """)
 
 gen_template_check_self_algo = Template("""    if(!PyObject_TypeCheck(self, &pyopencv_${name}_Type))
         return failmsgp("Incorrect type of self (must be '${name}' or its derivative)");
-    $cname* _self_ = dynamic_cast<$cname*>(${amp}((pyopencv_${name}_t*)self)->v.obj);
+    $cname* _self_ = dynamic_cast<$cname*>(${amp}((pyopencv_${name}_t*)self)->v.get());
 """)
 
-gen_template_call_constructor = Template("""self = PyObject_NEW(pyopencv_${name}_t, &pyopencv_${name}_Type);
+gen_template_call_constructor_prelude = Template("""self = PyObject_NEW(pyopencv_${name}_t, &pyopencv_${name}_Type);
         new (&(self->v)) Ptr<$cname>(); // init Ptr with placement new
-        if(self) ERRWRAP2(self->v = new $cname""")
+        if(self) """)
 
-gen_template_simple_call_constructor = Template("""self = PyObject_NEW(pyopencv_${name}_t, &pyopencv_${name}_Type);
-        if(self) ERRWRAP2(self->v = $cname""")
+gen_template_call_constructor = Template("""self->v.reset(new ${cname}${args})""")
+
+gen_template_simple_call_constructor_prelude = Template("""self = PyObject_NEW(pyopencv_${name}_t, &pyopencv_${name}_Type);
+        if(self) """)
+
+gen_template_simple_call_constructor = Template("""self->v = ${cname}${args}""")
 
 gen_template_parse_args = Template("""const char* keywords[] = { $kw_list, NULL };
     if( PyArg_ParseTupleAndKeywords(args, kw, "$fmtspec", (char**)keywords, $parse_arglist)$code_cvt )""")
@@ -34,7 +38,7 @@ gen_template_parse_args = Template("""const char* keywords[] = { $kw_list, NULL
 gen_template_func_body = Template("""$code_decl
     $code_parse
     {
-        $code_fcall;
+        ${code_prelude}ERRWRAP2($code_fcall);
         $code_ret;
     }
 """)
@@ -124,7 +128,7 @@ template<> bool pyopencv_to(PyObject* src, Ptr<${cname}>& dst, const char* name)
         failmsg("Expected ${cname} for argument '%%s'", name);
         return false;
     }
-    dst = ((pyopencv_${name}_t*)src)->v;
+    dst = ((pyopencv_${name}_t*)src)->v.dynamicCast<${cname}>();
     return true;
 }
 
@@ -187,7 +191,7 @@ static PyObject* pyopencv_${name}_get_${member}(pyopencv_${name}_t* p, void *clo
 gen_template_get_prop_algo = Template("""
 static PyObject* pyopencv_${name}_get_${member}(pyopencv_${name}_t* p, void *closure)
 {
-    return pyopencv_from(dynamic_cast<$cname*>(p->v.obj)${access}${member});
+    return pyopencv_from(dynamic_cast<$cname*>(p->v.get())${access}${member});
 }
 """)
 
@@ -211,7 +215,7 @@ static int pyopencv_${name}_set_${member}(pyopencv_${name}_t* p, PyObject *value
         PyErr_SetString(PyExc_TypeError, "Cannot delete the ${member} attribute");
         return -1;
     }
-    return pyopencv_to(value, dynamic_cast<$cname*>(p->v.obj)${access}${member}) ? 0 : -1;
+    return pyopencv_to(value, dynamic_cast<$cname*>(p->v.get())${access}${member}) ? 0 : -1;
 }
 """)
 
@@ -559,39 +563,22 @@ class FuncInfo(object):
         if self.classname:
             selfinfo = all_classes[self.classname]
             if not self.isconstructor:
-                amp = ""
-                if selfinfo.issimple:
-                    amp = "&"
+                amp = "&" if selfinfo.issimple else ""
                 if selfinfo.isalgorithm:
                     code += gen_template_check_self_algo.substitute(name=selfinfo.name, cname=selfinfo.cname, amp=amp)
                 else:
-                    code += gen_template_check_self.substitute(name=selfinfo.name, cname=selfinfo.cname, amp=amp)
+                    get = "" if selfinfo.issimple else ".get()"
+                    code += gen_template_check_self.substitute(name=selfinfo.name, cname=selfinfo.cname, amp=amp, get=get)
                 fullname = selfinfo.wname + "." + fullname
 
         all_code_variants = []
         declno = -1
         for v in self.variants:
             code_decl = ""
-            code_fcall = ""
             code_ret = ""
             code_cvt_list = []
 
-            if self.isconstructor:
-                code_decl += "    pyopencv_%s_t* self = 0;\n" % selfinfo.name
-                templ = gen_template_call_constructor
-                if selfinfo.issimple:
-                    templ = gen_template_simple_call_constructor
-                code_fcall = templ.substitute(name=selfinfo.name, cname=selfinfo.cname)
-            else:
-                code_fcall = "ERRWRAP2( "
-                if v.rettype:
-                    code_decl += "    " + v.rettype + " retval;\n"
-                    code_fcall += "retval = "
-                if ismethod:
-                    code_fcall += "_self_->" + self.cname
-                else:
-                    code_fcall += self.cname
-            code_fcall += "("
+            code_args = "("
             all_cargs = []
             parse_arglist = []
 
@@ -605,9 +592,9 @@ class FuncInfo(object):
                     if not defval and a.tp.endswith("*"):
                         defval = 0
                     assert defval
-                    if not code_fcall.endswith("("):
-                        code_fcall += ", "
-                    code_fcall += defval
+                    if not code_args.endswith("("):
+                        code_args += ", "
+                    code_args += defval
                     all_cargs.append([[None, ""], ""])
                     continue
                 tp1 = tp = a.tp
@@ -649,11 +636,34 @@ class FuncInfo(object):
                 else:
                     code_decl += "    %s %s;\n" % (amapping[0], a.name)
 
-                if not code_fcall.endswith("("):
-                    code_fcall += ", "
-                code_fcall += amp + a.name
+                if not code_args.endswith("("):
+                    code_args += ", "
+                code_args += amp + a.name
 
-            code_fcall += "))"
+            code_args += ")"
+
+            if self.isconstructor:
+                code_decl += "    pyopencv_%s_t* self = 0;\n" % selfinfo.name
+                if selfinfo.issimple:
+                    templ_prelude = gen_template_simple_call_constructor_prelude
+                    templ = gen_template_simple_call_constructor
+                else:
+                    templ_prelude = gen_template_call_constructor_prelude
+                    templ = gen_template_call_constructor
+
+                code_prelude = templ_prelude.substitute(name=selfinfo.name, cname=selfinfo.cname)
+                code_fcall = templ.substitute(name=selfinfo.name, cname=selfinfo.cname, args=code_args)
+            else:
+                code_prelude = ""
+                code_fcall = ""
+                if v.rettype:
+                    code_decl += "    " + v.rettype + " retval;\n"
+                    code_fcall += "retval = "
+                if ismethod:
+                    code_fcall += "_self_->" + self.cname
+                else:
+                    code_fcall += self.cname
+                code_fcall += code_args
 
             if code_cvt_list:
                 code_cvt_list = [""] + code_cvt_list
@@ -706,7 +716,7 @@ class FuncInfo(object):
                     (fmtspec, ", ".join(["pyopencv_from(" + aname + ")" for aname, argno in v.py_outlist]))
 
             all_code_variants.append(gen_template_func_body.substitute(code_decl=code_decl,
-                code_parse=code_parse, code_fcall=code_fcall, code_ret=code_ret))
+                code_parse=code_parse, code_prelude=code_prelude, code_fcall=code_fcall, code_ret=code_ret))
 
         if len(all_code_variants)==1:
             # if the function/method has only 1 signature, then just put it
diff --git a/modules/python/test/calchist.py b/modules/python/test/calchist.py
index 287e22f91..dba6796b3 100755
--- a/modules/python/test/calchist.py
+++ b/modules/python/test/calchist.py
@@ -51,4 +51,3 @@ if __name__ == '__main__':
     cv.ShowImage("H-S Histogram", hs_histogram(src))
 
     cv.WaitKey(0)
-
diff --git a/modules/softcascade/doc/softcascade.rst b/modules/softcascade/doc/softcascade.rst
index a8e041732..4b3a44a36 100644
--- a/modules/softcascade/doc/softcascade.rst
+++ b/modules/softcascade/doc/softcascade.rst
@@ -9,4 +9,4 @@ softcascade. Soft Cascade object detection and training.
 
     softcascade_detector
     softcascade_training
-    softcascade_cuda
\ No newline at end of file
+    softcascade_cuda
diff --git a/modules/softcascade/doc/softcascade_training.rst b/modules/softcascade/doc/softcascade_training.rst
index fe829c54c..cc83b38e4 100644
--- a/modules/softcascade/doc/softcascade_training.rst
+++ b/modules/softcascade/doc/softcascade_training.rst
@@ -140,4 +140,4 @@ Compute feature on integral channel image.
 
     :param si an index of sample.
 
-    :param fs a channel matrix.
\ No newline at end of file
+    :param fs a channel matrix.
diff --git a/modules/softcascade/misc/roc_test.py b/modules/softcascade/misc/roc_test.py
index f91630aab..30361f297 100755
--- a/modules/softcascade/misc/roc_test.py
+++ b/modules/softcascade/misc/roc_test.py
@@ -100,4 +100,4 @@ if __name__ == "__main__":
         fppi, miss_rate = sft.computeROC(confidenses, tp, nannotated, nframes, ignored)
         sft.plotLogLog(fppi, miss_rate, plot_colors[idx])
 
-    sft.showPlot(args.output, args.labels)
\ No newline at end of file
+    sft.showPlot(args.output, args.labels)
diff --git a/modules/softcascade/misc/scale_caltech.py b/modules/softcascade/misc/scale_caltech.py
index 29d9707c8..07e7d06bb 100755
--- a/modules/softcascade/misc/scale_caltech.py
+++ b/modules/softcascade/misc/scale_caltech.py
@@ -139,4 +139,4 @@ if __name__ == "__main__":
         path, ext = os.path.splitext(each)
         path = path + ".seq"
         print path
-        extractPositive(open(each), path, opath, options.octave, options.min_possible)
\ No newline at end of file
+        extractPositive(open(each), path, opath, options.octave, options.min_possible)
diff --git a/modules/softcascade/misc/scale_inria.py b/modules/softcascade/misc/scale_inria.py
index b08a4bc7f..ebfc6f366 100755
--- a/modules/softcascade/misc/scale_inria.py
+++ b/modules/softcascade/misc/scale_inria.py
@@ -136,4 +136,4 @@ if __name__ == "__main__":
             cv2.imwrite(os.path.join(cneg_path, out_name), img)
             idx = idx + 1
             print "." ,
-            sys.stdout.flush()
\ No newline at end of file
+            sys.stdout.flush()
diff --git a/modules/softcascade/misc/sft.py b/modules/softcascade/misc/sft.py
index cefbb3700..6c82d71b6 100644
--- a/modules/softcascade/misc/sft.py
+++ b/modules/softcascade/misc/sft.py
@@ -278,4 +278,4 @@ def parse_caltech(f):
         for idx, (p, pv, oc) in enumerate(zip(*[pos, posv, occl])):
             annotations[start + idx].append((type, p, oc, pv))
 
-    return annotations
\ No newline at end of file
+    return annotations
diff --git a/modules/softcascade/perf/perf_cuda_softcascade.cpp b/modules/softcascade/perf/perf_cuda_softcascade.cpp
index 1c4061f65..09f1aad0b 100644
--- a/modules/softcascade/perf/perf_cuda_softcascade.cpp
+++ b/modules/softcascade/perf/perf_cuda_softcascade.cpp
@@ -233,4 +233,4 @@ RUN_GPU(SCascadeTest, detectStream)
 
 NO_CPU(SCascadeTest, detectStream)
 
-#undef SC_PERF_TEST_P
\ No newline at end of file
+#undef SC_PERF_TEST_P
diff --git a/modules/softcascade/perf/perf_main.cpp b/modules/softcascade/perf/perf_main.cpp
index f045c4b46..9a3869f5e 100644
--- a/modules/softcascade/perf/perf_main.cpp
+++ b/modules/softcascade/perf/perf_main.cpp
@@ -41,5 +41,13 @@
 //M*/
 
 #include "perf_precomp.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
 
-CV_PERF_TEST_MAIN(softcascade)
+static const char * impls[] = {
+#ifdef HAVE_CUDA
+    "cuda",
+#endif
+    "plain"
+};
+
+CV_PERF_TEST_MAIN_WITH_IMPLS(softcascade, impls, perf::printCudaInfo())
diff --git a/modules/softcascade/perf/perf_precomp.cpp b/modules/softcascade/perf/perf_precomp.cpp
deleted file mode 100644
index cc728b050..000000000
--- a/modules/softcascade/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2008-2013, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and / or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/softcascade/perf/perf_softcascade.cpp b/modules/softcascade/perf/perf_softcascade.cpp
index c8f9fab9e..8165bd981 100644
--- a/modules/softcascade/perf/perf_softcascade.cpp
+++ b/modules/softcascade/perf/perf_softcascade.cpp
@@ -44,4 +44,4 @@ PERF_TEST_P(detect, SoftCascadeDetector,
     extractRacts(objectBoxes, rects);
     std::sort(rects.begin(), rects.end(), perf::comparators::RectLess());
     SANITY_CHECK(rects);
-}
\ No newline at end of file
+}
diff --git a/modules/softcascade/src/cuda/icf-sc.cu b/modules/softcascade/src/cuda/icf-sc.cu
index 3003e6ccb..b119209db 100644
--- a/modules/softcascade/src/cuda/icf-sc.cu
+++ b/modules/softcascade/src/cuda/icf-sc.cu
@@ -137,10 +137,10 @@ typedef unsigned char uchar;
     template<bool isDefaultNum>
     __device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy)
     {
-        const float angle_quantum = CV_PI / 6.f;
+        const float angle_quantum = CV_PI_F / 6.f;
         float angle = atan2(dx, dy) + (angle_quantum / 2.f);
 
-        if (angle < 0) angle += CV_PI;
+        if (angle < 0) angle += CV_PI_F;
 
         const float angle_scaling = 1.f / angle_quantum;
         return static_cast<int>(angle * angle_scaling) % 6;
@@ -174,8 +174,8 @@ typedef unsigned char uchar;
         {
             int i = 3;
             float2 bin_vector_i;
-            bin_vector_i.x = ::cos(i * (CV_PI / 6.f));
-            bin_vector_i.y = ::sin(i * (CV_PI / 6.f));
+            bin_vector_i.x = ::cos(i * (CV_PI_F / 6.f));
+            bin_vector_i.y = ::sin(i * (CV_PI_F / 6.f));
 
             const float dot_product = fabs(dx * bin_vector_i.x + dy * bin_vector_i.y);
             if(dot_product > max_dot)
diff --git a/modules/softcascade/src/detector.cpp b/modules/softcascade/src/detector.cpp
index c44b96e55..ad74614f4 100644
--- a/modules/softcascade/src/detector.cpp
+++ b/modules/softcascade/src/detector.cpp
@@ -593,4 +593,4 @@ void cv::softcascade::Detector::detect(InputArray _image, InputArray _rois,  Out
         rectPtr[i] = (*it).bb();
         confPtr[i] = (*it).confidence;
     }
-}
\ No newline at end of file
+}
diff --git a/modules/softcascade/src/detector_cuda.cpp b/modules/softcascade/src/detector_cuda.cpp
index 6119620e2..ef19fad05 100644
--- a/modules/softcascade/src/detector_cuda.cpp
+++ b/modules/softcascade/src/detector_cuda.cpp
@@ -58,7 +58,7 @@ cv::softcascade::ChannelsProcessor::ChannelsProcessor() { throw_no_cuda(); }
  cv::softcascade::ChannelsProcessor::~ChannelsProcessor() { throw_no_cuda(); }
 
 cv::Ptr<cv::softcascade::ChannelsProcessor> cv::softcascade::ChannelsProcessor::create(const int, const int, const int)
-{ throw_no_cuda(); return cv::Ptr<cv::softcascade::ChannelsProcessor>(0); }
+{ throw_no_cuda(); return cv::Ptr<cv::softcascade::ChannelsProcessor>(); }
 
 #else
 
@@ -200,7 +200,7 @@ struct cv::softcascade::SCascade::Fields
             }
 
             fns = fns[SC_TREES];
-            if (fn.empty()) return false;
+            if (fn.empty()) return 0;
 
             // for each stage (~ decision tree with H = 2)
             FileNodeIterator st = fns.begin(), st_end = fns.end();
@@ -594,7 +594,7 @@ private:
 cv::Ptr<cv::softcascade::ChannelsProcessor> cv::softcascade::ChannelsProcessor::create(const int s, const int b, const int m)
 {
     CV_Assert((m && SEPARABLE));
-    return cv::Ptr<cv::softcascade::ChannelsProcessor>(new SeparablePreprocessor(s, b));
+    return makePtr<SeparablePreprocessor>(s, b);
 }
 
 cv::softcascade::ChannelsProcessor::ChannelsProcessor() { }
diff --git a/modules/softcascade/src/octave.cpp b/modules/softcascade/src/octave.cpp
index 935898d56..5c5aa2eec 100644
--- a/modules/softcascade/src/octave.cpp
+++ b/modules/softcascade/src/octave.cpp
@@ -321,7 +321,7 @@ void BoostedSoftCascadeOctave::traverse(const CvBoostTree* tree, cv::FileStorage
 
 
     fs << "}";
-    
+
     delete [] leafs;
 }
 
diff --git a/modules/softcascade/src/precomp.cpp b/modules/softcascade/src/precomp.cpp
deleted file mode 100644
index 62c236d08..000000000
--- a/modules/softcascade/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2008-2013, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and / or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/softcascade/src/softcascade_init.cpp b/modules/softcascade/src/softcascade_init.cpp
index b5bac41fa..6f3c8b6a5 100644
--- a/modules/softcascade/src/softcascade_init.cpp
+++ b/modules/softcascade/src/softcascade_init.cpp
@@ -58,9 +58,9 @@ CV_INIT_ALGORITHM(SCascade, "CascadeDetector.SCascade",
 
 bool initModule_softcascade(void)
 {
-    Ptr<Algorithm> sc = createSCascade_hidden();
-    Ptr<Algorithm> sc1 = createDetector_hidden();
+    Ptr<Algorithm> sc = createSCascade_ptr_hidden();
+    Ptr<Algorithm> sc1 = createDetector_ptr_hidden();
     return (sc1->info() != 0) && (sc->info() != 0);
 }
 
-} }
\ No newline at end of file
+} }
diff --git a/modules/softcascade/test/test_cuda_softcascade.cpp b/modules/softcascade/test/test_cuda_softcascade.cpp
index 4d97f9208..5973d25f2 100644
--- a/modules/softcascade/test/test_cuda_softcascade.cpp
+++ b/modules/softcascade/test/test_cuda_softcascade.cpp
@@ -313,4 +313,4 @@ INSTANTIATE_TEST_CASE_P(cuda_accelerated, SCascadeTestAll, testing::Combine( ALL
                     testing::Values(Fixture("cascades/inria_caltech-17.01.2013.xml", 7),
                                     Fixture("cascades/sc_cvpr_2012_to_opencv_new_format.xml", 1291))));
 
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/softcascade/test/test_main.cpp b/modules/softcascade/test/test_main.cpp
index ab0e8615b..d3999d483 100644
--- a/modules/softcascade/test/test_main.cpp
+++ b/modules/softcascade/test/test_main.cpp
@@ -42,4 +42,4 @@
 
 #include "test_precomp.hpp"
 
-CV_TEST_MAIN("cv")
\ No newline at end of file
+CV_TEST_MAIN("cv")
diff --git a/modules/softcascade/test/test_precomp.cpp b/modules/softcascade/test/test_precomp.cpp
deleted file mode 100644
index 278cdd03c..000000000
--- a/modules/softcascade/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2008-2013, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and / or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/softcascade/test/test_precomp.hpp b/modules/softcascade/test/test_precomp.hpp
index 80bff6536..4a3974478 100644
--- a/modules/softcascade/test/test_precomp.hpp
+++ b/modules/softcascade/test/test_precomp.hpp
@@ -51,10 +51,11 @@
 #ifndef __OPENCV_TEST_PRECOMP_HPP__
 #define __OPENCV_TEST_PRECOMP_HPP__
 
-# include "opencv2/ts.hpp"
-# include "opencv2/softcascade.hpp"
-# include "opencv2/imgproc.hpp"
-# include "opencv2/highgui.hpp"
-# include "utility.hpp"
+#include "opencv2/ts.hpp"
+#include "opencv2/softcascade.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+#include "cvconfig.h"
+#include "utility.hpp"
 
 #endif
diff --git a/modules/softcascade/test/test_softcascade.cpp b/modules/softcascade/test/test_softcascade.cpp
index 9004264fc..59d5c3c4a 100644
--- a/modules/softcascade/test/test_softcascade.cpp
+++ b/modules/softcascade/test/test_softcascade.cpp
@@ -138,4 +138,4 @@ TEST(SoftCascadeDetector, detectEmptyRoi)
     cascade.detect(colored, cv::Mat::zeros(colored.size(), CV_8UC1), objects);
 
     ASSERT_EQ(0, (int)objects.size());
-}
\ No newline at end of file
+}
diff --git a/modules/softcascade/test/test_training.cpp b/modules/softcascade/test/test_training.cpp
index 2bd5ec66f..5ade81681 100644
--- a/modules/softcascade/test/test_training.cpp
+++ b/modules/softcascade/test/test_training.cpp
@@ -157,4 +157,4 @@ TEST(SoftCascade, training)
     ASSERT_FALSE(fn.empty());
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/stitching/CMakeLists.txt b/modules/stitching/CMakeLists.txt
index fa9adfc8e..389285a56 100644
--- a/modules/stitching/CMakeLists.txt
+++ b/modules/stitching/CMakeLists.txt
@@ -1,4 +1,3 @@
 set(the_description "Images stitching")
 ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect
                   OPTIONAL opencv_gpu opencv_gpuarithm opencv_gpufilters opencv_gpufeatures2d opencv_nonfree)
-
diff --git a/modules/stitching/doc/autocalib.rst b/modules/stitching/doc/autocalib.rst
index a9c0c35fd..021e7b394 100644
--- a/modules/stitching/doc/autocalib.rst
+++ b/modules/stitching/doc/autocalib.rst
@@ -30,4 +30,3 @@ Estimates focal lengths for each given camera.
     :param pairwise_matches: Matches between all image pairs.
 
     :param focals: Estimated focal lengths for each camera.
-
diff --git a/modules/stitching/doc/exposure_compensation.rst b/modules/stitching/doc/exposure_compensation.rst
index d27701e25..eff98c01c 100644
--- a/modules/stitching/doc/exposure_compensation.rst
+++ b/modules/stitching/doc/exposure_compensation.rst
@@ -108,4 +108,3 @@ Exposure compensator which tries to remove exposure related artifacts by adjusti
     };
 
 .. seealso:: :ocv:class:`detail::ExposureCompensator`
-
diff --git a/modules/stitching/doc/high_level.rst b/modules/stitching/doc/high_level.rst
index dec3362d3..955f8f248 100644
--- a/modules/stitching/doc/high_level.rst
+++ b/modules/stitching/doc/high_level.rst
@@ -88,6 +88,11 @@ High level image stitcher. It's possible to use this class without being aware o
         /* hidden */
     };
 
+.. note::
+
+   * A basic example on image stitching can be found at opencv_source_code/samples/cpp/stitching.cpp
+   * A detailed example on image stitching can be found at opencv_source_code/samples/cpp/stitching_detailed.cpp
+
 Stitcher::createDefault
 -----------------------
 Creates a stitcher with the default parameters.
@@ -203,4 +208,3 @@ Spherical warper factory class. ::
     };
 
 .. seealso:: :ocv:class:`detail::SphericalWarper`
-
diff --git a/modules/stitching/doc/motion_estimation.rst b/modules/stitching/doc/motion_estimation.rst
index 6cda27522..5859f3e02 100644
--- a/modules/stitching/doc/motion_estimation.rst
+++ b/modules/stitching/doc/motion_estimation.rst
@@ -18,12 +18,11 @@ Rotation estimator base class. It takes features of all images, pairwise matches
     public:
         virtual ~Estimator() {}
 
-        void operator ()(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches,
-                         std::vector<CameraParams> &cameras)
-            { estimate(features, pairwise_matches, cameras); }
+        bool operator ()(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches,
+                         std::vector<CameraParams> &cameras);
 
     protected:
-        virtual void estimate(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches,
+        virtual bool estimate(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches,
                               std::vector<CameraParams> &cameras) = 0;
     };
 
@@ -32,7 +31,7 @@ detail::Estimator::operator()
 
 Estimates camera parameters.
 
-.. ocv:function:: detail::Estimator::operator ()(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches, std::vector<CameraParams> &cameras)
+.. ocv:function:: bool detail::Estimator::operator ()(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches, std::vector<CameraParams> &cameras)
 
     :param features: Features of images
 
@@ -40,12 +39,14 @@ Estimates camera parameters.
 
     :param cameras: Estimated camera parameters
 
+    :return: True in case of success, false otherwise
+
 detail::Estimator::estimate
 ---------------------------
 
 This method must implement camera parameters estimation logic in order to make the wrapper `detail::Estimator::operator()`_ work.
 
-.. ocv:function:: void detail::Estimator::estimate(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches, std::vector<CameraParams> &cameras)
+.. ocv:function:: bool detail::Estimator::estimate(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches, std::vector<CameraParams> &cameras)
 
     :param features: Features of images
 
@@ -53,6 +54,8 @@ This method must implement camera parameters estimation logic in order to make t
 
     :param cameras: Estimated camera parameters
 
+    :return: True in case of success, false otherwise
+
 detail::HomographyBasedEstimator
 --------------------------------
 .. ocv:class:: detail::HomographyBasedEstimator : public detail::Estimator
diff --git a/modules/stitching/doc/stitching.rst b/modules/stitching/doc/stitching.rst
index 983b310e7..f071a2350 100644
--- a/modules/stitching/doc/stitching.rst
+++ b/modules/stitching/doc/stitching.rst
@@ -15,5 +15,3 @@ stitching. Images stitching
     seam_estimation
     exposure_compensation
     blenders
-
-
diff --git a/modules/stitching/doc/warpers.rst b/modules/stitching/doc/warpers.rst
index b6826a01a..1025ffa0c 100644
--- a/modules/stitching/doc/warpers.rst
+++ b/modules/stitching/doc/warpers.rst
@@ -260,4 +260,4 @@ Construct an instance of the cylindrical warper class.
 
 .. ocv:function:: void detail::CylindricalWarper::CylindricalWarper(float scale)
 
-    :param scale: Projected image scale multiplier
\ No newline at end of file
+    :param scale: Projected image scale multiplier
diff --git a/modules/stitching/include/opencv2/stitching.hpp b/modules/stitching/include/opencv2/stitching.hpp
index 3ccd2878a..2c48f2f2a 100644
--- a/modules/stitching/include/opencv2/stitching.hpp
+++ b/modules/stitching/include/opencv2/stitching.hpp
@@ -59,7 +59,13 @@ class CV_EXPORTS Stitcher
 {
 public:
     enum { ORIG_RESOL = -1 };
-    enum Status { OK, ERR_NEED_MORE_IMGS };
+    enum Status
+    {
+        OK = 0,
+        ERR_NEED_MORE_IMGS = 1,
+        ERR_HOMOGRAPHY_EST_FAIL = 2,
+        ERR_CAMERA_PARAMS_ADJUST_FAIL = 3
+    };
 
     // Creates stitcher with default parameters
     static Stitcher createDefault(bool try_use_gpu = false);
@@ -138,7 +144,7 @@ private:
     Stitcher() {}
 
     Status matchImages();
-    void estimateCameraParams();
+    Status estimateCameraParams();
 
     double registr_resol_;
     double seam_est_resol_;
diff --git a/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp b/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp
index 581a0292d..c0e446c60 100644
--- a/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp
@@ -56,12 +56,14 @@ class CV_EXPORTS Estimator
 public:
     virtual ~Estimator() {}
 
-    void operator ()(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches,
+    bool operator ()(const std::vector<ImageFeatures> &features,
+                     const std::vector<MatchesInfo> &pairwise_matches,
                      std::vector<CameraParams> &cameras)
-        { estimate(features, pairwise_matches, cameras); }
+        { return estimate(features, pairwise_matches, cameras); }
 
 protected:
-    virtual void estimate(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches,
+    virtual bool estimate(const std::vector<ImageFeatures> &features,
+                          const std::vector<MatchesInfo> &pairwise_matches,
                           std::vector<CameraParams> &cameras) = 0;
 };
 
@@ -73,8 +75,9 @@ public:
         : is_focals_estimated_(is_focals_estimated) {}
 
 private:
-    void estimate(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches,
-                  std::vector<CameraParams> &cameras);
+    virtual bool estimate(const std::vector<ImageFeatures> &features,
+                          const std::vector<MatchesInfo> &pairwise_matches,
+                          std::vector<CameraParams> &cameras);
 
     bool is_focals_estimated_;
 };
@@ -107,7 +110,7 @@ protected:
     }
 
     // Runs bundle adjustment
-    virtual void estimate(const std::vector<ImageFeatures> &features,
+    virtual bool estimate(const std::vector<ImageFeatures> &features,
                           const std::vector<MatchesInfo> &pairwise_matches,
                           std::vector<CameraParams> &cameras);
 
@@ -193,11 +196,14 @@ void CV_EXPORTS waveCorrect(std::vector<Mat> &rmats, WaveCorrectKind kind);
 String CV_EXPORTS matchesGraphAsString(std::vector<String> &pathes, std::vector<MatchesInfo> &pairwise_matches,
                                             float conf_threshold);
 
-std::vector<int> CV_EXPORTS leaveBiggestComponent(std::vector<ImageFeatures> &features, std::vector<MatchesInfo> &pairwise_matches,
-                                                  float conf_threshold);
+std::vector<int> CV_EXPORTS leaveBiggestComponent(
+        std::vector<ImageFeatures> &features,
+        std::vector<MatchesInfo> &pairwise_matches,
+        float conf_threshold);
 
-void CV_EXPORTS findMaxSpanningTree(int num_images, const std::vector<MatchesInfo> &pairwise_matches,
-                                    Graph &span_tree, std::vector<int> &centers);
+void CV_EXPORTS findMaxSpanningTree(
+        int num_images, const std::vector<MatchesInfo> &pairwise_matches,
+        Graph &span_tree, std::vector<int> &centers);
 
 } // namespace detail
 } // namespace cv
diff --git a/modules/stitching/include/opencv2/stitching/warpers.hpp b/modules/stitching/include/opencv2/stitching/warpers.hpp
index 9d62d356b..37cf84b7c 100644
--- a/modules/stitching/include/opencv2/stitching/warpers.hpp
+++ b/modules/stitching/include/opencv2/stitching/warpers.hpp
@@ -58,33 +58,33 @@ public:
 class PlaneWarper : public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::PlaneWarper(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::PlaneWarper>(scale); }
 };
 
 
 class CylindricalWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::CylindricalWarper(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::CylindricalWarper>(scale); }
 };
 
 
 class SphericalWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::SphericalWarper(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::SphericalWarper>(scale); }
 };
 
 class FisheyeWarper : public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::FisheyeWarper(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::FisheyeWarper>(scale); }
 };
 
 class StereographicWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::StereographicWarper(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::StereographicWarper>(scale); }
 };
 
 class CompressedRectilinearWarper: public WarperCreator
@@ -95,7 +95,7 @@ public:
     {
         a = A; b = B;
     }
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::CompressedRectilinearWarper(scale, a, b); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::CompressedRectilinearWarper>(scale, a, b); }
 };
 
 class CompressedRectilinearPortraitWarper: public WarperCreator
@@ -106,7 +106,7 @@ public:
     {
         a = A; b = B;
     }
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::CompressedRectilinearPortraitWarper(scale, a, b); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::CompressedRectilinearPortraitWarper>(scale, a, b); }
 };
 
 class PaniniWarper: public WarperCreator
@@ -117,7 +117,7 @@ public:
     {
         a = A; b = B;
     }
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::PaniniWarper(scale, a, b); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::PaniniWarper>(scale, a, b); }
 };
 
 class PaniniPortraitWarper: public WarperCreator
@@ -128,19 +128,19 @@ public:
     {
         a = A; b = B;
     }
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::PaniniPortraitWarper(scale, a, b); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::PaniniPortraitWarper>(scale, a, b); }
 };
 
 class MercatorWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::MercatorWarper(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::MercatorWarper>(scale); }
 };
 
 class TransverseMercatorWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::TransverseMercatorWarper(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::TransverseMercatorWarper>(scale); }
 };
 
 
@@ -149,21 +149,21 @@ public:
 class PlaneWarperGpu: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::PlaneWarperGpu(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::PlaneWarperGpu>(scale); }
 };
 
 
 class CylindricalWarperGpu: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::CylindricalWarperGpu(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::CylindricalWarperGpu>(scale); }
 };
 
 
 class SphericalWarperGpu: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return new detail::SphericalWarperGpu(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::SphericalWarperGpu>(scale); }
 };
 #endif
 
diff --git a/modules/stitching/perf/perf_precomp.cpp b/modules/stitching/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/stitching/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/stitching/perf/perf_stich.cpp b/modules/stitching/perf/perf_stich.cpp
index e42dea968..8cf89651f 100644
--- a/modules/stitching/perf/perf_stich.cpp
+++ b/modules/stitching/perf/perf_stich.cpp
@@ -34,12 +34,12 @@ PERF_TEST_P(stitch, a123, TEST_DETECTORS)
     imgs.push_back( imread( getDataPath("stitching/a3.png") ) );
 
     Ptr<detail::FeaturesFinder> featuresFinder = GetParam() == "orb"
-            ? (detail::FeaturesFinder*)new detail::OrbFeaturesFinder()
-            : (detail::FeaturesFinder*)new detail::SurfFeaturesFinder();
+            ? Ptr<detail::FeaturesFinder>(new detail::OrbFeaturesFinder())
+            : Ptr<detail::FeaturesFinder>(new detail::SurfFeaturesFinder());
 
     Ptr<detail::FeaturesMatcher> featuresMatcher = GetParam() == "orb"
-            ? new detail::BestOf2NearestMatcher(false, ORB_MATCH_CONFIDENCE)
-            : new detail::BestOf2NearestMatcher(false, SURF_MATCH_CONFIDENCE);
+            ? makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE)
+            : makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE);
 
     declare.time(30 * 20).iterations(20);
 
@@ -48,7 +48,7 @@ PERF_TEST_P(stitch, a123, TEST_DETECTORS)
         Stitcher stitcher = Stitcher::createDefault();
         stitcher.setFeaturesFinder(featuresFinder);
         stitcher.setFeaturesMatcher(featuresMatcher);
-        stitcher.setWarper(new SphericalWarper());
+        stitcher.setWarper(makePtr<SphericalWarper>());
         stitcher.setRegistrationResol(WORK_MEGAPIX);
 
         startTimer();
@@ -72,12 +72,12 @@ PERF_TEST_P(stitch, b12, TEST_DETECTORS)
     imgs.push_back( imread( getDataPath("stitching/b2.png") ) );
 
     Ptr<detail::FeaturesFinder> featuresFinder = GetParam() == "orb"
-            ? (detail::FeaturesFinder*)new detail::OrbFeaturesFinder()
-            : (detail::FeaturesFinder*)new detail::SurfFeaturesFinder();
+            ? Ptr<detail::FeaturesFinder>(new detail::OrbFeaturesFinder())
+            : Ptr<detail::FeaturesFinder>(new detail::SurfFeaturesFinder());
 
     Ptr<detail::FeaturesMatcher> featuresMatcher = GetParam() == "orb"
-            ? new detail::BestOf2NearestMatcher(false, ORB_MATCH_CONFIDENCE)
-            : new detail::BestOf2NearestMatcher(false, SURF_MATCH_CONFIDENCE);
+            ? makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE)
+            : makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE);
 
     declare.time(30 * 20).iterations(20);
 
@@ -86,7 +86,7 @@ PERF_TEST_P(stitch, b12, TEST_DETECTORS)
         Stitcher stitcher = Stitcher::createDefault();
         stitcher.setFeaturesFinder(featuresFinder);
         stitcher.setFeaturesMatcher(featuresMatcher);
-        stitcher.setWarper(new SphericalWarper());
+        stitcher.setWarper(makePtr<SphericalWarper>());
         stitcher.setRegistrationResol(WORK_MEGAPIX);
 
         startTimer();
@@ -114,13 +114,13 @@ PERF_TEST_P( match, bestOf2Nearest, TEST_DETECTORS)
     Ptr<detail::FeaturesMatcher> matcher;
     if (GetParam() == "surf")
     {
-        finder = new detail::SurfFeaturesFinder();
-        matcher = new detail::BestOf2NearestMatcher(false, SURF_MATCH_CONFIDENCE);
+        finder = makePtr<detail::SurfFeaturesFinder>();
+        matcher = makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE);
     }
     else if (GetParam() == "orb")
     {
-        finder = new detail::OrbFeaturesFinder();
-        matcher = new detail::BestOf2NearestMatcher(false, ORB_MATCH_CONFIDENCE);
+        finder = makePtr<detail::OrbFeaturesFinder>();
+        matcher = makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE);
     }
     else
     {
@@ -169,13 +169,13 @@ PERF_TEST_P( matchVector, bestOf2NearestVectorFeatures, testing::Combine(
     int featuresVectorSize = get<1>(GetParam());
     if (detectorName == "surf")
     {
-        finder = new detail::SurfFeaturesFinder();
-        matcher = new detail::BestOf2NearestMatcher(false, SURF_MATCH_CONFIDENCE);
+        finder = makePtr<detail::SurfFeaturesFinder>();
+        matcher = makePtr<detail::BestOf2NearestMatcher>(false, SURF_MATCH_CONFIDENCE);
     }
     else if (detectorName == "orb")
     {
-        finder = new detail::OrbFeaturesFinder();
-        matcher = new detail::BestOf2NearestMatcher(false, ORB_MATCH_CONFIDENCE);
+        finder = makePtr<detail::OrbFeaturesFinder>();
+        matcher = makePtr<detail::BestOf2NearestMatcher>(false, ORB_MATCH_CONFIDENCE);
     }
     else
     {
diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp
index 0043b7348..254c6a149 100644
--- a/modules/stitching/src/blenders.cpp
+++ b/modules/stitching/src/blenders.cpp
@@ -50,13 +50,13 @@ static const float WEIGHT_EPS = 1e-5f;
 Ptr<Blender> Blender::createDefault(int type, bool try_gpu)
 {
     if (type == NO)
-        return new Blender();
+        return makePtr<Blender>();
     if (type == FEATHER)
-        return new FeatherBlender();
+        return makePtr<FeatherBlender>();
     if (type == MULTI_BAND)
-        return new MultiBandBlender(try_gpu);
+        return makePtr<MultiBandBlender>(try_gpu);
     CV_Error(Error::StsBadArg, "unsupported blending method");
-    return NULL;
+    return Ptr<Blender>();
 }
 
 
diff --git a/modules/stitching/src/exposure_compensate.cpp b/modules/stitching/src/exposure_compensate.cpp
index 0a22daeb7..78ce6d371 100644
--- a/modules/stitching/src/exposure_compensate.cpp
+++ b/modules/stitching/src/exposure_compensate.cpp
@@ -48,13 +48,13 @@ namespace detail {
 Ptr<ExposureCompensator> ExposureCompensator::createDefault(int type)
 {
     if (type == NO)
-        return new NoExposureCompensator();
+        return makePtr<NoExposureCompensator>();
     if (type == GAIN)
-        return new GainCompensator();
+        return makePtr<GainCompensator>();
     if (type == GAIN_BLOCKS)
-        return new BlocksGainCompensator();
+        return makePtr<BlocksGainCompensator>();
     CV_Error(Error::StsBadArg, "unsupported exposure compensation method");
-    return NULL;
+    return Ptr<ExposureCompensator>();
 }
 
 
diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp
index e8b7bf24f..0ad0c1f1d 100644
--- a/modules/stitching/src/matchers.cpp
+++ b/modules/stitching/src/matchers.cpp
@@ -155,8 +155,8 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
 
     matches_info.matches.clear();
 
-    Ptr<flann::IndexParams> indexParams = new flann::KDTreeIndexParams();
-    Ptr<flann::SearchParams> searchParams = new flann::SearchParams();
+    Ptr<flann::IndexParams> indexParams = makePtr<flann::KDTreeIndexParams>();
+    Ptr<flann::SearchParams> searchParams = makePtr<flann::SearchParams>();
 
     if (features2.descriptors.depth() == CV_8U)
     {
@@ -314,7 +314,7 @@ SurfFeaturesFinder::SurfFeaturesFinder(double hess_thresh, int num_octaves, int
     if (num_octaves_descr == num_octaves && num_layers_descr == num_layers)
     {
         surf = Algorithm::create<Feature2D>("Feature2D.SURF");
-        if( surf.empty() )
+        if( !surf )
             CV_Error( Error::StsNotImplemented, "OpenCV was built without SURF support" );
         surf->set("hessianThreshold", hess_thresh);
         surf->set("nOctaves", num_octaves);
@@ -325,7 +325,7 @@ SurfFeaturesFinder::SurfFeaturesFinder(double hess_thresh, int num_octaves, int
         detector_ = Algorithm::create<FeatureDetector>("Feature2D.SURF");
         extractor_ = Algorithm::create<DescriptorExtractor>("Feature2D.SURF");
 
-        if( detector_.empty() || extractor_.empty() )
+        if( !detector_ || !extractor_ )
             CV_Error( Error::StsNotImplemented, "OpenCV was built without SURF support" );
 
         detector_->set("hessianThreshold", hess_thresh);
@@ -349,7 +349,7 @@ void SurfFeaturesFinder::find(const Mat &image, ImageFeatures &features)
     {
         gray_image = image;
     }
-    if (surf.empty())
+    if (!surf)
     {
         detector_->detect(gray_image, features.keypoints);
         extractor_->compute(gray_image, features.keypoints, features.descriptors);
@@ -365,7 +365,7 @@ void SurfFeaturesFinder::find(const Mat &image, ImageFeatures &features)
 OrbFeaturesFinder::OrbFeaturesFinder(Size _grid_size, int n_features, float scaleFactor, int nlevels)
 {
     grid_size = _grid_size;
-    orb = new ORB(n_features * (99 + grid_size.area())/100/grid_size.area(), scaleFactor, nlevels);
+    orb = makePtr<ORB>(n_features * (99 + grid_size.area())/100/grid_size.area(), scaleFactor, nlevels);
 }
 
 void OrbFeaturesFinder::find(const Mat &image, ImageFeatures &features)
@@ -534,12 +534,12 @@ BestOf2NearestMatcher::BestOf2NearestMatcher(bool try_use_gpu, float match_conf,
 #ifdef HAVE_OPENCV_GPUFEATURES2D
     if (try_use_gpu && getCudaEnabledDeviceCount() > 0)
     {
-        impl_ = new GpuMatcher(match_conf);
+        impl_ = makePtr<GpuMatcher>(match_conf);
     }
     else
 #endif
     {
-        impl_ = new CpuMatcher(match_conf);
+        impl_ = makePtr<CpuMatcher>(match_conf);
     }
 
     is_thread_safe_ = impl_->isThreadSafe();
diff --git a/modules/stitching/src/motion_estimators.cpp b/modules/stitching/src/motion_estimators.cpp
index 1bb3df7bb..d6fd2275d 100644
--- a/modules/stitching/src/motion_estimators.cpp
+++ b/modules/stitching/src/motion_estimators.cpp
@@ -42,6 +42,7 @@
 
 #include "precomp.hpp"
 #include "opencv2/calib3d/calib3d_c.h"
+#include "opencv2/core/cvdef.h"
 
 using namespace cv;
 using namespace cv::detail;
@@ -101,8 +102,10 @@ void calcDeriv(const Mat &err1, const Mat &err2, double h, Mat res)
 namespace cv {
 namespace detail {
 
-void HomographyBasedEstimator::estimate(const std::vector<ImageFeatures> &features, const std::vector<MatchesInfo> &pairwise_matches,
-                                        std::vector<CameraParams> &cameras)
+bool HomographyBasedEstimator::estimate(
+        const std::vector<ImageFeatures> &features,
+        const std::vector<MatchesInfo> &pairwise_matches,
+        std::vector<CameraParams> &cameras)
 {
     LOGLN("Estimating rotations...");
 #if ENABLE_LOG
@@ -164,12 +167,13 @@ void HomographyBasedEstimator::estimate(const std::vector<ImageFeatures> &featur
     }
 
     LOGLN("Estimating rotations, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
+    return true;
 }
 
 
 //////////////////////////////////////////////////////////////////////////////
 
-void BundleAdjusterBase::estimate(const std::vector<ImageFeatures> &features,
+bool BundleAdjusterBase::estimate(const std::vector<ImageFeatures> &features,
                                   const std::vector<MatchesInfo> &pairwise_matches,
                                   std::vector<CameraParams> &cameras)
 {
@@ -245,6 +249,19 @@ void BundleAdjusterBase::estimate(const std::vector<ImageFeatures> &features,
     LOGLN_CHAT("Bundle adjustment, final RMS error: " << std::sqrt(err.dot(err) / total_num_matches_));
     LOGLN_CHAT("Bundle adjustment, iterations done: " << iter);
 
+    // Check if all camera parameters are valid
+    bool ok = true;
+    for (int i = 0; i < cam_params_.rows; ++i)
+    {
+        if (cvIsNaN(cam_params_.at<double>(i,0)))
+        {
+            ok = false;
+            break;
+        }
+    }
+    if (!ok)
+        return false;
+
     obtainRefinedCameraParams(cameras);
 
     // Normalize motion to center image
@@ -256,6 +273,7 @@ void BundleAdjusterBase::estimate(const std::vector<ImageFeatures> &features,
         cameras[i].R = R_inv * cameras[i].R;
 
     LOGLN_CHAT("Bundle adjustment, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
+    return true;
 }
 
 
@@ -851,4 +869,3 @@ void findMaxSpanningTree(int num_images, const std::vector<MatchesInfo> &pairwis
 
 } // namespace detail
 } // namespace cv
-
diff --git a/modules/stitching/src/seam_finders.cpp b/modules/stitching/src/seam_finders.cpp
index b3bebb27a..c453b5feb 100644
--- a/modules/stitching/src/seam_finders.cpp
+++ b/modules/stitching/src/seam_finders.cpp
@@ -1644,4 +1644,3 @@ void GraphCutSeamFinderGpu::setGraphWeightsColorGrad(
 
 } // namespace detail
 } // namespace cv
-
diff --git a/modules/stitching/src/stitcher.cpp b/modules/stitching/src/stitcher.cpp
index cef9fe4e9..00e02b07c 100644
--- a/modules/stitching/src/stitcher.cpp
+++ b/modules/stitching/src/stitcher.cpp
@@ -53,34 +53,34 @@ Stitcher Stitcher::createDefault(bool try_use_gpu)
     stitcher.setPanoConfidenceThresh(1);
     stitcher.setWaveCorrection(true);
     stitcher.setWaveCorrectKind(detail::WAVE_CORRECT_HORIZ);
-    stitcher.setFeaturesMatcher(new detail::BestOf2NearestMatcher(try_use_gpu));
-    stitcher.setBundleAdjuster(new detail::BundleAdjusterRay());
+    stitcher.setFeaturesMatcher(makePtr<detail::BestOf2NearestMatcher>(try_use_gpu));
+    stitcher.setBundleAdjuster(makePtr<detail::BundleAdjusterRay>());
 
 #ifdef HAVE_OPENCV_GPU
     if (try_use_gpu && gpu::getCudaEnabledDeviceCount() > 0)
     {
 #ifdef HAVE_OPENCV_NONFREE
-        stitcher.setFeaturesFinder(new detail::SurfFeaturesFinderGpu());
+        stitcher.setFeaturesFinder(makePtr<detail::SurfFeaturesFinderGpu>());
 #else
-        stitcher.setFeaturesFinder(new detail::OrbFeaturesFinder());
+        stitcher.setFeaturesFinder(makePtr<detail::OrbFeaturesFinder>());
 #endif
-        stitcher.setWarper(new SphericalWarperGpu());
-        stitcher.setSeamFinder(new detail::GraphCutSeamFinderGpu());
+        stitcher.setWarper(makePtr<SphericalWarperGpu>());
+        stitcher.setSeamFinder(makePtr<detail::GraphCutSeamFinderGpu>());
     }
     else
 #endif
     {
 #ifdef HAVE_OPENCV_NONFREE
-        stitcher.setFeaturesFinder(new detail::SurfFeaturesFinder());
+        stitcher.setFeaturesFinder(makePtr<detail::SurfFeaturesFinder>());
 #else
-        stitcher.setFeaturesFinder(new detail::OrbFeaturesFinder());
+        stitcher.setFeaturesFinder(makePtr<detail::OrbFeaturesFinder>());
 #endif
-        stitcher.setWarper(new SphericalWarper());
-        stitcher.setSeamFinder(new detail::GraphCutSeamFinder(detail::GraphCutSeamFinderBase::COST_COLOR));
+        stitcher.setWarper(makePtr<SphericalWarper>());
+        stitcher.setSeamFinder(makePtr<detail::GraphCutSeamFinder>(detail::GraphCutSeamFinderBase::COST_COLOR));
     }
 
-    stitcher.setExposureCompensator(new detail::BlocksGainCompensator());
-    stitcher.setBlender(new detail::MultiBandBlender(try_use_gpu));
+    stitcher.setExposureCompensator(makePtr<detail::BlocksGainCompensator>());
+    stitcher.setBlender(makePtr<detail::MultiBandBlender>(try_use_gpu));
 
     return stitcher;
 }
@@ -102,7 +102,8 @@ Stitcher::Status Stitcher::estimateTransform(InputArray images, const std::vecto
     if ((status = matchImages()) != OK)
         return status;
 
-    estimateCameraParams();
+    if ((status = estimateCameraParams()) != OK)
+        return status;
 
     return OK;
 }
@@ -442,10 +443,11 @@ Stitcher::Status Stitcher::matchImages()
 }
 
 
-void Stitcher::estimateCameraParams()
+Stitcher::Status Stitcher::estimateCameraParams()
 {
     detail::HomographyBasedEstimator estimator;
-    estimator(features_, pairwise_matches_, cameras_);
+    if (!estimator(features_, pairwise_matches_, cameras_))
+        return ERR_HOMOGRAPHY_EST_FAIL;
 
     for (size_t i = 0; i < cameras_.size(); ++i)
     {
@@ -456,7 +458,8 @@ void Stitcher::estimateCameraParams()
     }
 
     bundle_adjuster_->setConfThresh(conf_thresh_);
-    (*bundle_adjuster_)(features_, pairwise_matches_, cameras_);
+    if (!(*bundle_adjuster_)(features_, pairwise_matches_, cameras_))
+        return ERR_CAMERA_PARAMS_ADJUST_FAIL;
 
     // Find median focal length and use it as final image scale
     std::vector<double> focals;
@@ -481,6 +484,8 @@ void Stitcher::estimateCameraParams()
         for (size_t i = 0; i < cameras_.size(); ++i)
             cameras_[i].R = rmats[i];
     }
+
+    return OK;
 }
 
 } // namespace cv
diff --git a/modules/stitching/test/test_matchers.cpp b/modules/stitching/test/test_matchers.cpp
index baa02ec8d..c7f068b7f 100644
--- a/modules/stitching/test/test_matchers.cpp
+++ b/modules/stitching/test/test_matchers.cpp
@@ -49,7 +49,7 @@ using namespace std;
 
 TEST(SurfFeaturesFinder, CanFindInROIs)
 {
-    Ptr<detail::FeaturesFinder> finder = new detail::SurfFeaturesFinder();
+    Ptr<detail::FeaturesFinder> finder = makePtr<detail::SurfFeaturesFinder>();
     Mat img  = imread(string(cvtest::TS::ptr()->get_data_path()) + "cv/shared/lena.png");
 
     vector<Rect> rois;
@@ -75,4 +75,4 @@ TEST(SurfFeaturesFinder, CanFindInROIs)
     ASSERT_EQ(bad_count, 0);
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/stitching/test/test_precomp.cpp b/modules/stitching/test/test_precomp.cpp
deleted file mode 100644
index 14a070e81..000000000
--- a/modules/stitching/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
\ No newline at end of file
diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt
index 065d8ced1..45150617c 100644
--- a/modules/superres/CMakeLists.txt
+++ b/modules/superres/CMakeLists.txt
@@ -5,5 +5,5 @@ endif()
 set(the_description "Super Resolution")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef)
 ocv_define_module(superres opencv_imgproc opencv_video
-                  OPTIONAL opencv_highgui
+                  OPTIONAL opencv_highgui opencv_ocl
                   opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc opencv_gpuoptflow opencv_gpucodec)
diff --git a/modules/superres/include/opencv2/superres.hpp b/modules/superres/include/opencv2/superres.hpp
index b85d0c1bd..ac12c42be 100644
--- a/modules/superres/include/opencv2/superres.hpp
+++ b/modules/superres/include/opencv2/superres.hpp
@@ -92,6 +92,7 @@ namespace cv
         // Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow.
         CV_EXPORTS Ptr<SuperResolution> createSuperResolution_BTVL1();
         CV_EXPORTS Ptr<SuperResolution> createSuperResolution_BTVL1_GPU();
+        CV_EXPORTS Ptr<SuperResolution> createSuperResolution_BTVL1_OCL();
     }
 }
 
diff --git a/modules/superres/include/opencv2/superres/optical_flow.hpp b/modules/superres/include/opencv2/superres/optical_flow.hpp
index 2717380a7..c51f7b217 100644
--- a/modules/superres/include/opencv2/superres/optical_flow.hpp
+++ b/modules/superres/include/opencv2/superres/optical_flow.hpp
@@ -58,15 +58,18 @@ namespace cv
 
         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Farneback();
         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Farneback_GPU();
+        CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Farneback_OCL();
 
         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Simple();
 
         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_DualTVL1();
         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_DualTVL1_GPU();
+        CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_DualTVL1_OCL();
 
         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Brox_GPU();
 
         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_PyrLK_GPU();
+        CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_PyrLK_OCL();
     }
 }
 
diff --git a/modules/superres/perf/perf_main.cpp b/modules/superres/perf/perf_main.cpp
index adc69e6e8..0a8ab5dea 100644
--- a/modules/superres/perf/perf_main.cpp
+++ b/modules/superres/perf/perf_main.cpp
@@ -44,4 +44,11 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(superres, printCudaInfo())
+static const char * impls[] = {
+#ifdef HAVE_CUDA
+    "cuda",
+#endif
+    "plain"
+};
+
+CV_PERF_TEST_MAIN_WITH_IMPLS(superres, impls, printCudaInfo())
diff --git a/modules/superres/perf/perf_precomp.cpp b/modules/superres/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/superres/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/superres/perf/perf_superres.cpp b/modules/superres/perf/perf_superres.cpp
index 8651b5532..9d83c949d 100644
--- a/modules/superres/perf/perf_superres.cpp
+++ b/modules/superres/perf/perf_superres.cpp
@@ -153,14 +153,14 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1,
         superRes->set("temporalAreaRadius", temporalAreaRadius);
         superRes->set("opticalFlow", opticalFlow);
 
-        superRes->setInput(new OneFrameSource_GPU(GpuMat(frame)));
+        superRes->setInput(makePtr<OneFrameSource_GPU>(GpuMat(frame)));
 
         GpuMat dst;
         superRes->nextFrame(dst);
 
         TEST_CYCLE_N(10) superRes->nextFrame(dst);
 
-        GPU_SANITY_CHECK(dst);
+        GPU_SANITY_CHECK(dst, 2);
     }
     else
     {
@@ -171,7 +171,7 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1,
         superRes->set("temporalAreaRadius", temporalAreaRadius);
         superRes->set("opticalFlow", opticalFlow);
 
-        superRes->setInput(new OneFrameSource_CPU(frame));
+        superRes->setInput(makePtr<OneFrameSource_CPU>(frame));
 
         Mat dst;
         superRes->nextFrame(dst);
diff --git a/modules/superres/perf/perf_superres_ocl.cpp b/modules/superres/perf/perf_superres_ocl.cpp
new file mode 100644
index 000000000..ce8f5934e
--- /dev/null
+++ b/modules/superres/perf/perf_superres_ocl.cpp
@@ -0,0 +1,146 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+#include "opencv2/ocl.hpp"
+using namespace std;
+using namespace testing;
+using namespace perf;
+using namespace cv;
+using namespace cv::superres;
+
+namespace
+{
+    class OneFrameSource_OCL : public FrameSource
+    {
+    public:
+        explicit OneFrameSource_OCL(const ocl::oclMat& frame) : frame_(frame) {}
+
+        void nextFrame(OutputArray frame)
+        {
+            ocl::getOclMatRef(frame) = frame_;
+        }
+        void reset()
+        {
+        }
+
+    private:
+        ocl::oclMat frame_;
+    };
+
+
+    class ZeroOpticalFlowOCL : public DenseOpticalFlowExt
+    {
+    public:
+        void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2)
+        {
+            ocl::oclMat& frame0_ = ocl::getOclMatRef(frame0);
+            ocl::oclMat& flow1_ = ocl::getOclMatRef(flow1);
+            ocl::oclMat& flow2_ = ocl::getOclMatRef(flow2);
+
+            cv::Size size = frame0_.size();
+
+            if(!flow2.needed())
+            {
+                flow1_.create(size, CV_32FC2);
+                flow1_.setTo(Scalar::all(0));
+            }
+            else
+            {
+                flow1_.create(size, CV_32FC1);
+                flow2_.create(size, CV_32FC1);
+
+                flow1_.setTo(Scalar::all(0));
+                flow2_.setTo(Scalar::all(0));
+            }
+        }
+
+        void collectGarbage()
+        {
+        }
+    };
+}
+
+PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL,
+    Combine(Values(szSmall64, szSmall128),
+    Values(MatType(CV_8UC1), MatType(CV_8UC3))))
+{
+    std::vector<cv::ocl::Info>info;
+    cv::ocl::getDevice(info);
+
+    declare.time(5 * 60);
+
+    const Size size = std::tr1::get<0>(GetParam());
+    const int type = std::tr1::get<1>(GetParam());
+
+    Mat frame(size, type);
+    declare.in(frame, WARMUP_RNG);
+
+    ocl::oclMat frame_ocl;
+    frame_ocl.upload(frame);
+
+
+    const int scale = 2;
+    const int iterations = 50;
+    const int temporalAreaRadius = 1;
+    Ptr<DenseOpticalFlowExt> opticalFlowOcl(new ZeroOpticalFlowOCL);
+
+    Ptr<SuperResolution> superRes_ocl = createSuperResolution_BTVL1_OCL();
+
+    superRes_ocl->set("scale", scale);
+    superRes_ocl->set("iterations", iterations);
+    superRes_ocl->set("temporalAreaRadius", temporalAreaRadius);
+    superRes_ocl->set("opticalFlow", opticalFlowOcl);
+
+    superRes_ocl->setInput(makePtr<OneFrameSource_OCL>(frame_ocl));
+
+    ocl::oclMat dst_ocl;
+    superRes_ocl->nextFrame(dst_ocl);
+
+    TEST_CYCLE_N(10) superRes_ocl->nextFrame(dst_ocl);
+    frame_ocl.release();
+    CPU_SANITY_CHECK(dst_ocl);
+}
+#endif
diff --git a/modules/superres/src/btv_l1.cpp b/modules/superres/src/btv_l1.cpp
index 178e434ad..e0ee7db63 100644
--- a/modules/superres/src/btv_l1.cpp
+++ b/modules/superres/src/btv_l1.cpp
@@ -337,7 +337,7 @@ namespace
 
         // update blur filter and btv weights
 
-        if (filter_.empty() || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)
+        if (!filter_ || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)
         {
             filter_ = createGaussianFilter(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_);
             curBlurKernelSize_ = blurKernelSize_;
@@ -614,5 +614,5 @@ namespace
 
 Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1()
 {
-    return new BTVL1;
+    return makePtr<BTVL1>();
 }
diff --git a/modules/superres/src/btv_l1_gpu.cpp b/modules/superres/src/btv_l1_gpu.cpp
index 7b2ad7370..f4268b01b 100644
--- a/modules/superres/src/btv_l1_gpu.cpp
+++ b/modules/superres/src/btv_l1_gpu.cpp
@@ -578,7 +578,7 @@ namespace
 
 Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1_GPU()
 {
-    return new BTVL1_GPU;
+    return makePtr<BTVL1_GPU>();
 }
 
 #endif // HAVE_CUDA
diff --git a/modules/superres/src/btv_l1_ocl.cpp b/modules/superres/src/btv_l1_ocl.cpp
new file mode 100644
index 000000000..9d94e61ce
--- /dev/null
+++ b/modules/superres/src/btv_l1_ocl.cpp
@@ -0,0 +1,748 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//		Jin Ma, jin@multicorewareinc.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+// S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution.
+// Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow.
+
+#include "precomp.hpp"
+
+#if !defined(HAVE_OPENCL) || !defined(HAVE_OPENCV_OCL)
+
+cv::Ptr<cv::superres::SuperResolution> cv::superres::createSuperResolution_BTVL1_OCL()
+{
+    CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform");
+    return Ptr<SuperResolution>();
+}
+
+#else
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+using namespace cv::superres;
+using namespace cv::superres::detail;
+
+namespace cv
+{
+    namespace ocl
+    {
+        extern const char* superres_btvl1;
+
+        float* btvWeights_ = NULL;
+        size_t btvWeights_size = 0;
+    }
+}
+
+namespace btv_l1_device_ocl
+{
+    void buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY,
+        const oclMat& backwardMotionX, const oclMat& bacwardMotionY,
+        oclMat& forwardMapX, oclMat& forwardMapY,
+        oclMat& backwardMapX, oclMat& backwardMapY);
+
+    void upscale(const oclMat& src, oclMat& dst, int scale);
+
+    float diffSign(float a, float b);
+
+    Point3f diffSign(Point3f a, Point3f b);
+
+    void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst);
+
+    void calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize);
+}
+
+void btv_l1_device_ocl::buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY,
+    const oclMat& backwardMotionX, const oclMat& backwardMotionY,
+    oclMat& forwardMapX, oclMat& forwardMapY,
+    oclMat& backwardMapX, oclMat& backwardMapY)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {forwardMapX.cols, forwardMapX.rows, 1};
+
+    int forwardMotionX_step = (int)(forwardMotionX.step/forwardMotionX.elemSize());
+    int forwardMotionY_step = (int)(forwardMotionY.step/forwardMotionY.elemSize());
+    int backwardMotionX_step = (int)(backwardMotionX.step/backwardMotionX.elemSize());
+    int backwardMotionY_step = (int)(backwardMotionY.step/backwardMotionY.elemSize());
+    int forwardMapX_step = (int)(forwardMapX.step/forwardMapX.elemSize());
+    int forwardMapY_step = (int)(forwardMapY.step/forwardMapY.elemSize());
+    int backwardMapX_step = (int)(backwardMapX.step/backwardMapX.elemSize());
+    int backwardMapY_step = (int)(backwardMapY.step/backwardMapY.elemSize());
+
+    String kernel_name = "buildMotionMapsKernel";
+    vector< pair<size_t, const void*> > args;
+
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionX.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionY.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionX.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionY.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapX.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapY.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapX.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapY.data));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX.rows));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY.cols));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionX_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionY_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapX_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapY_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapX_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapY_step));
+
+    openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1);
+}
+
+void btv_l1_device_ocl::upscale(const oclMat& src, oclMat& dst, int scale)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {src.cols, src.rows, 1};
+
+    int src_step = (int)(src.step/src.elemSize());
+    int dst_step = (int)(dst.step/dst.elemSize());
+
+    String kernel_name = "upscaleKernel";
+    vector< pair<size_t, const void*> > args;
+
+    int cn = src.oclchannels();
+
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&src.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&scale));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&cn));
+
+    openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1);
+
+}
+
+float btv_l1_device_ocl::diffSign(float a, float b)
+{
+    return a > b ? 1.0f : a < b ? -1.0f : 0.0f;
+}
+
+Point3f btv_l1_device_ocl::diffSign(Point3f a, Point3f b)
+{
+    return Point3f(
+        a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f,
+        a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f,
+        a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f
+        );
+}
+
+void btv_l1_device_ocl::diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst)
+{
+    Context* clCxt = Context::getContext();
+
+    oclMat src1_ = src1.reshape(1);
+    oclMat src2_ = src2.reshape(1);
+    oclMat dst_ = dst.reshape(1);
+
+    int src1_step = (int)(src1_.step/src1_.elemSize());
+    int src2_step = (int)(src2_.step/src2_.elemSize());
+    int dst_step = (int)(dst_.step/dst_.elemSize());
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {src1_.cols, src1_.rows, 1};
+
+    String kernel_name = "diffSignKernel";
+    vector< pair<size_t, const void*> > args;
+
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&src1_.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&src2_.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data));
+
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.rows));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.cols));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src1_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src2_step));
+
+    openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1);
+}
+
+void btv_l1_device_ocl::calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize)
+{
+    Context* clCxt = Context::getContext();
+
+    oclMat src_ = src.reshape(1);
+    oclMat dst_ = dst.reshape(1);
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {src.cols, src.rows, 1};
+
+    int src_step = (int)(src_.step/src_.elemSize());
+    int dst_step = (int)(dst_.step/dst_.elemSize());
+
+    String kernel_name = "calcBtvRegularizationKernel";
+    vector< pair<size_t, const void*> > args;
+
+    int cn = src.oclchannels();
+
+    cl_mem c_btvRegWeights;
+    size_t count = btvWeights_size * sizeof(float);
+    c_btvRegWeights = openCLCreateBuffer(clCxt, CL_MEM_READ_ONLY, count);
+    int cl_safe_check = clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL);
+    CV_Assert(cl_safe_check == CL_SUCCESS);
+
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&ksize));
+    args.push_back(make_pair(sizeof(cl_int), (void*)&cn));
+    args.push_back(make_pair(sizeof(cl_mem), (void*)&c_btvRegWeights));
+
+    openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1);
+    cl_safe_check = clReleaseMemObject(c_btvRegWeights);
+    CV_Assert(cl_safe_check == CL_SUCCESS);
+}
+
+namespace
+{
+    void calcRelativeMotions(const vector<pair<oclMat, oclMat> >& forwardMotions, const vector<pair<oclMat, oclMat> >& backwardMotions,
+        vector<pair<oclMat, oclMat> >& relForwardMotions, vector<pair<oclMat, oclMat> >& relBackwardMotions,
+        int baseIdx, Size size)
+    {
+        const int count = static_cast<int>(forwardMotions.size());
+
+        relForwardMotions.resize(count);
+        relForwardMotions[baseIdx].first.create(size, CV_32FC1);
+        relForwardMotions[baseIdx].first.setTo(Scalar::all(0));
+        relForwardMotions[baseIdx].second.create(size, CV_32FC1);
+        relForwardMotions[baseIdx].second.setTo(Scalar::all(0));
+
+        relBackwardMotions.resize(count);
+        relBackwardMotions[baseIdx].first.create(size, CV_32FC1);
+        relBackwardMotions[baseIdx].first.setTo(Scalar::all(0));
+        relBackwardMotions[baseIdx].second.create(size, CV_32FC1);
+        relBackwardMotions[baseIdx].second.setTo(Scalar::all(0));
+
+        for (int i = baseIdx - 1; i >= 0; --i)
+        {
+            ocl::add(relForwardMotions[i + 1].first, forwardMotions[i].first, relForwardMotions[i].first);
+            ocl::add(relForwardMotions[i + 1].second, forwardMotions[i].second, relForwardMotions[i].second);
+
+            ocl::add(relBackwardMotions[i + 1].first, backwardMotions[i + 1].first, relBackwardMotions[i].first);
+            ocl::add(relBackwardMotions[i + 1].second, backwardMotions[i + 1].second, relBackwardMotions[i].second);
+        }
+
+        for (int i = baseIdx + 1; i < count; ++i)
+        {
+            ocl::add(relForwardMotions[i - 1].first, backwardMotions[i].first, relForwardMotions[i].first);
+            ocl::add(relForwardMotions[i - 1].second, backwardMotions[i].second, relForwardMotions[i].second);
+
+            ocl::add(relBackwardMotions[i - 1].first, forwardMotions[i - 1].first, relBackwardMotions[i].first);
+            ocl::add(relBackwardMotions[i - 1].second, forwardMotions[i - 1].second, relBackwardMotions[i].second);
+        }
+    }
+
+    void upscaleMotions(const vector<pair<oclMat, oclMat> >& lowResMotions, vector<pair<oclMat, oclMat> >& highResMotions, int scale)
+    {
+        highResMotions.resize(lowResMotions.size());
+
+        for (size_t i = 0; i < lowResMotions.size(); ++i)
+        {
+            ocl::resize(lowResMotions[i].first, highResMotions[i].first, Size(), scale, scale, INTER_LINEAR);
+            ocl::resize(lowResMotions[i].second, highResMotions[i].second, Size(), scale, scale, INTER_LINEAR);
+
+            ocl::multiply(scale, highResMotions[i].first, highResMotions[i].first);
+            ocl::multiply(scale, highResMotions[i].second, highResMotions[i].second);
+        }
+    }
+
+    void buildMotionMaps(const pair<oclMat, oclMat>& forwardMotion, const pair<oclMat, oclMat>& backwardMotion,
+        pair<oclMat, oclMat>& forwardMap, pair<oclMat, oclMat>& backwardMap)
+    {
+        forwardMap.first.create(forwardMotion.first.size(), CV_32FC1);
+        forwardMap.second.create(forwardMotion.first.size(), CV_32FC1);
+
+        backwardMap.first.create(forwardMotion.first.size(), CV_32FC1);
+        backwardMap.second.create(forwardMotion.first.size(), CV_32FC1);
+
+        btv_l1_device_ocl::buildMotionMaps(forwardMotion.first, forwardMotion.second,
+            backwardMotion.first, backwardMotion.second,
+            forwardMap.first, forwardMap.second,
+            backwardMap.first, backwardMap.second);
+    }
+
+    void upscale(const oclMat& src, oclMat& dst, int scale)
+    {
+        CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
+
+        dst.create(src.rows * scale, src.cols * scale, src.type());
+        dst.setTo(Scalar::all(0));
+
+        btv_l1_device_ocl::upscale(src, dst, scale);
+    }
+
+    void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst)
+    {
+        dst.create(src1.size(), src1.type());
+
+        btv_l1_device_ocl::diffSign(src1, src2, dst);
+    }
+
+    void calcBtvWeights(int btvKernelSize, double alpha, vector<float>& btvWeights)
+    {
+        const size_t size = btvKernelSize * btvKernelSize;
+
+        btvWeights.resize(size);
+
+        const int ksize = (btvKernelSize - 1) / 2;
+        const float alpha_f = static_cast<float>(alpha);
+
+        for (int m = 0, ind = 0; m <= ksize; ++m)
+        {
+            for (int l = ksize; l + m >= 0; --l, ++ind)
+                btvWeights[ind] = pow(alpha_f, std::abs(m) + std::abs(l));
+        }
+
+        btvWeights_ = &btvWeights[0];
+        btvWeights_size = size;
+    }
+
+    void calcBtvRegularization(const oclMat& src, oclMat& dst, int btvKernelSize)
+    {
+        dst.create(src.size(), src.type());
+        dst.setTo(Scalar::all(0));
+
+        const int ksize = (btvKernelSize - 1) / 2;
+
+        btv_l1_device_ocl::calcBtvRegularization(src, dst, ksize);
+    }
+
+    class BTVL1_OCL_Base
+    {
+    public:
+        BTVL1_OCL_Base();
+
+        void process(const vector<oclMat>& src, oclMat& dst,
+            const vector<pair<oclMat, oclMat> >& forwardMotions, const vector<pair<oclMat, oclMat> >& backwardMotions,
+            int baseIdx);
+
+        void collectGarbage();
+
+    protected:
+        int scale_;
+        int iterations_;
+        double lambda_;
+        double tau_;
+        double alpha_;
+        int btvKernelSize_;
+        int blurKernelSize_;
+        double blurSigma_;
+        Ptr<DenseOpticalFlowExt> opticalFlow_;
+
+    private:
+        vector<Ptr<cv::ocl::FilterEngine_GPU> > filters_;
+        int curBlurKernelSize_;
+        double curBlurSigma_;
+        int curSrcType_;
+
+        vector<float> btvWeights_;
+        int curBtvKernelSize_;
+        double curAlpha_;
+
+        vector<pair<oclMat, oclMat> > lowResForwardMotions_;
+        vector<pair<oclMat, oclMat> > lowResBackwardMotions_;
+
+        vector<pair<oclMat, oclMat> > highResForwardMotions_;
+        vector<pair<oclMat, oclMat> > highResBackwardMotions_;
+
+        vector<pair<oclMat, oclMat> > forwardMaps_;
+        vector<pair<oclMat, oclMat> > backwardMaps_;
+
+        oclMat highRes_;
+
+        vector<oclMat> diffTerms_;
+        vector<oclMat> a_, b_, c_;
+        oclMat regTerm_;
+    };
+
+    BTVL1_OCL_Base::BTVL1_OCL_Base()
+    {
+        scale_ = 4;
+        iterations_ = 180;
+        lambda_ = 0.03;
+        tau_ = 1.3;
+        alpha_ = 0.7;
+        btvKernelSize_ = 7;
+        blurKernelSize_ = 5;
+        blurSigma_ = 0.0;
+        opticalFlow_ = createOptFlow_DualTVL1_OCL();
+
+        curBlurKernelSize_ = -1;
+        curBlurSigma_ = -1.0;
+        curSrcType_ = -1;
+
+        curBtvKernelSize_ = -1;
+        curAlpha_ = -1.0;
+    }
+
+    void BTVL1_OCL_Base::process(const vector<oclMat>& src, oclMat& dst,
+        const vector<pair<oclMat, oclMat> >& forwardMotions, const vector<pair<oclMat, oclMat> >& backwardMotions,
+        int baseIdx)
+    {
+        CV_Assert( scale_ > 1 );
+        CV_Assert( iterations_ > 0 );
+        CV_Assert( tau_ > 0.0 );
+        CV_Assert( alpha_ > 0.0 );
+        CV_Assert( btvKernelSize_ > 0 && btvKernelSize_ <= 16 );
+        CV_Assert( blurKernelSize_ > 0 );
+        CV_Assert( blurSigma_ >= 0.0 );
+
+        // update blur filter and btv weights
+
+        if (filters_.size() != src.size() || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)
+        {
+            filters_.resize(src.size());
+            for (size_t i = 0; i < src.size(); ++i)
+                filters_[i] = cv::ocl::createGaussianFilter_GPU(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_);
+            curBlurKernelSize_ = blurKernelSize_;
+            curBlurSigma_ = blurSigma_;
+            curSrcType_ = src[0].type();
+        }
+
+        if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_)
+        {
+            calcBtvWeights(btvKernelSize_, alpha_, btvWeights_);
+            curBtvKernelSize_ = btvKernelSize_;
+            curAlpha_ = alpha_;
+        }
+
+        // calc motions between input frames
+
+        calcRelativeMotions(forwardMotions, backwardMotions,
+            lowResForwardMotions_, lowResBackwardMotions_,
+            baseIdx, src[0].size());
+
+        upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_);
+        upscaleMotions(lowResBackwardMotions_, highResBackwardMotions_, scale_);
+
+        forwardMaps_.resize(highResForwardMotions_.size());
+        backwardMaps_.resize(highResForwardMotions_.size());
+        for (size_t i = 0; i < highResForwardMotions_.size(); ++i)
+        {
+            buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]);
+        }
+        // initial estimation
+
+        const Size lowResSize = src[0].size();
+        const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_);
+
+        ocl::resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_LINEAR);
+
+        // iterations
+
+        diffTerms_.resize(src.size());
+        a_.resize(src.size());
+        b_.resize(src.size());
+        c_.resize(src.size());
+
+        for (int i = 0; i < iterations_; ++i)
+        {
+            for (size_t k = 0; k < src.size(); ++k)
+            {
+                diffTerms_[k].create(highRes_.size(), highRes_.type());
+                a_[k].create(highRes_.size(), highRes_.type());
+                b_[k].create(highRes_.size(), highRes_.type());
+                c_[k].create(lowResSize, highRes_.type());
+
+                // a = M * Ih
+                ocl::remap(highRes_, a_[k], backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar());
+                // b = HM * Ih
+                filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1));
+                // c = DHF * Ih
+                ocl::resize(b_[k], c_[k], lowResSize, 0, 0, INTER_NEAREST);
+
+                diffSign(src[k], c_[k], c_[k]);
+
+                // a = Dt * diff
+                upscale(c_[k], a_[k], scale_);
+                // b = HtDt * diff
+                filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1));
+                // diffTerm = MtHtDt * diff
+                ocl::remap(b_[k], diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar());
+            }
+
+            if (lambda_ > 0)
+            {
+                calcBtvRegularization(highRes_, regTerm_, btvKernelSize_);
+                ocl::addWeighted(highRes_, 1.0, regTerm_, -tau_ * lambda_, 0.0, highRes_);
+            }
+
+            for (size_t k = 0; k < src.size(); ++k)
+            {
+                ocl::addWeighted(highRes_, 1.0, diffTerms_[k], tau_, 0.0, highRes_);
+            }
+        }
+
+        Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_);
+        highRes_(inner).copyTo(dst);
+    }
+
+    void BTVL1_OCL_Base::collectGarbage()
+    {
+        filters_.clear();
+
+        lowResForwardMotions_.clear();
+        lowResBackwardMotions_.clear();
+
+        highResForwardMotions_.clear();
+        highResBackwardMotions_.clear();
+
+        forwardMaps_.clear();
+        backwardMaps_.clear();
+
+        highRes_.release();
+
+        diffTerms_.clear();
+        a_.clear();
+        b_.clear();
+        c_.clear();
+        regTerm_.release();
+    }
+
+    ////////////////////////////////////////////////////////////
+
+    class BTVL1_OCL : public SuperResolution, private BTVL1_OCL_Base
+    {
+    public:
+        AlgorithmInfo* info() const;
+
+        BTVL1_OCL();
+
+        void collectGarbage();
+
+    protected:
+        void initImpl(Ptr<FrameSource>& frameSource);
+        void processImpl(Ptr<FrameSource>& frameSource, OutputArray output);
+
+    private:
+        int temporalAreaRadius_;
+
+        void readNextFrame(Ptr<FrameSource>& frameSource);
+        void processFrame(int idx);
+
+        oclMat curFrame_;
+        oclMat prevFrame_;
+
+        vector<oclMat> frames_;
+        vector<pair<oclMat, oclMat> > forwardMotions_;
+        vector<pair<oclMat, oclMat> > backwardMotions_;
+        vector<oclMat> outputs_;
+
+        int storePos_;
+        int procPos_;
+        int outPos_;
+
+        vector<oclMat> srcFrames_;
+        vector<pair<oclMat, oclMat> > srcForwardMotions_;
+        vector<pair<oclMat, oclMat> > srcBackwardMotions_;
+        oclMat finalOutput_;
+    };
+
+    CV_INIT_ALGORITHM(BTVL1_OCL, "SuperResolution.BTVL1_OCL",
+    obj.info()->addParam(obj, "scale", obj.scale_, false, 0, 0, "Scale factor.");
+    obj.info()->addParam(obj, "iterations", obj.iterations_, false, 0, 0, "Iteration count.");
+    obj.info()->addParam(obj, "tau", obj.tau_, false, 0, 0, "Asymptotic value of steepest descent method.");
+    obj.info()->addParam(obj, "lambda", obj.lambda_, false, 0, 0, "Weight parameter to balance data term and smoothness term.");
+    obj.info()->addParam(obj, "alpha", obj.alpha_, false, 0, 0, "Parameter of spacial distribution in Bilateral-TV.");
+    obj.info()->addParam(obj, "btvKernelSize", obj.btvKernelSize_, false, 0, 0, "Kernel size of Bilateral-TV filter.");
+    obj.info()->addParam(obj, "blurKernelSize", obj.blurKernelSize_, false, 0, 0, "Gaussian blur kernel size.");
+    obj.info()->addParam(obj, "blurSigma", obj.blurSigma_, false, 0, 0, "Gaussian blur sigma.");
+    obj.info()->addParam(obj, "temporalAreaRadius", obj.temporalAreaRadius_, false, 0, 0, "Radius of the temporal search area.");
+    obj.info()->addParam<DenseOpticalFlowExt>(obj, "opticalFlow", obj.opticalFlow_, false, 0, 0, "Dense optical flow algorithm."));
+
+    BTVL1_OCL::BTVL1_OCL()
+    {
+        temporalAreaRadius_ = 4;
+    }
+
+    void BTVL1_OCL::collectGarbage()
+    {
+        curFrame_.release();
+        prevFrame_.release();
+
+        frames_.clear();
+        forwardMotions_.clear();
+        backwardMotions_.clear();
+        outputs_.clear();
+
+        srcFrames_.clear();
+        srcForwardMotions_.clear();
+        srcBackwardMotions_.clear();
+        finalOutput_.release();
+
+        SuperResolution::collectGarbage();
+        BTVL1_OCL_Base::collectGarbage();
+    }
+
+    void BTVL1_OCL::initImpl(Ptr<FrameSource>& frameSource)
+    {
+        const int cacheSize = 2 * temporalAreaRadius_ + 1;
+
+        frames_.resize(cacheSize);
+        forwardMotions_.resize(cacheSize);
+        backwardMotions_.resize(cacheSize);
+        outputs_.resize(cacheSize);
+
+        storePos_ = -1;
+
+        for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t)
+            readNextFrame(frameSource);
+
+        for (int i = 0; i <= temporalAreaRadius_; ++i)
+            processFrame(i);
+
+        procPos_ = temporalAreaRadius_;
+        outPos_ = -1;
+    }
+
+    void BTVL1_OCL::processImpl(Ptr<FrameSource>& frameSource, OutputArray _output)
+    {
+        if (outPos_ >= storePos_)
+        {
+            if(_output.kind() == _InputArray::OCL_MAT)
+            {
+                getOclMatRef(_output).release();
+            }
+            else
+            {
+                _output.release();
+            }
+            return;
+        }
+
+        readNextFrame(frameSource);
+
+        if (procPos_ < storePos_)
+        {
+            ++procPos_;
+            processFrame(procPos_);
+        }
+
+        ++outPos_;
+        const oclMat& curOutput = at(outPos_, outputs_);
+
+        if (_output.kind() == _InputArray::OCL_MAT)
+            curOutput.convertTo(getOclMatRef(_output), CV_8U);
+        else
+        {
+            curOutput.convertTo(finalOutput_, CV_8U);
+            arrCopy(finalOutput_, _output);
+        }
+    }
+
+    void BTVL1_OCL::readNextFrame(Ptr<FrameSource>& frameSource)
+    {
+        curFrame_.release();
+        frameSource->nextFrame(curFrame_);
+
+        if (curFrame_.empty())
+            return;
+
+        ++storePos_;
+        curFrame_.convertTo(at(storePos_, frames_), CV_32F);
+
+        if (storePos_ > 0)
+        {
+            pair<oclMat, oclMat>& forwardMotion = at(storePos_ - 1, forwardMotions_);
+            pair<oclMat, oclMat>& backwardMotion = at(storePos_, backwardMotions_);
+
+            opticalFlow_->calc(prevFrame_, curFrame_, forwardMotion.first, forwardMotion.second);
+            opticalFlow_->calc(curFrame_, prevFrame_, backwardMotion.first, backwardMotion.second);
+        }
+
+        curFrame_.copyTo(prevFrame_);
+    }
+
+    void BTVL1_OCL::processFrame(int idx)
+    {
+        const int startIdx = max(idx - temporalAreaRadius_, 0);
+        const int procIdx = idx;
+        const int endIdx = min(startIdx + 2 * temporalAreaRadius_, storePos_);
+
+        const int count = endIdx - startIdx + 1;
+
+        srcFrames_.resize(count);
+        srcForwardMotions_.resize(count);
+        srcBackwardMotions_.resize(count);
+
+        int baseIdx = -1;
+
+        for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k)
+        {
+            if (i == procIdx)
+                baseIdx = k;
+
+            srcFrames_[k] = at(i, frames_);
+
+            if (i < endIdx)
+                srcForwardMotions_[k] = at(i, forwardMotions_);
+            if (i > startIdx)
+                srcBackwardMotions_[k] = at(i, backwardMotions_);
+        }
+
+        process(srcFrames_, at(idx, outputs_), srcForwardMotions_, srcBackwardMotions_, baseIdx);
+    }
+}
+
+Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1_OCL()
+{
+    return makePtr<BTVL1_OCL>();
+}
+#endif
diff --git a/modules/superres/src/frame_source.cpp b/modules/superres/src/frame_source.cpp
index 7da817cfa..14576d5dd 100644
--- a/modules/superres/src/frame_source.cpp
+++ b/modules/superres/src/frame_source.cpp
@@ -74,7 +74,7 @@ namespace
 
 Ptr<FrameSource> cv::superres::createFrameSource_Empty()
 {
-    return new EmptyFrameSource;
+    return makePtr<EmptyFrameSource>();
 }
 
 //////////////////////////////////////////////////////
@@ -118,11 +118,23 @@ namespace
         {
             vc_ >> _frame.getMatRef();
         }
-        else
+        else if(_frame.kind() == _InputArray::GPU_MAT)
         {
             vc_ >> frame_;
             arrCopy(frame_, _frame);
         }
+        else if(_frame.kind() == _InputArray::OCL_MAT)
+        {
+            vc_ >> frame_;
+            if(!frame_.empty())
+            {
+                arrCopy(frame_, _frame);
+            }
+        }
+        else
+        {
+            //should never get here
+        }
     }
 
     class VideoFrameSource : public CaptureFrameSource
@@ -174,12 +186,12 @@ namespace
 
 Ptr<FrameSource> cv::superres::createFrameSource_Video(const String& fileName)
 {
-    return new VideoFrameSource(fileName);
+    return makePtr<VideoFrameSource>(fileName);
 }
 
 Ptr<FrameSource> cv::superres::createFrameSource_Camera(int deviceId)
 {
-    return new CameraFrameSource(deviceId);
+    return makePtr<CameraFrameSource>(deviceId);
 }
 
 #endif // HAVE_OPENCV_HIGHGUI
@@ -245,7 +257,7 @@ namespace
 
 Ptr<FrameSource> cv::superres::createFrameSource_Video_GPU(const String& fileName)
 {
-    return new VideoFrameSource(fileName);
+    return makePtr<VideoFrameSource>(fileName);
 }
 
 #endif // HAVE_OPENCV_GPUCODEC
diff --git a/modules/superres/src/input_array_utility.cpp b/modules/superres/src/input_array_utility.cpp
index e74905089..8a3cbca55 100644
--- a/modules/superres/src/input_array_utility.cpp
+++ b/modules/superres/src/input_array_utility.cpp
@@ -108,30 +108,59 @@ namespace
     {
         src.getGpuMat().copyTo(dst.getGpuMatRef());
     }
+#ifdef HAVE_OPENCV_OCL
+    void ocl2mat(InputArray src, OutputArray dst)
+    {
+        dst.getMatRef() = (Mat)ocl::getOclMatRef(src);
+    }
+    void mat2ocl(InputArray src, OutputArray dst)
+    {
+        Mat m = src.getMat();
+        ocl::getOclMatRef(dst) = (ocl::oclMat)m;
+    }
+    void ocl2ocl(InputArray src, OutputArray dst)
+    {
+        ocl::getOclMatRef(src).copyTo(ocl::getOclMatRef(dst));
+    }
+#else
+    void ocl2mat(InputArray, OutputArray)
+    {
+        CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform");;
+    }
+    void mat2ocl(InputArray, OutputArray)
+    {
+        CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform");;
+    }
+    void ocl2ocl(InputArray, OutputArray)
+    {
+        CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform");
+    }
+#endif
 }
 
 void cv::superres::arrCopy(InputArray src, OutputArray dst)
 {
     typedef void (*func_t)(InputArray src, OutputArray dst);
-    static const func_t funcs[10][10] =
+    static const func_t funcs[11][11] =
     {
         {0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
-        {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0 /*buf2arr*/, buf2arr},
-        {0, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/},
-        {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, 0 /*arr2tex*/, gpu2gpu}
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
+        {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0 /*buf2arr*/, buf2arr, 0      },
+        {0, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0},
+        {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, 0 /*arr2tex*/, gpu2gpu, 0      },
+        {0, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, 0,       0,             0,       ocl2ocl}
     };
 
     const int src_kind = src.kind() >> _InputArray::KIND_SHIFT;
     const int dst_kind = dst.kind() >> _InputArray::KIND_SHIFT;
 
-    CV_DbgAssert( src_kind >= 0 && src_kind < 10 );
-    CV_DbgAssert( dst_kind >= 0 && dst_kind < 10 );
+    CV_DbgAssert( src_kind >= 0 && src_kind < 11 );
+    CV_DbgAssert( dst_kind >= 0 && dst_kind < 11 );
 
     const func_t func = funcs[src_kind][dst_kind];
     CV_DbgAssert( func != 0 );
@@ -169,11 +198,10 @@ namespace
             break;
 
         default:
-            cvtColor(src, dst, code, cn);
+            cv::cvtColor(src, dst, code, cn);
             break;
         }
     }
-
     void convertToDepth(InputArray src, OutputArray dst, int depth)
     {
         CV_Assert( src.depth() <= CV_64F );
@@ -254,3 +282,70 @@ GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, Gp
     convertToDepth(buf0, buf1, depth);
     return buf1;
 }
+#ifdef HAVE_OPENCV_OCL
+namespace
+{
+    // TODO(pengx17): remove these overloaded functions until IntputArray fully supports oclMat
+    void convertToCn(const ocl::oclMat& src, ocl::oclMat& dst, int cn)
+    {
+        CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( cn == 1 || cn == 3 || cn == 4 );
+
+        static const int codes[5][5] =
+        {
+            {-1, -1, -1, -1, -1},
+            {-1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA},
+            {-1, -1, -1, -1, -1},
+            {-1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA},
+            {-1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1},
+        };
+
+        const int code = codes[src.channels()][cn];
+        CV_DbgAssert( code >= 0 );
+
+        ocl::cvtColor(src, dst, code, cn);
+    }
+    void convertToDepth(const ocl::oclMat& src, ocl::oclMat& dst, int depth)
+    {
+        CV_Assert( src.depth() <= CV_64F );
+        CV_Assert( depth == CV_8U || depth == CV_32F );
+
+        static const double maxVals[] =
+        {
+            std::numeric_limits<uchar>::max(),
+            std::numeric_limits<schar>::max(),
+            std::numeric_limits<ushort>::max(),
+            std::numeric_limits<short>::max(),
+            std::numeric_limits<int>::max(),
+            1.0,
+            1.0,
+        };
+        const double scale = maxVals[depth] / maxVals[src.depth()];
+        src.convertTo(dst, depth, scale);
+    }
+}
+ocl::oclMat cv::superres::convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1)
+{
+    if (src.type() == type)
+        return src;
+
+    const int depth = CV_MAT_DEPTH(type);
+    const int cn = CV_MAT_CN(type);
+
+    if (src.depth() == depth)
+    {
+        convertToCn(src, buf0, cn);
+        return buf0;
+    }
+
+    if (src.channels() == cn)
+    {
+        convertToDepth(src, buf1, depth);
+        return buf1;
+    }
+
+    convertToCn(src, buf0, cn);
+    convertToDepth(buf0, buf1, depth);
+    return buf1;
+}
+#endif
diff --git a/modules/superres/src/input_array_utility.hpp b/modules/superres/src/input_array_utility.hpp
index 488399fc0..0c86143fc 100644
--- a/modules/superres/src/input_array_utility.hpp
+++ b/modules/superres/src/input_array_utility.hpp
@@ -45,6 +45,9 @@
 
 #include "opencv2/core.hpp"
 #include "opencv2/core/gpu.hpp"
+#ifdef HAVE_OPENCV_OCL
+#include "opencv2/ocl.hpp"
+#endif
 
 namespace cv
 {
@@ -57,6 +60,10 @@ namespace cv
 
         CV_EXPORTS Mat convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1);
         CV_EXPORTS gpu::GpuMat convertToType(const gpu::GpuMat& src, int type, gpu::GpuMat& buf0, gpu::GpuMat& buf1);
+
+#ifdef HAVE_OPENCV_OCL
+        CV_EXPORTS ocl::oclMat convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1);
+#endif
     }
 }
 
diff --git a/modules/superres/src/opencl/superres_btvl1.cl b/modules/superres/src/opencl/superres_btvl1.cl
new file mode 100644
index 000000000..472062323
--- /dev/null
+++ b/modules/superres/src/opencl/superres_btvl1.cl
@@ -0,0 +1,261 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+__kernel void buildMotionMapsKernel(__global float* forwardMotionX,
+    __global float* forwardMotionY,
+    __global float* backwardMotionX,
+    __global float* backwardMotionY,
+    __global float* forwardMapX,
+    __global float* forwardMapY,
+    __global float* backwardMapX,
+    __global float* backwardMapY,
+    int forwardMotionX_row,
+    int forwardMotionX_col,
+    int forwardMotionX_step,
+    int forwardMotionY_step,
+    int backwardMotionX_step,
+    int backwardMotionY_step,
+    int forwardMapX_step,
+    int forwardMapY_step,
+    int backwardMapX_step,
+    int backwardMapY_step
+    )
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < forwardMotionX_col && y < forwardMotionX_row)
+    {
+        float fx = forwardMotionX[y * forwardMotionX_step + x];
+        float fy = forwardMotionY[y * forwardMotionY_step + x];
+
+        float bx = backwardMotionX[y * backwardMotionX_step + x];
+        float by = backwardMotionY[y * backwardMotionY_step + x];
+
+        forwardMapX[y * forwardMapX_step + x] = x + bx;
+        forwardMapY[y * forwardMapY_step + x] = y + by;
+
+        backwardMapX[y * backwardMapX_step + x] = x + fx;
+        backwardMapY[y * backwardMapY_step + x] = y + fy;
+    }
+}
+
+__kernel void upscaleKernel(__global float* src,
+    __global float* dst,
+    int src_step,
+    int dst_step,
+    int src_row,
+    int src_col,
+    int scale,
+    int channels
+    )
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < src_col && y < src_row)
+    {
+        if(channels == 1)
+        {
+            dst[y * scale * dst_step + x * scale] = src[y * src_step + x];
+        }else if(channels == 3)
+        {
+            dst[y * channels * scale * dst_step + 3 * x * scale + 0] = src[y * channels * src_step + 3 * x + 0];
+            dst[y * channels * scale * dst_step + 3 * x * scale + 1] = src[y * channels * src_step + 3 * x + 1];
+            dst[y * channels * scale * dst_step + 3 * x * scale + 2] = src[y * channels * src_step + 3 * x + 2];
+        }else
+        {
+            dst[y * channels * scale * dst_step + 4 * x * scale + 0] = src[y * channels * src_step + 4 * x + 0];
+            dst[y * channels * scale * dst_step + 4 * x * scale + 1] = src[y * channels * src_step + 4 * x + 1];
+            dst[y * channels * scale * dst_step + 4 * x * scale + 2] = src[y * channels * src_step + 4 * x + 2];
+            dst[y * channels * scale * dst_step + 4 * x * scale + 3] = src[y * channels * src_step + 4 * x + 3];
+        }
+    }
+}
+
+
+float diffSign(float a, float b)
+{
+    return a > b ? 1.0f : a < b ? -1.0f : 0.0f;
+}
+
+float3 diffSign3(float3 a, float3 b)
+{
+    float3 pos;
+    pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f;
+    pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f;
+    pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f;
+    return pos;
+}
+
+float4 diffSign4(float4 a, float4 b)
+{
+    float4 pos;
+    pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f;
+    pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f;
+    pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f;
+    pos.w = 0.0f;
+    return pos;
+}
+
+__kernel void diffSignKernel(__global float* src1,
+    __global float* src2,
+    __global float* dst,
+    int src1_row,
+    int src1_col,
+    int dst_step,
+    int src1_step,
+    int src2_step)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < src1_col && y < src1_row)
+    {
+        dst[y * dst_step + x] = diffSign(src1[y * src1_step + x], src2[y * src2_step + x]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+}
+
+__kernel void calcBtvRegularizationKernel(__global float* src,
+    __global float* dst,
+    int src_step,
+    int dst_step,
+    int src_row,
+    int src_col,
+    int ksize,
+    int channels,
+    __global float* c_btvRegWeights
+    )
+{
+    int x = get_global_id(0) + ksize;
+    int y = get_global_id(1) + ksize;
+
+    if ((y < src_row - ksize) && (x < src_col - ksize))
+    {
+        if(channels == 1)
+        {
+            const float srcVal = src[y * src_step + x];
+            float dstVal = 0.0f;
+
+            for (int m = 0, count = 0; m <= ksize; ++m)
+            {
+                for (int l = ksize; l + m >= 0; --l, ++count)
+                    dstVal = dstVal + c_btvRegWeights[count] * (diffSign(srcVal, src[(y + m) * src_step + (x + l)]) - diffSign(src[(y - m) * src_step + (x - l)], srcVal));
+            }
+            dst[y * dst_step + x] = dstVal;
+        }else if(channels == 3)
+        {
+            float3 srcVal;
+            srcVal.x = src[y * src_step + 3 * x + 0];
+            srcVal.y = src[y * src_step + 3 * x + 1];
+            srcVal.z = src[y * src_step + 3 * x + 2];
+
+            float3 dstVal;
+            dstVal.x = 0.0f;
+            dstVal.y = 0.0f;
+            dstVal.z = 0.0f;
+
+            for (int m = 0, count = 0; m <= ksize; ++m)
+            {
+                for (int l = ksize; l + m >= 0; --l, ++count)
+                {
+                    float3 src1;
+                    src1.x = src[(y + m) * src_step + 3 * (x + l) + 0];
+                    src1.y = src[(y + m) * src_step + 3 * (x + l) + 1];
+                    src1.z = src[(y + m) * src_step + 3 * (x + l) + 2];
+
+                    float3 src2;
+                    src2.x = src[(y - m) * src_step + 3 * (x - l) + 0];
+                    src2.y = src[(y - m) * src_step + 3 * (x - l) + 1];
+                    src2.z = src[(y - m) * src_step + 3 * (x - l) + 2];
+
+                    dstVal = dstVal + c_btvRegWeights[count] * (diffSign3(srcVal, src1) - diffSign3(src2, srcVal));
+                }
+            }
+            dst[y * dst_step + 3 * x + 0] = dstVal.x;
+            dst[y * dst_step + 3 * x + 1] = dstVal.y;
+            dst[y * dst_step + 3 * x + 2] = dstVal.z;
+        }else
+        {
+            float4 srcVal;
+            srcVal.x = src[y * src_step + 4 * x + 0];//r type =float
+            srcVal.y = src[y * src_step + 4 * x + 1];//g
+            srcVal.z = src[y * src_step + 4 * x + 2];//b
+            srcVal.w = src[y * src_step + 4 * x + 3];//a
+
+            float4 dstVal;
+            dstVal.x = 0.0f;
+            dstVal.y = 0.0f;
+            dstVal.z = 0.0f;
+            dstVal.w = 0.0f;
+
+            for (int m = 0, count = 0; m <= ksize; ++m)
+            {
+                for (int l = ksize; l + m >= 0; --l, ++count)
+                {
+                    float4 src1;
+                    src1.x = src[(y + m) * src_step + 4 * (x + l) + 0];
+                    src1.y = src[(y + m) * src_step + 4 * (x + l) + 1];
+                    src1.z = src[(y + m) * src_step + 4 * (x + l) + 2];
+                    src1.w = src[(y + m) * src_step + 4 * (x + l) + 3];
+
+                    float4 src2;
+                    src2.x = src[(y - m) * src_step + 4 * (x - l) + 0];
+                    src2.y = src[(y - m) * src_step + 4 * (x - l) + 1];
+                    src2.z = src[(y - m) * src_step + 4 * (x - l) + 2];
+                    src2.w = src[(y - m) * src_step + 4 * (x - l) + 3];
+
+                    dstVal = dstVal + c_btvRegWeights[count] * (diffSign4(srcVal, src1) - diffSign4(src2, srcVal));
+
+                }
+            }
+            dst[y * dst_step + 4 * x + 0] = dstVal.x;
+            dst[y * dst_step + 4 * x + 1] = dstVal.y;
+            dst[y * dst_step + 4 * x + 2] = dstVal.z;
+            dst[y * dst_step + 4 * x + 3] = dstVal.w;
+        }
+    }
+}
diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp
index 242b91ee6..85c74115a 100644
--- a/modules/superres/src/optical_flow.cpp
+++ b/modules/superres/src/optical_flow.cpp
@@ -169,7 +169,7 @@ namespace
 
 Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Farneback()
 {
-    return new Farneback;
+    return makePtr<Farneback>();
 }
 
 ///////////////////////////////////////////////////////////////////
@@ -258,7 +258,7 @@ namespace
 
 Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Simple()
 {
-    return new Simple;
+    return makePtr<Simple>();
 }
 
 ///////////////////////////////////////////////////////////////////
@@ -337,7 +337,7 @@ namespace
 
 Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1()
 {
-    return new DualTVL1;
+    return makePtr<DualTVL1>();
 }
 
 ///////////////////////////////////////////////////////////////////
@@ -503,7 +503,7 @@ namespace
 
 Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Brox_GPU()
 {
-    return new Brox_GPU;
+    return makePtr<Brox_GPU>();
 }
 
 ///////////////////////////////////////////////////////////////////
@@ -562,7 +562,7 @@ namespace
 
 Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_PyrLK_GPU()
 {
-    return new PyrLK_GPU;
+    return makePtr<PyrLK_GPU>();
 }
 
 ///////////////////////////////////////////////////////////////////
@@ -636,7 +636,7 @@ namespace
 
 Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Farneback_GPU()
 {
-    return new Farneback_GPU;
+    return makePtr<Farneback_GPU>();
 }
 
 ///////////////////////////////////////////////////////////////////
@@ -714,7 +714,273 @@ namespace
 
 Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1_GPU()
 {
-    return new DualTVL1_GPU;
+    return makePtr<DualTVL1_GPU>();
 }
 
 #endif // HAVE_OPENCV_GPUOPTFLOW
+#ifdef HAVE_OPENCV_OCL
+
+namespace
+{
+    class oclOpticalFlow : public DenseOpticalFlowExt
+    {
+    public:
+        explicit oclOpticalFlow(int work_type);
+
+        void calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2);
+        void collectGarbage();
+
+    protected:
+        virtual void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) = 0;
+
+    private:
+        int work_type_;
+        cv::ocl::oclMat buf_[6];
+        cv::ocl::oclMat u_, v_, flow_;
+    };
+
+    oclOpticalFlow::oclOpticalFlow(int work_type) : work_type_(work_type)
+    {
+    }
+
+    void oclOpticalFlow::calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2)
+    {
+        ocl::oclMat& _frame0 = ocl::getOclMatRef(frame0);
+        ocl::oclMat& _frame1 = ocl::getOclMatRef(frame1);
+        ocl::oclMat& _flow1  = ocl::getOclMatRef(flow1);
+        ocl::oclMat& _flow2  = ocl::getOclMatRef(flow2);
+
+        CV_Assert( _frame1.type() == _frame0.type() );
+        CV_Assert( _frame1.size() == _frame0.size() );
+
+        cv::ocl::oclMat input0_ = convertToType(_frame0, work_type_, buf_[2], buf_[3]);
+        cv::ocl::oclMat input1_ = convertToType(_frame1, work_type_, buf_[4], buf_[5]);
+
+        impl(input0_, input1_, u_, v_);//go to tvl1 algorithm
+
+        u_.copyTo(_flow1);
+        v_.copyTo(_flow2);
+    }
+
+    void oclOpticalFlow::collectGarbage()
+    {
+        for (int i = 0; i < 6; ++i)
+            buf_[i].release();
+        u_.release();
+        v_.release();
+        flow_.release();
+    }
+}
+///////////////////////////////////////////////////////////////////
+// PyrLK_OCL
+
+namespace
+{
+    class PyrLK_OCL : public oclOpticalFlow
+    {
+    public:
+        AlgorithmInfo* info() const;
+
+        PyrLK_OCL();
+
+        void collectGarbage();
+
+    protected:
+        void impl(const ocl::oclMat& input0, const ocl::oclMat& input1, ocl::oclMat& dst1, ocl::oclMat& dst2);
+
+    private:
+        int winSize_;
+        int maxLevel_;
+        int iterations_;
+
+        ocl::PyrLKOpticalFlow alg_;
+    };
+
+    CV_INIT_ALGORITHM(PyrLK_OCL, "DenseOpticalFlowExt.PyrLK_OCL",
+        obj.info()->addParam(obj, "winSize", obj.winSize_);
+    obj.info()->addParam(obj, "maxLevel", obj.maxLevel_);
+    obj.info()->addParam(obj, "iterations", obj.iterations_));
+
+    PyrLK_OCL::PyrLK_OCL() : oclOpticalFlow(CV_8UC1)
+    {
+        winSize_ = alg_.winSize.width;
+        maxLevel_ = alg_.maxLevel;
+        iterations_ = alg_.iters;
+    }
+
+    void PyrLK_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2)
+    {
+        alg_.winSize.width = winSize_;
+        alg_.winSize.height = winSize_;
+        alg_.maxLevel = maxLevel_;
+        alg_.iters = iterations_;
+
+        alg_.dense(input0, input1, dst1, dst2);
+    }
+
+    void PyrLK_OCL::collectGarbage()
+    {
+        alg_.releaseMemory();
+        oclOpticalFlow::collectGarbage();
+    }
+}
+
+Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_PyrLK_OCL()
+{
+    return makePtr<PyrLK_OCL>();
+}
+
+///////////////////////////////////////////////////////////////////
+// DualTVL1_OCL
+
+namespace
+{
+    class DualTVL1_OCL : public oclOpticalFlow
+    {
+    public:
+        AlgorithmInfo* info() const;
+
+        DualTVL1_OCL();
+
+        void collectGarbage();
+
+    protected:
+        void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2);
+
+    private:
+        double tau_;
+        double lambda_;
+        double theta_;
+        int nscales_;
+        int warps_;
+        double epsilon_;
+        int iterations_;
+        bool useInitialFlow_;
+
+        ocl::OpticalFlowDual_TVL1_OCL alg_;
+    };
+
+    CV_INIT_ALGORITHM(DualTVL1_OCL, "DenseOpticalFlowExt.DualTVL1_OCL",
+    obj.info()->addParam(obj, "tau", obj.tau_);
+    obj.info()->addParam(obj, "lambda", obj.lambda_);
+    obj.info()->addParam(obj, "theta", obj.theta_);
+    obj.info()->addParam(obj, "nscales", obj.nscales_);
+    obj.info()->addParam(obj, "warps", obj.warps_);
+    obj.info()->addParam(obj, "epsilon", obj.epsilon_);
+    obj.info()->addParam(obj, "iterations", obj.iterations_);
+    obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow_));
+
+    DualTVL1_OCL::DualTVL1_OCL() : oclOpticalFlow(CV_8UC1)
+    {
+        tau_ = alg_.tau;
+        lambda_ = alg_.lambda;
+        theta_ = alg_.theta;
+        nscales_ = alg_.nscales;
+        warps_ = alg_.warps;
+        epsilon_ = alg_.epsilon;
+        iterations_ = alg_.iterations;
+        useInitialFlow_ = alg_.useInitialFlow;
+    }
+
+    void DualTVL1_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2)
+    {
+        alg_.tau = tau_;
+        alg_.lambda = lambda_;
+        alg_.theta = theta_;
+        alg_.nscales = nscales_;
+        alg_.warps = warps_;
+        alg_.epsilon = epsilon_;
+        alg_.iterations = iterations_;
+        alg_.useInitialFlow = useInitialFlow_;
+
+        alg_(input0, input1, dst1, dst2);
+
+    }
+
+    void DualTVL1_OCL::collectGarbage()
+    {
+        alg_.collectGarbage();
+        oclOpticalFlow::collectGarbage();
+    }
+}
+
+Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1_OCL()
+{
+    return makePtr<DualTVL1_OCL>();
+}
+
+///////////////////////////////////////////////////////////////////
+// FarneBack
+
+namespace
+{
+    class FarneBack_OCL : public oclOpticalFlow
+    {
+    public:
+        AlgorithmInfo* info() const;
+
+        FarneBack_OCL();
+
+        void collectGarbage();
+
+    protected:
+        void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2);
+
+    private:
+        double pyrScale_;
+        int numLevels_;
+        int winSize_;
+        int numIters_;
+        int polyN_;
+        double polySigma_;
+        int flags_;
+
+        ocl::FarnebackOpticalFlow alg_;
+    };
+
+    CV_INIT_ALGORITHM(FarneBack_OCL, "DenseOpticalFlowExt.FarneBack_OCL",
+        obj.info()->addParam(obj, "pyrScale", obj.pyrScale_);
+    obj.info()->addParam(obj, "numLevels", obj.numLevels_);
+    obj.info()->addParam(obj, "winSize", obj.winSize_);
+    obj.info()->addParam(obj, "numIters", obj.numIters_);
+    obj.info()->addParam(obj, "polyN", obj.polyN_);
+    obj.info()->addParam(obj, "polySigma", obj.polySigma_);
+    obj.info()->addParam(obj, "flags", obj.flags_));
+
+    FarneBack_OCL::FarneBack_OCL() : oclOpticalFlow(CV_8UC1)
+    {
+        pyrScale_ = alg_.pyrScale;
+        numLevels_ = alg_.numLevels;
+        winSize_ = alg_.winSize;
+        numIters_ = alg_.numIters;
+        polyN_ = alg_.polyN;
+        polySigma_ = alg_.polySigma;
+        flags_ = alg_.flags;
+    }
+
+    void FarneBack_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2)
+    {
+        alg_.pyrScale = pyrScale_;
+        alg_.numLevels = numLevels_;
+        alg_.winSize = winSize_;
+        alg_.numIters = numIters_;
+        alg_.polyN = polyN_;
+        alg_.polySigma = polySigma_;
+        alg_.flags = flags_;
+
+        alg_(input0, input1, dst1, dst2);
+    }
+
+    void FarneBack_OCL::collectGarbage()
+    {
+        alg_.releaseMemory();
+        oclOpticalFlow::collectGarbage();
+    }
+}
+
+Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Farneback_OCL()
+{
+    return makePtr<FarneBack_OCL>();
+}
+
+#endif
diff --git a/modules/superres/src/precomp.cpp b/modules/superres/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/superres/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/superres/src/precomp.hpp b/modules/superres/src/precomp.hpp
index f3c993e42..822d48879 100644
--- a/modules/superres/src/precomp.hpp
+++ b/modules/superres/src/precomp.hpp
@@ -81,6 +81,10 @@
 #  include "opencv2/gpucodec.hpp"
 #endif
 
+#ifdef HAVE_OPENCV_OCL
+    #include "opencv2/ocl/private/util.hpp"
+#endif
+
 #ifdef HAVE_OPENCV_HIGHGUI
     #include "opencv2/highgui.hpp"
 #endif
diff --git a/modules/superres/test/test_precomp.cpp b/modules/superres/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/superres/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/superres/test/test_precomp.hpp b/modules/superres/test/test_precomp.hpp
index 4ef73030b..47334e324 100644
--- a/modules/superres/test/test_precomp.hpp
+++ b/modules/superres/test/test_precomp.hpp
@@ -56,6 +56,7 @@
 #include "opencv2/imgproc.hpp"
 #include "opencv2/ts.hpp"
 #include "opencv2/superres.hpp"
+#include "cvconfig.h"
 #include "input_array_utility.hpp"
 
 #endif
diff --git a/modules/superres/test/test_superres.cpp b/modules/superres/test/test_superres.cpp
index 07abd04fa..d33e18273 100644
--- a/modules/superres/test/test_superres.cpp
+++ b/modules/superres/test/test_superres.cpp
@@ -59,7 +59,7 @@ private:
 AllignedFrameSource::AllignedFrameSource(const cv::Ptr<cv::superres::FrameSource>& base, int scale) :
     base_(base), scale_(scale)
 {
-    CV_Assert( !base_.empty() );
+    CV_Assert( base_ );
 }
 
 void AllignedFrameSource::nextFrame(cv::OutputArray frame)
@@ -101,7 +101,7 @@ private:
 DegradeFrameSource::DegradeFrameSource(const cv::Ptr<cv::superres::FrameSource>& base, int scale) :
     base_(base), iscale_(1.0 / scale)
 {
-    CV_Assert( !base_.empty() );
+    CV_Assert( base_ );
 }
 
 void addGaussNoise(cv::Mat& image, double sigma)
@@ -229,7 +229,8 @@ void SuperResolution::RunTest(cv::Ptr<cv::superres::SuperResolution> superRes)
     superRes->set("temporalAreaRadius", temporalAreaRadius);
 
     cv::Ptr<cv::superres::FrameSource> goldSource(new AllignedFrameSource(cv::superres::createFrameSource_Video(inputVideoName), scale));
-    cv::Ptr<cv::superres::FrameSource> lowResSource(new DegradeFrameSource(new AllignedFrameSource(cv::superres::createFrameSource_Video(inputVideoName), scale), scale));
+    cv::Ptr<cv::superres::FrameSource> lowResSource(new DegradeFrameSource(
+        cv::makePtr<AllignedFrameSource>(cv::superres::createFrameSource_Video(inputVideoName), scale), scale));
 
     // skip first frame
     cv::Mat frame;
@@ -276,3 +277,14 @@ TEST_F(SuperResolution, BTVL1_GPU)
 }
 
 #endif
+
+#if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL)
+
+TEST_F(SuperResolution, BTVL1_OCL)
+{
+    std::vector<cv::ocl::Info> infos;
+    cv::ocl::getDevice(infos);
+    RunTest(cv::superres::createSuperResolution_BTVL1_OCL());
+}
+
+#endif
diff --git a/modules/ts/CMakeLists.txt b/modules/ts/CMakeLists.txt
index 1b0bba398..0f9c3fe77 100644
--- a/modules/ts/CMakeLists.txt
+++ b/modules/ts/CMakeLists.txt
@@ -4,10 +4,7 @@ if(IOS)
   ocv_module_disable(ts)
 endif()
 
-if(MINGW)
-  set(OPENCV_MODULE_TYPE STATIC)
-endif()
-
+set(OPENCV_MODULE_TYPE STATIC)
 set(OPENCV_MODULE_IS_PART_OF_WORLD FALSE)
 
 if(HAVE_CUDA)
@@ -22,10 +19,4 @@ ocv_glob_module_sources()
 ocv_module_include_directories()
 ocv_create_module()
 
-if(BUILD_SHARED_LIBS AND NOT MINGW)
-  add_definitions(-DGTEST_CREATE_SHARED_LIBRARY=1)
-else()
-  add_definitions(-DGTEST_CREATE_SHARED_LIBRARY=0)
-endif()
-
 ocv_add_precompiled_headers(${the_module})
diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp
index f4ff82777..5d9881014 100644
--- a/modules/ts/include/opencv2/ts.hpp
+++ b/modules/ts/include/opencv2/ts.hpp
@@ -4,12 +4,8 @@
 #include "opencv2/core/cvdef.h"
 #include <stdarg.h> // for va_list
 
-#include "cvconfig.h"
-
-#ifndef GTEST_CREATE_SHARED_LIBRARY
-#ifdef BUILD_SHARED_LIBS
-#define GTEST_LINKED_AS_SHARED_LIBRARY 1
-#endif
+#ifdef HAVE_WINRT
+    #pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
 #endif
 
 #ifdef _MSC_VER
@@ -551,6 +547,13 @@ int main(int argc, char **argv) \
     return RUN_ALL_TESTS(); \
 }
 
+// This usually only makes sense in perf tests with several implementations,
+// some of which are not available.
+#define CV_TEST_FAIL_NO_IMPL() do { \
+    ::testing::Test::RecordProperty("custom_status", "noimpl"); \
+    FAIL() << "No equivalent implementation."; \
+} while (0)
+
 #endif
 
 #include "opencv2/ts/ts_perf.hpp"
diff --git a/modules/ts/include/opencv2/ts/gpu_perf.hpp b/modules/ts/include/opencv2/ts/gpu_perf.hpp
index cff6f8dc6..8f1ba8abc 100644
--- a/modules/ts/include/opencv2/ts/gpu_perf.hpp
+++ b/modules/ts/include/opencv2/ts/gpu_perf.hpp
@@ -47,6 +47,7 @@
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/ts/ts_perf.hpp"
+#include "cvconfig.h"
 
 namespace perf
 {
@@ -105,6 +106,22 @@ namespace perf
     CV_EXPORTS void printCudaInfo();
 
     CV_EXPORTS void sortKeyPoints(std::vector<cv::KeyPoint>& keypoints, cv::InputOutputArray _descriptors = cv::noArray());
+
+#ifdef HAVE_CUDA
+    #define CV_PERF_TEST_CUDA_MAIN(modulename) \
+        int main(int argc, char **argv)\
+        {\
+            const char * impls[] = { "cuda", "plain" };\
+            CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, perf::printCudaInfo())\
+        }
+#else
+    #define CV_PERF_TEST_CUDA_MAIN(modulename) \
+        int main(int argc, char **argv)\
+        {\
+            const char * plain_only[] = { "plain" };\
+            CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only)\
+        }
+#endif
 }
 
 #endif // __OPENCV_GPU_PERF_UTILITY_HPP__
diff --git a/modules/ts/include/opencv2/ts/gpu_test.hpp b/modules/ts/include/opencv2/ts/gpu_test.hpp
index dab5fd3b9..2ff2ca0bf 100644
--- a/modules/ts/include/opencv2/ts/gpu_test.hpp
+++ b/modules/ts/include/opencv2/ts/gpu_test.hpp
@@ -44,6 +44,7 @@
 #define __OPENCV_GPU_TEST_UTILITY_HPP__
 
 #include <stdexcept>
+#include "cvconfig.h"
 #include "opencv2/core.hpp"
 #include "opencv2/core/gpu.hpp"
 #include "opencv2/highgui.hpp"
diff --git a/modules/ts/include/opencv2/ts/ts_gtest.h b/modules/ts/include/opencv2/ts/ts_gtest.h
index 3ccf48584..60f30b8f2 100644
--- a/modules/ts/include/opencv2/ts/ts_gtest.h
+++ b/modules/ts/include/opencv2/ts/ts_gtest.h
@@ -17566,6 +17566,9 @@ GTEST_DECLARE_string_(color);
 // the tests to run. If the filter is not given all tests are executed.
 GTEST_DECLARE_string_(filter);
 
+// OpenCV extension: same as filter, but for the parameters string.
+GTEST_DECLARE_string_(param_filter);
+
 // This flag causes the Google Test to list tests. None of the tests listed
 // are actually run if the flag is provided.
 GTEST_DECLARE_bool_(list_tests);
diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp
index 2faba1263..74ea47e13 100644
--- a/modules/ts/include/opencv2/ts/ts_perf.hpp
+++ b/modules/ts/include/opencv2/ts/ts_perf.hpp
@@ -1,14 +1,6 @@
 #ifndef __OPENCV_TS_PERF_HPP__
 #define __OPENCV_TS_PERF_HPP__
 
-#include "cvconfig.h"
-
-#ifndef GTEST_CREATE_SHARED_LIBRARY
-#  ifdef BUILD_SHARED_LIBS
-#    define GTEST_LINKED_AS_SHARED_LIBRARY 1
-#  endif
-#endif
-
 #include "opencv2/core.hpp"
 #include "ts_gtest.h"
 
@@ -218,8 +210,7 @@ public:
   static bool targetDevice();
 };
 
-# define PERF_RUN_GPU()  ::perf::GpuPerf::targetDevice()
-
+#define PERF_RUN_GPU()  ::perf::GpuPerf::targetDevice()
 
 /*****************************************************************************************\
 *                            Container for performance metrics                            *
@@ -261,7 +252,11 @@ public:
     TestBase();
 
     static void Init(int argc, const char* const argv[]);
+    static void Init(const std::vector<std::string> & availableImpls,
+                     int argc, const char* const argv[]);
+    static void RecordRunParameters();
     static std::string getDataPath(const std::string& relativePath);
+    static std::string getSelectedImpl();
 
 protected:
     virtual void PerfTestBody() = 0;
@@ -474,16 +469,37 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
     INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
     void fixture##_##name::PerfTestBody()
 
+#if defined(_MSC_VER) && (_MSC_VER <= 1400)
+#define CV_PERF_TEST_MAIN_INTERNALS_ARGS(...)	\
+    while (++argc >= (--argc,-1)) {__VA_ARGS__; break;} /*this ugly construction is needed for VS 2005*/
+#else
+#define CV_PERF_TEST_MAIN_INTERNALS_ARGS(...)	\
+    __VA_ARGS__;
+#endif
 
-#define CV_PERF_TEST_MAIN(testsuitname, ...) \
-int main(int argc, char **argv)\
-{\
-    while (++argc >= (--argc,-1)) {__VA_ARGS__; break;} /*this ugly construction is needed for VS 2005*/\
-    ::perf::Regression::Init(#testsuitname);\
-    ::perf::TestBase::Init(argc, argv);\
+#define CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, ...)	\
+    CV_PERF_TEST_MAIN_INTERNALS_ARGS(__VA_ARGS__) \
+    ::perf::Regression::Init(#modulename);\
+    ::perf::TestBase::Init(std::vector<std::string>(impls, impls + sizeof impls / sizeof *impls),\
+                           argc, argv);\
     ::testing::InitGoogleTest(&argc, argv);\
     cvtest::printVersionInfo();\
-    return RUN_ALL_TESTS();\
+    ::testing::Test::RecordProperty("cv_module_name", #modulename);\
+    ::perf::TestBase::RecordRunParameters();\
+    return RUN_ALL_TESTS();
+
+// impls must be an array, not a pointer; "plain" should always be one of the implementations
+#define CV_PERF_TEST_MAIN_WITH_IMPLS(modulename, impls, ...) \
+int main(int argc, char **argv)\
+{\
+    CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, __VA_ARGS__)\
+}
+
+#define CV_PERF_TEST_MAIN(modulename, ...) \
+int main(int argc, char **argv)\
+{\
+    const char * plain_only[] = { "plain" };\
+    CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only, __VA_ARGS__)\
 }
 
 #define TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); stopTimer())
diff --git a/modules/ts/misc/perf_tests_timing.py b/modules/ts/misc/perf_tests_timing.py
index d082f9c26..dd44f1a63 100644
--- a/modules/ts/misc/perf_tests_timing.py
+++ b/modules/ts/misc/perf_tests_timing.py
@@ -156,4 +156,4 @@ if __name__ == "__main__":
             if not options.failedOnly:
                 print '\nOverall time: %.2f min\n' % overall_time
             tbl.consolePrintTable(sys.stdout)
-            print 2 * '\n'
\ No newline at end of file
+            print 2 * '\n'
diff --git a/modules/ts/misc/report.py b/modules/ts/misc/report.py
index 6da89e512..045fee8d1 100755
--- a/modules/ts/misc/report.py
+++ b/modules/ts/misc/report.py
@@ -100,4 +100,4 @@ if __name__ == "__main__":
             tbl.htmlPrintTable(sys.stdout)
             htmlPrintFooter(sys.stdout)
     else:
-        tbl.consolePrintTable(sys.stdout)
\ No newline at end of file
+        tbl.consolePrintTable(sys.stdout)
diff --git a/modules/ts/misc/testlog_parser.py b/modules/ts/misc/testlog_parser.py
index 7ae6aa598..4ab0a3ef2 100755
--- a/modules/ts/misc/testlog_parser.py
+++ b/modules/ts/misc/testlog_parser.py
@@ -1,6 +1,9 @@
 #!/usr/bin/env python
 
-import sys, re, os.path
+import collections
+import re
+import os.path
+import sys
 from xml.dom.minidom import parse
 
 class TestInfo(object):
@@ -10,10 +13,17 @@ class TestInfo(object):
         self.name = xmlnode.getAttribute("name")
         self.value_param = xmlnode.getAttribute("value_param")
         self.type_param = xmlnode.getAttribute("type_param")
-        if xmlnode.getElementsByTagName("failure"):
+
+        custom_status = xmlnode.getAttribute("custom_status")
+        failures = xmlnode.getElementsByTagName("failure")
+
+        if len(custom_status) > 0:
+            self.status = custom_status
+        elif len(failures) > 0:
             self.status = "failed"
         else:
             self.status = xmlnode.getAttribute("status")
+
         if self.name.startswith("DISABLED_"):
             self.status = "disabled"
             self.fixture = self.fixture.replace("DISABLED_", "")
@@ -100,34 +110,39 @@ class TestInfo(object):
     def dump(self, units="ms"):
         print "%s ->\t\033[1;31m%s\033[0m = \t%.2f%s" % (str(self), self.status, self.get("gmean", units), units)
 
-    def shortName(self):
+
+    def getName(self):
         pos = self.name.find("/")
         if pos > 0:
-            name = self.name[:pos]
-        else:
-            name = self.name
-        if self.fixture.endswith(name):
-            fixture = self.fixture[:-len(name)]
+            return self.name[:pos]
+        return self.name
+
+
+    def getFixture(self):
+        if self.fixture.endswith(self.getName()):
+            fixture = self.fixture[:-len(self.getName())]
         else:
             fixture = self.fixture
         if fixture.endswith("_"):
             fixture = fixture[:-1]
+        return fixture
+
+
+    def param(self):
+        return '::'.join(filter(None, [self.type_param, self.value_param]))
+
+    def shortName(self):
+        name = self.getName()
+        fixture = self.getFixture()
         return '::'.join(filter(None, [name, fixture]))
 
+
     def __str__(self):
-        pos = self.name.find("/")
-        if pos > 0:
-            name = self.name[:pos]
-        else:
-            name = self.name
-        if self.fixture.endswith(name):
-            fixture = self.fixture[:-len(name)]
-        else:
-            fixture = self.fixture
-        if fixture.endswith("_"):
-            fixture = fixture[:-1]
+        name = self.getName()
+        fixture = self.getFixture()
         return '::'.join(filter(None, [name, fixture, self.type_param, self.value_param]))
 
+
     def __cmp__(self, other):
         r = cmp(self.fixture, other.fixture);
         if r != 0:
@@ -154,12 +169,31 @@ class TestInfo(object):
                 return 1
         return 0
 
+# This is a Sequence for compatibility with old scripts,
+# which treat parseLogFile's return value as a list.
+class TestRunInfo(collections.Sequence):
+    def __init__(self, properties, tests):
+        self.properties = properties
+        self.tests = tests
+
+    def __len__(self):
+        return len(self.tests)
+
+    def __getitem__(self, key):
+        return self.tests[key]
+
 def parseLogFile(filename):
-    tests = []
     log = parse(filename)
-    for case in log.getElementsByTagName("testcase"):
-        tests.append(TestInfo(case))
-    return tests
+
+    properties = {
+        attr_name[3:]: attr_value
+        for (attr_name, attr_value) in log.documentElement.attributes.items()
+        if attr_name.startswith('cv_')
+    }
+
+    tests = map(TestInfo, log.getElementsByTagName("testcase"))
+
+    return TestRunInfo(properties, tests)
 
 
 if __name__ == "__main__":
@@ -168,8 +202,18 @@ if __name__ == "__main__":
         exit(0)
 
     for arg in sys.argv[1:]:
-        print "Tests found in", arg
-        tests = parseLogFile(arg)
-        for t in sorted(tests):
+        print "Processing {}...".format(arg)
+
+        run = parseLogFile(arg)
+
+        print "Properties:"
+
+        for (prop_name, prop_value) in run.properties.items():
+          print "\t{} = {}".format(prop_name, prop_value)
+
+        print "Tests:"
+
+        for t in sorted(run.tests):
             t.dump()
+
         print
diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py
new file mode 100755
index 000000000..e71a7f66c
--- /dev/null
+++ b/modules/ts/misc/xls-report.py
@@ -0,0 +1,371 @@
+#!/usr/bin/env python
+
+"""
+    This script can generate XLS reports from OpenCV tests' XML output files.
+
+    To use it, first, create a directory for each machine you ran tests on.
+    Each such directory will become a sheet in the report. Put each XML file
+    into the corresponding directory.
+
+    Then, create your configuration file(s). You can have a global configuration
+    file (specified with the -c option), and per-sheet configuration files, which
+    must be called sheet.conf and placed in the directory corresponding to the sheet.
+    The settings in the per-sheet configuration file will override those in the
+    global configuration file, if both are present.
+
+    A configuration file must consist of a Python dictionary. The following keys
+    will be recognized:
+
+    * 'comparisons': [{'from': string, 'to': string}]
+        List of configurations to compare performance between. For each item,
+        the sheet will have a column showing speedup from configuration named
+        'from' to configuration named "to".
+
+    * 'configuration_matchers': [{'properties': {string: object}, 'name': string}]
+        Instructions for matching test run property sets to configuration names.
+
+        For each found XML file:
+
+        1) All attributes of the root element starting with the prefix 'cv_' are
+           placed in a dictionary, with the cv_ prefix stripped and the cv_module_name
+           element deleted.
+
+        2) The first matcher for which the XML's file property set contains the same
+           keys with equal values as its 'properties' dictionary is searched for.
+           A missing property can be matched by using None as the value.
+
+           Corollary 1: you should place more specific matchers before less specific
+           ones.
+
+           Corollary 2: an empty 'properties' dictionary matches every property set.
+
+        3) If a matching matcher is found, its 'name' string is presumed to be the name
+           of the configuration the XML file corresponds to. A warning is printed if
+           two different property sets match to the same configuration name.
+
+        4) If a such a matcher isn't found, if --include-unmatched was specified, the
+           configuration name is assumed to be the relative path from the sheet's
+           directory to the XML file's containing directory. If the XML file isinstance
+           directly inside the sheet's directory, the configuration name is instead
+           a dump of all its properties. If --include-unmatched wasn't specified,
+           the XML file is ignored and a warning is printed.
+
+    * 'configurations': [string]
+        List of names for compile-time and runtime configurations of OpenCV.
+        Each item will correspond to a column of the sheet.
+
+    * 'module_colors': {string: string}
+        Mapping from module name to color name. In the sheet, cells containing module
+        names from this mapping will be colored with the corresponding color. You can
+        find the list of available colors here:
+        <http://www.simplistix.co.uk/presentations/python-excel.pdf>.
+
+    * 'sheet_name': string
+        Name for the sheet. If this parameter is missing, the name of sheet's directory
+        will be used.
+
+    * 'sheet_properties': [(string, string)]
+        List of arbitrary (key, value) pairs that somehow describe the sheet. Will be
+        dumped into the first row of the sheet in string form.
+
+    Note that all keys are optional, although to get useful results, you'll want to
+    specify at least 'configurations' and 'configuration_matchers'.
+
+    Finally, run the script. Use the --help option for usage information.
+"""
+
+from __future__ import division
+
+import ast
+import errno
+import fnmatch
+import logging
+import numbers
+import os, os.path
+import re
+
+from argparse import ArgumentParser
+from glob import glob
+from itertools import ifilter
+
+import xlwt
+
+from testlog_parser import parseLogFile
+
+re_image_size = re.compile(r'^ \d+ x \d+$', re.VERBOSE)
+re_data_type = re.compile(r'^ (?: 8 | 16 | 32 | 64 ) [USF] C [1234] $', re.VERBOSE)
+
+time_style = xlwt.easyxf(num_format_str='#0.00')
+no_time_style = xlwt.easyxf('pattern: pattern solid, fore_color gray25')
+
+speedup_style = time_style
+good_speedup_style = xlwt.easyxf('font: color green', num_format_str='#0.00')
+bad_speedup_style = xlwt.easyxf('font: color red', num_format_str='#0.00')
+no_speedup_style = no_time_style
+error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange')
+header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top, wrap True')
+subheader_style = xlwt.easyxf('alignment: horizontal centre, vertical top')
+
+class Collector(object):
+    def __init__(self, config_match_func, include_unmatched):
+        self.__config_cache = {}
+        self.config_match_func = config_match_func
+        self.include_unmatched = include_unmatched
+        self.tests = {}
+        self.extra_configurations = set()
+
+    # Format a sorted sequence of pairs as if it was a dictionary.
+    # We can't just use a dictionary instead, since we want to preserve the sorted order of the keys.
+    @staticmethod
+    def __format_config_cache_key(pairs, multiline=False):
+        return (
+          ('{\n' if multiline else '{') +
+          (',\n' if multiline else ', ').join(
+             ('  ' if multiline else '') + repr(k) + ': ' + repr(v) for (k, v) in pairs) +
+          ('\n}\n' if multiline else '}')
+        )
+
+    def collect_from(self, xml_path, default_configuration):
+        run = parseLogFile(xml_path)
+
+        module = run.properties['module_name']
+
+        properties = run.properties.copy()
+        del properties['module_name']
+
+        props_key = tuple(sorted(properties.iteritems())) # dicts can't be keys
+
+        if props_key in self.__config_cache:
+            configuration = self.__config_cache[props_key]
+        else:
+            configuration = self.config_match_func(properties)
+
+            if configuration is None:
+                if self.include_unmatched:
+                    if default_configuration is not None:
+                        configuration = default_configuration
+                    else:
+                        configuration = Collector.__format_config_cache_key(props_key, multiline=True)
+
+                    self.extra_configurations.add(configuration)
+                else:
+                    logging.warning('failed to match properties to a configuration: %s',
+                        Collector.__format_config_cache_key(props_key))
+
+            else:
+                same_config_props = [it[0] for it in self.__config_cache.iteritems() if it[1] == configuration]
+                if len(same_config_props) > 0:
+                    logging.warning('property set %s matches the same configuration %r as property set %s',
+                        Collector.__format_config_cache_key(props_key),
+                        configuration,
+                        Collector.__format_config_cache_key(same_config_props[0]))
+
+            self.__config_cache[props_key] = configuration
+
+        if configuration is None: return
+
+        module_tests = self.tests.setdefault(module, {})
+
+        for test in run.tests:
+            test_results = module_tests.setdefault((test.shortName(), test.param()), {})
+            new_result = test.get("gmean") if test.status == 'run' else test.status
+            test_results[configuration] = min(
+              test_results.get(configuration), new_result,
+              key=lambda r: (1, r) if isinstance(r, numbers.Number) else
+                            (2,) if r is not None else
+                            (3,)
+            ) # prefer lower result; prefer numbers to errors and errors to nothing
+
+def make_match_func(matchers):
+    def match_func(properties):
+        for matcher in matchers:
+            if all(properties.get(name) == value
+                   for (name, value) in matcher['properties'].iteritems()):
+                return matcher['name']
+
+        return None
+
+    return match_func
+
+def main():
+    arg_parser = ArgumentParser(description='Build an XLS performance report.')
+    arg_parser.add_argument('sheet_dirs', nargs='+', metavar='DIR', help='directory containing perf test logs')
+    arg_parser.add_argument('-o', '--output', metavar='XLS', default='report.xls', help='name of output file')
+    arg_parser.add_argument('-c', '--config', metavar='CONF', help='global configuration file')
+    arg_parser.add_argument('--include-unmatched', action='store_true',
+        help='include results from XML files that were not recognized by configuration matchers')
+    arg_parser.add_argument('--show-times-per-pixel', action='store_true',
+        help='for tests that have an image size parameter, show per-pixel time, as well as total time')
+
+    args = arg_parser.parse_args()
+
+    logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)
+
+    if args.config is not None:
+        with open(args.config) as global_conf_file:
+            global_conf = ast.literal_eval(global_conf_file.read())
+    else:
+        global_conf = {}
+
+    wb = xlwt.Workbook()
+
+    for sheet_path in args.sheet_dirs:
+        try:
+            with open(os.path.join(sheet_path, 'sheet.conf')) as sheet_conf_file:
+                sheet_conf = ast.literal_eval(sheet_conf_file.read())
+        except IOError as ioe:
+            if ioe.errno != errno.ENOENT: raise
+            sheet_conf = {}
+            logging.debug('no sheet.conf for %s', sheet_path)
+
+        sheet_conf = dict(global_conf.items() + sheet_conf.items())
+
+        config_names = sheet_conf.get('configurations', [])
+        config_matchers = sheet_conf.get('configuration_matchers', [])
+
+        collector = Collector(make_match_func(config_matchers), args.include_unmatched)
+
+        for root, _, filenames in os.walk(sheet_path):
+            logging.info('looking in %s', root)
+            for filename in fnmatch.filter(filenames, '*.xml'):
+                if os.path.normpath(sheet_path) == os.path.normpath(root):
+                  default_conf = None
+                else:
+                  default_conf = os.path.relpath(root, sheet_path)
+                collector.collect_from(os.path.join(root, filename), default_conf)
+
+        config_names.extend(sorted(collector.extra_configurations - set(config_names)))
+
+        sheet = wb.add_sheet(sheet_conf.get('sheet_name', os.path.basename(os.path.abspath(sheet_path))))
+
+        sheet_properties = sheet_conf.get('sheet_properties', [])
+
+        sheet.write(0, 0, 'Properties:')
+
+        sheet.write(0, 1,
+          'N/A' if len(sheet_properties) == 0 else
+          ' '.join(str(k) + '=' + repr(v) for (k, v) in sheet_properties))
+
+        sheet.row(2).height = 800
+        sheet.panes_frozen = True
+        sheet.remove_splits = True
+
+        sheet_comparisons = sheet_conf.get('comparisons', [])
+
+        row = 2
+
+        col = 0
+
+        for (w, caption) in [
+                (2500, 'Module'),
+                (10000, 'Test'),
+                (2000, 'Image\nwidth'),
+                (2000, 'Image\nheight'),
+                (2000, 'Data\ntype'),
+                (7500, 'Other parameters')]:
+            sheet.col(col).width = w
+            if args.show_times_per_pixel:
+                sheet.write_merge(row, row + 1, col, col, caption, header_style)
+            else:
+                sheet.write(row, col, caption, header_style)
+            col += 1
+
+        for config_name in config_names:
+            if args.show_times_per_pixel:
+                sheet.col(col).width = 3000
+                sheet.col(col + 1).width = 3000
+                sheet.write_merge(row, row, col, col + 1, config_name, header_style)
+                sheet.write(row + 1, col, 'total, ms', subheader_style)
+                sheet.write(row + 1, col + 1, 'per pixel, ns', subheader_style)
+                col += 2
+            else:
+                sheet.col(col).width = 4000
+                sheet.write(row, col, config_name, header_style)
+                col += 1
+
+        col += 1 # blank column between configurations and comparisons
+
+        for comp in sheet_comparisons:
+            sheet.col(col).width = 4000
+            caption = comp['to'] + '\nvs\n' + comp['from']
+            if args.show_times_per_pixel:
+                sheet.write_merge(row, row + 1, col, col, caption, header_style)
+            else:
+                sheet.write(row, col, caption, header_style)
+            col += 1
+
+        row += 2 if args.show_times_per_pixel else 1
+
+        sheet.horz_split_pos = row
+        sheet.horz_split_first_visible = row
+
+        module_colors = sheet_conf.get('module_colors', {})
+        module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color))
+                         for module, color in module_colors.iteritems()}
+
+        for module, tests in sorted(collector.tests.iteritems()):
+            for ((test, param), configs) in sorted(tests.iteritems()):
+                sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style))
+                sheet.write(row, 1, test)
+
+                param_list = param[1:-1].split(', ') if param.startswith('(') and param.endswith(')') else [param]
+
+                image_size = next(ifilter(re_image_size.match, param_list), None)
+                if image_size is not None:
+                    (image_width, image_height) = map(int, image_size.split('x', 1))
+                    sheet.write(row, 2, image_width)
+                    sheet.write(row, 3, image_height)
+                    del param_list[param_list.index(image_size)]
+
+                data_type = next(ifilter(re_data_type.match, param_list), None)
+                if data_type is not None:
+                    sheet.write(row, 4, data_type)
+                    del param_list[param_list.index(data_type)]
+
+                sheet.row(row).write(5, ' | '.join(param_list))
+
+                col = 6
+
+                for c in config_names:
+                    if c in configs:
+                        sheet.write(row, col, configs[c], time_style)
+                    else:
+                        sheet.write(row, col, None, no_time_style)
+                    col += 1
+                    if args.show_times_per_pixel:
+                        sheet.write(row, col,
+                          xlwt.Formula('{0} * 1000000 / ({1} * {2})'.format(
+                              xlwt.Utils.rowcol_to_cell(row, col - 1),
+                              xlwt.Utils.rowcol_to_cell(row, 2),
+                              xlwt.Utils.rowcol_to_cell(row, 3)
+                          )),
+                          time_style
+                        )
+                        col += 1
+
+                col += 1 # blank column
+
+                for comp in sheet_comparisons:
+                    cmp_from = configs.get(comp["from"])
+                    cmp_to = configs.get(comp["to"])
+
+                    if isinstance(cmp_from, numbers.Number) and isinstance(cmp_to, numbers.Number):
+                        try:
+                            speedup = cmp_from / cmp_to
+                            sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else
+                                                           bad_speedup_style  if speedup < 0.9 else
+                                                           speedup_style)
+                        except ArithmeticError as e:
+                            sheet.write(row, col, None, error_speedup_style)
+                    else:
+                        sheet.write(row, col, None, no_speedup_style)
+
+                    col += 1
+
+                row += 1
+                if row % 1000 == 0: sheet.flush_row_data()
+
+    wb.save(args.output)
+
+if __name__ == '__main__':
+    main()
diff --git a/modules/ts/src/gpu_perf.cpp b/modules/ts/src/gpu_perf.cpp
index dca181468..2bca535c4 100644
--- a/modules/ts/src/gpu_perf.cpp
+++ b/modules/ts/src/gpu_perf.cpp
@@ -288,7 +288,7 @@ namespace perf
 
             printf("[----------]\n"), fflush(stdout);
             printf("[ DEVICE   ] \t# %d %s.\n", i, info.name()), fflush(stdout);
-            printf("[          ] \tCompute capability: %d.%d\n", (int)info.major(), (int)info.minor()), fflush(stdout);
+            printf("[          ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout);
             printf("[          ] \tMulti Processor Count:  %d\n", info.multiProcessorCount()), fflush(stdout);
             printf("[          ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout);
             printf("[          ] \tFree  memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory()  / 1024.0) / 1024.0)), fflush(stdout);
diff --git a/modules/ts/src/precomp.cpp b/modules/ts/src/precomp.cpp
deleted file mode 100644
index c149df18f..000000000
--- a/modules/ts/src/precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "precomp.hpp"
diff --git a/modules/ts/src/precomp.hpp b/modules/ts/src/precomp.hpp
index 3fef1804a..fbb13ec4c 100644
--- a/modules/ts/src/precomp.hpp
+++ b/modules/ts/src/precomp.hpp
@@ -1,6 +1,7 @@
-#include "opencv2/ts.hpp"
 #include "opencv2/core/utility.hpp"
 #include "opencv2/core/private.hpp"
+#include "opencv2/ts.hpp"
+#include "cvconfig.h"
 
 #ifdef GTEST_LINKED_AS_SHARED_LIBRARY
 #error ts module should not have GTEST_LINKED_AS_SHARED_LIBRARY defined
diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp
index 0f3751e52..35482f312 100644
--- a/modules/ts/src/ts_func.cpp
+++ b/modules/ts/src/ts_func.cpp
@@ -2,6 +2,10 @@
 #include <float.h>
 #include <limits.h>
 
+#ifdef HAVE_TEGRA_OPTIMIZATION
+#include "tegra.hpp"
+#endif
+
 using namespace cv;
 
 namespace cvtest
@@ -2939,28 +2943,75 @@ MatComparator::operator()(const char* expr1, const char* expr2,
 
 void printVersionInfo(bool useStdOut)
 {
-    ::testing::Test::RecordProperty("CV_VERSION", CV_VERSION);
+    ::testing::Test::RecordProperty("cv_version", CV_VERSION);
     if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl;
 
     std::string buildInfo( cv::getBuildInformation() );
 
     size_t pos1 = buildInfo.find("Version control");
-    size_t pos2 = buildInfo.find("\n", pos1);\
+    size_t pos2 = buildInfo.find('\n', pos1);
     if(pos1 != std::string::npos && pos2 != std::string::npos)
     {
-        std::string ver( buildInfo.substr(pos1, pos2-pos1) );
-        ::testing::Test::RecordProperty("Version_control", ver);
-        if(useStdOut) std::cout << ver << std::endl;
+        size_t value_start = buildInfo.rfind(' ', pos2) + 1;
+        std::string ver( buildInfo.substr(value_start, pos2 - value_start) );
+        ::testing::Test::RecordProperty("cv_vcs_version", ver);
+        if (useStdOut) std::cout << "OpenCV VCS version: " << ver << std::endl;
     }
 
     pos1 = buildInfo.find("inner version");
-    pos2 = buildInfo.find("\n", pos1);\
+    pos2 = buildInfo.find('\n', pos1);
     if(pos1 != std::string::npos && pos2 != std::string::npos)
     {
-        std::string ver( buildInfo.substr(pos1, pos2-pos1) );
-        ::testing::Test::RecordProperty("inner_version", ver);
-        if(useStdOut) std::cout << ver << std::endl;
+        size_t value_start = buildInfo.rfind(' ', pos2) + 1;
+        std::string ver( buildInfo.substr(value_start, pos2 - value_start) );
+        ::testing::Test::RecordProperty("cv_inner_vcs_version", ver);
+        if(useStdOut) std::cout << "Inner VCS version: " << ver << std::endl;
     }
+
+    const char* parallel_framework = currentParallelFramework();
+
+    if (parallel_framework) {
+        ::testing::Test::RecordProperty("cv_parallel_framework", parallel_framework);
+        if (useStdOut) std::cout << "Parallel framework: " << parallel_framework << std::endl;
+    }
+
+    std::string cpu_features;
+
+#if CV_SSE
+    if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse";
+#endif
+#if CV_SSE2
+    if (checkHardwareSupport(CV_CPU_SSE2)) cpu_features += " sse2";
+#endif
+#if CV_SSE3
+    if (checkHardwareSupport(CV_CPU_SSE3)) cpu_features += " sse3";
+#endif
+#if CV_SSSE3
+    if (checkHardwareSupport(CV_CPU_SSSE3)) cpu_features += " ssse3";
+#endif
+#if CV_SSE4_1
+    if (checkHardwareSupport(CV_CPU_SSE4_1)) cpu_features += " sse4.1";
+#endif
+#if CV_SSE4_2
+    if (checkHardwareSupport(CV_CPU_SSE4_2)) cpu_features += " sse4.2";
+#endif
+#if CV_AVX
+    if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx";
+#endif
+#if CV_NEON
+    cpu_features += " neon"; // NEON is currently not checked at runtime
+#endif
+
+    cpu_features.erase(0, 1); // erase initial space
+
+    ::testing::Test::RecordProperty("cv_cpu_features", cpu_features);
+    if (useStdOut) std::cout << "CPU features: " << cpu_features << std::endl;
+
+#ifdef HAVE_TEGRA_OPTIMIZATION
+    const char * tegra_optimization = tegra::isDeviceSupported() ? "enabled" : "disabled";
+    ::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization);
+    if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl;
+#endif
 }
 
 }
diff --git a/modules/ts/src/ts_gtest.cpp b/modules/ts/src/ts_gtest.cpp
index 7c388cbd4..48870913c 100644
--- a/modules/ts/src/ts_gtest.cpp
+++ b/modules/ts/src/ts_gtest.cpp
@@ -497,6 +497,7 @@ const char kBreakOnFailureFlag[] = "break_on_failure";
 const char kCatchExceptionsFlag[] = "catch_exceptions";
 const char kColorFlag[] = "color";
 const char kFilterFlag[] = "filter";
+const char kParamFilterFlag[] = "param_filter";
 const char kListTestsFlag[] = "list_tests";
 const char kOutputFlag[] = "output";
 const char kPrintTimeFlag[] = "print_time";
@@ -575,6 +576,7 @@ class GTestFlagSaver {
     death_test_style_ = GTEST_FLAG(death_test_style);
     death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
     filter_ = GTEST_FLAG(filter);
+    param_filter_ = GTEST_FLAG(param_filter);
     internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
     list_tests_ = GTEST_FLAG(list_tests);
     output_ = GTEST_FLAG(output);
@@ -596,6 +598,7 @@ class GTestFlagSaver {
     GTEST_FLAG(death_test_style) = death_test_style_;
     GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
     GTEST_FLAG(filter) = filter_;
+    GTEST_FLAG(param_filter) = param_filter_;
     GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
     GTEST_FLAG(list_tests) = list_tests_;
     GTEST_FLAG(output) = output_;
@@ -617,6 +620,7 @@ class GTestFlagSaver {
   std::string death_test_style_;
   bool death_test_use_fork_;
   std::string filter_;
+  std::string param_filter_;
   std::string internal_run_death_test_;
   bool list_tests_;
   std::string output_;
@@ -1699,6 +1703,12 @@ GTEST_DEFINE_string_(
     "exclude).  A test is run if it matches one of the positive "
     "patterns and does not match any of the negative patterns.");
 
+GTEST_DEFINE_string_(
+    param_filter,
+    internal::StringFromGTestEnv("param_filter", kUniversalFilter),
+    "Same syntax and semantics as for param, but these patterns "
+    "have to match the test's parameters.");
+
 GTEST_DEFINE_bool_(list_tests, false,
                    "List all tests without running them.");
 
@@ -4188,6 +4198,14 @@ void PrettyUnitTestResultPrinter::OnTestIterationStart(
                   "Note: %s filter = %s\n", GTEST_NAME_, filter);
   }
 
+  const char* const param_filter = GTEST_FLAG(param_filter).c_str();
+
+  // Ditto.
+  if (!String::CStringEquals(param_filter, kUniversalFilter)) {
+    ColoredPrintf(COLOR_YELLOW,
+                  "Note: %s parameter filter = %s\n", GTEST_NAME_, param_filter);
+  }
+
   if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
     const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
     ColoredPrintf(COLOR_YELLOW,
@@ -5873,9 +5891,15 @@ int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
                                                    kDisableTestFilter);
       test_info->is_disabled_ = is_disabled;
 
+      const std::string value_param(test_info->value_param() == NULL ?
+                                    "" : test_info->value_param());
+
       const bool matches_filter =
           internal::UnitTestOptions::FilterMatchesTest(test_case_name,
-                                                       test_name);
+                                                       test_name) &&
+          internal::UnitTestOptions::MatchesFilter(value_param,
+                                                   GTEST_FLAG(param_filter).c_str());
+
       test_info->matches_filter_ = matches_filter;
 
       const bool is_runnable =
@@ -6223,6 +6247,12 @@ static const char kColorEncodedHelpMessage[] =
 "      Run only the tests whose name matches one of the positive patterns but\n"
 "      none of the negative patterns. '?' matches any single character; '*'\n"
 "      matches any substring; ':' separates two patterns.\n"
+"  @G--" GTEST_FLAG_PREFIX_ "param_filter=@YPOSITIVE_PATTERNS"
+    "[@G-@YNEGATIVE_PATTERNS]@D\n"
+"      Like @G--" GTEST_FLAG_PREFIX_
+                      "filter@D, but applies to the test's parameter. If a\n"
+"      test is not parameterized, its parameter is considered to be the\n"
+"      empty string.\n"
 "  @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n"
 "      Run all disabled tests too.\n"
 "\n"
@@ -6300,6 +6330,7 @@ void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
         ParseBoolFlag(arg, kDeathTestUseFork,
                       &GTEST_FLAG(death_test_use_fork)) ||
         ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
+        ParseStringFlag(arg, kParamFilterFlag, &GTEST_FLAG(param_filter)) ||
         ParseStringFlag(arg, kInternalRunDeathTestFlag,
                         &GTEST_FLAG(internal_run_death_test)) ||
         ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp
index cd5a239d5..e3ae8735d 100644
--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
@@ -14,30 +14,10 @@ int64 TestBase::timeLimitDefault = 0;
 unsigned int TestBase::iterationsLimitDefault = (unsigned int)(-1);
 int64 TestBase::_timeadjustment = 0;
 
-const std::string command_line_keys =
-    "{   perf_max_outliers   |8        |percent of allowed outliers}"
-    "{   perf_min_samples    |10       |minimal required numer of samples}"
-    "{   perf_force_samples  |100      |force set maximum number of samples for all tests}"
-    "{   perf_seed           |809564   |seed for random numbers generator}"
-    "{   perf_threads        |-1       |the number of worker threads, if parallel execution is enabled}"
-    "{   perf_write_sanity   |         |create new records for sanity checks}"
-    "{   perf_verify_sanity  |         |fail tests having no regression data for sanity checks}"
-#ifdef ANDROID
-    "{   perf_time_limit     |6.0      |default time limit for a single test (in seconds)}"
-    "{   perf_affinity_mask  |0        |set affinity mask for the main thread}"
-    "{   perf_log_power_checkpoints  | |additional xml logging for power measurement}"
-#else
-    "{   perf_time_limit     |3.0      |default time limit for a single test (in seconds)}"
-#endif
-    "{   perf_max_deviation  |1.0      |}"
-    "{   help h              |         |print help info}"
-#ifdef HAVE_CUDA
-    "{   perf_run_cpu        |false    |run GPU performance tests for analogical CPU functions}"
-    "{   perf_cuda_device    |0        |run GPU test suite onto specific CUDA capable device}"
-    "{   perf_cuda_info_only |false    |print an information about system and an available CUDA devices and then exit.}"
-#endif
-;
+// Item [0] will be considered the default implementation.
+static std::vector<std::string> available_impls;
 
+static std::string  param_impl;
 static double       param_max_outliers;
 static double       param_max_deviation;
 static unsigned int param_min_samples;
@@ -48,7 +28,6 @@ static int          param_threads;
 static bool         param_write_sanity;
 static bool         param_verify_sanity;
 #ifdef HAVE_CUDA
-static bool         param_run_cpu;
 static int          param_cuda_device;
 #endif
 
@@ -573,12 +552,6 @@ Regression& Regression::operator() (const std::string& name, cv::InputArray arra
 
     std::string nodename = getCurrentTestNodeName();
 
-#ifdef HAVE_CUDA
-    static const std::string prefix = (param_run_cpu)? "CPU_" : "GPU_";
-    if(suiteName == "gpu")
-        nodename = prefix + nodename;
-#endif
-
     cv::FileNode n = rootIn[nodename];
     if(n.isNone())
     {
@@ -642,6 +615,43 @@ performance_metrics::performance_metrics()
 
 void TestBase::Init(int argc, const char* const argv[])
 {
+    std::vector<std::string> plain_only;
+    plain_only.push_back("plain");
+    TestBase::Init(plain_only, argc, argv);
+}
+
+void TestBase::Init(const std::vector<std::string> & availableImpls,
+                 int argc, const char* const argv[])
+{
+    available_impls = availableImpls;
+
+    const std::string command_line_keys =
+        "{   perf_max_outliers           |8        |percent of allowed outliers}"
+        "{   perf_min_samples            |10       |minimal required numer of samples}"
+        "{   perf_force_samples          |100      |force set maximum number of samples for all tests}"
+        "{   perf_seed                   |809564   |seed for random numbers generator}"
+        "{   perf_threads                |-1       |the number of worker threads, if parallel execution is enabled}"
+        "{   perf_write_sanity           |false    |create new records for sanity checks}"
+        "{   perf_verify_sanity          |false    |fail tests having no regression data for sanity checks}"
+        "{   perf_impl                   |" + available_impls[0] +
+                                                  "|the implementation variant of functions under test}"
+        "{   perf_list_impls             |false    |list available implementation variants and exit}"
+        "{   perf_run_cpu                |false    |deprecated, equivalent to --perf_impl=plain}"
+#ifdef ANDROID
+        "{   perf_time_limit             |6.0      |default time limit for a single test (in seconds)}"
+        "{   perf_affinity_mask          |0        |set affinity mask for the main thread}"
+        "{   perf_log_power_checkpoints  |         |additional xml logging for power measurement}"
+#else
+        "{   perf_time_limit             |3.0      |default time limit for a single test (in seconds)}"
+#endif
+        "{   perf_max_deviation          |1.0      |}"
+        "{   help h                      |false    |print help info}"
+#ifdef HAVE_CUDA
+        "{   perf_cuda_device            |0        |run GPU test suite onto specific CUDA capable device}"
+        "{   perf_cuda_info_only         |false    |print an information about system and an available CUDA devices and then exit.}"
+#endif
+    ;
+
     cv::CommandLineParser args(argc, argv, command_line_keys);
     if (args.has("help"))
     {
@@ -651,6 +661,7 @@ void TestBase::Init(int argc, const char* const argv[])
 
     ::testing::AddGlobalTestEnvironment(new PerfEnvironment);
 
+    param_impl          = args.has("perf_run_cpu") ? "plain" : args.get<std::string>("perf_impl");
     param_max_outliers  = std::min(100., std::max(0., args.get<double>("perf_max_outliers")));
     param_min_samples   = std::max(1u, args.get<unsigned int>("perf_min_samples"));
     param_max_deviation = std::max(0., args.get<double>("perf_max_deviation"));
@@ -665,19 +676,41 @@ void TestBase::Init(int argc, const char* const argv[])
     log_power_checkpoints = args.has("perf_log_power_checkpoints");
 #endif
 
+    bool param_list_impls = args.has("perf_list_impls");
+
+    if (param_list_impls)
+    {
+        fputs("Available implementation variants:", stdout);
+        for (size_t i = 0; i < available_impls.size(); ++i) {
+            putchar(' ');
+            fputs(available_impls[i].c_str(), stdout);
+        }
+        putchar('\n');
+        exit(0);
+    }
+
+    if (std::find(available_impls.begin(), available_impls.end(), param_impl) == available_impls.end())
+    {
+        printf("No such implementation: %s\n", param_impl.c_str());
+        exit(1);
+    }
+
 #ifdef HAVE_CUDA
 
     bool printOnly        = args.has("perf_cuda_info_only");
 
     if (printOnly)
         exit(0);
+#endif
+
+    if (available_impls.size() > 1)
+        printf("[----------]\n[   INFO   ] \tImplementation variant: %s.\n[----------]\n", param_impl.c_str()), fflush(stdout);
+
+#ifdef HAVE_CUDA
 
-    param_run_cpu         = args.has("perf_run_cpu");
     param_cuda_device      = std::max(0, std::min(cv::gpu::getCudaEnabledDeviceCount(), args.get<int>("perf_cuda_device")));
 
-    if (param_run_cpu)
-        printf("[----------]\n[ GPU INFO ] \tRun test suite on CPU.\n[----------]\n"), fflush(stdout);
-    else
+    if (param_impl == "cuda")
     {
         cv::gpu::DeviceInfo info(param_cuda_device);
         if (!info.isCompatible())
@@ -703,6 +736,26 @@ void TestBase::Init(int argc, const char* const argv[])
     _timeadjustment = _calibrate();
 }
 
+void TestBase::RecordRunParameters()
+{
+    ::testing::Test::RecordProperty("cv_implementation", param_impl);
+    ::testing::Test::RecordProperty("cv_num_threads", param_threads);
+
+#ifdef HAVE_CUDA
+    if (param_impl == "cuda")
+    {
+        cv::gpu::DeviceInfo info(param_cuda_device);
+        ::testing::Test::RecordProperty("cv_cuda_gpu", info.name());
+    }
+#endif
+}
+
+std::string TestBase::getSelectedImpl()
+{
+    return param_impl;
+}
+
+
 int64 TestBase::_calibrate()
 {
     class _helper : public ::perf::TestBase
@@ -1322,11 +1375,7 @@ void perf::sort(std::vector<cv::KeyPoint>& pts, cv::InputOutputArray descriptors
 \*****************************************************************************************/
 bool perf::GpuPerf::targetDevice()
 {
-#ifdef HAVE_CUDA
-    return !param_run_cpu;
-#else
-    return false;
-#endif
+    return param_impl == "cuda";
 }
 
 /*****************************************************************************************\
@@ -1365,4 +1414,3 @@ void PrintTo(const Size& sz, ::std::ostream* os)
 }
 
 }  // namespace cv
-
diff --git a/modules/video/doc/motion_analysis_and_object_tracking.rst b/modules/video/doc/motion_analysis_and_object_tracking.rst
index aef1ba4e9..b392c6854 100644
--- a/modules/video/doc/motion_analysis_and_object_tracking.rst
+++ b/modules/video/doc/motion_analysis_and_object_tracking.rst
@@ -41,6 +41,13 @@ Calculates an optical flow for a sparse feature set using the iterative Lucas-Ka
 
 The function implements a sparse iterative version of the Lucas-Kanade optical flow in pyramids. See [Bouguet00]_. The function is parallelized with the TBB library.
 
+.. note::
+
+   * An example using the Lucas-Kanade optical flow algorithm can be found at opencv_source_code/samples/cpp/lkdemo.cpp
+
+   * (Python) An example using the Lucas-Kanade optical flow algorithm can be found at opencv_source_code/samples/python2/lk_track.py
+   * (Python) An example using the Lucas-Kanade tracker for homography matching can be found at opencv_source_code/samples/python2/lk_homography.py
+
 buildOpticalFlowPyramid
 -----------------------
 Constructs the image pyramid which can be passed to :ocv:func:`calcOpticalFlowPyrLK`.
@@ -108,6 +115,11 @@ The function finds an optical flow for each ``prev`` pixel using the [Farneback2
 
     \texttt{prev} (y,x)  \sim \texttt{next} ( y + \texttt{flow} (y,x)[1],  x + \texttt{flow} (y,x)[0])
 
+.. note::
+
+   * An example using the optical flow algorithm described by Gunnar Farneback can be found at opencv_source_code/samples/cpp/fback.cpp
+
+   * (Python) An example using the optical flow algorithm described by Gunnar Farneback can be found at opencv_source_code/samples/python2/opt_flow.py
 
 estimateRigidTransform
 --------------------------
@@ -158,6 +170,8 @@ Finds the geometric transform (warp) between two images in terms of the ECC crit
 
 .. ocv:function:: double findTransformECC( InputArray templateImage, InputArray inputImage, InputOutputArray warpMatrix, int motionType=MOTION_AFFINE, TermCriteria criteria=TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 50, 0.001))
 
+.. ocv:pyfunction:: cv2.findTransformECC(templateImage, inputImage, warpMatrix[, motionType[, criteria]]) -> retval, warpMatrix
+
     :param templateImage: single-channel template image; ``CV_8U`` or ``CV_32F`` array.
 
     :param inputImage: single-channel input image which should be warped with the final ``warpMatrix`` in order to provide an image similar to ``templateImage``, same type as ``temlateImage``.
@@ -270,7 +284,9 @@ In fact,
 :ocv:func:`fastAtan2` and
 :ocv:func:`phase` are used so that the computed angle is measured in degrees and covers the full range 0..360. Also, the ``mask`` is filled to indicate pixels where the computed angle is valid.
 
+.. note::
 
+   * (Python) An example on how to perform a motion template technique can be found at opencv_source_code/samples/python2/motempl.py
 
 calcGlobalOrientation
 -------------------------
@@ -352,7 +368,9 @@ First, it finds an object center using
 
 See the OpenCV sample ``camshiftdemo.c`` that tracks colored objects.
 
+.. note::
 
+   * (Python) A sample explaining the camshift tracking algorithm can be found at opencv_source_code/samples/python2/camshift.py
 
 meanShift
 ---------
@@ -380,7 +398,9 @@ The function implements the iterative object search algorithm. It takes the inpu
 :ocv:func:`contourArea` ), and rendering the  remaining contours with
 :ocv:func:`drawContours` .
 
+.. note::
 
+   * A mean-shift tracking sample can be found at opencv_source_code/samples/cpp/camshiftdemo.cpp
 
 KalmanFilter
 ------------
@@ -391,7 +411,9 @@ KalmanFilter
 The class implements a standard Kalman filter
 http://en.wikipedia.org/wiki/Kalman_filter, [Welch95]_. However, you can modify ``transitionMatrix``, ``controlMatrix``, and ``measurementMatrix`` to get an extended Kalman filter functionality. See the OpenCV sample ``kalman.cpp`` .
 
+.. note::
 
+   * An example using the standard Kalman filter can be found at opencv_source_code/samples/cpp/kalman.cpp
 
 
 KalmanFilter::KalmanFilter
@@ -678,6 +700,166 @@ Sets the shadow threshold
 .. ocv:function:: void BackgroundSubtractorMOG2::setShadowThreshold(double threshold)
 
 
+BackgroundSubtractorGMG
+------------------------
+Background Subtractor module based on the algorithm given in [Gold2012]_.
+
+.. ocv:class:: BackgroundSubtractorGMG : public BackgroundSubtractor
+
+
+createBackgroundSubtractorGMG
+-----------------------------------
+Creates a GMG Background Subtractor
+
+.. ocv:function:: Ptr<BackgroundSubtractorGMG> createBackgroundSubtractorGMG(int initializationFrames=120, double decisionThreshold=0.8)
+
+.. ocv:pyfunction:: cv2.createBackgroundSubtractorGMG([, initializationFrames[, decisionThreshold]]) -> retval
+
+    :param initializationFrames: number of frames used to initialize the background models.
+
+    :param decisionThreshold: Threshold value, above which it is marked foreground, else background.
+
+
+BackgroundSubtractorGMG::getNumFrames
+---------------------------------------
+Returns the number of frames used to initialize background model.
+
+.. ocv:function:: int BackgroundSubtractorGMG::getNumFrames() const
+
+
+BackgroundSubtractorGMG::setNumFrames
+---------------------------------------
+Sets the number of frames used to initialize background model.
+
+.. ocv:function:: void BackgroundSubtractorGMG::setNumFrames(int nframes)
+
+
+BackgroundSubtractorGMG::getDefaultLearningRate
+--------------------------------------------------
+Returns the learning rate of the algorithm. It lies between 0.0 and 1.0. It determines how quickly features are "forgotten" from histograms.
+
+.. ocv:function:: double BackgroundSubtractorGMG::getDefaultLearningRate() const
+
+
+BackgroundSubtractorGMG::setDefaultLearningRate
+--------------------------------------------------
+Sets the learning rate of the algorithm.
+
+.. ocv:function:: void BackgroundSubtractorGMG::setDefaultLearningRate(double lr)
+
+
+BackgroundSubtractorGMG::getDecisionThreshold
+--------------------------------------------------
+Returns the value of decision threshold. Decision value is the value above which pixel is determined to be FG.
+
+.. ocv:function:: double BackgroundSubtractorGMG::getDecisionThreshold() const
+
+
+BackgroundSubtractorGMG::setDecisionThreshold
+--------------------------------------------------
+Sets the value of decision threshold.
+
+.. ocv:function:: void BackgroundSubtractorGMG::setDecisionThreshold(double thresh)
+
+
+BackgroundSubtractorGMG::getMaxFeatures
+--------------------------------------------------
+Returns total number of distinct colors to maintain in histogram.
+
+.. ocv:function:: int BackgroundSubtractorGMG::getMaxFeatures() const
+
+
+BackgroundSubtractorGMG::setMaxFeatures
+--------------------------------------------------
+Sets total number of distinct colors to maintain in histogram.
+
+.. ocv:function:: void BackgroundSubtractorGMG::setMaxFeatures(int maxFeatures)
+
+
+BackgroundSubtractorGMG::getQuantizationLevels
+--------------------------------------------------
+Returns the parameter used for quantization of color-space. It is the number of discrete levels in each channel to be used in histograms.
+
+.. ocv:function:: int BackgroundSubtractorGMG::getQuantizationLevels() const
+
+
+BackgroundSubtractorGMG::setQuantizationLevels
+--------------------------------------------------
+Sets the parameter used for quantization of color-space
+
+.. ocv:function:: void BackgroundSubtractorGMG::setQuantizationLevels(int nlevels)
+
+
+BackgroundSubtractorGMG::getSmoothingRadius
+--------------------------------------------------
+Returns the kernel radius used for morphological operations
+
+.. ocv:function:: int BackgroundSubtractorGMG::getSmoothingRadius() const
+
+
+BackgroundSubtractorGMG::setSmoothingRadius
+--------------------------------------------------
+Sets the kernel radius used for morphological operations
+
+.. ocv:function:: void BackgroundSubtractorGMG::setSmoothingRadius(int radius)
+
+
+BackgroundSubtractorGMG::getUpdateBackgroundModel
+--------------------------------------------------
+Returns the status of background model update
+
+.. ocv:function:: bool BackgroundSubtractorGMG::getUpdateBackgroundModel() const
+
+
+BackgroundSubtractorGMG::setUpdateBackgroundModel
+--------------------------------------------------
+Sets the status of background model update
+
+.. ocv:function:: void BackgroundSubtractorGMG::setUpdateBackgroundModel(bool update)
+
+
+BackgroundSubtractorGMG::getMinVal
+--------------------------------------------------
+Returns the minimum value taken on by pixels in image sequence. Usually 0.
+
+.. ocv:function:: double BackgroundSubtractorGMG::getMinVal() const
+
+
+BackgroundSubtractorGMG::setMinVal
+--------------------------------------------------
+Sets the minimum value taken on by pixels in image sequence.
+
+.. ocv:function:: void BackgroundSubtractorGMG::setMinVal(double val)
+
+
+BackgroundSubtractorGMG::getMaxVal
+--------------------------------------------------
+Returns the maximum value taken on by pixels in image sequence. e.g. 1.0 or 255.
+
+.. ocv:function:: double BackgroundSubtractorGMG::getMaxVal() const
+
+
+BackgroundSubtractorGMG::setMaxVal
+--------------------------------------------------
+Sets the maximum value taken on by pixels in image sequence.
+
+.. ocv:function:: void BackgroundSubtractorGMG::setMaxVal(double val)
+
+
+BackgroundSubtractorGMG::getBackgroundPrior
+--------------------------------------------------
+Returns the prior probability that each individual pixel is a background pixel.
+
+.. ocv:function:: double BackgroundSubtractorGMG::getBackgroundPrior() const
+
+
+BackgroundSubtractorGMG::setBackgroundPrior
+--------------------------------------------------
+Sets the prior probability that each individual pixel is a background pixel.
+
+.. ocv:function:: void BackgroundSubtractorGMG::setBackgroundPrior(double bgprior)
+
+
 calcOpticalFlowSF
 -----------------
 Calculate an optical flow using "SimpleFlow" algorithm.
@@ -690,7 +872,7 @@ Calculate an optical flow using "SimpleFlow" algorithm.
 
     :param next: Second 8-bit 3-channel image of the same size as ``prev``
 
-    :param flow: computed flow image that has the same size as ``prev`` and type ``CV_32FC2`` 
+    :param flow: computed flow image that has the same size as ``prev`` and type ``CV_32FC2``
 
     :param layers: Number of layers
 
@@ -720,7 +902,9 @@ Calculate an optical flow using "SimpleFlow" algorithm.
 
 See [Tao2012]_. And site of project - http://graphics.berkeley.edu/papers/Tao-SAN-2012-05/.
 
+.. note::
 
+   * An example using the simpleFlow algorithm can be found at opencv_source_code/samples/cpp/simpleflow_demo.cpp
 
 createOptFlow_DualTVL1
 ----------------------
@@ -812,6 +996,8 @@ Releases all inner buffers.
 
 .. [Zach2007] C. Zach, T. Pock and H. Bischof. "A Duality Based Approach for Realtime TV-L1 Optical Flow", In Proceedings of Pattern Recognition (DAGM), Heidelberg, Germany, pp. 214-223, 2007
 
-.. [Zivkovic2004] Z. Zivkovic. Improved adaptive Gausian mixture model for background subtraction*, International Conference Pattern Recognition, UK, August, 2004, http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf. The code is very fast and performs also shadow detection. Number of Gausssian components is adapted per pixel.
+.. [Zivkovic2004] Z. Zivkovic. "Improved adaptive Gausian mixture model for background subtraction", International Conference Pattern Recognition, UK, August, 2004, http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf. The code is very fast and performs also shadow detection. Number of Gausssian components is adapted per pixel.
 
 .. [Zivkovic2006] Z.Zivkovic, F. van der Heijden. "Efficient Adaptive Density Estimation per Image Pixel for the Task of Background Subtraction", Pattern Recognition Letters, vol. 27, no. 7, pages 773-780, 2006.
+
+.. [Gold2012] Andrew B. Godbehere, Akihiro Matsukawa, Ken Goldberg, "Visual Tracking of Human Visitors under Variable-Lighting Conditions for a Responsive Audio Art Installation", American Control Conference, Montreal, June 2012.
diff --git a/modules/video/include/opencv2/video/video.hpp b/modules/video/include/opencv2/video/video.hpp
index 8837bff1b..8267b85d5 100644
--- a/modules/video/include/opencv2/video/video.hpp
+++ b/modules/video/include/opencv2/video/video.hpp
@@ -45,4 +45,4 @@
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 
-#include "opencv2/video.hpp"
\ No newline at end of file
+#include "opencv2/video.hpp"
diff --git a/modules/video/perf/perf_precomp.cpp b/modules/video/perf/perf_precomp.cpp
deleted file mode 100644
index 8552ac3d4..000000000
--- a/modules/video/perf/perf_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "perf_precomp.hpp"
diff --git a/modules/video/src/bgfg_gaussmix.cpp b/modules/video/src/bgfg_gaussmix.cpp
index ebdd19bd0..b5ad039e2 100644
--- a/modules/video/src/bgfg_gaussmix.cpp
+++ b/modules/video/src/bgfg_gaussmix.cpp
@@ -464,10 +464,9 @@ void BackgroundSubtractorMOGImpl::apply(InputArray _image, OutputArray _fgmask,
 Ptr<BackgroundSubtractorMOG> createBackgroundSubtractorMOG(int history, int nmixtures,
                                   double backgroundRatio, double noiseSigma)
 {
-    return new BackgroundSubtractorMOGImpl(history, nmixtures, backgroundRatio, noiseSigma);
+    return makePtr<BackgroundSubtractorMOGImpl>(history, nmixtures, backgroundRatio, noiseSigma);
 }
 
 }
 
 /* End of file. */
-
diff --git a/modules/video/src/bgfg_gaussmix2.cpp b/modules/video/src/bgfg_gaussmix2.cpp
index 2c99c01b0..485e34d26 100644
--- a/modules/video/src/bgfg_gaussmix2.cpp
+++ b/modules/video/src/bgfg_gaussmix2.cpp
@@ -769,7 +769,7 @@ void BackgroundSubtractorMOG2Impl::getBackgroundImage(OutputArray backgroundImag
 Ptr<BackgroundSubtractorMOG2> createBackgroundSubtractorMOG2(int _history, double _varThreshold,
                                                              bool _bShadowDetection)
 {
-    return new BackgroundSubtractorMOG2Impl(_history, (float)_varThreshold, _bShadowDetection);
+    return makePtr<BackgroundSubtractorMOG2Impl>(_history, (float)_varThreshold, _bShadowDetection);
 }
 
 }
diff --git a/modules/video/src/bgfg_gmg.cpp b/modules/video/src/bgfg_gmg.cpp
index e3e423211..f5b7881a0 100644
--- a/modules/video/src/bgfg_gmg.cpp
+++ b/modules/video/src/bgfg_gmg.cpp
@@ -485,7 +485,7 @@ void BackgroundSubtractorGMGImpl::release()
 
 Ptr<BackgroundSubtractorGMG> createBackgroundSubtractorGMG(int initializationFrames, double decisionThreshold)
 {
-    Ptr<BackgroundSubtractorGMG> bgfg = new BackgroundSubtractorGMGImpl;
+    Ptr<BackgroundSubtractorGMG> bgfg = makePtr<BackgroundSubtractorGMGImpl>();
     bgfg->setNumFrames(initializationFrames);
     bgfg->setDecisionThreshold(decisionThreshold);
 
diff --git a/modules/video/src/kalman.cpp b/modules/video/src/kalman.cpp
index 3cb34c0d2..793404a39 100644
--- a/modules/video/src/kalman.cpp
+++ b/modules/video/src/kalman.cpp
@@ -127,5 +127,3 @@ const Mat& KalmanFilter::correct(const Mat& measurement)
 }
 
 };
-
-
diff --git a/modules/video/src/lkpyramid.hpp b/modules/video/src/lkpyramid.hpp
index 4aff37ef8..1b9d50bc6 100644
--- a/modules/video/src/lkpyramid.hpp
+++ b/modules/video/src/lkpyramid.hpp
@@ -34,4 +34,3 @@ namespace detail
 
 }// namespace detail
 }// namespace cv
-
diff --git a/modules/video/src/precomp.cpp b/modules/video/src/precomp.cpp
deleted file mode 100644
index 3e0ec42de..000000000
--- a/modules/video/src/precomp.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-/* End of file. */
diff --git a/modules/video/src/simpleflow.cpp b/modules/video/src/simpleflow.cpp
index f987b42ee..765a34270 100644
--- a/modules/video/src/simpleflow.cpp
+++ b/modules/video/src/simpleflow.cpp
@@ -319,7 +319,7 @@ static Mat upscaleOpticalFlow(int new_rows,
 static Mat calcIrregularityMat(const Mat& flow, int radius) {
   const int rows = flow.rows;
   const int cols = flow.cols;
-  Mat irregularity(rows, cols, CV_32F);
+  Mat irregularity = Mat::zeros(rows, cols, CV_32F);
   for (int r = 0; r < rows; ++r) {
     const int start_row = std::max(0, r - radius);
     const int end_row = std::min(rows - 1, r + radius);
@@ -441,7 +441,7 @@ static void extrapolateFlow(Mat& flow,
                             const Mat& speed_up) {
   const int rows = flow.rows;
   const int cols = flow.cols;
-  Mat done(rows, cols, CV_8U);
+  Mat done = Mat::zeros(rows, cols, CV_8U);
   for (int r = 0; r < rows; ++r) {
     for (int c = 0; c < cols; ++c) {
       if (!done.at<uchar>(r, c) && speed_up.at<uchar>(r, c) > 1) {
@@ -540,8 +540,8 @@ CV_EXPORTS_W void calcOpticalFlowSF(InputArray _from,
   Mat mask = Mat::ones(curr_from.size(), CV_8U);
   Mat mask_inv = Mat::ones(curr_from.size(), CV_8U);
 
-  Mat flow(curr_from.size(), CV_32FC2);
-  Mat flow_inv(curr_to.size(), CV_32FC2);
+  Mat flow = Mat::zeros(curr_from.size(), CV_32FC2);
+  Mat flow_inv = Mat::zeros(curr_to.size(), CV_32FC2);
 
   Mat confidence;
   Mat confidence_inv;
@@ -685,4 +685,3 @@ CV_EXPORTS_W void calcOpticalFlowSF(InputArray from,
 }
 
 }
-
diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp
index cdf42f91e..8d5993275 100644
--- a/modules/video/src/tvl1flow.cpp
+++ b/modules/video/src/tvl1flow.cpp
@@ -953,5 +953,5 @@ CV_INIT_ALGORITHM(OpticalFlowDual_TVL1, "DenseOpticalFlow.DualTVL1",
 
 Ptr<DenseOpticalFlow> cv::createOptFlow_DualTVL1()
 {
-    return new OpticalFlowDual_TVL1;
+    return makePtr<OpticalFlowDual_TVL1>();
 }
diff --git a/modules/video/test/test_backgroundsubtractor_gbh.cpp b/modules/video/test/test_backgroundsubtractor_gbh.cpp
index a7261516a..99d53e3d3 100644
--- a/modules/video/test/test_backgroundsubtractor_gbh.cpp
+++ b/modules/video/test/test_backgroundsubtractor_gbh.cpp
@@ -40,7 +40,7 @@ void CV_BackgroundSubtractorTest::run(int)
     Ptr<BackgroundSubtractorGMG> fgbg = createBackgroundSubtractorGMG();
     Mat fgmask;
 
-    if (fgbg.empty())
+    if (!fgbg)
         CV_Error(Error::StsError,"Failed to create Algorithm\n");
 
     /**
diff --git a/modules/video/test/test_precomp.cpp b/modules/video/test/test_precomp.cpp
deleted file mode 100644
index 5956e13e3..000000000
--- a/modules/video/test/test_precomp.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "test_precomp.hpp"
diff --git a/modules/videostab/CMakeLists.txt b/modules/videostab/CMakeLists.txt
index e9e62815f..4df203c7d 100644
--- a/modules/videostab/CMakeLists.txt
+++ b/modules/videostab/CMakeLists.txt
@@ -1,4 +1,3 @@
 set(the_description "Video stabilization")
 ocv_define_module(videostab opencv_imgproc opencv_features2d opencv_video opencv_photo opencv_calib3d
                   OPTIONAL opencv_gpu opencv_gpuwarping opencv_gpuoptflow opencv_highgui)
-
diff --git a/modules/videostab/include/opencv2/videostab/global_motion.hpp b/modules/videostab/include/opencv2/videostab/global_motion.hpp
index 58b831b37..8ccc067a3 100644
--- a/modules/videostab/include/opencv2/videostab/global_motion.hpp
+++ b/modules/videostab/include/opencv2/videostab/global_motion.hpp
@@ -217,7 +217,7 @@ public:
 
 private:
     Ptr<MotionEstimatorBase> motionEstimator_;
-    gpu::GoodFeaturesToTrackDetector_GPU detector_;
+    Ptr<gpu::CornersDetector> detector_;
     SparsePyrLkOptFlowEstimatorGpu optFlowEstimator_;
     Ptr<IOutlierRejector> outlierRejector_;
 
diff --git a/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp b/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
index 8588acbc5..01163b27c 100644
--- a/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
+++ b/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
@@ -133,4 +133,3 @@ private:
 } // namespace cv
 
 #endif
-
diff --git a/modules/videostab/src/clp.hpp b/modules/videostab/src/clp.hpp
index 223016fe7..41b875b3c 100644
--- a/modules/videostab/src/clp.hpp
+++ b/modules/videostab/src/clp.hpp
@@ -44,14 +44,6 @@
 #define __OPENCV_VIDEOSTAB_CLP_HPP__
 
 #ifdef HAVE_CLP
-#  undef PACKAGE
-#  undef PACKAGE_BUGREPORT
-#  undef PACKAGE_NAME
-#  undef PACKAGE_STRING
-#  undef PACKAGE_TARNAME
-#  undef PACKAGE_VERSION
-#  undef VERSION
-
 #  define COIN_BIG_INDEX 0
 #  define DEBUG_COIN 0
 #  define PRESOLVE_DEBUG 0
diff --git a/modules/videostab/src/frame_source.cpp b/modules/videostab/src/frame_source.cpp
index 003220211..7ca4b73de 100644
--- a/modules/videostab/src/frame_source.cpp
+++ b/modules/videostab/src/frame_source.cpp
@@ -111,10 +111,10 @@ VideoFileSource::VideoFileSource(const String &path, bool volatileFrame)
 void VideoFileSource::reset() { impl->reset(); }
 Mat VideoFileSource::nextFrame() { return impl->nextFrame(); }
 
-int VideoFileSource::width() { return ((VideoFileSourceImpl*)impl.obj)->width(); }
-int VideoFileSource::height() { return ((VideoFileSourceImpl*)impl.obj)->height(); }
-int VideoFileSource::count() { return ((VideoFileSourceImpl*)impl.obj)->count(); }
-double VideoFileSource::fps() { return ((VideoFileSourceImpl*)impl.obj)->fps(); }
+int VideoFileSource::width() { return ((VideoFileSourceImpl*)impl.get())->width(); }
+int VideoFileSource::height() { return ((VideoFileSourceImpl*)impl.get())->height(); }
+int VideoFileSource::count() { return ((VideoFileSourceImpl*)impl.get())->count(); }
+double VideoFileSource::fps() { return ((VideoFileSourceImpl*)impl.get())->fps(); }
 
 } // namespace videostab
 } // namespace cv
diff --git a/modules/videostab/src/global_motion.cpp b/modules/videostab/src/global_motion.cpp
index d6c291ca7..c9f15a61c 100644
--- a/modules/videostab/src/global_motion.cpp
+++ b/modules/videostab/src/global_motion.cpp
@@ -671,9 +671,9 @@ Mat ToFileMotionWriter::estimate(const Mat &frame0, const Mat &frame1, bool *ok)
 KeypointBasedMotionEstimator::KeypointBasedMotionEstimator(Ptr<MotionEstimatorBase> estimator)
     : ImageMotionEstimatorBase(estimator->motionModel()), motionEstimator_(estimator)
 {
-    setDetector(new GoodFeaturesToTrackDetector());
-    setOpticalFlowEstimator(new SparsePyrLkOptFlowEstimator());
-    setOutlierRejector(new NullOutlierRejector());
+    setDetector(makePtr<GoodFeaturesToTrackDetector>());
+    setOpticalFlowEstimator(makePtr<SparsePyrLkOptFlowEstimator>());
+    setOutlierRejector(makePtr<NullOutlierRejector>());
 }
 
 
@@ -708,7 +708,7 @@ Mat KeypointBasedMotionEstimator::estimate(const Mat &frame0, const Mat &frame1,
 
     // perform outlier rejection
 
-    IOutlierRejector *outlRejector = static_cast<IOutlierRejector*>(outlierRejector_);
+    IOutlierRejector *outlRejector = outlierRejector_.get();
     if (!dynamic_cast<NullOutlierRejector*>(outlRejector))
     {
         pointsPrev_.swap(pointsPrevGood_);
@@ -742,8 +742,10 @@ Mat KeypointBasedMotionEstimator::estimate(const Mat &frame0, const Mat &frame1,
 KeypointBasedMotionEstimatorGpu::KeypointBasedMotionEstimatorGpu(Ptr<MotionEstimatorBase> estimator)
     : ImageMotionEstimatorBase(estimator->motionModel()), motionEstimator_(estimator)
 {
+    detector_ = gpu::createGoodFeaturesToTrackDetector(CV_8UC1);
+
     CV_Assert(gpu::getCudaEnabledDeviceCount() > 0);
-    setOutlierRejector(new NullOutlierRejector());
+    setOutlierRejector(makePtr<NullOutlierRejector>());
 }
 
 
@@ -769,7 +771,7 @@ Mat KeypointBasedMotionEstimatorGpu::estimate(const gpu::GpuMat &frame0, const g
     }
 
     // find keypoints
-    detector_(grayFrame0, pointsPrev_);
+    detector_->detect(grayFrame0, pointsPrev_);
 
     // find correspondences
     optFlowEstimator_.run(frame0, frame1, pointsPrev_, points_, status_);
@@ -782,7 +784,7 @@ Mat KeypointBasedMotionEstimatorGpu::estimate(const gpu::GpuMat &frame0, const g
 
     // perform outlier rejection
 
-    IOutlierRejector *rejector = static_cast<IOutlierRejector*>(outlierRejector_);
+    IOutlierRejector *rejector = outlierRejector_.get();
     if (!dynamic_cast<NullOutlierRejector*>(rejector))
     {
         outlierRejector_->process(frame0.size(), hostPointsPrev_, hostPoints_, rejectionStatus_);
@@ -832,5 +834,3 @@ Mat getMotion(int from, int to, const std::vector<Mat> &motions)
 
 } // namespace videostab
 } // namespace cv
-
-
diff --git a/modules/videostab/src/inpainting.cpp b/modules/videostab/src/inpainting.cpp
index b3092048f..7f5368cff 100644
--- a/modules/videostab/src/inpainting.cpp
+++ b/modules/videostab/src/inpainting.cpp
@@ -324,7 +324,7 @@ public:
 MotionInpainter::MotionInpainter()
 {
 #ifdef HAVE_OPENCV_GPUOPTFLOW
-    setOptFlowEstimator(new DensePyrLkOptFlowEstimatorGpu());
+    setOptFlowEstimator(makePtr<DensePyrLkOptFlowEstimatorGpu>());
 #else
     CV_Error(Error::StsNotImplemented, "Current implementation of MotionInpainter requires GPU");
 #endif
diff --git a/modules/videostab/src/motion_stabilizing.cpp b/modules/videostab/src/motion_stabilizing.cpp
index c1f3442e7..65bbd73bd 100644
--- a/modules/videostab/src/motion_stabilizing.cpp
+++ b/modules/videostab/src/motion_stabilizing.cpp
@@ -532,9 +532,9 @@ void LpMotionStabilizer::stabilize(
     model.scaling(1);
 
     ClpPresolve presolveInfo;
-    Ptr<ClpSimplex> presolvedModel = presolveInfo.presolvedModel(model);
+    Ptr<ClpSimplex> presolvedModel(presolveInfo.presolvedModel(model));
 
-    if (!presolvedModel.empty())
+    if (presolvedModel)
     {
         presolvedModel->dual();
         presolveInfo.postsolve(true);
diff --git a/modules/videostab/src/precomp.cpp b/modules/videostab/src/precomp.cpp
deleted file mode 100644
index 111385282..000000000
--- a/modules/videostab/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/videostab/src/precomp.hpp b/modules/videostab/src/precomp.hpp
index 691107412..aa6026dee 100644
--- a/modules/videostab/src/precomp.hpp
+++ b/modules/videostab/src/precomp.hpp
@@ -65,4 +65,3 @@ inline float intensity(const cv::Point3_<uchar> &bgr)
 }
 
 #endif
-
diff --git a/modules/videostab/src/stabilizer.cpp b/modules/videostab/src/stabilizer.cpp
index 50ac05c6f..f9c09ba36 100644
--- a/modules/videostab/src/stabilizer.cpp
+++ b/modules/videostab/src/stabilizer.cpp
@@ -54,11 +54,11 @@ namespace videostab
 
 StabilizerBase::StabilizerBase()
 {
-    setLog(new LogToStdout());
-    setFrameSource(new NullFrameSource());
-    setMotionEstimator(new KeypointBasedMotionEstimator(new MotionEstimatorRansacL2()));
-    setDeblurer(new NullDeblurer());
-    setInpainter(new NullInpainter());
+    setLog(makePtr<LogToStdout>());
+    setFrameSource(makePtr<NullFrameSource>());
+    setMotionEstimator(makePtr<KeypointBasedMotionEstimator>(makePtr<MotionEstimatorRansacL2>()));
+    setDeblurer(makePtr<NullDeblurer>());
+    setInpainter(makePtr<NullInpainter>());
     setRadius(15);
     setTrimRatio(0);
     setCorrectionForInclusion(false);
@@ -156,7 +156,7 @@ bool StabilizerBase::doOneIteration()
 
 void StabilizerBase::setUp(const Mat &firstFrame)
 {
-    InpainterBase *inpaint = static_cast<InpainterBase*>(inpainter_);
+    InpainterBase *inpaint = inpainter_.get();
     doInpainting_ = dynamic_cast<NullInpainter*>(inpaint) == 0;
     if (doInpainting_)
     {
@@ -167,7 +167,7 @@ void StabilizerBase::setUp(const Mat &firstFrame)
         inpainter_->setStabilizationMotions(stabilizationMotions_);
     }
 
-    DeblurerBase *deblurer = static_cast<DeblurerBase*>(deblurer_);
+    DeblurerBase *deblurer = deblurer_.get();
     doDeblurring_ = dynamic_cast<NullDeblurer*>(deblurer) == 0;
     if (doDeblurring_)
     {
@@ -252,7 +252,7 @@ void StabilizerBase::logProcessingTime()
 
 OnePassStabilizer::OnePassStabilizer()
 {
-    setMotionFilter(new GaussianMotionFilter());
+    setMotionFilter(makePtr<GaussianMotionFilter>());
     reset();
 }
 
@@ -308,8 +308,8 @@ Mat OnePassStabilizer::postProcessFrame(const Mat &frame)
 
 TwoPassStabilizer::TwoPassStabilizer()
 {
-    setMotionStabilizer(new GaussianMotionFilter());
-    setWobbleSuppressor(new NullWobbleSuppressor());
+    setMotionStabilizer(makePtr<GaussianMotionFilter>());
+    setWobbleSuppressor(makePtr<NullWobbleSuppressor>());
     setEstimateTrimRatio(false);
     reset();
 }
@@ -371,7 +371,7 @@ void TwoPassStabilizer::runPrePassIfNecessary()
     {
         // check if we must do wobble suppression
 
-        WobbleSuppressorBase *wobble = static_cast<WobbleSuppressorBase*>(wobbleSuppressor_);
+        WobbleSuppressorBase *wobble = wobbleSuppressor_.get();
         doWobbleSuppression_ = dynamic_cast<NullWobbleSuppressor*>(wobble) == 0;
 
         // estimate motions
@@ -469,7 +469,7 @@ void TwoPassStabilizer::setUp(const Mat &firstFrame)
     for (int i = -radius_; i <= 0; ++i)
         at(i, frames_) = firstFrame;
 
-    WobbleSuppressorBase *wobble = static_cast<WobbleSuppressorBase*>(wobbleSuppressor_);
+    WobbleSuppressorBase *wobble = wobbleSuppressor_.get();
     doWobbleSuppression_ = dynamic_cast<NullWobbleSuppressor*>(wobble) == 0;
     if (doWobbleSuppression_)
     {
diff --git a/modules/videostab/src/wobble_suppression.cpp b/modules/videostab/src/wobble_suppression.cpp
index 049bb81b3..0b652b9db 100644
--- a/modules/videostab/src/wobble_suppression.cpp
+++ b/modules/videostab/src/wobble_suppression.cpp
@@ -60,7 +60,7 @@ namespace videostab
 
 WobbleSuppressorBase::WobbleSuppressorBase() : motions_(0), stabilizationMotions_(0)
 {
-    setMotionEstimator(new KeypointBasedMotionEstimator(new MotionEstimatorRansacL2(MM_HOMOGRAPHY)));
+    setMotionEstimator(makePtr<KeypointBasedMotionEstimator>(makePtr<MotionEstimatorRansacL2>(MM_HOMOGRAPHY)));
 }
 
 
@@ -160,4 +160,3 @@ void MoreAccurateMotionWobbleSuppressorGpu::suppress(int idx, const Mat &frame,
 
 } // namespace videostab
 } // namespace cv
-
diff --git a/modules/world/src/precomp.cpp b/modules/world/src/precomp.cpp
deleted file mode 100644
index e69de29bb..000000000
diff --git a/platforms/android/package/CMakeLists.txt b/platforms/android/package/CMakeLists.txt
index 0bfb3fe89..1382a078c 100644
--- a/platforms/android/package/CMakeLists.txt
+++ b/platforms/android/package/CMakeLists.txt
@@ -91,4 +91,4 @@ add_custom_command(
 
 install(FILES "${APK_NAME}" DESTINATION "apk/" COMPONENT main)
 add_custom_target(android_package ALL SOURCES "${APK_NAME}" )
-add_dependencies(android_package opencv_java)
\ No newline at end of file
+add_dependencies(android_package opencv_java)
diff --git a/platforms/android/refman.rst b/platforms/android/refman.rst
index 12d7ea6ec..b830bbe09 100644
--- a/platforms/android/refman.rst
+++ b/platforms/android/refman.rst
@@ -6,4 +6,4 @@ OpenCV4Android Reference
    :maxdepth: 2
 
    service/doc/index.rst
-   java.rst
\ No newline at end of file
+   java.rst
diff --git a/platforms/android/service/all.py b/platforms/android/service/all.py
index 0993b89ac..18ab4570b 100755
--- a/platforms/android/service/all.py
+++ b/platforms/android/service/all.py
@@ -40,4 +40,4 @@ if (__name__ ==  "__main__"):
         os.system(PushCommand)
         print("Testing on device \"%s\"" % Name)
         TestCommand = "%s \"%s\" \"%s\" 2>&1" % (os.path.join(HomeDir, "test_native.py"), Arch, Name)
-        os.system(TestCommand)
\ No newline at end of file
+        os.system(TestCommand)
diff --git a/platforms/android/service/device.conf b/platforms/android/service/device.conf
index 2953b5451..f8c6b5a86 100644
--- a/platforms/android/service/device.conf
+++ b/platforms/android/service/device.conf
@@ -1,3 +1,3 @@
 armeabi;15c000000581404;
 x86;0123456789ABCDEF;
-mips;Novo7 Basic;
\ No newline at end of file
+mips;Novo7 Basic;
diff --git a/platforms/android/service/doc/BaseLoaderCallback.rst b/platforms/android/service/doc/BaseLoaderCallback.rst
index 71915c449..f8c30a800 100644
--- a/platforms/android/service/doc/BaseLoaderCallback.rst
+++ b/platforms/android/service/doc/BaseLoaderCallback.rst
@@ -48,7 +48,7 @@ See the "15-puzzle" OpenCV sample for details.
         super.onResume();
 
         Log.i(TAG, "Trying to load OpenCV library");
-        if (!OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_4, this, mOpenCVCallBack))
+        if (!OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_6, this, mOpenCVCallBack))
         {
             Log.e(TAG, "Cannot connect to OpenCV Manager");
         }
diff --git a/platforms/android/service/doc/Intro.rst b/platforms/android/service/doc/Intro.rst
index d0e9d7347..4b00b8e4a 100644
--- a/platforms/android/service/doc/Intro.rst
+++ b/platforms/android/service/doc/Intro.rst
@@ -41,4 +41,4 @@ Subsequent launches of OpenCV apps\:
 Architecture of OpenCV Manager
 ------------------------------
 
-.. image:: img/Structure.png
\ No newline at end of file
+.. image:: img/Structure.png
diff --git a/platforms/android/service/doc/JavaHelper.rst b/platforms/android/service/doc/JavaHelper.rst
index e90b016e5..9262a7cf7 100644
--- a/platforms/android/service/doc/JavaHelper.rst
+++ b/platforms/android/service/doc/JavaHelper.rst
@@ -55,3 +55,7 @@ OpenCV version constants
 .. data:: OPENCV_VERSION_2_4_5
 
     OpenCV Library version 2.4.5
+
+.. data:: OPENCV_VERSION_2_4_6
+
+    OpenCV Library version 2.4.6
diff --git a/platforms/android/service/doc/LoaderCallbackInterface.rst b/platforms/android/service/doc/LoaderCallbackInterface.rst
index 440b6b673..63838a127 100644
--- a/platforms/android/service/doc/LoaderCallbackInterface.rst
+++ b/platforms/android/service/doc/LoaderCallbackInterface.rst
@@ -46,4 +46,4 @@ Initialization status constants
 
 .. data:: INIT_FAILED
 
-    OpenCV library initialization failed
\ No newline at end of file
+    OpenCV library initialization failed
diff --git a/platforms/android/service/doc/UseCases.rst b/platforms/android/service/doc/UseCases.rst
index bbc7da02c..50ac050e9 100644
--- a/platforms/android/service/doc/UseCases.rst
+++ b/platforms/android/service/doc/UseCases.rst
@@ -29,4 +29,4 @@ Regular application start
 
 OpenCV Manager and OpenCV library has been already installed.
 
-.. image:: img/LibInstalled.png
\ No newline at end of file
+.. image:: img/LibInstalled.png
diff --git a/platforms/android/service/doc/build_uml.py b/platforms/android/service/doc/build_uml.py
index df9eb7bcd..94b61d6ae 100755
--- a/platforms/android/service/doc/build_uml.py
+++ b/platforms/android/service/doc/build_uml.py
@@ -20,4 +20,4 @@ if (not os.path.exists(TARGET_PATH)):
 
 for filename in os.listdir("."):
     if ("dia" == filename[-3:]):
-        os.system("%s --export %s %s" % (DiaPath, os.path.join(TARGET_PATH, filename[0:len(filename)-4] + ".png"), filename))
\ No newline at end of file
+        os.system("%s --export %s %s" % (DiaPath, os.path.join(TARGET_PATH, filename[0:len(filename)-4] + ".png"), filename))
diff --git a/platforms/android/service/engine/AndroidManifest.xml b/platforms/android/service/engine/AndroidManifest.xml
index 954955678..dc992b3a6 100644
--- a/platforms/android/service/engine/AndroidManifest.xml
+++ b/platforms/android/service/engine/AndroidManifest.xml
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
     package="org.opencv.engine"
-    android:versionCode="27@ANDROID_PLATFORM_VERSION_CODE@"
-    android:versionName="2.7" >
+    android:versionCode="210@ANDROID_PLATFORM_VERSION_CODE@"
+    android:versionName="2.10" >
 
     <uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" />
     <uses-feature android:name="android.hardware.touchscreen" android:required="false"/>
diff --git a/platforms/android/service/engine/CMakeLists.txt b/platforms/android/service/engine/CMakeLists.txt
index 852a028ca..b1cac9383 100644
--- a/platforms/android/service/engine/CMakeLists.txt
+++ b/platforms/android/service/engine/CMakeLists.txt
@@ -26,19 +26,32 @@ endif()
 
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/platforms/android/service/engine/.build/${ANDROID_MANIFEST_FILE}"  @ONLY)
 
-link_directories("${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib" "${ANDROID_SOURCE_TREE}/out/target/product/${ANDROID_PRODUCT}/system/lib" "${ANDROID_SOURCE_TREE}/bin/${ANDROID_ARCH_NAME}")
+link_directories(
+  "${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib"
+  "${ANDROID_SOURCE_TREE}/out/target/product/${ANDROID_PRODUCT}/system/lib"
+  "${ANDROID_SOURCE_TREE}/bin/${ANDROID_ARCH_NAME}")
+
+file(GLOB engine_files "jni/BinderComponent/*.cpp" "jni/BinderComponent/*.h" "jni/include/*.h")
+set(engine_libs "z" "binder" "log" "utils")
+
+if (TEGRA_DETECTOR)
+  if (ANDROID_NATIVE_API_LEVEL GREATER 8)
+    add_definitions(-DUSE_TEGRA_HW_DETECTOR)
+    list(APPEND engine_libs ${TEGRA_DETECTOR} GLESv2 EGL)
+  else()
+    message(FATAL_ERROR "Tegra detector required native api level 9 or above")
+  endif()
+endif()
 
 # -D__SUPPORT_ARMEABI_FEATURES key is also available
 add_definitions(-DPLATFORM_ANDROID -D__SUPPORT_ARMEABI_V7A_FEATURES -D__SUPPORT_TEGRA3 -D__SUPPORT_MIPS)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -fno-exceptions")
-
 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-allow-shlib-undefined")
-file(GLOB engine_files "jni/BinderComponent/*.cpp" "jni/BinderComponent/*.h" "jni/include/*.h")
 
-include_directories(jni/BinderComponent jni/include)
+include_directories("jni/BinderComponent" "jni/include")
 include_directories(SYSTEM "${ANDROID_SOURCE_TREE}/frameworks/base/include" "${ANDROID_SOURCE_TREE}/system/core/include")
 add_library(${engine} SHARED ${engine_files})
-target_link_libraries(${engine} z binder log utils)
+target_link_libraries(${engine} ${engine_libs})
 
 set_target_properties(${engine} PROPERTIES
     OUTPUT_NAME ${engine}
@@ -51,7 +64,15 @@ add_custom_command(TARGET ${engine} POST_BUILD COMMAND ${CMAKE_STRIP} --strip-un
 file(GLOB engine_jni_files "jni/JNIWrapper/*.cpp" "jni/JNIWrapper/*.h" "jni/include/*.h")
 list(APPEND engine_jni_files jni/NativeService/CommonPackageManager.cpp jni/NativeService/PackageInfo.cpp)
 
-include_directories(jni/include jni/JNIWrapper jni/NativeService jni/BinderComponent "${ANDROID_SOURCE_TREE}/frameworks/base/include" "${ANDROID_SOURCE_TREE}/system/core/include" "${ANDROID_SOURCE_TREE}/frameworks/base/core/jni")
+include_directories(
+  jni/include jni/JNIWrapper
+  jni/NativeService
+  jni/BinderComponent
+  "${ANDROID_SOURCE_TREE}/frameworks/base/include"
+  "${ANDROID_SOURCE_TREE}/system/core/include"
+  "${ANDROID_SOURCE_TREE}/frameworks/base/core/jni"
+  )
+
 add_library(${engine}_jni SHARED ${engine_jni_files})
 target_link_libraries(${engine}_jni z binder log utils android_runtime ${engine})
 
diff --git a/platforms/android/service/engine/jni/Android.mk b/platforms/android/service/engine/jni/Android.mk
index 71d2c464c..a5c188110 100644
--- a/platforms/android/service/engine/jni/Android.mk
+++ b/platforms/android/service/engine/jni/Android.mk
@@ -83,4 +83,4 @@ include $(BUILD_SHARED_LIBRARY)
 #        Native test application
 #---------------------------------------------------------------------
 
-#include $(LOCAL_PATH)/Tests/Tests.mk
\ No newline at end of file
+#include $(LOCAL_PATH)/Tests/Tests.mk
diff --git a/platforms/android/service/engine/jni/Application.mk b/platforms/android/service/engine/jni/Application.mk
index 5647110fa..e530dc191 100644
--- a/platforms/android/service/engine/jni/Application.mk
+++ b/platforms/android/service/engine/jni/Application.mk
@@ -3,4 +3,4 @@ APP_PLATFORM := android-8
 APP_STL := stlport_static
 APP_CPPFLAGS := -fno-rtti -fno-exceptions
 NDK_TOOLCHAIN_VERSION=4.4.3
-#APP_OPTIM := debug
\ No newline at end of file
+#APP_OPTIM := debug
diff --git a/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp
index 997b8d6ce..fccb329e9 100644
--- a/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp
+++ b/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp
@@ -69,4 +69,4 @@ status_t BnOpenCVEngine::onTransact(uint32_t code, const Parcel& data, android::
     }
 
     return android::NO_ERROR;
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h b/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h
index bdb88ae1a..cda2b03e9 100644
--- a/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h
+++ b/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h
@@ -18,4 +18,4 @@ public:
 
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp
index b1a88ffb6..a9cbc11a2 100644
--- a/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp
+++ b/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp
@@ -68,4 +68,4 @@ bool BpOpenCVEngine::InstallVersion(String16 version)
     return static_cast<bool>(reply.readInt32());
 }
 
-IMPLEMENT_META_INTERFACE(OpenCVEngine, OPECV_ENGINE_CLASSNAME)
\ No newline at end of file
+IMPLEMENT_META_INTERFACE(OpenCVEngine, OPECV_ENGINE_CLASSNAME)
diff --git a/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h b/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h
index 01df00eaf..cb2af532d 100644
--- a/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h
+++ b/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h
@@ -17,4 +17,4 @@ public:
     virtual bool InstallVersion(android::String16 version);
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp
index eab49ac5f..d0d8514b7 100644
--- a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp
+++ b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp
@@ -13,7 +13,7 @@ int GetCpuID()
     map<string, string> cpu_info = GetCpuInfo();
     map<string, string>::const_iterator it;
 
-    #if defined(__i386__)
+#if defined(__i386__)
     LOGD("Using X86 HW detector");
     result |= ARCH_X86;
     it = cpu_info.find("flags");
@@ -161,8 +161,11 @@ int GetProcessorCount()
 
 int DetectKnownPlatforms()
 {
+#if defined(__arm__) && defined(USE_TEGRA_HW_DETECTOR)
     int tegra_status = DetectTegra();
-
+#else
+    int tegra_status = NOT_TEGRA;
+#endif
     // All Tegra platforms since Tegra3
     if (2 < tegra_status)
     {
@@ -172,4 +175,4 @@ int DetectKnownPlatforms()
     {
         return PLATFORM_UNKNOWN;
     }
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h
index 135684418..1dda8bd14 100644
--- a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h
+++ b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h
@@ -34,4 +34,4 @@ int GetProcessorCount();
 std::string GetPlatformName();
 int GetCpuID();
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp
index 274e36a4b..b0b2b5137 100644
--- a/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp
+++ b/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp
@@ -15,7 +15,7 @@ using namespace android;
 
 const int OpenCVEngine::Platform = DetectKnownPlatforms();
 const int OpenCVEngine::CpuID = GetCpuID();
-const int OpenCVEngine::KnownVersions[] = {2040000, 2040100, 2040200, 2040300, 2040301, 2040302, 2040400, 2040500};
+const int OpenCVEngine::KnownVersions[] = {2040000, 2040100, 2040200, 2040300, 2040301, 2040302, 2040400, 2040500, 2040600};
 
 bool OpenCVEngine::ValidateVersion(int version)
 {
diff --git a/platforms/android/service/engine/jni/BinderComponent/ProcReader.h b/platforms/android/service/engine/jni/BinderComponent/ProcReader.h
index 333e7c293..f703ccf9c 100644
--- a/platforms/android/service/engine/jni/BinderComponent/ProcReader.h
+++ b/platforms/android/service/engine/jni/BinderComponent/ProcReader.h
@@ -26,4 +26,4 @@
 // public part
 std::map<std::string, std::string> GetCpuInfo();
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/jni/BinderComponent/StringUtils.cpp b/platforms/android/service/engine/jni/BinderComponent/StringUtils.cpp
index 3809f5aa6..2e6b35a7b 100644
--- a/platforms/android/service/engine/jni/BinderComponent/StringUtils.cpp
+++ b/platforms/android/service/engine/jni/BinderComponent/StringUtils.cpp
@@ -89,4 +89,4 @@ vector<string> SplitStringVector(const string& src, const char seporator)
     }
 
     return result;
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/jni/BinderComponent/StringUtils.h b/platforms/android/service/engine/jni/BinderComponent/StringUtils.h
index d1fe36ce7..e36bfcc7c 100644
--- a/platforms/android/service/engine/jni/BinderComponent/StringUtils.h
+++ b/platforms/android/service/engine/jni/BinderComponent/StringUtils.h
@@ -10,4 +10,4 @@ std::set<std::string> SplitString(const std::string& src, const char seporator);
 bool ParseString(const std::string& src, std::string& key, std::string& value);
 std::vector<std::string> SplitStringVector(const std::string& src, const char seporator);
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp
deleted file mode 100644
index 3933efe49..000000000
--- a/platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "TegraDetector.h"
-#include <zlib.h>
-#include <string.h>
-
-#define KERNEL_CONFIG "/proc/config.gz"
-#define KERNEL_CONFIG_MAX_LINE_WIDTH 512
-#define KERNEL_CONFIG_TEGRA_MAGIC "CONFIG_ARCH_TEGRA=y"
-#define KERNEL_CONFIG_TEGRA2_MAGIC "CONFIG_ARCH_TEGRA_2x_SOC=y"
-#define KERNEL_CONFIG_TEGRA3_MAGIC "CONFIG_ARCH_TEGRA_3x_SOC=y"
-#define KERNEL_CONFIG_TEGRA4_MAGIC "CONFIG_ARCH_TEGRA_11x_SOC=y"
-#define MAX_DATA_LEN    4096
-
-int DetectTegra()
-{
-    int result = TEGRA_NOT_TEGRA;
-    gzFile kernelConfig = gzopen(KERNEL_CONFIG, "r");
-    if (kernelConfig != 0)
-    {
-        char tmpbuf[KERNEL_CONFIG_MAX_LINE_WIDTH];
-        const char *tegra_config = KERNEL_CONFIG_TEGRA_MAGIC;
-        const char *tegra2_config = KERNEL_CONFIG_TEGRA2_MAGIC;
-        const char *tegra3_config = KERNEL_CONFIG_TEGRA3_MAGIC;
-        const char *tegra4_config = KERNEL_CONFIG_TEGRA4_MAGIC;
-        int len = strlen(tegra_config);
-        int len2 = strlen(tegra2_config);
-        int len3 = strlen(tegra3_config);
-        int len4 = strlen(tegra4_config);
-        while (0 != gzgets(kernelConfig, tmpbuf, KERNEL_CONFIG_MAX_LINE_WIDTH))
-        {
-            if (0 == strncmp(tmpbuf, tegra_config, len))
-            {
-                result = 1;
-            }
-
-            if (0 == strncmp(tmpbuf, tegra2_config, len2))
-            {
-                result = 2;
-                break;
-            }
-
-            if (0 == strncmp(tmpbuf, tegra3_config, len3))
-            {
-                result = 3;
-                break;
-            }
-
-            if (0 == strncmp(tmpbuf, tegra4_config, len4))
-            {
-                result = 4;
-                break;
-            }
-        }
-        gzclose(kernelConfig);
-    }
-    else
-    {
-        result = TEGRA_DETECTOR_ERROR;
-    }
-
-    return result;
-}
\ No newline at end of file
diff --git a/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h
index 8fbdb607a..4ca930b75 100644
--- a/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h
+++ b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h
@@ -2,8 +2,13 @@
 #define __TEGRA_DETECTOR_H__
 
 #define TEGRA_DETECTOR_ERROR -2
-#define TEGRA_NOT_TEGRA -1
+#define NOT_TEGRA -1
+#define TEGRA2  2
+#define TEGRA3  3
+#define TEGRA4i 4
+#define TEGRA4  5
+#define TEGRA5  6
 
 int DetectTegra();
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp b/platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp
index be30d877b..3e490a316 100644
--- a/platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp
+++ b/platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp
@@ -22,4 +22,4 @@ JNIEXPORT jint JNICALL Java_org_opencv_engine_HardwareDetector_GetProcessorCount
 JNIEXPORT jint JNICALL Java_org_opencv_engine_HardwareDetector_DetectKnownPlatforms(JNIEnv* , jclass)
 {
     return DetectKnownPlatforms();
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/jni/NativeClient/ClientMain.cpp b/platforms/android/service/engine/jni/NativeClient/ClientMain.cpp
index 4e9bba296..e9075dc9a 100644
--- a/platforms/android/service/engine/jni/NativeClient/ClientMain.cpp
+++ b/platforms/android/service/engine/jni/NativeClient/ClientMain.cpp
@@ -39,4 +39,4 @@ int main(int argc, char *argv[])
     printf("OpenCVEngine version %d started", EngineVersion);
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/jni/NativeService/NativePackageManager.cpp b/platforms/android/service/engine/jni/NativeService/NativePackageManager.cpp
index aba13de1e..87d0ea0c1 100644
--- a/platforms/android/service/engine/jni/NativeService/NativePackageManager.cpp
+++ b/platforms/android/service/engine/jni/NativeService/NativePackageManager.cpp
@@ -16,4 +16,4 @@ vector<PackageInfo> NativePackageManager::GetInstalledPackages()
 
 NativePackageManager::~NativePackageManager()
 {
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/jni/NativeService/NativePackageManager.h b/platforms/android/service/engine/jni/NativeService/NativePackageManager.h
index c74a08359..329047f0b 100644
--- a/platforms/android/service/engine/jni/NativeService/NativePackageManager.h
+++ b/platforms/android/service/engine/jni/NativeService/NativePackageManager.h
@@ -13,4 +13,4 @@ protected:
     virtual std::vector<PackageInfo> GetInstalledPackages();
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/jni/NativeService/ServiceMain.cpp b/platforms/android/service/engine/jni/NativeService/ServiceMain.cpp
index 66ea99598..20cafa6c4 100644
--- a/platforms/android/service/engine/jni/NativeService/ServiceMain.cpp
+++ b/platforms/android/service/engine/jni/NativeService/ServiceMain.cpp
@@ -29,4 +29,4 @@ int main(int argc, char *argv[])
     delete PackageManager;
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/jni/Tests/PackageManagerStub.cpp b/platforms/android/service/engine/jni/Tests/PackageManagerStub.cpp
index 20be63a9b..e15465862 100644
--- a/platforms/android/service/engine/jni/Tests/PackageManagerStub.cpp
+++ b/platforms/android/service/engine/jni/Tests/PackageManagerStub.cpp
@@ -15,4 +15,4 @@ vector<PackageInfo> PackageManagerStub::GetInstalledPackages()
 
 PackageManagerStub::~PackageManagerStub()
 {
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/jni/Tests/PackageManagerStub.h b/platforms/android/service/engine/jni/Tests/PackageManagerStub.h
index a37e6b055..8d1d3a80b 100644
--- a/platforms/android/service/engine/jni/Tests/PackageManagerStub.h
+++ b/platforms/android/service/engine/jni/Tests/PackageManagerStub.h
@@ -14,4 +14,4 @@ protected:
     virtual std::vector<PackageInfo> GetInstalledPackages();
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/jni/Tests/TestMain.cpp b/platforms/android/service/engine/jni/Tests/TestMain.cpp
index 4c994ef44..96d7200cc 100644
--- a/platforms/android/service/engine/jni/Tests/TestMain.cpp
+++ b/platforms/android/service/engine/jni/Tests/TestMain.cpp
@@ -4,4 +4,4 @@ int main(int argc, char **argv)
 {
     ::testing::InitGoogleTest(&argc, argv);
     return RUN_ALL_TESTS();
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/jni/Tests/Tests.mk b/platforms/android/service/engine/jni/Tests/Tests.mk
index 4cfe1c16a..5b46bde52 100644
--- a/platforms/android/service/engine/jni/Tests/Tests.mk
+++ b/platforms/android/service/engine/jni/Tests/Tests.mk
@@ -44,4 +44,4 @@ LOCAL_LDLIBS += -lz -lbinder -llog -lutils
 
 LOCAL_SHARED_LIBRARIES += libOpenCVEngine
 
-include $(BUILD_EXECUTABLE)
\ No newline at end of file
+include $(BUILD_EXECUTABLE)
diff --git a/platforms/android/service/engine/jni/include/EngineCommon.h b/platforms/android/service/engine/jni/include/EngineCommon.h
index 2bee139ea..a03f02c68 100644
--- a/platforms/android/service/engine/jni/include/EngineCommon.h
+++ b/platforms/android/service/engine/jni/include/EngineCommon.h
@@ -19,4 +19,4 @@
 
 typedef const char* (*InfoFunctionType)();
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/jni/include/IOpenCVEngine.h b/platforms/android/service/engine/jni/include/IOpenCVEngine.h
index 4dbb251c5..735d3746b 100644
--- a/platforms/android/service/engine/jni/include/IOpenCVEngine.h
+++ b/platforms/android/service/engine/jni/include/IOpenCVEngine.h
@@ -29,4 +29,4 @@ public:
     virtual bool InstallVersion(android::String16 version) = 0;
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java b/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java
index 7fc7e1ae8..dc82ec30c 100644
--- a/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java
+++ b/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java
@@ -30,11 +30,12 @@ public class HardwareDetector
     // GPU Acceleration options
     public static final int FEATURES_HAS_GPU = 0x010000;
 
-    public static final int PLATFORM_TEGRA  = 1;
-    public static final int PLATFORM_TEGRA2 = 2;
-    public static final int PLATFORM_TEGRA3 = 3;
-    public static final int PLATFORM_TEGRA4 = 4;
-
+    public static final int PLATFORM_TEGRA   = 1;
+    public static final int PLATFORM_TEGRA2  = 2;
+    public static final int PLATFORM_TEGRA3  = 3;
+    public static final int PLATFORM_TEGRA4i = 4;
+    public static final int PLATFORM_TEGRA4  = 5;
+    public static final int PLATFORM_TEGRA5  = 6;
 
     public static final int PLATFORM_UNKNOWN = 0;
 
diff --git a/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl b/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl
index 91de3940c..a6cf193e3 100644
--- a/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl
+++ b/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl
@@ -30,4 +30,4 @@ interface OpenCVEngineInterface
     * @return Returns OpenCV libraries names seporated by symbol ";" in loading order
     */
     String getLibraryList(String version);
-}
\ No newline at end of file
+}
diff --git a/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java
index 3c1aac994..e22f7b529 100644
--- a/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java
+++ b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java
@@ -107,6 +107,10 @@ public class ManagerActivity extends Activity
             {
                 HardwarePlatformView.setText("Tegra 3");
             }
+            else if (HardwareDetector.PLATFORM_TEGRA4i == Platfrom)
+            {
+                HardwarePlatformView.setText("Tegra 4i");
+            }
             else
             {
                 HardwarePlatformView.setText("Tegra 4");
diff --git a/platforms/android/service/push_native.py b/platforms/android/service/push_native.py
index 408fefc28..cc14a2fa1 100755
--- a/platforms/android/service/push_native.py
+++ b/platforms/android/service/push_native.py
@@ -24,4 +24,4 @@ if (__name__ ==  "__main__"):
     os.system("adb %s push ./engine/libs/%s/OpenCVEngineNativeService %s" % (DEVICE_STR, DEVICE_ARCH, TARGET_DEVICE_PATH))
     os.system("adb %s push ./engine/libs/%s/OpenCVEngineTest %s" % (DEVICE_STR, DEVICE_ARCH, TARGET_DEVICE_PATH))
     os.system("adb %s push ./engine/libs/%s/OpenCVEngineTestApp %s" % (DEVICE_STR, DEVICE_ARCH, TARGET_DEVICE_PATH))
-    os.system("adb %s push ./engine/libs/%s/libOpenCVEngine_jni.so %s" % (DEVICE_STR, DEVICE_ARCH, TARGET_DEVICE_PATH))
\ No newline at end of file
+    os.system("adb %s push ./engine/libs/%s/libOpenCVEngine_jni.so %s" % (DEVICE_STR, DEVICE_ARCH, TARGET_DEVICE_PATH))
diff --git a/platforms/android/service/readme.txt b/platforms/android/service/readme.txt
index f4e65eb36..6255df5e9 100644
--- a/platforms/android/service/readme.txt
+++ b/platforms/android/service/readme.txt
@@ -14,20 +14,20 @@ manually using adb tool:
 
 .. code-block:: sh
 
-    adb install OpenCV-2.4.5-android-sdk/apk/OpenCV_2.4.5_Manager_2.7_<platform>.apk
+    adb install OpenCV-2.4.6-android-sdk/apk/OpenCV_2.4.6_Manager_2.9_<platform>.apk
 
 Use the table below to determine proper OpenCV Manager package for your device:
 
 +------------------------------+--------------+---------------------------------------------------+
 | Hardware Platform            | Android ver. | Package name                                      |
 +==============================+==============+===================================================+
-| armeabi-v7a (ARMv7-A + NEON) |    >= 2.3    | OpenCV_2.4.5_Manager_2.7_armv7a-neon.apk          |
+| armeabi-v7a (ARMv7-A + NEON) |    >= 2.3    | OpenCV_2.4.6_Manager_2.9_armv7a-neon.apk          |
 +------------------------------+--------------+---------------------------------------------------+
-| armeabi-v7a (ARMv7-A + NEON) |     = 2.2    | OpenCV_2.4.5_Manager_2.7_armv7a-neon-android8.apk |
+| armeabi-v7a (ARMv7-A + NEON) |     = 2.2    | OpenCV_2.4.6_Manager_2.9_armv7a-neon-android8.apk |
 +------------------------------+--------------+---------------------------------------------------+
-| armeabi (ARMv5, ARMv6)       |    >= 2.3    | OpenCV_2.4.5_Manager_2.7_armeabi.apk              |
+| armeabi (ARMv5, ARMv6)       |    >= 2.3    | OpenCV_2.4.6_Manager_2.9_armeabi.apk              |
 +------------------------------+--------------+---------------------------------------------------+
-| Intel x86                    |    >= 2.3    | OpenCV_2.4.5_Manager_2.7_x86.apk                  |
+| Intel x86                    |    >= 2.3    | OpenCV_2.4.6_Manager_2.9_x86.apk                  |
 +------------------------------+--------------+---------------------------------------------------+
-| MIPS                         |    >= 2.3    | OpenCV_2.4.5_Manager_2.7_mips.apk                 |
+| MIPS                         |    >= 2.3    | OpenCV_2.4.6_Manager_2.9_mips.apk                 |
 +------------------------------+--------------+---------------------------------------------------+
diff --git a/ios/Info.plist.in b/platforms/ios/Info.plist.in
similarity index 80%
rename from ios/Info.plist.in
rename to platforms/ios/Info.plist.in
index 89ef38625..b2a3baf52 100644
--- a/ios/Info.plist.in
+++ b/platforms/ios/Info.plist.in
@@ -5,11 +5,11 @@
     <key>CFBundleName</key>
     <string>OpenCV</string>
     <key>CFBundleIdentifier</key>
-    <string>com.itseez.opencv</string>
+    <string>org.opencv</string>
     <key>CFBundleVersion</key>
-    <string>${VERSION}</string>
+    <string>${OPENCV_LIBVERSION}</string>
     <key>CFBundleShortVersionString</key>
-    <string>${VERSION}</string>
+    <string>${OPENCV_LIBVERSION}</string>
     <key>CFBundleSignature</key>
     <string>????</string>
     <key>CFBundlePackageType</key>
diff --git a/ios/build_framework.py b/platforms/ios/build_framework.py
similarity index 84%
rename from ios/build_framework.py
rename to platforms/ios/build_framework.py
index ceef4b71d..cb3788f7d 100755
--- a/ios/build_framework.py
+++ b/platforms/ios/build_framework.py
@@ -38,7 +38,7 @@ def build_opencv(srcroot, buildroot, target, arch):
     # for some reason, if you do not specify CMAKE_BUILD_TYPE, it puts libs to "RELEASE" rather than "Release"
     cmakeargs = ("-GXcode " +
                 "-DCMAKE_BUILD_TYPE=Release " +
-                "-DCMAKE_TOOLCHAIN_FILE=%s/ios/cmake/Toolchains/Toolchain-%s_Xcode.cmake " +
+                "-DCMAKE_TOOLCHAIN_FILE=%s/platforms/ios/cmake/Toolchains/Toolchain-%s_Xcode.cmake " +
                 "-DBUILD_opencv_world=ON " +
                 "-DCMAKE_INSTALL_PREFIX=install") % (srcroot, target)
     # if cmake cache exists, just rerun cmake to update OpenCV.xproj if necessary
@@ -71,19 +71,11 @@ def put_framework_together(srcroot, dstroot):
     os.makedirs(framework_dir)
     os.chdir(framework_dir)
 
-    # determine OpenCV version (without subminor part)
-    tdir0 = "../build/" + targetlist[0]
-    cfg = open(tdir0 + "/cvconfig.h", "rt")
-    for l in cfg.readlines():
-        if l.startswith("#define  VERSION"):
-            opencv_version = l[l.find("\"")+1:l.rfind(".")]
-            break
-    cfg.close()
-
     # form the directory tree
     dstdir = "Versions/A"
     os.makedirs(dstdir + "/Resources")
 
+    tdir0 = "../build/" + targetlist[0]
     # copy headers
     shutil.copytree(tdir0 + "/install/include/opencv2", dstdir + "/Headers")
 
@@ -91,16 +83,8 @@ def put_framework_together(srcroot, dstroot):
     wlist = " ".join(["../build/" + t + "/lib/Release/libopencv_world.a" for t in targetlist])
     os.system("lipo -create " + wlist + " -o " + dstdir + "/opencv2")
 
-    # form Info.plist
-    srcfile = open(srcroot + "/ios/Info.plist.in", "rt")
-    dstfile = open(dstdir + "/Resources/Info.plist", "wt")
-    for l in srcfile.readlines():
-        dstfile.write(l.replace("${VERSION}", opencv_version))
-    srcfile.close()
-    dstfile.close()
-
-    # copy cascades
-    # TODO ...
+    # copy Info.plist
+    shutil.copyfile(tdir0 + "/ios/Info.plist", dstdir + "/Resources/Info.plist")
 
     # make symbolic links
     os.symlink("A", "Versions/Current")
@@ -125,4 +109,4 @@ if __name__ == "__main__":
         print "Usage:\n\t./build_framework.py <outputdir>\n\n"
         sys.exit(0)
 
-    build_framework(os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), "..")), os.path.abspath(sys.argv[1]))
\ No newline at end of file
+    build_framework(os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), "../..")), os.path.abspath(sys.argv[1]))
diff --git a/ios/cmake/Modules/Platform/iOS.cmake b/platforms/ios/cmake/Modules/Platform/iOS.cmake
similarity index 99%
rename from ios/cmake/Modules/Platform/iOS.cmake
rename to platforms/ios/cmake/Modules/Platform/iOS.cmake
index 4d196e65d..e021aca55 100644
--- a/ios/cmake/Modules/Platform/iOS.cmake
+++ b/platforms/ios/cmake/Modules/Platform/iOS.cmake
@@ -160,4 +160,4 @@ list (APPEND CMAKE_SYSTEM_PREFIX_PATH
 
     # Project install destination.
     "${CMAKE_INSTALL_PREFIX}"
-)
\ No newline at end of file
+)
diff --git a/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake b/platforms/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake
similarity index 80%
rename from ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake
rename to platforms/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake
index 67343253b..14555cd6a 100644
--- a/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake
+++ b/platforms/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake
@@ -4,12 +4,12 @@ set (IPHONEOS TRUE)
 # Standard settings
 set (CMAKE_SYSTEM_NAME iOS)
 # Include extra modules for the iOS platform files
-set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/ios/cmake/Modules")
+set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/platforms/ios/cmake/Modules")
 
-# Force the compilers to gcc for iOS
+# Force the compilers to clang for iOS
 include (CMakeForceCompiler)
-#CMAKE_FORCE_C_COMPILER (gcc gcc)
-#CMAKE_FORCE_CXX_COMPILER (g++ g++)
+#CMAKE_FORCE_C_COMPILER (clang GNU)
+#CMAKE_FORCE_CXX_COMPILER (clang++ GNU)
 
 set (CMAKE_C_SIZEOF_DATA_PTR 4)
 set (CMAKE_C_HAS_ISYSROOT 1)
@@ -28,4 +28,4 @@ SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
 SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
 
-message (STATUS "iPhoneOS toolchain loaded")
\ No newline at end of file
+message (STATUS "iPhoneOS toolchain loaded")
diff --git a/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake b/platforms/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake
similarity index 80%
rename from ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake
rename to platforms/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake
index 7ef8113ed..937079d18 100644
--- a/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake
+++ b/platforms/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake
@@ -4,12 +4,12 @@ set (IPHONESIMULATOR TRUE)
 # Standard settings
 set (CMAKE_SYSTEM_NAME iOS)
 # Include extra modules for the iOS platform files
-set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/ios/cmake/Modules")
+set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/platforms/ios/cmake/Modules")
 
-# Force the compilers to gcc for iOS
+# Force the compilers to clang for iOS
 include (CMakeForceCompiler)
-#CMAKE_FORCE_C_COMPILER (gcc gcc)
-#CMAKE_FORCE_CXX_COMPILER (g++ g++)
+#CMAKE_FORCE_C_COMPILER (clang GNU)
+#CMAKE_FORCE_CXX_COMPILER (clang++ GNU)
 
 set (CMAKE_C_SIZEOF_DATA_PTR 4)
 set (CMAKE_C_HAS_ISYSROOT 1)
@@ -28,4 +28,4 @@ SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
 SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
 
-message (STATUS "iPhoneSimulator toolchain loaded")
\ No newline at end of file
+message (STATUS "iPhoneSimulator toolchain loaded")
diff --git a/platforms/ios/readme.txt b/platforms/ios/readme.txt
new file mode 100644
index 000000000..0c39e7213
--- /dev/null
+++ b/platforms/ios/readme.txt
@@ -0,0 +1,7 @@
+Building OpenCV from Source, using CMake and Command Line
+=========================================================
+
+cd ~/<my_working_directory>
+python opencv/platforms/ios/build_framework.py ios
+
+If everything's fine, a few minutes later you will get ~/<my_working_directory>/ios/opencv2.framework. You can add this framework to your Xcode projects.
diff --git a/platforms/readme.txt b/platforms/readme.txt
index 7e1c4555c..b039d3dc8 100644
--- a/platforms/readme.txt
+++ b/platforms/readme.txt
@@ -1 +1,3 @@
-This folder contains toolchains and additional files that are needed for cross compitation.
\ No newline at end of file
+This folder contains toolchains and additional files that are needed for cross compilation.
+For more information see introduction tutorials for target platform in documentation:
+http://docs.opencv.org/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.html#table-of-content-introduction
diff --git a/platforms/scripts/ABI_compat_generator.py b/platforms/scripts/ABI_compat_generator.py
index fdabf0061..d7cc3728a 100755
--- a/platforms/scripts/ABI_compat_generator.py
+++ b/platforms/scripts/ABI_compat_generator.py
@@ -6,7 +6,7 @@ import os
 
 
 architecture = 'armeabi'
-excludedHeaders = set(['hdf5.h', 'cap_ios.h', 'eigen.hpp', 'cxeigen.hpp']) #TOREMOVE
+excludedHeaders = set(['hdf5.h', 'cap_ios.h', 'ios.h', 'eigen.hpp', 'cxeigen.hpp']) #TOREMOVE
 systemIncludes = ['sources/cxx-stl/gnu-libstdc++/4.6/include', \
     '/opt/android-ndk-r8c/platforms/android-8/arch-arm', # TODO: check if this one could be passed as command line arg
     'sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include']
diff --git a/platforms/scripts/camera_build.conf b/platforms/scripts/camera_build.conf
index cd172b4fd..fb79cacc2 100644
--- a/platforms/scripts/camera_build.conf
+++ b/platforms/scripts/camera_build.conf
@@ -21,3 +21,7 @@ native_camera_r4.2.0; armeabi-v7a; 14; $ANDROID_STUB_ROOT/4.2.0
 native_camera_r4.2.0; armeabi;     14; $ANDROID_STUB_ROOT/4.2.0
 native_camera_r4.2.0; x86;         14; $ANDROID_STUB_ROOT/4.2.0
 native_camera_r4.2.0; mips;        14; $ANDROID_STUB_ROOT/4.2.0
+native_camera_r4.3.0; armeabi;     14; $ANDROID_STUB_ROOT/4.3.0
+native_camera_r4.3.0; armeabi-v7a; 14; $ANDROID_STUB_ROOT/4.3.0
+native_camera_r4.3.0; x86;         14; $ANDROID_STUB_ROOT/4.3.0
+native_camera_r4.3.0; mips;        14; $ANDROID_STUB_ROOT/4.3.0
diff --git a/platforms/scripts/cmake_winrt.cmd b/platforms/scripts/cmake_winrt.cmd
index aafed7d09..392781ceb 100644
--- a/platforms/scripts/cmake_winrt.cmd
+++ b/platforms/scripts/cmake_winrt.cmd
@@ -1,6 +1,9 @@
-mkdir build
-cd build
+mkdir build_winrt_arm
+cd build_winrt_arm
 
-rem call "C:\Program Files\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat"
+set msvc_path=C:\Program Files\Microsoft Visual Studio 11.0
 
-cmake.exe -GNinja -DCMAKE_BUILD_TYPE=Release -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\..\winrt\arm.winrt.toolchain.cmake ..\..\..
+call "%msvc_path%\Common7\Tools\VsDevCmd.bat"
+call "%msvc_path%\VC\bin\x86_arm\vcvarsx86_arm.bat"
+
+cmake.exe -GNinja -DCMAKE_BUILD_TYPE=Release -DENABLE_WINRT_MODE=ON -DWITH_FFMPEG=OFF -DWITH_MSMF=OFF -DWITH_DSHOW=OFF -DWITH_VFW=OFF -DWITH_OPENEXR=OFF -DWITH_CUDA=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DBUILD_opencv_java=OFF -DCMAKE_TOOLCHAIN_FILE=..\winrt\arm.winrt.toolchain.cmake  %* ..\..
diff --git a/platforms/winrt/arm.winrt.toolchain.cmake b/platforms/winrt/arm.winrt.toolchain.cmake
index b34056cd5..a5158d01f 100644
--- a/platforms/winrt/arm.winrt.toolchain.cmake
+++ b/platforms/winrt/arm.winrt.toolchain.cmake
@@ -3,4 +3,4 @@ set(CMAKE_SYSTEM_PROCESSOR "arm-v7a")
 
 set(CMAKE_FIND_ROOT_PATH "${CMAKE_SOURCE_DIR}/platforms/winrt")
 set(CMAKE_REQUIRED_DEFINITIONS -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE)
-add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE)
\ No newline at end of file
+add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE)
diff --git a/samples/MacOSX/FaceTracker/FaceTracker-Info.plist b/samples/MacOSX/FaceTracker/FaceTracker-Info.plist
index 8388dcd4f..45d8bce13 100644
--- a/samples/MacOSX/FaceTracker/FaceTracker-Info.plist
+++ b/samples/MacOSX/FaceTracker/FaceTracker-Info.plist
@@ -2,19 +2,19 @@
 <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
-	<key>CFBundleDevelopmentRegion</key>
-	<string>English</string>
-	<key>CFBundleExecutable</key>
-	<string>${EXECUTABLE_NAME}</string>
-	<key>CFBundleIdentifier</key>
-	<string>de.rwth-aachen.ient.FaceTracker</string>
-	<key>CFBundleInfoDictionaryVersion</key>
-	<string>6.0</string>
-	<key>CFBundlePackageType</key>
-	<string>APPL</string>
-	<key>CFBundleSignature</key>
-	<string>????</string>
-	<key>CFBundleVersion</key>
-	<string>1.0</string>
+    <key>CFBundleDevelopmentRegion</key>
+    <string>English</string>
+    <key>CFBundleExecutable</key>
+    <string>${EXECUTABLE_NAME}</string>
+    <key>CFBundleIdentifier</key>
+    <string>de.rwth-aachen.ient.FaceTracker</string>
+    <key>CFBundleInfoDictionaryVersion</key>
+    <string>6.0</string>
+    <key>CFBundlePackageType</key>
+    <string>APPL</string>
+    <key>CFBundleSignature</key>
+    <string>????</string>
+    <key>CFBundleVersion</key>
+    <string>1.0</string>
 </dict>
 </plist>
diff --git a/samples/android/.gitignore b/samples/android/.gitignore
index 2d406cbbc..a21956836 100644
--- a/samples/android/.gitignore
+++ b/samples/android/.gitignore
@@ -4,4 +4,4 @@ build.xml
 local.properties
 proguard-project.txt
 project.properties
-default.properties
\ No newline at end of file
+default.properties
diff --git a/samples/android/CMakeLists.txt b/samples/android/CMakeLists.txt
index c0b626d9b..0dc4a3cd6 100644
--- a/samples/android/CMakeLists.txt
+++ b/samples/android/CMakeLists.txt
@@ -9,6 +9,7 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS -Wmissing-declarations)
 add_subdirectory(15-puzzle)
 add_subdirectory(face-detection)
 add_subdirectory(image-manipulations)
+add_subdirectory(camera-calibration)
 add_subdirectory(color-blob-detection)
 add_subdirectory(tutorial-1-camerapreview)
 add_subdirectory(tutorial-2-mixedprocessing)
diff --git a/samples/android/camera-calibration/.classpath b/samples/android/camera-calibration/.classpath
new file mode 100644
index 000000000..46c3d4696
--- /dev/null
+++ b/samples/android/camera-calibration/.classpath
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.LIBRARIES"/>
+	<classpathentry kind="con" path="com.android.ide.eclipse.adt.ANDROID_FRAMEWORK"/>
+	<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.DEPENDENCIES"/>
+	<classpathentry kind="src" path="src"/>
+	<classpathentry kind="src" path="gen"/>
+	<classpathentry kind="output" path="bin/classes"/>
+</classpath>
diff --git a/samples/android/camera-calibration/.project b/samples/android/camera-calibration/.project
new file mode 100644
index 000000000..eae413e1a
--- /dev/null
+++ b/samples/android/camera-calibration/.project
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>OpenCV Sample - camera-calibration</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>com.android.ide.eclipse.adt.PreCompilerBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>com.android.ide.eclipse.adt.ApkBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>com.android.ide.eclipse.adt.AndroidNature</nature>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
diff --git a/samples/android/camera-calibration/.settings/org.eclipse.jdt.core.prefs b/samples/android/camera-calibration/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 000000000..48ab4c6b1
--- /dev/null
+++ b/samples/android/camera-calibration/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,4 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.source=1.6
diff --git a/samples/android/camera-calibration/AndroidManifest.xml b/samples/android/camera-calibration/AndroidManifest.xml
new file mode 100644
index 000000000..619c919ee
--- /dev/null
+++ b/samples/android/camera-calibration/AndroidManifest.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+          package="org.opencv.samples.cameracalibration"
+          android:versionCode="1"
+          android:versionName="1.0">
+
+    <application
+        android:label="@string/app_name"
+        android:icon="@drawable/icon"
+        android:theme="@android:style/Theme.NoTitleBar.Fullscreen">
+
+        <activity android:name="CameraCalibrationActivity"
+                  android:label="@string/app_name"
+                  android:screenOrientation="landscape"
+                  android:configChanges="keyboardHidden|orientation">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+    </application>
+
+    <supports-screens android:resizeable="true"
+                      android:smallScreens="true"
+                      android:normalScreens="true"
+                      android:largeScreens="true"
+                      android:anyDensity="true" />
+
+    <uses-sdk android:minSdkVersion="8" />
+
+    <uses-permission android:name="android.permission.CAMERA"/>
+
+    <uses-feature android:name="android.hardware.camera" android:required="false"/>
+    <uses-feature android:name="android.hardware.camera.autofocus" android:required="false"/>
+    <uses-feature android:name="android.hardware.camera.front" android:required="false"/>
+    <uses-feature android:name="android.hardware.camera.front.autofocus" android:required="false"/>
+
+</manifest>
diff --git a/samples/android/camera-calibration/CMakeLists.txt b/samples/android/camera-calibration/CMakeLists.txt
new file mode 100644
index 000000000..83b11b364
--- /dev/null
+++ b/samples/android/camera-calibration/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(sample example-camera-calibration)
+
+add_android_project(${sample} "${CMAKE_CURRENT_SOURCE_DIR}" LIBRARY_DEPS ${OpenCV_BINARY_DIR} SDK_TARGET 11 ${ANDROID_SDK_TARGET})
+if(TARGET ${sample})
+  add_dependencies(opencv_android_examples ${sample})
+endif()
diff --git a/samples/android/camera-calibration/res/drawable/icon.png b/samples/android/camera-calibration/res/drawable/icon.png
new file mode 100644
index 000000000..79ad94845
Binary files /dev/null and b/samples/android/camera-calibration/res/drawable/icon.png differ
diff --git a/samples/android/camera-calibration/res/layout/camera_calibration_surface_view.xml b/samples/android/camera-calibration/res/layout/camera_calibration_surface_view.xml
new file mode 100644
index 000000000..0feccde3a
--- /dev/null
+++ b/samples/android/camera-calibration/res/layout/camera_calibration_surface_view.xml
@@ -0,0 +1,12 @@
+<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:tools="http://schemas.android.com/tools"
+    xmlns:opencv="http://schemas.android.com/apk/res-auto"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent" >
+
+    <org.opencv.android.JavaCameraView
+        android:layout_width="fill_parent"
+        android:layout_height="fill_parent"
+        android:id="@+id/camera_calibration_java_surface_view" />
+
+</LinearLayout>
diff --git a/samples/android/camera-calibration/res/menu/calibration.xml b/samples/android/camera-calibration/res/menu/calibration.xml
new file mode 100644
index 000000000..9c90f127c
--- /dev/null
+++ b/samples/android/camera-calibration/res/menu/calibration.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="utf-8"?>
+<menu xmlns:android="http://schemas.android.com/apk/res/android" >
+    <group android:checkableBehavior="single">
+        <item android:id="@+id/calibrate"
+              android:title="@string/action_calibrate"
+              android:showAsAction="ifRoom|withText" />
+        <item android:id="@+id/preview_mode"
+              android:title="@string/preview_mode">
+              <menu>
+                  <group android:checkableBehavior="single">
+                      <item android:id="@+id/calibration"
+                            android:title="@string/calibration"
+                            android:checked="true" />
+                      <item android:id="@+id/undistortion"
+                            android:title="@string/undistortion" />
+                      <item android:id="@+id/comparison"
+                            android:title="@string/comparison" />
+                  </group>
+              </menu>
+        </item>
+    </group>
+</menu>
diff --git a/samples/android/camera-calibration/res/values/strings.xml b/samples/android/camera-calibration/res/values/strings.xml
new file mode 100644
index 000000000..e1ce932a1
--- /dev/null
+++ b/samples/android/camera-calibration/res/values/strings.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+
+    <string name="app_name">OCV Camera Calibration</string>
+    <string name="action_calibrate">Calibrate</string>
+    <string name="calibration">Calibration</string>
+    <string name="undistortion">Undistortion</string>
+    <string name="comparison">Comparison</string>
+    <string name="preview_mode">Preview mode</string>
+    <string name="calibration_successful">Successfully calibrated!\nAvg. re-projection error:</string>
+    <string name="calibration_unsuccessful">Unsuccessful calibration.\nTry again</string>
+    <string name="more_samples">Please, capture more samples</string>
+    <string name="calibrating">Calibrating...</string>
+    <string name="please_wait">Please, wait</string>
+    <string name="original">Original</string>
+    <string name="undistorted">Undistorted</string>
+
+</resources>
diff --git a/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CalibrationResult.java b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CalibrationResult.java
new file mode 100644
index 000000000..4b03d5967
--- /dev/null
+++ b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CalibrationResult.java
@@ -0,0 +1,69 @@
+package org.opencv.samples.cameracalibration;
+
+import org.opencv.core.Mat;
+
+import android.app.Activity;
+import android.content.Context;
+import android.content.SharedPreferences;
+import android.util.Log;
+
+public abstract class CalibrationResult {
+    private static final String TAG = "OCVSample::CalibrationResult";
+
+    private static final int CAMERA_MATRIX_ROWS = 3;
+    private static final int CAMERA_MATRIX_COLS = 3;
+    private static final int DISTORTION_COEFFICIENTS_SIZE = 5;
+
+    public static void save(Activity activity, Mat cameraMatrix, Mat distortionCoefficients) {
+        SharedPreferences sharedPref = activity.getPreferences(Context.MODE_PRIVATE);
+        SharedPreferences.Editor editor = sharedPref.edit();
+
+        double[] cameraMatrixArray = new double[CAMERA_MATRIX_ROWS * CAMERA_MATRIX_COLS];
+        cameraMatrix.get(0,  0, cameraMatrixArray);
+        for (int i = 0; i < CAMERA_MATRIX_ROWS; i++) {
+            for (int j = 0; j < CAMERA_MATRIX_COLS; j++) {
+                Integer id = i * CAMERA_MATRIX_ROWS + j;
+                editor.putFloat(id.toString(), (float)cameraMatrixArray[id]);
+            }
+        }
+
+        double[] distortionCoefficientsArray = new double[DISTORTION_COEFFICIENTS_SIZE];
+        distortionCoefficients.get(0, 0, distortionCoefficientsArray);
+        int shift = CAMERA_MATRIX_ROWS * CAMERA_MATRIX_COLS;
+        for (Integer i = shift; i < DISTORTION_COEFFICIENTS_SIZE + shift; i++) {
+            editor.putFloat(i.toString(), (float)distortionCoefficientsArray[i-shift]);
+        }
+
+        editor.commit();
+        Log.i(TAG, "Saved camera matrix: " + cameraMatrix.dump());
+        Log.i(TAG, "Saved distortion coefficients: " + distortionCoefficients.dump());
+    }
+
+    public static boolean tryLoad(Activity activity, Mat cameraMatrix, Mat distortionCoefficients) {
+        SharedPreferences sharedPref = activity.getPreferences(Context.MODE_PRIVATE);
+        if (sharedPref.getFloat("0", -1) == -1) {
+            Log.i(TAG, "No previous calibration results found");
+            return false;
+        }
+
+        double[] cameraMatrixArray = new double[CAMERA_MATRIX_ROWS * CAMERA_MATRIX_COLS];
+        for (int i = 0; i < CAMERA_MATRIX_ROWS; i++) {
+            for (int j = 0; j < CAMERA_MATRIX_COLS; j++) {
+                Integer id = i * CAMERA_MATRIX_ROWS + j;
+                cameraMatrixArray[id] = sharedPref.getFloat(id.toString(), -1);
+            }
+        }
+        cameraMatrix.put(0, 0, cameraMatrixArray);
+        Log.i(TAG, "Loaded camera matrix: " + cameraMatrix.dump());
+
+        double[] distortionCoefficientsArray = new double[DISTORTION_COEFFICIENTS_SIZE];
+        int shift = CAMERA_MATRIX_ROWS * CAMERA_MATRIX_COLS;
+        for (Integer i = shift; i < DISTORTION_COEFFICIENTS_SIZE + shift; i++) {
+            distortionCoefficientsArray[i - shift] = sharedPref.getFloat(i.toString(), -1);
+        }
+        distortionCoefficients.put(0, 0, distortionCoefficientsArray);
+        Log.i(TAG, "Loaded distortion coefficients: " + distortionCoefficients.dump());
+
+        return true;
+    }
+}
diff --git a/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrationActivity.java b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrationActivity.java
new file mode 100644
index 000000000..33c9bbbf4
--- /dev/null
+++ b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrationActivity.java
@@ -0,0 +1,216 @@
+// This sample is based on "Camera calibration With OpenCV" tutorial:
+// http://docs.opencv.org/doc/tutorials/calib3d/camera_calibration/camera_calibration.html
+//
+// It uses standard OpenCV asymmetric circles grid pattern 11x4:
+// https://github.com/Itseez/opencv/blob/2.4/doc/acircles_pattern.png.
+// The results are the camera matrix and 5 distortion coefficients.
+//
+// Tap on highlighted pattern to capture pattern corners for calibration.
+// Move pattern along the whole screen and capture data.
+//
+// When you've captured necessary amount of pattern corners (usually ~20 are enough),
+// press "Calibrate" button for performing camera calibration.
+
+package org.opencv.samples.cameracalibration;
+
+import org.opencv.android.BaseLoaderCallback;
+import org.opencv.android.CameraBridgeViewBase;
+import org.opencv.android.CameraBridgeViewBase.CvCameraViewFrame;
+import org.opencv.android.CameraBridgeViewBase.CvCameraViewListener2;
+import org.opencv.android.LoaderCallbackInterface;
+import org.opencv.android.OpenCVLoader;
+import org.opencv.core.Mat;
+
+import android.app.Activity;
+import android.app.ProgressDialog;
+import android.content.res.Resources;
+import android.os.AsyncTask;
+import android.os.Bundle;
+import android.util.Log;
+import android.view.Menu;
+import android.view.MenuItem;
+import android.view.MotionEvent;
+import android.view.SurfaceView;
+import android.view.View;
+import android.view.View.OnTouchListener;
+import android.view.WindowManager;
+import android.widget.Toast;
+
+public class CameraCalibrationActivity extends Activity implements CvCameraViewListener2, OnTouchListener {
+    private static final String TAG = "OCVSample::Activity";
+
+    private CameraBridgeViewBase mOpenCvCameraView;
+    private CameraCalibrator mCalibrator;
+    private OnCameraFrameRender mOnCameraFrameRender;
+    private int mWidth;
+    private int mHeight;
+
+    private BaseLoaderCallback mLoaderCallback = new BaseLoaderCallback(this) {
+        @Override
+        public void onManagerConnected(int status) {
+            switch (status) {
+            case LoaderCallbackInterface.SUCCESS:
+            {
+                Log.i(TAG, "OpenCV loaded successfully");
+                mOpenCvCameraView.enableView();
+                mOpenCvCameraView.setOnTouchListener(CameraCalibrationActivity.this);
+            } break;
+            default:
+            {
+                super.onManagerConnected(status);
+            } break;
+            }
+        }
+    };
+
+    public CameraCalibrationActivity() {
+        Log.i(TAG, "Instantiated new " + this.getClass());
+    }
+
+    @Override
+    public void onCreate(Bundle savedInstanceState) {
+        Log.i(TAG, "called onCreate");
+        super.onCreate(savedInstanceState);
+        getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON);
+
+        setContentView(R.layout.camera_calibration_surface_view);
+
+        mOpenCvCameraView = (CameraBridgeViewBase) findViewById(R.id.camera_calibration_java_surface_view);
+        mOpenCvCameraView.setVisibility(SurfaceView.VISIBLE);
+        mOpenCvCameraView.setCvCameraViewListener(this);
+    }
+
+    @Override
+    public void onPause()
+    {
+        super.onPause();
+        if (mOpenCvCameraView != null)
+            mOpenCvCameraView.disableView();
+    }
+
+    @Override
+    public void onResume()
+    {
+        super.onResume();
+        OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_2, this, mLoaderCallback);
+    }
+
+    public void onDestroy() {
+        super.onDestroy();
+        if (mOpenCvCameraView != null)
+            mOpenCvCameraView.disableView();
+    }
+
+    @Override
+    public boolean onCreateOptionsMenu(Menu menu) {
+        super.onCreateOptionsMenu(menu);
+        getMenuInflater().inflate(R.menu.calibration, menu);
+
+        return true;
+    }
+
+    @Override
+    public boolean onPrepareOptionsMenu (Menu menu) {
+        super.onPrepareOptionsMenu(menu);
+        menu.findItem(R.id.preview_mode).setEnabled(true);
+        if (!mCalibrator.isCalibrated())
+            menu.findItem(R.id.preview_mode).setEnabled(false);
+
+        return true;
+    }
+
+    @Override
+    public boolean onOptionsItemSelected(MenuItem item) {
+        switch (item.getItemId()) {
+        case R.id.calibration:
+            mOnCameraFrameRender =
+                new OnCameraFrameRender(new CalibrationFrameRender(mCalibrator));
+            item.setChecked(true);
+            return true;
+        case R.id.undistortion:
+            mOnCameraFrameRender =
+                new OnCameraFrameRender(new UndistortionFrameRender(mCalibrator));
+            item.setChecked(true);
+            return true;
+        case R.id.comparison:
+            mOnCameraFrameRender =
+                new OnCameraFrameRender(new ComparisonFrameRender(mCalibrator, mWidth, mHeight, getResources()));
+            item.setChecked(true);
+            return true;
+        case R.id.calibrate:
+            final Resources res = getResources();
+            if (mCalibrator.getCornersBufferSize() < 2) {
+                (Toast.makeText(this, res.getString(R.string.more_samples), Toast.LENGTH_SHORT)).show();
+                return true;
+            }
+
+            mOnCameraFrameRender = new OnCameraFrameRender(new PreviewFrameRender());
+            new AsyncTask<Void, Void, Void>() {
+                private ProgressDialog calibrationProgress;
+
+                @Override
+                protected void onPreExecute() {
+                    calibrationProgress = new ProgressDialog(CameraCalibrationActivity.this);
+                    calibrationProgress.setTitle(res.getString(R.string.calibrating));
+                    calibrationProgress.setMessage(res.getString(R.string.please_wait));
+                    calibrationProgress.setCancelable(false);
+                    calibrationProgress.setIndeterminate(true);
+                    calibrationProgress.show();
+                }
+
+                @Override
+                protected Void doInBackground(Void... arg0) {
+                    mCalibrator.calibrate();
+                    return null;
+                }
+
+                @Override
+                protected void onPostExecute(Void result) {
+                    calibrationProgress.dismiss();
+                    mCalibrator.clearCorners();
+                    mOnCameraFrameRender = new OnCameraFrameRender(new CalibrationFrameRender(mCalibrator));
+                    String resultMessage = (mCalibrator.isCalibrated()) ?
+                            res.getString(R.string.calibration_successful)  + " " + mCalibrator.getAvgReprojectionError() :
+                            res.getString(R.string.calibration_unsuccessful);
+                    (Toast.makeText(CameraCalibrationActivity.this, resultMessage, Toast.LENGTH_SHORT)).show();
+
+                    if (mCalibrator.isCalibrated()) {
+                        CalibrationResult.save(CameraCalibrationActivity.this,
+                                mCalibrator.getCameraMatrix(), mCalibrator.getDistortionCoefficients());
+                    }
+                }
+            }.execute();
+            return true;
+        default:
+            return super.onOptionsItemSelected(item);
+        }
+    }
+
+    public void onCameraViewStarted(int width, int height) {
+        if (mWidth != width || mHeight != height) {
+            mWidth = width;
+            mHeight = height;
+            mCalibrator = new CameraCalibrator(mWidth, mHeight);
+            if (CalibrationResult.tryLoad(this, mCalibrator.getCameraMatrix(), mCalibrator.getDistortionCoefficients())) {
+                mCalibrator.setCalibrated();
+            }
+
+            mOnCameraFrameRender = new OnCameraFrameRender(new CalibrationFrameRender(mCalibrator));
+        }
+    }
+
+    public void onCameraViewStopped() {
+    }
+
+    public Mat onCameraFrame(CvCameraViewFrame inputFrame) {
+        return mOnCameraFrameRender.render(inputFrame);
+    }
+
+    @Override
+    public boolean onTouch(View v, MotionEvent event) {
+        Log.d(TAG, "onTouch invoked");
+
+        mCalibrator.addCorners();
+        return false;
+    }
+}
diff --git a/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrator.java b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrator.java
new file mode 100644
index 000000000..2f9df6a3c
--- /dev/null
+++ b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrator.java
@@ -0,0 +1,169 @@
+package org.opencv.samples.cameracalibration;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.opencv.calib3d.Calib3d;
+import org.opencv.core.Core;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.MatOfDouble;
+import org.opencv.core.MatOfPoint2f;
+import org.opencv.core.MatOfPoint3f;
+import org.opencv.core.Point;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+
+import android.util.Log;
+
+public class CameraCalibrator {
+    private static final String TAG = "OCVSample::CameraCalibrator";
+
+    private final Size mPatternSize = new Size(4, 11);
+    private final int mCornersSize = (int)(mPatternSize.width * mPatternSize.height);
+    private boolean mPatternWasFound = false;
+    private MatOfPoint2f mCorners = new MatOfPoint2f();
+    private List<Mat> mCornersBuffer = new ArrayList<Mat>();
+    private boolean mIsCalibrated = false;
+
+    private Mat mCameraMatrix = new Mat();
+    private Mat mDistortionCoefficients = new Mat();
+    private int mFlags;
+    private double mRms;
+    private double mSquareSize = 0.0181;
+    private Size mImageSize;
+
+    public CameraCalibrator(int width, int height) {
+        mImageSize = new Size(width, height);
+        mFlags = Calib3d.CALIB_FIX_PRINCIPAL_POINT +
+                 Calib3d.CALIB_ZERO_TANGENT_DIST +
+                 Calib3d.CALIB_FIX_ASPECT_RATIO +
+                 Calib3d.CALIB_FIX_K4 +
+                 Calib3d.CALIB_FIX_K5;
+        Mat.eye(3, 3, CvType.CV_64FC1).copyTo(mCameraMatrix);
+        mCameraMatrix.put(0, 0, 1.0);
+        Mat.zeros(5, 1, CvType.CV_64FC1).copyTo(mDistortionCoefficients);
+        Log.i(TAG, "Instantiated new " + this.getClass());
+    }
+
+    public void processFrame(Mat grayFrame, Mat rgbaFrame) {
+        findPattern(grayFrame);
+        renderFrame(rgbaFrame);
+    }
+
+    public void calibrate() {
+        ArrayList<Mat> rvecs = new ArrayList<Mat>();
+        ArrayList<Mat> tvecs = new ArrayList<Mat>();
+        Mat reprojectionErrors = new Mat();
+        ArrayList<Mat> objectPoints = new ArrayList<Mat>();
+        objectPoints.add(Mat.zeros(mCornersSize, 1, CvType.CV_32FC3));
+        calcBoardCornerPositions(objectPoints.get(0));
+        for (int i = 1; i < mCornersBuffer.size(); i++) {
+            objectPoints.add(objectPoints.get(0));
+        }
+
+        Calib3d.calibrateCamera(objectPoints, mCornersBuffer, mImageSize,
+                mCameraMatrix, mDistortionCoefficients, rvecs, tvecs, mFlags);
+
+        mIsCalibrated = Core.checkRange(mCameraMatrix)
+                && Core.checkRange(mDistortionCoefficients);
+
+        mRms = computeReprojectionErrors(objectPoints, rvecs, tvecs, reprojectionErrors);
+        Log.i(TAG, String.format("Average re-projection error: %f", mRms));
+        Log.i(TAG, "Camera matrix: " + mCameraMatrix.dump());
+        Log.i(TAG, "Distortion coefficients: " + mDistortionCoefficients.dump());
+    }
+
+    public void clearCorners() {
+        mCornersBuffer.clear();
+    }
+
+    private void calcBoardCornerPositions(Mat corners) {
+        final int cn = 3;
+        float positions[] = new float[mCornersSize * cn];
+
+        for (int i = 0; i < mPatternSize.height; i++) {
+            for (int j = 0; j < mPatternSize.width * cn; j += cn) {
+                positions[(int) (i * mPatternSize.width * cn + j + 0)] =
+                        (2 * (j / cn) + i % 2) * (float) mSquareSize;
+                positions[(int) (i * mPatternSize.width * cn + j + 1)] =
+                        i * (float) mSquareSize;
+                positions[(int) (i * mPatternSize.width * cn + j + 2)] = 0;
+            }
+        }
+        corners.create(mCornersSize, 1, CvType.CV_32FC3);
+        corners.put(0, 0, positions);
+    }
+
+    private double computeReprojectionErrors(List<Mat> objectPoints,
+            List<Mat> rvecs, List<Mat> tvecs, Mat perViewErrors) {
+        MatOfPoint2f cornersProjected = new MatOfPoint2f();
+        double totalError = 0;
+        double error;
+        float viewErrors[] = new float[objectPoints.size()];
+
+        MatOfDouble distortionCoefficients = new MatOfDouble(mDistortionCoefficients);
+        int totalPoints = 0;
+        for (int i = 0; i < objectPoints.size(); i++) {
+            MatOfPoint3f points = new MatOfPoint3f(objectPoints.get(i));
+            Calib3d.projectPoints(points, rvecs.get(i), tvecs.get(i),
+                    mCameraMatrix, distortionCoefficients, cornersProjected);
+            error = Core.norm(mCornersBuffer.get(i), cornersProjected, Core.NORM_L2);
+
+            int n = objectPoints.get(i).rows();
+            viewErrors[i] = (float) Math.sqrt(error * error / n);
+            totalError  += error * error;
+            totalPoints += n;
+        }
+        perViewErrors.create(objectPoints.size(), 1, CvType.CV_32FC1);
+        perViewErrors.put(0, 0, viewErrors);
+
+        return Math.sqrt(totalError / totalPoints);
+    }
+
+    private void findPattern(Mat grayFrame) {
+        mPatternWasFound = Calib3d.findCirclesGrid(grayFrame, mPatternSize,
+                mCorners, Calib3d.CALIB_CB_ASYMMETRIC_GRID);
+    }
+
+    public void addCorners() {
+        if (mPatternWasFound) {
+            mCornersBuffer.add(mCorners.clone());
+        }
+    }
+
+    private void drawPoints(Mat rgbaFrame) {
+        Calib3d.drawChessboardCorners(rgbaFrame, mPatternSize, mCorners, mPatternWasFound);
+    }
+
+    private void renderFrame(Mat rgbaFrame) {
+        drawPoints(rgbaFrame);
+
+        Core.putText(rgbaFrame, "Captured: " + mCornersBuffer.size(), new Point(rgbaFrame.cols() / 3 * 2, rgbaFrame.rows() * 0.1),
+                Core.FONT_HERSHEY_SIMPLEX, 1.0, new Scalar(255, 255, 0));
+    }
+
+    public Mat getCameraMatrix() {
+        return mCameraMatrix;
+    }
+
+    public Mat getDistortionCoefficients() {
+        return mDistortionCoefficients;
+    }
+
+    public int getCornersBufferSize() {
+        return mCornersBuffer.size();
+    }
+
+    public double getAvgReprojectionError() {
+        return mRms;
+    }
+
+    public boolean isCalibrated() {
+        return mIsCalibrated;
+    }
+
+    public void setCalibrated() {
+        mIsCalibrated = true;
+    }
+}
diff --git a/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/OnCameraFrameRender.java b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/OnCameraFrameRender.java
new file mode 100644
index 000000000..3f155c2bf
--- /dev/null
+++ b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/OnCameraFrameRender.java
@@ -0,0 +1,102 @@
+package org.opencv.samples.cameracalibration;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.opencv.android.CameraBridgeViewBase.CvCameraViewFrame;
+import org.opencv.core.Core;
+import org.opencv.core.Mat;
+import org.opencv.core.MatOfPoint;
+import org.opencv.core.Point;
+import org.opencv.core.Range;
+import org.opencv.core.Scalar;
+import org.opencv.imgproc.Imgproc;
+
+import android.content.res.Resources;
+
+abstract class FrameRender {
+    protected CameraCalibrator mCalibrator;
+
+    public abstract Mat render(CvCameraViewFrame inputFrame);
+}
+
+class PreviewFrameRender extends FrameRender {
+    @Override
+    public Mat render(CvCameraViewFrame inputFrame) {
+        return inputFrame.rgba();
+    }
+}
+
+class CalibrationFrameRender extends FrameRender {
+    public CalibrationFrameRender(CameraCalibrator calibrator) {
+        mCalibrator = calibrator;
+    }
+
+    @Override
+    public Mat render(CvCameraViewFrame inputFrame) {
+        Mat rgbaFrame = inputFrame.rgba();
+        Mat grayFrame = inputFrame.gray();
+        mCalibrator.processFrame(grayFrame, rgbaFrame);
+
+        return rgbaFrame;
+    }
+}
+
+class UndistortionFrameRender extends FrameRender {
+    public UndistortionFrameRender(CameraCalibrator calibrator) {
+        mCalibrator = calibrator;
+    }
+
+    @Override
+    public Mat render(CvCameraViewFrame inputFrame) {
+        Mat renderedFrame = new Mat(inputFrame.rgba().size(), inputFrame.rgba().type());
+        Imgproc.undistort(inputFrame.rgba(), renderedFrame,
+                mCalibrator.getCameraMatrix(), mCalibrator.getDistortionCoefficients());
+
+        return renderedFrame;
+    }
+}
+
+class ComparisonFrameRender extends FrameRender {
+    private int mWidth;
+    private int mHeight;
+    private Resources mResources;
+    public ComparisonFrameRender(CameraCalibrator calibrator, int width, int height, Resources resources) {
+        mCalibrator = calibrator;
+        mWidth = width;
+        mHeight = height;
+        mResources = resources;
+    }
+
+    @Override
+    public Mat render(CvCameraViewFrame inputFrame) {
+        Mat undistortedFrame = new Mat(inputFrame.rgba().size(), inputFrame.rgba().type());
+        Imgproc.undistort(inputFrame.rgba(), undistortedFrame,
+                mCalibrator.getCameraMatrix(), mCalibrator.getDistortionCoefficients());
+
+        Mat comparisonFrame = inputFrame.rgba();
+        undistortedFrame.colRange(new Range(0, mWidth / 2)).copyTo(comparisonFrame.colRange(new Range(mWidth / 2, mWidth)));
+        List<MatOfPoint> border = new ArrayList<MatOfPoint>();
+        final int shift = (int)(mWidth * 0.005);
+        border.add(new MatOfPoint(new Point(mWidth / 2 - shift, 0), new Point(mWidth / 2 + shift, 0),
+                new Point(mWidth / 2 + shift, mHeight), new Point(mWidth / 2 - shift, mHeight)));
+        Core.fillPoly(comparisonFrame, border, new Scalar(255, 255, 255));
+
+        Core.putText(comparisonFrame, mResources.getString(R.string.original), new Point(mWidth * 0.1, mHeight * 0.1),
+                Core.FONT_HERSHEY_SIMPLEX, 1.0, new Scalar(255, 255, 0));
+        Core.putText(comparisonFrame, mResources.getString(R.string.undistorted), new Point(mWidth * 0.6, mHeight * 0.1),
+                Core.FONT_HERSHEY_SIMPLEX, 1.0, new Scalar(255, 255, 0));
+
+        return comparisonFrame;
+    }
+}
+
+class OnCameraFrameRender {
+    private FrameRender mFrameRender;
+    public OnCameraFrameRender(FrameRender frameRender) {
+        mFrameRender = frameRender;
+    }
+    public Mat render(CvCameraViewFrame inputFrame) {
+        return mFrameRender.render(inputFrame);
+    }
+}
diff --git a/samples/android/color-blob-detection/CMakeLists.txt b/samples/android/color-blob-detection/CMakeLists.txt
index 400db29ce..42051bc3a 100644
--- a/samples/android/color-blob-detection/CMakeLists.txt
+++ b/samples/android/color-blob-detection/CMakeLists.txt
@@ -4,4 +4,3 @@ add_android_project(${sample} "${CMAKE_CURRENT_SOURCE_DIR}" LIBRARY_DEPS ${OpenC
 if(TARGET ${sample})
   add_dependencies(opencv_android_examples ${sample})
 endif()
-
diff --git a/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetectionActivity.java b/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetectionActivity.java
index 0799df26d..276b03aeb 100644
--- a/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetectionActivity.java
+++ b/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetectionActivity.java
@@ -187,4 +187,4 @@ public class ColorBlobDetectionActivity extends Activity implements OnTouchListe
 
         return new Scalar(pointMatRgba.get(0, 0));
     }
-}
\ No newline at end of file
+}
diff --git a/samples/android/face-detection/CMakeLists.txt b/samples/android/face-detection/CMakeLists.txt
index 70658a799..0f89b2a36 100644
--- a/samples/android/face-detection/CMakeLists.txt
+++ b/samples/android/face-detection/CMakeLists.txt
@@ -10,4 +10,3 @@ add_android_project(${sample} "${CMAKE_CURRENT_SOURCE_DIR}" LIBRARY_DEPS ${OpenC
 if(TARGET ${sample})
   add_dependencies(opencv_android_examples ${sample})
 endif()
-
diff --git a/samples/android/face-detection/jni/Android.mk b/samples/android/face-detection/jni/Android.mk
index e6db280c1..6b6642602 100644
--- a/samples/android/face-detection/jni/Android.mk
+++ b/samples/android/face-detection/jni/Android.mk
@@ -13,4 +13,4 @@ LOCAL_LDLIBS     += -llog -ldl
 
 LOCAL_MODULE     := detection_based_tracker
 
-include $(BUILD_SHARED_LIBRARY)
\ No newline at end of file
+include $(BUILD_SHARED_LIBRARY)
diff --git a/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp b/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp
index e0e53e370..33a8334c6 100644
--- a/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp
+++ b/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp
@@ -26,7 +26,7 @@ public:
             Detector(detector)
     {
         LOGD("CascadeDetectorAdapter::Detect::Detect");
-        CV_Assert(!detector.empty());
+        CV_Assert(detector);
     }
 
     void detect(const cv::Mat &Image, std::vector<cv::Rect> &objects)
@@ -57,11 +57,11 @@ struct DetectorAgregator
             mainDetector(_mainDetector),
             trackingDetector(_trackingDetector)
     {
-        CV_Assert(!_mainDetector.empty());
-        CV_Assert(!_trackingDetector.empty());
+        CV_Assert(_mainDetector);
+        CV_Assert(_trackingDetector);
 
         DetectionBasedTracker::Parameters DetectorParams;
-        tracker = new DetectionBasedTracker(mainDetector.ptr<DetectionBasedTracker::IDetector>(), trackingDetector.ptr<DetectionBasedTracker::IDetector>(), DetectorParams);
+        tracker = makePtr<DetectionBasedTracker>(mainDetector, trackingDetector, DetectorParams);
     }
 };
 
@@ -77,8 +77,10 @@ JNIEXPORT jlong JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker
 
     try
     {
-        cv::Ptr<CascadeDetectorAdapter> mainDetector = new CascadeDetectorAdapter(new CascadeClassifier(stdFileName));
-        cv::Ptr<CascadeDetectorAdapter> trackingDetector = new CascadeDetectorAdapter(new CascadeClassifier(stdFileName));
+        cv::Ptr<CascadeDetectorAdapter> mainDetector = makePtr<CascadeDetectorAdapter>(
+            makePtr<CascadeClassifier>(stdFileName));
+        cv::Ptr<CascadeDetectorAdapter> trackingDetector = makePtr<CascadeDetectorAdapter>(
+            makePtr<CascadeClassifier>(stdFileName));
         result = (jlong)new DetectorAgregator(mainDetector, trackingDetector);
         if (faceSize > 0)
         {
diff --git a/samples/android/hello-android/main.cpp b/samples/android/hello-android/main.cpp
index e595e066d..cd02e1742 100644
--- a/samples/android/hello-android/main.cpp
+++ b/samples/android/hello-android/main.cpp
@@ -24,4 +24,3 @@ int main(int argc, char* argv[])
 #endif
   return 0;
 }
-
diff --git a/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java b/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java
index 88ade8430..38f1d5959 100644
--- a/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java
+++ b/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java
@@ -48,17 +48,12 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
     private CameraBridgeViewBase mOpenCvCameraView;
 
     private Size                 mSize0;
-    private Size                 mSizeRgba;
-    private Size                 mSizeRgbaInner;
 
-    private Mat                  mRgba;
-    private Mat                  mGray;
     private Mat                  mIntermediateMat;
-    private Mat                  mHist;
     private Mat                  mMat0;
     private MatOfInt             mChannels[];
     private MatOfInt             mHistSize;
-    private int                  mHistSizeNum;
+    private int                  mHistSizeNum = 25;
     private MatOfFloat           mRanges;
     private Scalar               mColorsRGB[];
     private Scalar               mColorsHue[];
@@ -66,10 +61,6 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
     private Point                mP1;
     private Point                mP2;
     private float                mBuff[];
-    private Mat                  mRgbaInnerWindow;
-    private Mat                  mGrayInnerWindow;
-    private Mat                  mZoomWindow;
-    private Mat                  mZoomCorner;
     private Mat                  mSepiaKernel;
 
     public static int           viewMode = VIEW_MODE_RGBA;
@@ -166,13 +157,9 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
     }
 
     public void onCameraViewStarted(int width, int height) {
-        mGray = new Mat();
-        mRgba = new Mat();
         mIntermediateMat = new Mat();
         mSize0 = new Size();
-        mHist = new Mat();
         mChannels = new MatOfInt[] { new MatOfInt(0), new MatOfInt(1), new MatOfInt(2) };
-        mHistSizeNum = 25;
         mBuff = new float[mHistSizeNum];
         mHistSize = new MatOfInt(mHistSizeNum);
         mRanges = new MatOfFloat(0f, 256f);
@@ -197,14 +184,22 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
         mSepiaKernel.put(3, 0, /* A */0.000f, 0.000f, 0.000f, 1f);
     }
 
-    private void CreateAuxiliaryMats() {
-        if (mRgba.empty())
-            return;
+    public void onCameraViewStopped() {
+        // Explicitly deallocate Mats
+        if (mIntermediateMat != null)
+            mIntermediateMat.release();
 
-        mSizeRgba = mRgba.size();
+        mIntermediateMat = null;
+    }
 
-        int rows = (int) mSizeRgba.height;
-        int cols = (int) mSizeRgba.width;
+    public Mat onCameraFrame(CvCameraViewFrame inputFrame) {
+        Mat rgba = inputFrame.rgba();
+        Size sizeRgba = rgba.size();
+
+        Mat rgbaInnerWindow;
+
+        int rows = (int) sizeRgba.height;
+        int cols = (int) sizeRgba.width;
 
         int left = cols / 8;
         int top = rows / 8;
@@ -212,151 +207,107 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
         int width = cols * 3 / 4;
         int height = rows * 3 / 4;
 
-        if (mRgbaInnerWindow == null)
-            mRgbaInnerWindow = mRgba.submat(top, top + height, left, left + width);
-        mSizeRgbaInner = mRgbaInnerWindow.size();
-
-        if (mGrayInnerWindow == null && !mGray.empty())
-            mGrayInnerWindow = mGray.submat(top, top + height, left, left + width);
-
-        if (mZoomCorner == null)
-            mZoomCorner = mRgba.submat(0, rows / 2 - rows / 10, 0, cols / 2 - cols / 10);
-
-        if (mZoomWindow == null)
-            mZoomWindow = mRgba.submat(rows / 2 - 9 * rows / 100, rows / 2 + 9 * rows / 100, cols / 2 - 9 * cols / 100, cols / 2 + 9 * cols / 100);
-    }
-
-    public void onCameraViewStopped() {
-        // Explicitly deallocate Mats
-        if (mZoomWindow != null)
-            mZoomWindow.release();
-        if (mZoomCorner != null)
-            mZoomCorner.release();
-        if (mGrayInnerWindow != null)
-            mGrayInnerWindow.release();
-        if (mRgbaInnerWindow != null)
-            mRgbaInnerWindow.release();
-        if (mRgba != null)
-            mRgba.release();
-        if (mGray != null)
-            mGray.release();
-        if (mIntermediateMat != null)
-            mIntermediateMat.release();
-
-        mRgba = null;
-        mGray = null;
-        mIntermediateMat = null;
-        mRgbaInnerWindow = null;
-        mGrayInnerWindow = null;
-        mZoomCorner = null;
-        mZoomWindow = null;
-    }
-
-    public Mat onCameraFrame(CvCameraViewFrame inputFrame) {
-        mRgba = inputFrame.rgba();
-
         switch (ImageManipulationsActivity.viewMode) {
         case ImageManipulationsActivity.VIEW_MODE_RGBA:
             break;
 
         case ImageManipulationsActivity.VIEW_MODE_HIST:
-            if ((mSizeRgba == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
-                CreateAuxiliaryMats();
-            int thikness = (int) (mSizeRgba.width / (mHistSizeNum + 10) / 5);
+            Mat hist = new Mat();
+            int thikness = (int) (sizeRgba.width / (mHistSizeNum + 10) / 5);
             if(thikness > 5) thikness = 5;
-            int offset = (int) ((mSizeRgba.width - (5*mHistSizeNum + 4*10)*thikness)/2);
+            int offset = (int) ((sizeRgba.width - (5*mHistSizeNum + 4*10)*thikness)/2);
             // RGB
             for(int c=0; c<3; c++) {
-                Imgproc.calcHist(Arrays.asList(mRgba), mChannels[c], mMat0, mHist, mHistSize, mRanges);
-                Core.normalize(mHist, mHist, mSizeRgba.height/2, 0, Core.NORM_INF);
-                mHist.get(0, 0, mBuff);
+                Imgproc.calcHist(Arrays.asList(rgba), mChannels[c], mMat0, hist, mHistSize, mRanges);
+                Core.normalize(hist, hist, sizeRgba.height/2, 0, Core.NORM_INF);
+                hist.get(0, 0, mBuff);
                 for(int h=0; h<mHistSizeNum; h++) {
                     mP1.x = mP2.x = offset + (c * (mHistSizeNum + 10) + h) * thikness;
-                    mP1.y = mSizeRgba.height-1;
+                    mP1.y = sizeRgba.height-1;
                     mP2.y = mP1.y - 2 - (int)mBuff[h];
-                    Core.line(mRgba, mP1, mP2, mColorsRGB[c], thikness);
+                    Core.line(rgba, mP1, mP2, mColorsRGB[c], thikness);
                 }
             }
             // Value and Hue
-            Imgproc.cvtColor(mRgba, mIntermediateMat, Imgproc.COLOR_RGB2HSV_FULL);
+            Imgproc.cvtColor(rgba, mIntermediateMat, Imgproc.COLOR_RGB2HSV_FULL);
             // Value
-            Imgproc.calcHist(Arrays.asList(mIntermediateMat), mChannels[2], mMat0, mHist, mHistSize, mRanges);
-            Core.normalize(mHist, mHist, mSizeRgba.height/2, 0, Core.NORM_INF);
-            mHist.get(0, 0, mBuff);
+            Imgproc.calcHist(Arrays.asList(mIntermediateMat), mChannels[2], mMat0, hist, mHistSize, mRanges);
+            Core.normalize(hist, hist, sizeRgba.height/2, 0, Core.NORM_INF);
+            hist.get(0, 0, mBuff);
             for(int h=0; h<mHistSizeNum; h++) {
                 mP1.x = mP2.x = offset + (3 * (mHistSizeNum + 10) + h) * thikness;
-                mP1.y = mSizeRgba.height-1;
+                mP1.y = sizeRgba.height-1;
                 mP2.y = mP1.y - 2 - (int)mBuff[h];
-                Core.line(mRgba, mP1, mP2, mWhilte, thikness);
+                Core.line(rgba, mP1, mP2, mWhilte, thikness);
             }
             // Hue
-            Imgproc.calcHist(Arrays.asList(mIntermediateMat), mChannels[0], mMat0, mHist, mHistSize, mRanges);
-            Core.normalize(mHist, mHist, mSizeRgba.height/2, 0, Core.NORM_INF);
-            mHist.get(0, 0, mBuff);
+            Imgproc.calcHist(Arrays.asList(mIntermediateMat), mChannels[0], mMat0, hist, mHistSize, mRanges);
+            Core.normalize(hist, hist, sizeRgba.height/2, 0, Core.NORM_INF);
+            hist.get(0, 0, mBuff);
             for(int h=0; h<mHistSizeNum; h++) {
                 mP1.x = mP2.x = offset + (4 * (mHistSizeNum + 10) + h) * thikness;
-                mP1.y = mSizeRgba.height-1;
+                mP1.y = sizeRgba.height-1;
                 mP2.y = mP1.y - 2 - (int)mBuff[h];
-                Core.line(mRgba, mP1, mP2, mColorsHue[h], thikness);
+                Core.line(rgba, mP1, mP2, mColorsHue[h], thikness);
             }
             break;
 
         case ImageManipulationsActivity.VIEW_MODE_CANNY:
-             if ((mRgbaInnerWindow == null) || (mGrayInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
-                CreateAuxiliaryMats();
-            Imgproc.Canny(mRgbaInnerWindow, mIntermediateMat, 80, 90);
-            Imgproc.cvtColor(mIntermediateMat, mRgbaInnerWindow, Imgproc.COLOR_GRAY2BGRA, 4);
+            rgbaInnerWindow = rgba.submat(top, top + height, left, left + width);
+            Imgproc.Canny(rgbaInnerWindow, mIntermediateMat, 80, 90);
+            Imgproc.cvtColor(mIntermediateMat, rgbaInnerWindow, Imgproc.COLOR_GRAY2BGRA, 4);
+            rgbaInnerWindow.release();
             break;
 
         case ImageManipulationsActivity.VIEW_MODE_SOBEL:
-            mGray = inputFrame.gray();
-
-            if ((mRgbaInnerWindow == null) || (mGrayInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
-                CreateAuxiliaryMats();
-
-            Imgproc.Sobel(mGrayInnerWindow, mIntermediateMat, CvType.CV_8U, 1, 1);
+            Mat gray = inputFrame.gray();
+            Mat grayInnerWindow = gray.submat(top, top + height, left, left + width);
+            rgbaInnerWindow = rgba.submat(top, top + height, left, left + width);
+            Imgproc.Sobel(grayInnerWindow, mIntermediateMat, CvType.CV_8U, 1, 1);
             Core.convertScaleAbs(mIntermediateMat, mIntermediateMat, 10, 0);
-            Imgproc.cvtColor(mIntermediateMat, mRgbaInnerWindow, Imgproc.COLOR_GRAY2BGRA, 4);
+            Imgproc.cvtColor(mIntermediateMat, rgbaInnerWindow, Imgproc.COLOR_GRAY2BGRA, 4);
+            grayInnerWindow.release();
+            rgbaInnerWindow.release();
             break;
 
         case ImageManipulationsActivity.VIEW_MODE_SEPIA:
-            if ((mRgbaInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
-                CreateAuxiliaryMats();
-            Core.transform(mRgbaInnerWindow, mRgbaInnerWindow, mSepiaKernel);
+            rgbaInnerWindow = rgba.submat(top, top + height, left, left + width);
+            Core.transform(rgbaInnerWindow, rgbaInnerWindow, mSepiaKernel);
+            rgbaInnerWindow.release();
             break;
 
         case ImageManipulationsActivity.VIEW_MODE_ZOOM:
-            if ((mZoomCorner == null) || (mZoomWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
-                CreateAuxiliaryMats();
-            Imgproc.resize(mZoomWindow, mZoomCorner, mZoomCorner.size());
-
+            Mat zoomCorner = rgba.submat(0, rows / 2 - rows / 10, 0, cols / 2 - cols / 10);
+            Mat mZoomWindow = rgba.submat(rows / 2 - 9 * rows / 100, rows / 2 + 9 * rows / 100, cols / 2 - 9 * cols / 100, cols / 2 + 9 * cols / 100);
+            Imgproc.resize(mZoomWindow, zoomCorner, zoomCorner.size());
             Size wsize = mZoomWindow.size();
             Core.rectangle(mZoomWindow, new Point(1, 1), new Point(wsize.width - 2, wsize.height - 2), new Scalar(255, 0, 0, 255), 2);
+            zoomCorner.release();
+            mZoomWindow.release();
             break;
 
         case ImageManipulationsActivity.VIEW_MODE_PIXELIZE:
-            if ((mRgbaInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
-                CreateAuxiliaryMats();
-            Imgproc.resize(mRgbaInnerWindow, mIntermediateMat, mSize0, 0.1, 0.1, Imgproc.INTER_NEAREST);
-            Imgproc.resize(mIntermediateMat, mRgbaInnerWindow, mSizeRgbaInner, 0., 0., Imgproc.INTER_NEAREST);
+            rgbaInnerWindow = rgba.submat(top, top + height, left, left + width);
+            Imgproc.resize(rgbaInnerWindow, mIntermediateMat, mSize0, 0.1, 0.1, Imgproc.INTER_NEAREST);
+            Imgproc.resize(mIntermediateMat, rgbaInnerWindow, rgbaInnerWindow.size(), 0., 0., Imgproc.INTER_NEAREST);
+            rgbaInnerWindow.release();
             break;
 
         case ImageManipulationsActivity.VIEW_MODE_POSTERIZE:
-            if ((mRgbaInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
-                CreateAuxiliaryMats();
             /*
-            Imgproc.cvtColor(mRgbaInnerWindow, mIntermediateMat, Imgproc.COLOR_RGBA2RGB);
+            Imgproc.cvtColor(rgbaInnerWindow, mIntermediateMat, Imgproc.COLOR_RGBA2RGB);
             Imgproc.pyrMeanShiftFiltering(mIntermediateMat, mIntermediateMat, 5, 50);
-            Imgproc.cvtColor(mIntermediateMat, mRgbaInnerWindow, Imgproc.COLOR_RGB2RGBA);
+            Imgproc.cvtColor(mIntermediateMat, rgbaInnerWindow, Imgproc.COLOR_RGB2RGBA);
             */
-
-            Imgproc.Canny(mRgbaInnerWindow, mIntermediateMat, 80, 90);
-            mRgbaInnerWindow.setTo(new Scalar(0, 0, 0, 255), mIntermediateMat);
-            Core.convertScaleAbs(mRgbaInnerWindow, mIntermediateMat, 1./16, 0);
-            Core.convertScaleAbs(mIntermediateMat, mRgbaInnerWindow, 16, 0);
+            rgbaInnerWindow = rgba.submat(top, top + height, left, left + width);
+            Imgproc.Canny(rgbaInnerWindow, mIntermediateMat, 80, 90);
+            rgbaInnerWindow.setTo(new Scalar(0, 0, 0, 255), mIntermediateMat);
+            Core.convertScaleAbs(rgbaInnerWindow, mIntermediateMat, 1./16, 0);
+            Core.convertScaleAbs(mIntermediateMat, rgbaInnerWindow, 16, 0);
+            rgbaInnerWindow.release();
             break;
         }
 
-        return mRgba;
+        return rgba;
     }
 }
diff --git a/samples/android/native-activity/.cproject b/samples/android/native-activity/.cproject
index 44aadfe9a..83ca04b55 100644
--- a/samples/android/native-activity/.cproject
+++ b/samples/android/native-activity/.cproject
@@ -20,7 +20,7 @@
 					<folderInfo id="0.882924228." name="/" resourcePath="">
 						<toolChain id="org.eclipse.cdt.build.core.prefbase.toolchain.1667980868" name="No ToolChain" resourceTypeBasedDiscovery="false" superClass="org.eclipse.cdt.build.core.prefbase.toolchain">
 							<targetPlatform id="org.eclipse.cdt.build.core.prefbase.toolchain.1667980868.2108168132" name=""/>
-							<builder autoBuildTarget="" command="&quot;${NDKROOT}/ndk-build.cmd&quot;" enableAutoBuild="true" enableCleanBuild="false" id="org.eclipse.cdt.build.core.settings.default.builder.328915772" incrementalBuildTarget="" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="org.eclipse.cdt.build.core.settings.default.builder"/>
+							<builder autoBuildTarget="" command="${NDKROOT}/ndk-build.cmd" enableAutoBuild="true" enableCleanBuild="false" id="org.eclipse.cdt.build.core.settings.default.builder.328915772" incrementalBuildTarget="" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="org.eclipse.cdt.build.core.settings.default.builder"/>
 							<tool id="org.eclipse.cdt.build.core.settings.holder.libs.630148311" name="holder for library settings" superClass="org.eclipse.cdt.build.core.settings.holder.libs"/>
 							<tool id="org.eclipse.cdt.build.core.settings.holder.525090327" name="Assembly" superClass="org.eclipse.cdt.build.core.settings.holder">
 								<inputType id="org.eclipse.cdt.build.core.settings.holder.inType.1491216279" languageId="org.eclipse.cdt.core.assembly" languageName="Assembly" sourceContentType="org.eclipse.cdt.core.asmSource" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>
diff --git a/samples/android/native-activity/AndroidManifest.xml b/samples/android/native-activity/AndroidManifest.xml
index 369bc7502..55b696ba4 100644
--- a/samples/android/native-activity/AndroidManifest.xml
+++ b/samples/android/native-activity/AndroidManifest.xml
@@ -10,6 +10,7 @@
 
         <activity android:name="CvNativeActivity"
                   android:label="@string/app_name"
+                  android:screenOrientation="landscape"
                   android:configChanges="orientation|keyboardHidden">
             <intent-filter>
                 <action android:name="android.intent.action.MAIN" />
@@ -17,7 +18,9 @@
                 </intent-filter>
             </activity>
         <activity android:name="android.app.NativeActivity"
-                  android:label="@string/app_name">
+                  android:label="@string/app_name"
+                  android:screenOrientation="landscape"
+                  android:configChanges="keyboardHidden|orientation">
             <meta-data android:name="android.app.lib_name"
                     android:value="native_activity" />
         </activity>
diff --git a/samples/android/native-activity/jni/Android.mk b/samples/android/native-activity/jni/Android.mk
index fd4fd2bfb..7ae31e24c 100644
--- a/samples/android/native-activity/jni/Android.mk
+++ b/samples/android/native-activity/jni/Android.mk
@@ -7,7 +7,7 @@ include ../../sdk/native/jni/OpenCV.mk
 LOCAL_MODULE    := native_activity
 LOCAL_SRC_FILES := native.cpp
 LOCAL_LDLIBS    += -lm -llog -landroid
-LOCAL_STATIC_LIBRARIES := android_native_app_glue
+LOCAL_STATIC_LIBRARIES += android_native_app_glue
 
 include $(BUILD_SHARED_LIBRARY)
 
diff --git a/samples/android/native-activity/jni/native.cpp b/samples/android/native-activity/jni/native.cpp
index 5cfb3a961..052b866e6 100644
--- a/samples/android/native-activity/jni/native.cpp
+++ b/samples/android/native-activity/jni/native.cpp
@@ -11,9 +11,10 @@
 #include <math.h>
 #include <queue>
 
-#include <opencv2/core/core.hpp>
-#include <opencv2/imgproc/imgproc.hpp>
-#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/core.hpp>
+#include <opencv2/core/utility.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
 
 #define  LOG_TAG    "OCV:libnative_activity"
 #define  LOGD(...)  __android_log_print(ANDROID_LOG_DEBUG,LOG_TAG,__VA_ARGS__)
@@ -78,18 +79,29 @@ static void engine_draw_frame(Engine* engine, const cv::Mat& frame)
         return;
     }
 
-    void* pixels = buffer.bits;
+    int32_t* pixels = (int32_t*)buffer.bits;
 
     int left_indent = (buffer.width-frame.cols)/2;
     int top_indent = (buffer.height-frame.rows)/2;
 
-    for (int yy = top_indent; yy < std::min(frame.rows+top_indent, buffer.height); yy++)
+    if (top_indent > 0)
     {
-        unsigned char* line = (unsigned char*)pixels + left_indent*4*sizeof(unsigned char);
-        size_t line_size = std::min(frame.cols, buffer.width)*4*sizeof(unsigned char);
+        memset(pixels, 0, top_indent*buffer.stride*sizeof(int32_t));
+        pixels += top_indent*buffer.stride;
+    }
+
+    for (int yy = 0; yy < frame.rows; yy++)
+    {
+        if (left_indent > 0)
+        {
+            memset(pixels, 0, left_indent*sizeof(int32_t));
+            memset(pixels+left_indent+frame.cols, 0, (buffer.stride-frame.cols-left_indent)*sizeof(int32_t));
+        }
+        int32_t* line = pixels + left_indent;
+        size_t line_size = frame.cols*4*sizeof(unsigned char);
         memcpy(line, frame.ptr<unsigned char>(yy), line_size);
         // go to next line
-        pixels = (int32_t*)pixels + buffer.stride;
+        pixels += buffer.stride;
     }
     ANativeWindow_unlockAndPost(engine->app->window);
 }
@@ -104,10 +116,10 @@ static void engine_handle_cmd(android_app* app, int32_t cmd)
             {
                 LOGI("APP_CMD_INIT_WINDOW");
 
-                engine->capture = new cv::VideoCapture(0);
+                engine->capture = cv::makePtr<cv::VideoCapture>(0);
 
                 union {double prop; const char* name;} u;
-                u.prop = engine->capture->get(CV_CAP_PROP_SUPPORTED_PREVIEW_SIZES_STRING);
+                u.prop = engine->capture->get(cv::CAP_PROP_ANDROID_PREVIEW_SIZES_STRING);
 
                 int view_width = ANativeWindow_getWidth(app->window);
                 int view_height = ANativeWindow_getHeight(app->window);
@@ -124,8 +136,8 @@ static void engine_handle_cmd(android_app* app, int32_t cmd)
 
                 if ((camera_resolution.width != 0) && (camera_resolution.height != 0))
                 {
-                    engine->capture->set(CV_CAP_PROP_FRAME_WIDTH, camera_resolution.width);
-                    engine->capture->set(CV_CAP_PROP_FRAME_HEIGHT, camera_resolution.height);
+                    engine->capture->set(cv::CAP_PROP_FRAME_WIDTH, camera_resolution.width);
+                    engine->capture->set(cv::CAP_PROP_FRAME_HEIGHT, camera_resolution.height);
                 }
 
                 float scale = std::min((float)view_width/camera_resolution.width,
@@ -199,7 +211,7 @@ void android_main(android_app* app)
         if (!engine.capture.empty())
         {
             if (engine.capture->grab())
-                engine.capture->retrieve(drawing_frame, CV_CAP_ANDROID_COLOR_FRAME_RGBA);
+                engine.capture->retrieve(drawing_frame, cv::CAP_ANDROID_COLOR_FRAME_RGBA);
 
              char buffer[256];
              sprintf(buffer, "Display performance: %dx%d @ %.3f", drawing_frame.cols, drawing_frame.rows, fps);
diff --git a/samples/android/native-activity/src/org/opencv/samples/NativeActivity/CvNativeActivity.java b/samples/android/native-activity/src/org/opencv/samples/NativeActivity/CvNativeActivity.java
index 1a9302b1d..04da9a949 100644
--- a/samples/android/native-activity/src/org/opencv/samples/NativeActivity/CvNativeActivity.java
+++ b/samples/android/native-activity/src/org/opencv/samples/NativeActivity/CvNativeActivity.java
@@ -40,4 +40,4 @@ public class CvNativeActivity extends Activity {
         super.onResume();
         OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
     }
-}
\ No newline at end of file
+}
diff --git a/samples/android/tutorial-3-cameracontrol/src/org/opencv/samples/tutorial3/Tutorial3View.java b/samples/android/tutorial-3-cameracontrol/src/org/opencv/samples/tutorial3/Tutorial3View.java
index 7ba2d9f96..d102cbcde 100644
--- a/samples/android/tutorial-3-cameracontrol/src/org/opencv/samples/tutorial3/Tutorial3View.java
+++ b/samples/android/tutorial-3-cameracontrol/src/org/opencv/samples/tutorial3/Tutorial3View.java
@@ -84,4 +84,4 @@ public class Tutorial3View extends JavaCameraView implements PictureCallback {
         }
 
     }
-}
\ No newline at end of file
+}
diff --git a/samples/c/CMakeLists.txt b/samples/c/CMakeLists.txt
index dfab5e00a..7ea20b9b2 100644
--- a/samples/c/CMakeLists.txt
+++ b/samples/c/CMakeLists.txt
@@ -57,4 +57,3 @@ if (INSTALL_C_EXAMPLES AND NOT WIN32)
           DESTINATION share/OpenCV/samples/c
           PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
 endif ()
-
diff --git a/samples/c/adaptiveskindetector.cpp b/samples/c/adaptiveskindetector.cpp
index e1f777cc1..f94c7a871 100644
--- a/samples/c/adaptiveskindetector.cpp
+++ b/samples/c/adaptiveskindetector.cpp
@@ -410,4 +410,3 @@ int main(int argc, char** argv )
 
     return 0;
 }
-
diff --git a/samples/c/blobtrack_sample.cpp b/samples/c/blobtrack_sample.cpp
index 5fcd1a888..55f72eb20 100644
--- a/samples/c/blobtrack_sample.cpp
+++ b/samples/c/blobtrack_sample.cpp
@@ -751,6 +751,3 @@ int main(int argc, char* argv[])
     return 0;
 
 }   /* main() */
-
-
-
diff --git a/samples/c/build_all.sh b/samples/c/build_all.sh
index 787474aa2..94a5a055c 100755
--- a/samples/c/build_all.sh
+++ b/samples/c/build_all.sh
@@ -1,16 +1,16 @@
 #!/bin/sh
 
 if [ $# -gt 0 ] ; then
-	base=`basename $1 .c`
-	echo "compiling $base"
-	gcc -ggdb `pkg-config opencv --cflags --libs` $base.c -o $base 
+    base=`basename $1 .c`
+    echo "compiling $base"
+    gcc -ggdb `pkg-config opencv --cflags --libs` $base.c -o $base
 else
-	for i in *.c; do
-	    echo "compiling $i"
-	    gcc -ggdb `pkg-config --cflags opencv` -o `basename $i .c` $i `pkg-config --libs opencv`;
-	done
-	for i in *.cpp; do
-	    echo "compiling $i"
-	    g++ -ggdb `pkg-config --cflags opencv` -o `basename $i .cpp` $i `pkg-config --libs opencv`;
-	done
+    for i in *.c; do
+        echo "compiling $i"
+        gcc -ggdb `pkg-config --cflags opencv` -o `basename $i .c` $i `pkg-config --libs opencv`;
+    done
+    for i in *.cpp; do
+        echo "compiling $i"
+        g++ -ggdb `pkg-config --cflags opencv` -o `basename $i .cpp` $i `pkg-config --libs opencv`;
+    done
 fi
diff --git a/samples/c/example_cmake/CMakeLists.txt b/samples/c/example_cmake/CMakeLists.txt
index 85129a752..475677a09 100644
--- a/samples/c/example_cmake/CMakeLists.txt
+++ b/samples/c/example_cmake/CMakeLists.txt
@@ -13,6 +13,3 @@ ADD_EXECUTABLE(opencv_example  minarea.c)
 TARGET_LINK_LIBRARIES(opencv_example ${OpenCV_LIBS})
 
 #MESSAGE(STATUS "OpenCV_LIBS: ${OpenCV_LIBS}")
-
-
-
diff --git a/samples/c/example_cmake/README.txt b/samples/c/example_cmake/README.txt
index 7bf53e7ac..7d573e373 100644
--- a/samples/c/example_cmake/README.txt
+++ b/samples/c/example_cmake/README.txt
@@ -25,8 +25,3 @@ the CMake gui with:
  $ cmake-gui <OPENCV_SRC_PATH>/samples/c/example_cmake/
 
 And pick the correct value for OpenCV_DIR.
-
-
-
-
-
diff --git a/samples/c/example_cmake/minarea.c b/samples/c/example_cmake/minarea.c
index 916acb0ef..c6e0cf310 100644
--- a/samples/c/example_cmake/minarea.c
+++ b/samples/c/example_cmake/minarea.c
@@ -114,4 +114,3 @@ int main( int argc, char** argv )
 #ifdef _EiC
 main(1,"convexhull.c");
 #endif
-
diff --git a/samples/c/mushroom.cpp b/samples/c/mushroom.cpp
index 5763b1246..60eb9f066 100644
--- a/samples/c/mushroom.cpp
+++ b/samples/c/mushroom.cpp
@@ -90,7 +90,7 @@ static int mushroom_read_database( const char* filename, CvMat** data, CvMat** m
     }
 
     cvReleaseMemStorage( &storage );
-    delete el_ptr;
+    delete [] el_ptr;
     return 1;
 }
 
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index ab4a0d06c..4115bf9fc 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -5,7 +5,7 @@
 
 SET(OPENCV_CPP_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc
     opencv_highgui opencv_ml opencv_video opencv_objdetect opencv_photo opencv_nonfree opencv_softcascade
-    opencv_features2d opencv_calib3d opencv_legacy opencv_contrib opencv_stitching opencv_videostab)
+    opencv_features2d opencv_calib3d opencv_legacy opencv_contrib opencv_stitching opencv_videostab opencv_bioinspired)
 
 ocv_check_dependencies(${OPENCV_CPP_SAMPLES_REQUIRED_DEPS})
 
@@ -95,4 +95,3 @@ if (INSTALL_C_EXAMPLES AND NOT WIN32)
           DESTINATION share/OpenCV/samples/cpp
           PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
 endif()
-
diff --git a/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp b/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp
index 2117ede7b..539115a99 100644
--- a/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp
+++ b/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping.cpp
@@ -10,8 +10,9 @@
 #include <iostream>
 #include <cstring>
 
-#include "opencv2/contrib.hpp"
-#include "opencv2/highgui.hpp"
+#include "opencv2/bioinspired.hpp" // retina based algorithms
+#include "opencv2/imgproc.hpp" // cvCvtcolor function
+#include "opencv2/highgui.hpp" // display
 
 static void help(std::string errorMessage)
 {
@@ -127,7 +128,7 @@ static void drawPlot(const cv::Mat curve, const std::string figureTitle, const i
      normalize(imageInputRescaled, imageInputRescaled, 0.0, 255.0, cv::NORM_MINMAX);
  }
 
- cv::Ptr<cv::Retina> retina;
+ cv::Ptr<cv::bioinspired::Retina> retina;
  int retinaHcellsGain;
  int localAdaptation_photoreceptors, localAdaptation_Gcells;
  static void callBack_updateRetinaParams(int, void*)
@@ -175,6 +176,12 @@ static void drawPlot(const cv::Mat curve, const std::string figureTitle, const i
      }
 
      bool useLogSampling = !strcmp(argv[argc-1], "log"); // check if user wants retina log sampling processing
+     int chosenMethod=0;
+     if (!strcmp(argv[argc-1], "fast"))
+     {
+         chosenMethod=1;
+         std::cout<<"Using fast method (no spectral whithning), adaptation of Meylan&al 2008 method"<<std::endl;
+     }
 
      std::string inputImageName=argv[1];
 
@@ -210,17 +217,22 @@ static void drawPlot(const cv::Mat curve, const std::string figureTitle, const i
           * -> if the last parameter is 'log', then activate log sampling (favour foveal vision and subsamples peripheral vision)
           */
          if (useLogSampling)
-                {
-                     retina = cv::createRetina(inputImage.size(),true, cv::RETINA_COLOR_BAYER, true, 2.0, 10.0);
+         {
+             retina = cv::bioinspired::createRetina(inputImage.size(),true, cv::bioinspired::RETINA_COLOR_BAYER, true, 2.0, 10.0);
                  }
          else// -> else allocate "classical" retina :
-             retina = cv::createRetina(inputImage.size());
+             retina = cv::bioinspired::createRetina(inputImage.size());
 
-        // save default retina parameters file in order to let you see this and maybe modify it and reload using method "setup"
-        retina->write("RetinaDefaultParameters.xml");
+         // create a fast retina tone mapper (Meyla&al algorithm)
+         std::cout<<"Allocating fast tone mapper..."<<std::endl;
+         //cv::Ptr<cv::RetinaFastToneMapping> fastToneMapper=createRetinaFastToneMapping(inputImage.size());
+         std::cout<<"Fast tone mapper allocated"<<std::endl;
 
-                 // desactivate Magnocellular pathway processing (motion information extraction) since it is not usefull here
-                 retina->activateMovingContoursProcessing(false);
+         // save default retina parameters file in order to let you see this and maybe modify it and reload using method "setup"
+         retina->write("RetinaDefaultParameters.xml");
+
+         // desactivate Magnocellular pathway processing (motion information extraction) since it is not usefull here
+         retina->activateMovingContoursProcessing(false);
 
          // declare retina output buffers
          cv::Mat retinaOutput_parvo;
@@ -230,20 +242,19 @@ static void drawPlot(const cv::Mat curve, const std::string figureTitle, const i
          histogramClippingValue=0; // default value... updated with interface slider
          //inputRescaleMat = inputImage;
          //outputRescaleMat = imageInputRescaled;
-         cv::namedWindow("Retina input image (with cut edges histogram for basic pixels error avoidance)",1);
-         cv::createTrackbar("histogram edges clipping limit", "Retina input image (with cut edges histogram for basic pixels error avoidance)",&histogramClippingValue,50,callBack_rescaleGrayLevelMat);
+         cv::namedWindow("Processing configuration",1);
+         cv::createTrackbar("histogram edges clipping limit", "Processing configuration",&histogramClippingValue,50,callBack_rescaleGrayLevelMat);
 
-         cv::namedWindow("Retina Parvocellular pathway output : 16bit=>8bit image retina tonemapping", 1);
          colorSaturationFactor=3;
-         cv::createTrackbar("Color saturation", "Retina Parvocellular pathway output : 16bit=>8bit image retina tonemapping", &colorSaturationFactor,5,callback_saturateColors);
+         cv::createTrackbar("Color saturation", "Processing configuration", &colorSaturationFactor,5,callback_saturateColors);
 
          retinaHcellsGain=40;
-         cv::createTrackbar("Hcells gain", "Retina Parvocellular pathway output : 16bit=>8bit image retina tonemapping",&retinaHcellsGain,100,callBack_updateRetinaParams);
+         cv::createTrackbar("Hcells gain", "Processing configuration",&retinaHcellsGain,100,callBack_updateRetinaParams);
 
          localAdaptation_photoreceptors=197;
          localAdaptation_Gcells=190;
-         cv::createTrackbar("Ph sensitivity", "Retina Parvocellular pathway output : 16bit=>8bit image retina tonemapping", &localAdaptation_photoreceptors,199,callBack_updateRetinaParams);
-         cv::createTrackbar("Gcells sensitivity", "Retina Parvocellular pathway output : 16bit=>8bit image retina tonemapping", &localAdaptation_Gcells,199,callBack_updateRetinaParams);
+         cv::createTrackbar("Ph sensitivity", "Processing configuration", &localAdaptation_photoreceptors,199,callBack_updateRetinaParams);
+         cv::createTrackbar("Gcells sensitivity", "Processing configuration", &localAdaptation_Gcells,199,callBack_updateRetinaParams);
 
 
          /////////////////////////////////////////////
@@ -257,11 +268,28 @@ static void drawPlot(const cv::Mat curve, const std::string figureTitle, const i
          while(continueProcessing)
          {
              // run retina filter
-             retina->run(imageInputRescaled);
-             // Retrieve and display retina output
-             retina->getParvo(retinaOutput_parvo);
-             cv::imshow("Retina input image (with cut edges histogram for basic pixels error avoidance)", imageInputRescaled/255.0);
-             cv::imshow("Retina Parvocellular pathway output : 16bit=>8bit image retina tonemapping", retinaOutput_parvo);
+             if (!chosenMethod)
+             {
+                 retina->run(imageInputRescaled);
+                 // Retrieve and display retina output
+                 retina->getParvo(retinaOutput_parvo);
+                 cv::imshow("Retina input image (with cut edges histogram for basic pixels error avoidance)", imageInputRescaled/255.0);
+                 cv::imshow("Retina Parvocellular pathway output : 16bit=>8bit image retina tonemapping", retinaOutput_parvo);
+                 cv::imwrite("HDRinput.jpg",imageInputRescaled/255.0);
+                 cv::imwrite("RetinaToneMapping.jpg",retinaOutput_parvo);
+             }
+             else
+             {
+                 // apply the simplified hdr tone mapping method
+                 cv::Mat fastToneMappingOutput;
+                 retina->applyFastToneMapping(imageInputRescaled, fastToneMappingOutput);
+                 cv::imshow("Retina fast tone mapping output : 16bit=>8bit image retina tonemapping", fastToneMappingOutput);
+             }
+             /*cv::Mat fastToneMappingOutput_specificObject;
+             fastToneMapper->setup(3.f, 1.5f, 1.f);
+             fastToneMapper->applyFastToneMapping(imageInputRescaled, fastToneMappingOutput_specificObject);
+             cv::imshow("### Retina fast tone mapping output : 16bit=>8bit image retina tonemapping", fastToneMappingOutput_specificObject);
+*/
              cv::waitKey(10);
          }
      }catch(cv::Exception e)
@@ -274,5 +302,3 @@ static void drawPlot(const cv::Mat curve, const std::string figureTitle, const i
 
      return 0;
  }
-
-
diff --git a/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping_video.cpp b/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping_video.cpp
index a4b71391c..646b1b7b7 100644
--- a/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping_video.cpp
+++ b/samples/cpp/OpenEXRimages_HighDynamicRange_Retina_toneMapping_video.cpp
@@ -14,8 +14,9 @@
 #include <stdio.h>
 #include <cstring>
 
-#include "opencv2/contrib.hpp"
-#include "opencv2/highgui.hpp"
+#include "opencv2/bioinspired.hpp" // retina based algorithms
+#include "opencv2/imgproc.hpp" // cvCvtcolor function
+#include "opencv2/highgui.hpp" // display
 
 static void help(std::string errorMessage)
 {
@@ -160,7 +161,7 @@ static void rescaleGrayLevelMat(const cv::Mat &inputMat, cv::Mat &outputMat, con
 
  }
 
- cv::Ptr<cv::Retina> retina;
+ cv::Ptr<cv::bioinspired::Retina> retina;
  int retinaHcellsGain;
  int localAdaptation_photoreceptors, localAdaptation_Gcells;
  static void callBack_updateRetinaParams(int, void*)
@@ -280,10 +281,10 @@ static void loadNewFrame(const std::string filenamePrototype, const int currentF
           */
          if (useLogSampling)
                 {
-                     retina = cv::createRetina(inputImage.size(),true, cv::RETINA_COLOR_BAYER, true, 2.0, 10.0);
+                     retina = cv::bioinspired::createRetina(inputImage.size(),true, cv::bioinspired::RETINA_COLOR_BAYER, true, 2.0, 10.0);
                  }
          else// -> else allocate "classical" retina :
-             retina = cv::createRetina(inputImage.size());
+             retina = cv::bioinspired::createRetina(inputImage.size());
 
         // save default retina parameters file in order to let you see this and maybe modify it and reload using method "setup"
         retina->write("RetinaDefaultParameters.xml");
@@ -358,5 +359,3 @@ static void loadNewFrame(const std::string filenamePrototype, const int currentF
 
      return 0;
  }
-
-
diff --git a/samples/cpp/bagofwords_classification.cpp b/samples/cpp/bagofwords_classification.cpp
index e24a770f8..4506e5b9d 100644
--- a/samples/cpp/bagofwords_classification.cpp
+++ b/samples/cpp/bagofwords_classification.cpp
@@ -2563,19 +2563,19 @@ int main(int argc, char** argv)
     Ptr<FeatureDetector> featureDetector = FeatureDetector::create( ddmParams.detectorType );
     Ptr<DescriptorExtractor> descExtractor = DescriptorExtractor::create( ddmParams.descriptorType );
     Ptr<BOWImgDescriptorExtractor> bowExtractor;
-    if( featureDetector.empty() || descExtractor.empty() )
+    if( !featureDetector || !descExtractor )
     {
         cout << "featureDetector or descExtractor was not created" << endl;
         return -1;
     }
     {
         Ptr<DescriptorMatcher> descMatcher = DescriptorMatcher::create( ddmParams.matcherType );
-        if( featureDetector.empty() || descExtractor.empty() || descMatcher.empty() )
+        if( !featureDetector || !descExtractor || !descMatcher )
         {
             cout << "descMatcher was not created" << endl;
             return -1;
         }
-        bowExtractor = new BOWImgDescriptorExtractor( descExtractor, descMatcher );
+        bowExtractor = makePtr<BOWImgDescriptorExtractor>( descExtractor, descMatcher );
     }
 
     // Print configuration to screen
diff --git a/samples/cpp/bgfg_gmg.cpp b/samples/cpp/bgfg_gmg.cpp
index 90d4fdde6..226eea463 100644
--- a/samples/cpp/bgfg_gmg.cpp
+++ b/samples/cpp/bgfg_gmg.cpp
@@ -35,7 +35,7 @@ int main(int argc, char** argv)
     setNumThreads(8);
 
     Ptr<BackgroundSubtractor> fgbg = createBackgroundSubtractorGMG(20, 0.7);
-    if (fgbg.empty())
+    if (!fgbg)
     {
         std::cerr << "Failed to create BackgroundSubtractor.GMG Algorithm." << std::endl;
         return -1;
@@ -78,4 +78,3 @@ int main(int argc, char** argv)
 
     return 0;
 }
-
diff --git a/samples/cpp/calibration_artificial.cpp b/samples/cpp/calibration_artificial.cpp
index f77a8f30a..3023127b7 100644
--- a/samples/cpp/calibration_artificial.cpp
+++ b/samples/cpp/calibration_artificial.cpp
@@ -332,4 +332,3 @@ Mat cv::ChessBoardGenerator::operator ()(const Mat& bg, const Mat& camMat, const
 
     return generageChessBoard(bg, camMat, distCoeffs, zero, pb1, pb2, sqWidth, sqHeight,  pts3d, corners);
 }
-
diff --git a/samples/cpp/dbt_face_detection.cpp b/samples/cpp/dbt_face_detection.cpp
index c2e6d0a81..651eaff73 100644
--- a/samples/cpp/dbt_face_detection.cpp
+++ b/samples/cpp/dbt_face_detection.cpp
@@ -22,7 +22,7 @@ class CascadeDetectorAdapter: public DetectionBasedTracker::IDetector
             IDetector(),
             Detector(detector)
         {
-            CV_Assert(!detector.empty());
+            CV_Assert(detector);
         }
 
         void detect(const cv::Mat &Image, std::vector<cv::Rect> &objects)
@@ -51,11 +51,11 @@ int main(int , char** )
     }
 
     std::string cascadeFrontalfilename = "../../data/lbpcascades/lbpcascade_frontalface.xml";
-    cv::Ptr<cv::CascadeClassifier> cascade = new cv::CascadeClassifier(cascadeFrontalfilename);
-    cv::Ptr<DetectionBasedTracker::IDetector> MainDetector = new CascadeDetectorAdapter(cascade);
+    cv::Ptr<cv::CascadeClassifier> cascade = makePtr<cv::CascadeClassifier>(cascadeFrontalfilename);
+    cv::Ptr<DetectionBasedTracker::IDetector> MainDetector = makePtr<CascadeDetectorAdapter>(cascade);
 
-    cascade = new cv::CascadeClassifier(cascadeFrontalfilename);
-    cv::Ptr<DetectionBasedTracker::IDetector> TrackingDetector = new CascadeDetectorAdapter(cascade);
+    cascade = makePtr<cv::CascadeClassifier>(cascadeFrontalfilename);
+    cv::Ptr<DetectionBasedTracker::IDetector> TrackingDetector = makePtr<CascadeDetectorAdapter>(cascade);
 
     DetectionBasedTracker::Parameters params;
     DetectionBasedTracker Detector(MainDetector, TrackingDetector, params);
diff --git a/samples/cpp/descriptor_extractor_matcher.cpp b/samples/cpp/descriptor_extractor_matcher.cpp
index 43baed2fe..7df3cd02b 100644
--- a/samples/cpp/descriptor_extractor_matcher.cpp
+++ b/samples/cpp/descriptor_extractor_matcher.cpp
@@ -153,7 +153,7 @@ static void doIteration( const Mat& img1, Mat& img2, bool isWarpPerspective,
     {
         cout << "< Evaluate descriptor matcher..." << endl;
         vector<Point2f> curve;
-        Ptr<GenericDescriptorMatcher> gdm = new VectorDescriptorMatcher( descriptorExtractor, descriptorMatcher );
+        Ptr<GenericDescriptorMatcher> gdm = makePtr<VectorDescriptorMatcher>( descriptorExtractor, descriptorMatcher );
         evaluateGenericDescriptorMatcher( img1, img2, H12, keypoints1, keypoints2, 0, 0, curve, gdm );
 
         Point2f firstPoint = *curve.begin();
@@ -253,7 +253,7 @@ int main(int argc, char** argv)
     int mactherFilterType = getMatcherFilterType( argv[4] );
     bool eval = !isWarpPerspective ? false : (atoi(argv[6]) == 0 ? false : true);
     cout << ">" << endl;
-    if( detector.empty() || descriptorExtractor.empty() || descriptorMatcher.empty()  )
+    if( !detector || !descriptorExtractor || !descriptorMatcher )
     {
         cout << "Can not create detector or descriptor exstractor or descriptor matcher of given types" << endl;
         return -1;
diff --git a/samples/cpp/detection_based_tracker_sample.cpp b/samples/cpp/detection_based_tracker_sample.cpp
index 1debff7e3..81afa789c 100644
--- a/samples/cpp/detection_based_tracker_sample.cpp
+++ b/samples/cpp/detection_based_tracker_sample.cpp
@@ -67,7 +67,7 @@ class CascadeDetectorAdapter: public DetectionBasedTracker::IDetector
         CascadeDetectorAdapter(cv::Ptr<cv::CascadeClassifier> detector):
             Detector(detector)
         {
-            CV_Assert(!detector.empty());
+            CV_Assert(detector);
         }
 
         void detect(const cv::Mat &Image, std::vector<cv::Rect> &objects)
@@ -117,11 +117,11 @@ static int test_FaceDetector(int argc, char *argv[])
     }
 
     std::string cascadeFrontalfilename=cascadefile;
-    cv::Ptr<cv::CascadeClassifier> cascade = new cv::CascadeClassifier(cascadeFrontalfilename);
-    cv::Ptr<DetectionBasedTracker::IDetector> MainDetector = new CascadeDetectorAdapter(cascade);
+    cv::Ptr<cv::CascadeClassifier> cascade = makePtr<cv::CascadeClassifier>(cascadeFrontalfilename);
+    cv::Ptr<DetectionBasedTracker::IDetector> MainDetector = makePtr<CascadeDetectorAdapter>(cascade);
 
-    cascade = new cv::CascadeClassifier(cascadeFrontalfilename);
-    cv::Ptr<DetectionBasedTracker::IDetector> TrackingDetector = new CascadeDetectorAdapter(cascade);
+    cascade = makePtr<cv::CascadeClassifier>(cascadeFrontalfilename);
+    cv::Ptr<DetectionBasedTracker::IDetector> TrackingDetector = makePtr<CascadeDetectorAdapter>(cascade);
 
     DetectionBasedTracker::Parameters params;
     DetectionBasedTracker fd(MainDetector, TrackingDetector, params);
diff --git a/samples/cpp/detector_descriptor_evaluation.cpp b/samples/cpp/detector_descriptor_evaluation.cpp
index ece735a50..dd3cd800e 100644
--- a/samples/cpp/detector_descriptor_evaluation.cpp
+++ b/samples/cpp/detector_descriptor_evaluation.cpp
@@ -535,7 +535,7 @@ void DetectorQualityEvaluator::readAlgorithm ()
 {
     defaultDetector = FeatureDetector::create( algName );
     specificDetector = FeatureDetector::create( algName );
-    if( defaultDetector.empty() )
+    if( !defaultDetector )
     {
         printf( "Algorithm can not be read\n" );
         exit(-1);
@@ -769,14 +769,14 @@ void DescriptorQualityEvaluator::readAlgorithm( )
     defaultDescMatcher = GenericDescriptorMatcher::create( algName );
     specificDescMatcher = GenericDescriptorMatcher::create( algName );
 
-    if( defaultDescMatcher.empty() )
+    if( !defaultDescMatcher )
     {
         Ptr<DescriptorExtractor> extractor = DescriptorExtractor::create( algName );
         Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create( matcherName );
-        defaultDescMatcher = new VectorDescriptorMatch( extractor, matcher );
-        specificDescMatcher = new VectorDescriptorMatch( extractor, matcher );
+        defaultDescMatcher = makePtr<VectorDescriptorMatch>( extractor, matcher );
+        specificDescMatcher = makePtr<VectorDescriptorMatch>( extractor, matcher );
 
-        if( extractor.empty() || matcher.empty() )
+        if( !extractor || !matcher )
         {
             printf("Algorithm can not be read\n");
             exit(-1);
@@ -881,8 +881,9 @@ public:
     virtual void readAlgorithm( )
     {
         string classifierFile = data_path + "/features2d/calonder_classifier.rtc";
-        defaultDescMatcher = new VectorDescriptorMatch( new CalonderDescriptorExtractor<float>( classifierFile ),
-                                                        new BFMatcher(NORM_L2) );
+        defaultDescMatcher = makePtr<VectorDescriptorMatch>(
+            makePtr<CalonderDescriptorExtractor<float> >( classifierFile ),
+            makePtr<BFMatcher>(int(NORM_L2)));
         specificDescMatcher = defaultDescMatcher;
     }
 };
@@ -922,10 +923,11 @@ void OneWayDescriptorQualityTest::processRunParamsFile ()
 
     readAllDatasetsRunParams();
 
-    OneWayDescriptorBase *base = new OneWayDescriptorBase(patchSize, poseCount, pcaFilename,
-                                               trainPath, trainImagesList);
+    Ptr<OneWayDescriptorBase> base(
+        new OneWayDescriptorBase(patchSize, poseCount, pcaFilename,
+                                 trainPath, trainImagesList));
 
-    OneWayDescriptorMatch *match = new OneWayDescriptorMatch ();
+    Ptr<OneWayDescriptorMatch> match = makePtr<OneWayDescriptorMatch>();
     match->initialize( OneWayDescriptorMatch::Params (), base );
     defaultDescMatcher = match;
     writeAllDatasetsRunParams();
@@ -958,18 +960,18 @@ int main( int argc, char** argv )
 
     Ptr<BaseQualityEvaluator> evals[] =
     {
-        new DetectorQualityEvaluator( "FAST", "quality-detector-fast" ),
-        new DetectorQualityEvaluator( "GFTT", "quality-detector-gftt" ),
-        new DetectorQualityEvaluator( "HARRIS", "quality-detector-harris" ),
-        new DetectorQualityEvaluator( "MSER", "quality-detector-mser" ),
-        new DetectorQualityEvaluator( "STAR", "quality-detector-star" ),
-        new DetectorQualityEvaluator( "SIFT", "quality-detector-sift" ),
-        new DetectorQualityEvaluator( "SURF", "quality-detector-surf" ),
+        makePtr<DetectorQualityEvaluator>( "FAST", "quality-detector-fast" ),
+        makePtr<DetectorQualityEvaluator>( "GFTT", "quality-detector-gftt" ),
+        makePtr<DetectorQualityEvaluator>( "HARRIS", "quality-detector-harris" ),
+        makePtr<DetectorQualityEvaluator>( "MSER", "quality-detector-mser" ),
+        makePtr<DetectorQualityEvaluator>( "STAR", "quality-detector-star" ),
+        makePtr<DetectorQualityEvaluator>( "SIFT", "quality-detector-sift" ),
+        makePtr<DetectorQualityEvaluator>( "SURF", "quality-detector-surf" ),
 
-        new DescriptorQualityEvaluator( "SIFT", "quality-descriptor-sift", "BruteForce" ),
-        new DescriptorQualityEvaluator( "SURF", "quality-descriptor-surf", "BruteForce" ),
-        new DescriptorQualityEvaluator( "FERN", "quality-descriptor-fern"),
-        new CalonderDescriptorQualityEvaluator()
+        makePtr<DescriptorQualityEvaluator>( "SIFT", "quality-descriptor-sift", "BruteForce" ),
+        makePtr<DescriptorQualityEvaluator>( "SURF", "quality-descriptor-surf", "BruteForce" ),
+        makePtr<DescriptorQualityEvaluator>( "FERN", "quality-descriptor-fern"),
+        makePtr<CalonderDescriptorQualityEvaluator>()
     };
 
     for( size_t i = 0; i < sizeof(evals)/sizeof(evals[0]); i++ )
diff --git a/samples/cpp/dft.cpp b/samples/cpp/dft.cpp
index b94387c54..4708defab 100644
--- a/samples/cpp/dft.cpp
+++ b/samples/cpp/dft.cpp
@@ -80,4 +80,3 @@ int main(int argc, const char ** argv)
     waitKey();
     return 0;
 }
-
diff --git a/samples/cpp/erfilter.cpp b/samples/cpp/erfilter.cpp
new file mode 100644
index 000000000..69009b82a
--- /dev/null
+++ b/samples/cpp/erfilter.cpp
@@ -0,0 +1,120 @@
+
+//--------------------------------------------------------------------------------------------------
+//  A demo program of the Extremal Region Filter algorithm described in
+//  Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+//--------------------------------------------------------------------------------------------------
+
+#include  "opencv2/opencv.hpp"
+#include  "opencv2/objdetect.hpp"
+#include  "opencv2/highgui.hpp"
+#include  "opencv2/imgproc.hpp"
+
+#include  <vector>
+#include  <iostream>
+#include  <iomanip>
+
+using  namespace std;
+using  namespace cv;
+
+void  er_draw(Mat &src, Mat &dst, ERStat& er);
+
+void  er_draw(Mat &src, Mat &dst, ERStat& er)
+{
+
+    if (er.parent != NULL) // deprecate the root region
+    {
+        int newMaskVal = 255;
+        int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
+        floodFill(src,dst,Point(er.pixel%src.cols,er.pixel/src.cols),Scalar(255),0,Scalar(er.level),Scalar(0),flags);
+    }
+
+}
+
+int  main(int argc, const char * argv[])
+{
+
+
+    vector<ERStat> regions;
+
+    if (argc < 2) {
+        cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
+        cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
+        cout << "    Usage: " << argv[0] << " input_image <optional_groundtruth_image>" << endl;
+        cout << "    Default classifier files (trained_classifierNM*.xml) should be in ./" << endl;
+        return -1;
+    }
+
+    Mat original = imread(argv[1]);
+    Mat gt;
+    if (argc > 2)
+    {
+        gt = imread(argv[2]);
+        cvtColor(gt, gt, COLOR_RGB2GRAY);
+        threshold(gt, gt, 254, 255, THRESH_BINARY);
+    }
+    Mat grey(original.size(),CV_8UC1);
+    cvtColor(original,grey,COLOR_RGB2GRAY);
+
+    double t = (double)getTickCount();
+
+    // Build ER tree and filter with the 1st stage default classifier
+    Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"));
+
+    er_filter1->run(grey, regions);
+
+    t = (double)getTickCount() - t;
+    cout << " --------------------------------------------------------------------------------------------------" << endl;
+    cout << "\t FIRST STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
+    cout << " --------------------------------------------------------------------------------------------------" << endl;
+    cout << setw(9) << regions.size()+er_filter1->getNumRejected() << "\t Extremal Regions extracted " << endl;
+    cout << setw(9) << regions.size() << "\t Extremal Regions selected by the first stage of the sequential classifier." << endl;
+    cout << "\t \t (saving into out_second_stage.jpg)" << endl;
+    cout << " --------------------------------------------------------------------------------------------------" << endl;
+
+    er_filter1.release();
+
+    // draw regions
+    Mat mask = Mat::zeros(grey.rows+2,grey.cols+2,CV_8UC1);
+    for (int r=0; r<(int)regions.size(); r++)
+        er_draw(grey, mask, regions.at(r));
+    mask = 255-mask;
+    imwrite("out_first_stage.jpg", mask);
+
+    if (argc > 2)
+    {
+        Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2))));
+        cout << "Recall for the 1st stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
+    }
+
+    t = (double)getTickCount();
+
+    // Default second stage classifier
+    Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"));
+    er_filter2->run(grey, regions);
+
+    t = (double)getTickCount() - t;
+    cout << " --------------------------------------------------------------------------------------------------" << endl;
+    cout << "\t SECOND STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
+    cout << " --------------------------------------------------------------------------------------------------" << endl;
+    cout << setw(9) << regions.size() << "\t Extremal Regions selected by the second stage of the sequential classifier." << endl;
+    cout << "\t \t (saving into out_second_stage.jpg)" << endl;
+    cout << " --------------------------------------------------------------------------------------------------" << endl;
+
+    er_filter2.release();
+
+    // draw regions
+    mask = mask*0;
+    for (int r=0; r<(int)regions.size(); r++)
+        er_draw(grey, mask, regions.at(r));
+    mask = 255-mask;
+    imwrite("out_second_stage.jpg", mask);
+
+    if (argc > 2)
+    {
+        Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2))));
+        cout << "Recall for the 2nd stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
+    }
+
+    regions.clear();
+
+}
diff --git a/samples/cpp/fabmap_sample.cpp b/samples/cpp/fabmap_sample.cpp
index 75febb134..cd06b55cb 100644
--- a/samples/cpp/fabmap_sample.cpp
+++ b/samples/cpp/fabmap_sample.cpp
@@ -131,11 +131,11 @@ int main(int argc, char * argv[]) {
     //generate test data
     cout << "Extracting Test Data from images" << endl <<
         endl;
-    Ptr<FeatureDetector> detector =
+    Ptr<FeatureDetector> detector(
         new DynamicAdaptedFeatureDetector(
-        AdjusterAdapter::create("STAR"), 130, 150, 5);
-    Ptr<DescriptorExtractor> extractor =
-        new SurfDescriptorExtractor(1000, 4, 2, false, true);
+            AdjusterAdapter::create("STAR"), 130, 150, 5));
+    Ptr<DescriptorExtractor> extractor(
+        new SurfDescriptorExtractor(1000, 4, 2, false, true));
     Ptr<DescriptorMatcher> matcher =
         DescriptorMatcher::create("FlannBased");
 
@@ -183,8 +183,8 @@ int main(int argc, char * argv[]) {
         endl;
     Ptr<of2::FabMap> fabmap;
 
-    fabmap = new of2::FabMap2(tree, 0.39, 0, of2::FabMap::SAMPLED |
-        of2::FabMap::CHOW_LIU);
+    fabmap.reset(new of2::FabMap2(tree, 0.39, 0, of2::FabMap::SAMPLED |
+        of2::FabMap::CHOW_LIU));
     fabmap->addTraining(trainData);
 
     vector<of2::IMatch> matches;
diff --git a/samples/cpp/freak_demo.cpp b/samples/cpp/freak_demo.cpp
index 41604865b..fa58d2038 100644
--- a/samples/cpp/freak_demo.cpp
+++ b/samples/cpp/freak_demo.cpp
@@ -56,7 +56,7 @@ static void help( char** argv )
 }
 
 int main( int argc, char** argv ) {
-    // check http://opencv.itseez.com/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.html
+    // check http://docs.opencv.org/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.html
     // for OpenCV general detection/matching framework details
 
     if( argc != 3 ) {
diff --git a/samples/cpp/generic_descriptor_match.cpp b/samples/cpp/generic_descriptor_match.cpp
index 888c24f7e..359f3c08e 100644
--- a/samples/cpp/generic_descriptor_match.cpp
+++ b/samples/cpp/generic_descriptor_match.cpp
@@ -33,7 +33,7 @@ int main(int argc, char** argv)
     std::string params_filename = std::string(argv[4]);
 
     Ptr<GenericDescriptorMatcher> descriptorMatcher = GenericDescriptorMatcher::create(alg_name, params_filename);
-    if( descriptorMatcher.empty() )
+    if( !descriptorMatcher )
     {
         printf ("Cannot create descriptor\n");
         return 0;
diff --git a/samples/cpp/houghlines.cpp b/samples/cpp/houghlines.cpp
index 8dea5c15d..2fdabe839 100644
--- a/samples/cpp/houghlines.cpp
+++ b/samples/cpp/houghlines.cpp
@@ -61,4 +61,3 @@ int main(int argc, char** argv)
 
     return 0;
 }
-
diff --git a/samples/cpp/image.cpp b/samples/cpp/image.cpp
index 806926b04..80f80c7af 100644
--- a/samples/cpp/image.cpp
+++ b/samples/cpp/image.cpp
@@ -31,8 +31,8 @@ int main( int argc, char** argv )
     help();
     const char* imagename = argc > 1 ? argv[1] : "lena.jpg";
 #if DEMO_MIXED_API_USE
-    Ptr<IplImage> iplimg = cvLoadImage(imagename); // Ptr<T> is safe ref-conting pointer class
-    if(iplimg.empty())
+    Ptr<IplImage> iplimg(cvLoadImage(imagename)); // Ptr<T> is safe ref-counting pointer class
+    if(!iplimg)
     {
         fprintf(stderr, "Can not load image %s\n", imagename);
         return -1;
diff --git a/samples/cpp/image_sequence.cpp b/samples/cpp/image_sequence.cpp
new file mode 100644
index 000000000..d94362161
--- /dev/null
+++ b/samples/cpp/image_sequence.cpp
@@ -0,0 +1,57 @@
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+
+#include <iostream>
+
+using namespace cv;
+using namespace std;
+
+static void help(char** argv)
+{
+    cout << "\nThis sample shows you how to read a sequence of images using the VideoCapture interface.\n"
+         << "Usage: " << argv[0] << " <image_mask> (example mask: example_%%02d.jpg)\n"
+         << "Image mask defines the name variation for the input images that have to be read as a sequence. \n"
+         << "Using the mask example_%%02d.jpg will read in images labeled as 'example_00.jpg', 'example_01.jpg', etc."
+         << endl;
+}
+
+int main(int argc, char** argv)
+{
+    if(argc != 2)
+    {
+        help(argv);
+        return 1;
+    }
+
+    string first_file = argv[1];
+    VideoCapture sequence(first_file);
+
+    if (!sequence.isOpened())
+    {
+        cerr << "Failed to open the image sequence!\n" << endl;
+        return 1;
+    }
+
+    Mat image;
+    namedWindow("Image sequence | press ESC to close", 1);
+
+    for(;;)
+    {
+        // Read in image from sequence
+        sequence >> image;
+
+        // If no image was retrieved -> end of sequence
+        if(image.empty())
+        {
+            cout << "End of Sequence" << endl;
+            break;
+        }
+
+        imshow("Image sequence | press ESC to close", image);
+
+        if(waitKey(500) == 27)
+            break;
+    }
+
+    return 0;
+}
diff --git a/samples/cpp/latentsvm_multidetect.cpp b/samples/cpp/latentsvm_multidetect.cpp
index 4da5506d8..f5a8bc56d 100644
--- a/samples/cpp/latentsvm_multidetect.cpp
+++ b/samples/cpp/latentsvm_multidetect.cpp
@@ -3,7 +3,7 @@
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/contrib/contrib.hpp"
 
-#ifdef WIN32
+#if defined(WIN32) || defined(_WIN32)
 #include <io.h>
 #else
 #include <dirent.h>
@@ -59,7 +59,7 @@ static void readDirectory( const string& directoryName, vector<String>& filename
 {
     filenames.clear();
 
-#ifdef WIN32
+#if defined(WIN32) | defined(_WIN32)
     struct _finddata_t s_file;
     string str = directoryName + "\\*.*";
 
diff --git a/samples/cpp/linemod.cpp b/samples/cpp/linemod.cpp
index 08d2a0035..4d11da36b 100644
--- a/samples/cpp/linemod.cpp
+++ b/samples/cpp/linemod.cpp
@@ -114,7 +114,7 @@ private:
 // Functions to store detector and templates in single XML/YAML file
 static cv::Ptr<cv::linemod::Detector> readLinemod(const std::string& filename)
 {
-  cv::Ptr<cv::linemod::Detector> detector = new cv::linemod::Detector;
+  cv::Ptr<cv::linemod::Detector> detector = cv::makePtr<cv::linemod::Detector>();
   cv::FileStorage fs(filename, cv::FileStorage::READ);
   detector->read(fs.root());
 
diff --git a/samples/cpp/lsd_lines.cpp b/samples/cpp/lsd_lines.cpp
new file mode 100644
index 000000000..62692f70b
--- /dev/null
+++ b/samples/cpp/lsd_lines.cpp
@@ -0,0 +1,54 @@
+#include <iostream>
+#include <string>
+
+#include "opencv2/core/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+
+int main(int argc, char** argv)
+{
+    std::string in;
+    if (argc != 2)
+    {
+        std::cout << "Usage: lsd_lines [input image]. Now loading building.jpg" << std::endl;
+        in = "building.jpg";
+    }
+    else
+    {
+        in = argv[1];
+    }
+
+    Mat image = imread(in, IMREAD_GRAYSCALE);
+
+#if 0
+    Canny(image, image, 50, 200, 3); // Apply canny edge
+#endif
+
+    // Create and LSD detector with standard or no refinement.
+#if 1
+    Ptr<LineSegmentDetector> ls = createLineSegmentDetectorPtr(LSD_REFINE_STD);
+#else
+    Ptr<LineSegmentDetector> ls = createLineSegmentDetectorPtr(LSD_REFINE_NONE);
+#endif
+
+    double start = double(getTickCount());
+    vector<Vec4i> lines_std;
+
+    // Detect the lines
+    ls->detect(image, lines_std);
+
+    double duration_ms = (double(getTickCount()) - start) * 1000 / getTickFrequency();
+    std::cout << "It took " << duration_ms << " ms." << std::endl;
+
+    // Show found lines
+    Mat drawnLines(image);
+    ls->drawSegments(drawnLines, lines_std);
+    imshow("Standard refinement", drawnLines);
+
+    waitKey();
+    return 0;
+}
diff --git a/samples/cpp/matching_to_many_images.cpp b/samples/cpp/matching_to_many_images.cpp
index 7a346e3f4..152b40064 100644
--- a/samples/cpp/matching_to_many_images.cpp
+++ b/samples/cpp/matching_to_many_images.cpp
@@ -84,7 +84,7 @@ static bool createDetectorDescriptorMatcher( const string& detectorType, const s
     descriptorMatcher = DescriptorMatcher::create( matcherType );
     cout << ">" << endl;
 
-    bool isCreated = !( featureDetector.empty() || descriptorExtractor.empty() || descriptorMatcher.empty() );
+    bool isCreated = featureDetector && descriptorExtractor && descriptorMatcher;
     if( !isCreated )
         cout << "Can not create feature detector or descriptor extractor or descriptor matcher of given types." << endl << ">" << endl;
 
diff --git a/samples/cpp/phase_corr.cpp b/samples/cpp/phase_corr.cpp
index 2b1f2378b..cfee80941 100644
--- a/samples/cpp/phase_corr.cpp
+++ b/samples/cpp/phase_corr.cpp
@@ -43,7 +43,3 @@ int main(int, char* [])
 
     return 0;
 }
-
-
-
-
diff --git a/samples/cpp/retinaDemo.cpp b/samples/cpp/retinaDemo.cpp
index ffade70f3..790e75383 100644
--- a/samples/cpp/retinaDemo.cpp
+++ b/samples/cpp/retinaDemo.cpp
@@ -9,7 +9,7 @@
 #include <iostream>
 #include <cstring>
 
-#include "opencv2/contrib.hpp"
+#include "opencv2/bioinspired.hpp"
 #include "opencv2/highgui.hpp"
 
 static void help(std::string errorMessage)
@@ -106,15 +106,15 @@ int main(int argc, char* argv[]) {
     try
     {
         // create a retina instance with default parameters setup, uncomment the initialisation you wanna test
-        cv::Ptr<cv::Retina> myRetina;
+        cv::Ptr<cv::bioinspired::Retina> myRetina;
 
         // if the last parameter is 'log', then activate log sampling (favour foveal vision and subsamples peripheral vision)
         if (useLogSampling)
                 {
-                        myRetina = cv::createRetina(inputFrame.size(), true, cv::RETINA_COLOR_BAYER, true, 2.0, 10.0);
+                        myRetina = cv::bioinspired::createRetina(inputFrame.size(), true, cv::bioinspired::RETINA_COLOR_BAYER, true, 2.0, 10.0);
                 }
         else// -> else allocate "classical" retina :
-            myRetina = cv::createRetina(inputFrame.size());
+            myRetina = cv::bioinspired::createRetina(inputFrame.size());
 
         // save default retina parameters file in order to let you see this and maybe modify it and reload using method "setup"
         myRetina->write("RetinaDefaultParameters.xml");
@@ -143,7 +143,8 @@ int main(int argc, char* argv[]) {
             cv::imshow("retina input", inputFrame);
             cv::imshow("Retina Parvo", retinaOutput_parvo);
             cv::imshow("Retina Magno", retinaOutput_magno);
-            cv::waitKey(10);
+
+            cv::waitKey(5);
         }
     }catch(cv::Exception e)
     {
@@ -155,4 +156,3 @@ int main(int argc, char* argv[]) {
 
     return 0;
 }
-
diff --git a/samples/cpp/scenetext.jpg b/samples/cpp/scenetext.jpg
new file mode 100644
index 000000000..6a9c6bf5e
Binary files /dev/null and b/samples/cpp/scenetext.jpg differ
diff --git a/samples/cpp/scenetext_GT.png b/samples/cpp/scenetext_GT.png
new file mode 100644
index 000000000..a5c190557
Binary files /dev/null and b/samples/cpp/scenetext_GT.png differ
diff --git a/samples/cpp/starter_video.cpp b/samples/cpp/starter_video.cpp
index 87b414df6..1a3d5b0c9 100644
--- a/samples/cpp/starter_video.cpp
+++ b/samples/cpp/starter_video.cpp
@@ -41,15 +41,15 @@ namespace {
         cout << "press space to save a picture. q or esc to quit" << endl;
         namedWindow(window_name, WINDOW_KEEPRATIO); //resizable window;
         Mat frame;
-        
+
         for (;;) {
             capture >> frame;
             if (frame.empty())
                 break;
-            
+
             imshow(window_name, frame);
             char key = (char)waitKey(30); //delay N millis, usually long enough to display and capture input
-            
+
             switch (key) {
             case 'q':
             case 'Q':
diff --git a/samples/cpp/stereo_calib.cpp b/samples/cpp/stereo_calib.cpp
index 44be2d64e..367df4bfe 100644
--- a/samples/cpp/stereo_calib.cpp
+++ b/samples/cpp/stereo_calib.cpp
@@ -14,15 +14,12 @@
      Or: http://oreilly.com/catalog/9780596516130/
      ISBN-10: 0596516134 or: ISBN-13: 978-0596516130
 
-   OTHER OPENCV SITES:
-   * The source code is on sourceforge at:
-     http://sourceforge.net/projects/opencvlibrary/
-   * The OpenCV wiki page (As of Oct 1, 2008 this is down for changing over servers, but should come back):
-     http://opencvlibrary.sourceforge.net/
-   * An active user group is at:
-     http://tech.groups.yahoo.com/group/OpenCV/
-   * The minutes of weekly OpenCV development meetings are at:
-     http://code.opencv.org/projects/opencv/wiki/Meeting_notes
+   OPENCV WEBSITES:
+     Homepage:      http://opencv.org
+     Online docs:   http://docs.opencv.org
+     Q&A forum:     http://answers.opencv.org
+     Issue tracker: http://code.opencv.org
+     GitHub:        https://github.com/Itseez/opencv/
    ************************************************** */
 
 #include "opencv2/calib3d/calib3d.hpp"
@@ -404,4 +401,3 @@ int main(int argc, char** argv)
     StereoCalib(imagelist, boardSize, true, showRectified);
     return 0;
 }
-
diff --git a/samples/cpp/stitching.cpp b/samples/cpp/stitching.cpp
index 1ee6bdb7c..602edfe86 100644
--- a/samples/cpp/stitching.cpp
+++ b/samples/cpp/stitching.cpp
@@ -134,5 +134,3 @@ int parseCmdArgs(int argc, char** argv)
     }
     return 0;
 }
-
-
diff --git a/samples/cpp/stitching_detailed.cpp b/samples/cpp/stitching_detailed.cpp
index 5ee79184d..a576816d6 100644
--- a/samples/cpp/stitching_detailed.cpp
+++ b/samples/cpp/stitching_detailed.cpp
@@ -358,14 +358,14 @@ int main(int argc, char* argv[])
     {
 #ifdef HAVE_OPENCV_NONFREE
         if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
-            finder = new SurfFeaturesFinderGpu();
+            finder = makePtr<SurfFeaturesFinderGpu>();
         else
 #endif
-            finder = new SurfFeaturesFinder();
+            finder = makePtr<SurfFeaturesFinder>();
     }
     else if (features_type == "orb")
     {
-        finder = new OrbFeaturesFinder();
+        finder = makePtr<OrbFeaturesFinder>();
     }
     else
     {
@@ -469,7 +469,11 @@ int main(int argc, char* argv[])
 
     HomographyBasedEstimator estimator;
     vector<CameraParams> cameras;
-    estimator(features, pairwise_matches, cameras);
+    if (!estimator(features, pairwise_matches, cameras))
+    {
+        cout << "Homography estimation failed.\n";
+        return -1;
+    }
 
     for (size_t i = 0; i < cameras.size(); ++i)
     {
@@ -480,8 +484,8 @@ int main(int argc, char* argv[])
     }
 
     Ptr<detail::BundleAdjusterBase> adjuster;
-    if (ba_cost_func == "reproj") adjuster = new detail::BundleAdjusterReproj();
-    else if (ba_cost_func == "ray") adjuster = new detail::BundleAdjusterRay();
+    if (ba_cost_func == "reproj") adjuster = makePtr<detail::BundleAdjusterReproj>();
+    else if (ba_cost_func == "ray") adjuster = makePtr<detail::BundleAdjusterRay>();
     else
     {
         cout << "Unknown bundle adjustment cost function: '" << ba_cost_func << "'.\n";
@@ -495,7 +499,11 @@ int main(int argc, char* argv[])
     if (ba_refine_mask[3] == 'x') refine_mask(1,1) = 1;
     if (ba_refine_mask[4] == 'x') refine_mask(1,2) = 1;
     adjuster->setRefinementMask(refine_mask);
-    (*adjuster)(features, pairwise_matches, cameras);
+    if (!(*adjuster)(features, pairwise_matches, cameras))
+    {
+        cout << "Camera parameters adjusting failed.\n";
+        return -1;
+    }
 
     // Find median focal length
 
@@ -547,31 +555,49 @@ int main(int argc, char* argv[])
 #ifdef HAVE_OPENCV_GPUWARPING
     if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
     {
-        if (warp_type == "plane") warper_creator = new cv::PlaneWarperGpu();
-        else if (warp_type == "cylindrical") warper_creator = new cv::CylindricalWarperGpu();
-        else if (warp_type == "spherical") warper_creator = new cv::SphericalWarperGpu();
+        if (warp_type == "plane")
+            warper_creator = makePtr<cv::PlaneWarperGpu>();
+        else if (warp_type == "cylindrical")
+            warper_creator = makePtr<cv::CylindricalWarperGpu>();
+        else if (warp_type == "spherical")
+            warper_creator = makePtr<cv::SphericalWarperGpu>();
     }
     else
 #endif
     {
-        if (warp_type == "plane") warper_creator = new cv::PlaneWarper();
-        else if (warp_type == "cylindrical") warper_creator = new cv::CylindricalWarper();
-        else if (warp_type == "spherical") warper_creator = new cv::SphericalWarper();
-        else if (warp_type == "fisheye") warper_creator = new cv::FisheyeWarper();
-        else if (warp_type == "stereographic") warper_creator = new cv::StereographicWarper();
-        else if (warp_type == "compressedPlaneA2B1") warper_creator = new cv::CompressedRectilinearWarper(2, 1);
-        else if (warp_type == "compressedPlaneA1.5B1") warper_creator = new cv::CompressedRectilinearWarper(1.5, 1);
-        else if (warp_type == "compressedPlanePortraitA2B1") warper_creator = new cv::CompressedRectilinearPortraitWarper(2, 1);
-        else if (warp_type == "compressedPlanePortraitA1.5B1") warper_creator = new cv::CompressedRectilinearPortraitWarper(1.5, 1);
-        else if (warp_type == "paniniA2B1") warper_creator = new cv::PaniniWarper(2, 1);
-        else if (warp_type == "paniniA1.5B1") warper_creator = new cv::PaniniWarper(1.5, 1);
-        else if (warp_type == "paniniPortraitA2B1") warper_creator = new cv::PaniniPortraitWarper(2, 1);
-        else if (warp_type == "paniniPortraitA1.5B1") warper_creator = new cv::PaniniPortraitWarper(1.5, 1);
-        else if (warp_type == "mercator") warper_creator = new cv::MercatorWarper();
-        else if (warp_type == "transverseMercator") warper_creator = new cv::TransverseMercatorWarper();
+        if (warp_type == "plane")
+            warper_creator = makePtr<cv::PlaneWarper>();
+        else if (warp_type == "cylindrical")
+            warper_creator = makePtr<cv::CylindricalWarper>();
+        else if (warp_type == "spherical")
+            warper_creator = makePtr<cv::SphericalWarper>();
+        else if (warp_type == "fisheye")
+            warper_creator = makePtr<cv::FisheyeWarper>();
+        else if (warp_type == "stereographic")
+            warper_creator = makePtr<cv::StereographicWarper>();
+        else if (warp_type == "compressedPlaneA2B1")
+            warper_creator = makePtr<cv::CompressedRectilinearWarper>(2.0f, 1.0f);
+        else if (warp_type == "compressedPlaneA1.5B1")
+            warper_creator = makePtr<cv::CompressedRectilinearWarper>(1.5f, 1.0f);
+        else if (warp_type == "compressedPlanePortraitA2B1")
+            warper_creator = makePtr<cv::CompressedRectilinearPortraitWarper>(2.0f, 1.0f);
+        else if (warp_type == "compressedPlanePortraitA1.5B1")
+            warper_creator = makePtr<cv::CompressedRectilinearPortraitWarper>(1.5f, 1.0f);
+        else if (warp_type == "paniniA2B1")
+            warper_creator = makePtr<cv::PaniniWarper>(2.0f, 1.0f);
+        else if (warp_type == "paniniA1.5B1")
+            warper_creator = makePtr<cv::PaniniWarper>(1.5f, 1.0f);
+        else if (warp_type == "paniniPortraitA2B1")
+            warper_creator = makePtr<cv::PaniniPortraitWarper>(2.0f, 1.0f);
+        else if (warp_type == "paniniPortraitA1.5B1")
+            warper_creator = makePtr<cv::PaniniPortraitWarper>(1.5f, 1.0f);
+        else if (warp_type == "mercator")
+            warper_creator = makePtr<cv::MercatorWarper>();
+        else if (warp_type == "transverseMercator")
+            warper_creator = makePtr<cv::TransverseMercatorWarper>();
     }
 
-    if (warper_creator.empty())
+    if (!warper_creator)
     {
         cout << "Can't create the following warper '" << warp_type << "'\n";
         return 1;
@@ -604,32 +630,32 @@ int main(int argc, char* argv[])
 
     Ptr<SeamFinder> seam_finder;
     if (seam_find_type == "no")
-        seam_finder = new detail::NoSeamFinder();
+        seam_finder = makePtr<detail::NoSeamFinder>();
     else if (seam_find_type == "voronoi")
-        seam_finder = new detail::VoronoiSeamFinder();
+        seam_finder = makePtr<detail::VoronoiSeamFinder>();
     else if (seam_find_type == "gc_color")
     {
 #ifdef HAVE_OPENCV_GPU
         if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
-            seam_finder = new detail::GraphCutSeamFinderGpu(GraphCutSeamFinderBase::COST_COLOR);
+            seam_finder = makePtr<detail::GraphCutSeamFinderGpu>(GraphCutSeamFinderBase::COST_COLOR);
         else
 #endif
-            seam_finder = new detail::GraphCutSeamFinder(GraphCutSeamFinderBase::COST_COLOR);
+            seam_finder = makePtr<detail::GraphCutSeamFinder>(GraphCutSeamFinderBase::COST_COLOR);
     }
     else if (seam_find_type == "gc_colorgrad")
     {
 #ifdef HAVE_OPENCV_GPU
         if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
-            seam_finder = new detail::GraphCutSeamFinderGpu(GraphCutSeamFinderBase::COST_COLOR_GRAD);
+            seam_finder = makePtr<detail::GraphCutSeamFinderGpu>(GraphCutSeamFinderBase::COST_COLOR_GRAD);
         else
 #endif
-            seam_finder = new detail::GraphCutSeamFinder(GraphCutSeamFinderBase::COST_COLOR_GRAD);
+            seam_finder = makePtr<detail::GraphCutSeamFinder>(GraphCutSeamFinderBase::COST_COLOR_GRAD);
     }
     else if (seam_find_type == "dp_color")
-        seam_finder = new detail::DpSeamFinder(DpSeamFinder::COLOR);
+        seam_finder = makePtr<detail::DpSeamFinder>(DpSeamFinder::COLOR);
     else if (seam_find_type == "dp_colorgrad")
-        seam_finder = new detail::DpSeamFinder(DpSeamFinder::COLOR_GRAD);
-    if (seam_finder.empty())
+        seam_finder = makePtr<detail::DpSeamFinder>(DpSeamFinder::COLOR_GRAD);
+    if (!seam_finder)
     {
         cout << "Can't create the following seam finder '" << seam_find_type << "'\n";
         return 1;
@@ -727,7 +753,7 @@ int main(int argc, char* argv[])
         resize(dilated_mask, seam_mask, mask_warped.size());
         mask_warped = seam_mask & mask_warped;
 
-        if (blender.empty())
+        if (!blender)
         {
             blender = Blender::createDefault(blend_type, try_gpu);
             Size dst_sz = resultRoi(corners, sizes).size();
@@ -736,13 +762,13 @@ int main(int argc, char* argv[])
                 blender = Blender::createDefault(Blender::NO, try_gpu);
             else if (blend_type == Blender::MULTI_BAND)
             {
-                MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(static_cast<Blender*>(blender));
+                MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(blender.get());
                 mb->setNumBands(static_cast<int>(ceil(log(blend_width)/log(2.)) - 1.));
                 LOGLN("Multi-band blender, number of bands: " << mb->numBands());
             }
             else if (blend_type == Blender::FEATHER)
             {
-                FeatherBlender* fb = dynamic_cast<FeatherBlender*>(static_cast<Blender*>(blender));
+                FeatherBlender* fb = dynamic_cast<FeatherBlender*>(blender.get());
                 fb->setSharpness(1.f/blend_width);
                 LOGLN("Feather blender, sharpness: " << fb->sharpness());
             }
@@ -763,5 +789,3 @@ int main(int argc, char* argv[])
     LOGLN("Finished, total time: " << ((getTickCount() - app_start_time) / getTickFrequency()) << " sec");
     return 0;
 }
-
-
diff --git a/samples/cpp/trained_classifierNM1.xml b/samples/cpp/trained_classifierNM1.xml
new file mode 100644
index 000000000..c395a67df
--- /dev/null
+++ b/samples/cpp/trained_classifierNM1.xml
@@ -0,0 +1,4046 @@
+<?xml version="1.0"?>
+<opencv_storage>
+<boost type_id="opencv-ml-boost-tree">
+  <boosting_type>RealAdaboost</boosting_type>
+  <splitting_criteria>Gini</splitting_criteria>
+  <ntrees>100</ntrees>
+  <weight_trimming_rate>1.</weight_trimming_rate>
+  <is_classifier>1</is_classifier>
+  <var_all>5</var_all>
+  <var_count>4</var_count>
+  <ord_var_count>4</ord_var_count>
+  <cat_var_count>0</cat_var_count>
+  <training_params>
+    <use_surrogates>0</use_surrogates>
+    <max_categories>10</max_categories>
+    <max_depth>1</max_depth>
+    <min_sample_count>10</min_sample_count>
+    <cross_validation_folds>0</cross_validation_folds>
+    <priors type_id="opencv-matrix">
+      <rows>1</rows>
+      <cols>2</cols>
+      <dt>d</dt>
+      <data>
+        1. 1.</data></priors></training_params>
+  <var_idx type_id="opencv-matrix">
+    <rows>1</rows>
+    <cols>4</cols>
+    <dt>i</dt>
+    <data>
+      1 2 3 4</data></var_idx>
+  <var_type>
+    0 0 0 0</var_type>
+  <cat_count type_id="opencv-matrix">
+    <rows>1</rows>
+    <cols>1</cols>
+    <dt>i</dt>
+    <data>
+      2</data></cat_count>
+  <cat_map type_id="opencv-matrix">
+    <rows>1</rows>
+    <cols>2</cols>
+    <dt>i</dt>
+    <data>
+      1 2</data></cat_map>
+  <trees>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-8.7333809708193622e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>8.9537560939788818e-01</quality>
+              <le>8.5429996252059937e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>281</sample_count>
+          <value>6.7336438045685210e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1578</sample_count>
+          <value>-1.6797288877447023e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>6.8833827526759232e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>6.0157060623168945e-01</quality>
+              <le>1.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1728</sample_count>
+          <value>-2.3830233314576746e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>131</sample_count>
+          <value>1.3270520927417431e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.1546319456101495e-14</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>6.0729509592056274e-01</quality>
+              <le>1.0963299870491028e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>781</sample_count>
+          <value>2.8488485626451149e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1078</sample_count>
+          <value>-1.0290340538001230e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>8.8817841970011734e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.8338499069213867e-01</quality>
+              <le>9.5209401845932007e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1331</sample_count>
+          <value>-4.6437394337436783e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>528</sample_count>
+          <value>4.0542316486679569e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.5931700403371249e-14</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.5465269088745117e-01</quality>
+              <le>3.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>856</sample_count>
+          <value>5.0630658667623052e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1003</sample_count>
+          <value>-2.3844591468675169e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-4.0523140398818379e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.5272746086120605e-01</quality>
+              <le>5.0000000000000000e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1046</sample_count>
+          <value>-3.5086409511139033e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>813</sample_count>
+          <value>3.2363636038761090e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>3.5527136788004883e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.4958724975585938e-01</quality>
+              <le>1.6267855167388916e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1811</sample_count>
+          <value>-1.0723654593445622e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>48</sample_count>
+          <value>1.6463996332936000e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.5543122344752215e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.5254465341567993e-01</quality>
+              <le>7.4758999049663544e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>149</sample_count>
+          <value>1.0273978441887677e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1710</sample_count>
+          <value>-1.3682146780021626e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.1990408665951547e-14</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.4537707567214966e-01</quality>
+              <le>6.6345453262329102e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>455</sample_count>
+          <value>-1.0072489119231773e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1404</sample_count>
+          <value>1.1925375544044788e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-7.1054273576010523e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.2146106958389282e-01</quality>
+              <le>2.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1791</sample_count>
+          <value>-4.7305821386203636e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>68</sample_count>
+          <value>1.5160735804641536e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>3.7747582837255180e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.2081108093261719e-01</quality>
+              <le>5.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1690</sample_count>
+          <value>9.1796156088740075e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>169</sample_count>
+          <value>-4.9059916762377265e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-7.1054273576010523e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.2672868967056274e-01</quality>
+              <le>1.0963299870491028e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>781</sample_count>
+          <value>1.0385059515501381e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1078</sample_count>
+          <value>-5.7169325870124055e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.1102230246251577e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.2609002590179443e-01</quality>
+              <le>3.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>856</sample_count>
+          <value>3.2591023278276837e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1003</sample_count>
+          <value>-1.6727738809775036e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>3.5527136788004883e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.1409310102462769e-01</quality>
+              <le>7.1857005357742310e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>114</sample_count>
+          <value>6.4622038086485267e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1745</sample_count>
+          <value>-4.9566471799398701e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-7.4940054162198634e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.1351374387741089e-01</quality>
+              <le>1.4143149554729462e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1571</sample_count>
+          <value>-4.6424296372866977e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>288</sample_count>
+          <value>6.6639156578772774e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>2.6645352591003686e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.2261954545974731e-01</quality>
+              <le>1.0177600383758545e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>590</sample_count>
+          <value>1.3091283509473614e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1269</sample_count>
+          <value>-3.6264100845476954e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.2181720733642578e-01</quality>
+              <le>8.9620999991893768e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>352</sample_count>
+          <value>-2.4183425262980360e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1507</sample_count>
+          <value>1.8605210755965146e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.1102230246251554e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0836616754531860e-01</quality>
+              <le>1.0963299870491028e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>781</sample_count>
+          <value>5.1954271677409611e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1078</sample_count>
+          <value>-3.3426585985028628e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>3.7747582837255180e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0880891084671021e-01</quality>
+              <le>1.1348484754562378e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1664</sample_count>
+          <value>-7.4858358240636261e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>195</sample_count>
+          <value>2.4031047447238377e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.3322676295501896e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.1081919670104980e-01</quality>
+              <le>1.2661495208740234e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1730</sample_count>
+          <value>5.6719022662652843e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>129</sample_count>
+          <value>-4.0229379476194682e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>5.5511151231257519e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0904816389083862e-01</quality>
+              <le>2.3689103126525879e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1838</sample_count>
+          <value>-1.9140961693183144e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>21</sample_count>
+          <value>1.7878855228391193e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-9.2703622556201423e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0664407014846802e-01</quality>
+              <le>2.2119045257568359e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1836</sample_count>
+          <value>1.8191803445960011e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>23</sample_count>
+          <value>-9.2985231739132745e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.1102230246251577e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0936371088027954e-01</quality>
+              <le>1.2460045814514160e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1724</sample_count>
+          <value>-5.7918483977216939e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>135</sample_count>
+          <value>3.3577728337370422e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>2.4424906541753385e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0752711296081543e-01</quality>
+              <le>9.5505046844482422e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1341</sample_count>
+          <value>1.1992717133073164e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>518</sample_count>
+          <value>-1.2680428752841727e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-4.8849813083507124e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0749230384826660e-01</quality>
+              <le>7.8355848789215088e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>764</sample_count>
+          <value>-2.4384603502165489e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1095</sample_count>
+          <value>6.2746713049518996e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.9984014443252778e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0703763961791992e-01</quality>
+              <le>6.3535496592521667e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>59</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1800</sample_count>
+          <value>-1.4076159604991374e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>4.4512436177016777e-05</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0712978839874268e-01</quality>
+              <le>6.4906999468803406e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>61</sample_count>
+          <value>-2.8822016897573164e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1798</sample_count>
+          <value>1.4394134992880541e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>4.4408920985006242e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.1009607315063477e-01</quality>
+              <le>6.9339498877525330e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>87</sample_count>
+          <value>1.6547355644710868e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1772</sample_count>
+          <value>-2.1726950528099288e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.1102230246251577e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.0710922479629517e-01</quality>
+              <le>7.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1823</sample_count>
+          <value>1.7568593940350565e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>36</sample_count>
+          <value>-1.1252698341588880e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.7763568394002536e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0767201185226440e-01</quality>
+              <le>8.0150999128818512e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>206</sample_count>
+          <value>2.7041738749866351e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1653</sample_count>
+          <value>-5.8184179168035623e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>3.3306690738754586e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0510478019714355e-01</quality>
+              <le>6.2720644474029541e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>374</sample_count>
+          <value>-4.5462988716181019e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1485</sample_count>
+          <value>2.3987712659202257e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-4.2188474935756130e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.0439256429672241e-01</quality>
+              <le>2.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1791</sample_count>
+          <value>-1.5635519241144060e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>68</sample_count>
+          <value>6.3562787613239691e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>6.6613381477508951e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.0438237190246582e-01</quality>
+              <le>3.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>856</sample_count>
+          <value>1.3081676957882793e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1003</sample_count>
+          <value>-6.7484142743913345e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.1102230246251554e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0724387168884277e-01</quality>
+              <le>1.1533749848604202e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>921</sample_count>
+          <value>4.0250806766256482e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>938</sample_count>
+          <value>-3.7703668263161400e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-5.8286708792821058e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0915646553039551e-01</quality>
+              <le>1.4143149554729462e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1571</sample_count>
+          <value>-3.3928131204983447e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>288</sample_count>
+          <value>6.0410410576585261e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>4.2188474935755767e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0659310817718506e-01</quality>
+              <le>1.4145749807357788e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1572</sample_count>
+          <value>2.0368450770510211e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>287</sample_count>
+          <value>-7.7093695859120004e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.3877787807814476e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0616967678070068e-01</quality>
+              <le>1.6267855167388916e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1811</sample_count>
+          <value>-2.4173163361906828e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>48</sample_count>
+          <value>5.6348494257358028e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-3.8857805861880494e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0677150487899780e-01</quality>
+              <le>1.2661495208740234e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1730</sample_count>
+          <value>4.0402378477571901e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>129</sample_count>
+          <value>-3.4888339817180891e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>4.4408920985006242e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0588327646255493e-01</quality>
+              <le>1.0797724723815918e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1608</sample_count>
+          <value>-6.8287991647037555e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>251</sample_count>
+          <value>1.7431967242459881e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>4.6629367034256354e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0476020574569702e-01</quality>
+              <le>9.2052650451660156e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1253</sample_count>
+          <value>1.0513502662517235e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>606</sample_count>
+          <value>-9.1138849679050823e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.5543122344752168e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0690639019012451e-01</quality>
+              <le>9.0829002857208252e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1219</sample_count>
+          <value>-1.4636537853223561e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>640</sample_count>
+          <value>9.5332675553986543e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>4.2188474935755767e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0534439086914062e-01</quality>
+              <le>9.2052650451660156e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1253</sample_count>
+          <value>1.1293790129864213e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>606</sample_count>
+          <value>-9.5332675553986113e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-2.1649348980190600e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0435024499893188e-01</quality>
+              <le>9.3664005398750305e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>411</sample_count>
+          <value>8.4555682036086433e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1448</sample_count>
+          <value>-1.0351006034230040e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0704753398895264e-01</quality>
+              <le>9.9698998034000397e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>544</sample_count>
+          <value>-8.5042709024257673e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1315</sample_count>
+          <value>1.6769533771522532e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.5543122344752168e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0574851036071777e-01</quality>
+              <le>1.0386450588703156e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>639</sample_count>
+          <value>5.3643508899729207e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1220</sample_count>
+          <value>-2.1791341988666471e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0597953796386719e-01</quality>
+              <le>1.4143149554729462e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1571</sample_count>
+          <value>-2.3199011211312900e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>288</sample_count>
+          <value>5.7032019870719552e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.5543122344752168e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0620901584625244e-01</quality>
+              <le>1.1533749848604202e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>921</sample_count>
+          <value>3.0260573388611104e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>938</sample_count>
+          <value>-4.3620446423872927e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0698792934417725e-01</quality>
+              <le>1.0789400339126587e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>742</sample_count>
+          <value>-4.7900146843168623e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1117</sample_count>
+          <value>3.0074316296999687e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0459891557693481e-01</quality>
+              <le>9.0829002857208252e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1219</sample_count>
+          <value>-1.1779072393798255e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>640</sample_count>
+          <value>7.8608870331688729e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.3322676295501861e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0399231910705566e-01</quality>
+              <le>3.3933651447296143e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>59</sample_count>
+          <value>1.3770357317310018e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1800</sample_count>
+          <value>-9.0707243801608061e-03</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-2.6090241078691246e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0383007526397705e-01</quality>
+              <le>1.0386450588703156e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>639</sample_count>
+          <value>4.0916632352681503e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1220</sample_count>
+          <value>-1.8955755646648090e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0380486249923706e-01</quality>
+              <le>9.9698998034000397e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>544</sample_count>
+          <value>-5.8888670701558621e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1315</sample_count>
+          <value>1.3009980239826119e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>2.2204460492503083e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0560057163238525e-01</quality>
+              <le>9.4531498849391937e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>431</sample_count>
+          <value>8.6647133637941770e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1428</sample_count>
+          <value>-1.3032934253469194e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.3877787807814476e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0492835044860840e-01</quality>
+              <le>8.9620999991893768e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>352</sample_count>
+          <value>-1.0506312397672390e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1507</sample_count>
+          <value>9.4441739612300818e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-9.4368957093138405e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0368678569793701e-01</quality>
+              <le>9.2722505331039429e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>392</sample_count>
+          <value>7.6022467408684444e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1467</sample_count>
+          <value>-9.7486085972466607e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>3.1086244689504288e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0421810150146484e-01</quality>
+              <le>9.9444493651390076e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>536</sample_count>
+          <value>-6.3752797298286562e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1323</sample_count>
+          <value>1.3328967944519532e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>4.4408920985006242e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0323176383972168e-01</quality>
+              <le>1.0177600383758545e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>590</sample_count>
+          <value>4.3831310206729070e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1269</sample_count>
+          <value>-1.4864401441489694e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-8.8817841970012602e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0349098443984985e-01</quality>
+              <le>1.0789400339126587e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>742</sample_count>
+          <value>-3.4075730824220232e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1117</sample_count>
+          <value>2.0791874238488089e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-4.4408920985006281e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0392138957977295e-01</quality>
+              <le>1.0963299870491028e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>781</sample_count>
+          <value>2.6986027157566792e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1078</sample_count>
+          <value>-2.9932407189155552e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0362765789031982e-01</quality>
+              <le>2.3689103126525879e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1838</sample_count>
+          <value>-9.2384036977927863e-03</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>21</sample_count>
+          <value>1.0592165467278325e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.9984014443252857e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0543153285980225e-01</quality>
+              <le>2.2119045257568359e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1836</sample_count>
+          <value>1.5914551618823981e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>23</sample_count>
+          <value>-8.3405482949082688e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>3.5527136788004883e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0523018836975098e-01</quality>
+              <le>1.6267855167388916e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1811</sample_count>
+          <value>-1.9824411253917307e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>48</sample_count>
+          <value>5.8698394468331438e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-4.2188474935756130e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0369459390640259e-01</quality>
+              <le>1.2661495208740234e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1730</sample_count>
+          <value>2.8318382799977653e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>129</sample_count>
+          <value>-2.6718445379285333e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0479084253311157e-01</quality>
+              <le>1.2460045814514160e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1724</sample_count>
+          <value>-3.7451522137317672e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>135</sample_count>
+          <value>2.6178246916992864e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>6.6613381477509353e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0358188152313232e-01</quality>
+              <le>1.2661495208740234e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1730</sample_count>
+          <value>2.7995062956094632e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>129</sample_count>
+          <value>-2.6178246916992898e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-5.5511151231257827e-17</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.0313872098922729e-01</quality>
+              <le>5.0000000000000000e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1046</sample_count>
+          <value>-8.7344555737785268e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>813</sample_count>
+          <value>7.2176940461025546e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.9984014443252857e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0293076038360596e-01</quality>
+              <le>1.2460045814514160e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1724</sample_count>
+          <value>-2.8268637265518849e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>135</sample_count>
+          <value>2.1045949672703843e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>4.2188474935755767e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.0332474708557129e-01</quality>
+              <le>3.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>856</sample_count>
+          <value>1.2230679161917310e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1003</sample_count>
+          <value>-5.4692581535459049e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.9984014443252857e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0446850061416626e-01</quality>
+              <le>1.1533749848604202e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>921</sample_count>
+          <value>2.3320665865473968e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>938</sample_count>
+          <value>-4.0391049572091403e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.5543122344752168e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0324887037277222e-01</quality>
+              <le>8.0150999128818512e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>206</sample_count>
+          <value>1.6178738318585617e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1653</sample_count>
+          <value>-4.0534413064780048e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.5543122344752168e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0377529859542847e-01</quality>
+              <le>1.4143149554729462e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1571</sample_count>
+          <value>-1.6597530926694307e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>288</sample_count>
+          <value>4.9094014377722600e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-3.1086244689504478e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0313144922256470e-01</quality>
+              <le>4.5337548851966858e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>110</sample_count>
+          <value>-8.1199669103862704e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1749</sample_count>
+          <value>9.3381880402274662e-03</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>3.7747582837255180e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0475740432739258e-01</quality>
+              <le>4.5797997713088989e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>113</sample_count>
+          <value>6.8212891092482997e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1746</sample_count>
+          <value>-1.6049266927207175e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0308030843734741e-01</quality>
+              <le>6.4906999468803406e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>61</sample_count>
+          <value>-1.8287112564925503e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1798</sample_count>
+          <value>6.4869817401182221e-03</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>6.6613381477509353e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0292497873306274e-01</quality>
+              <le>1.4145749807357788e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1572</sample_count>
+          <value>1.1075480383243890e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>287</sample_count>
+          <value>-5.8766713493584966e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0272303819656372e-01</quality>
+              <le>1.0132449865341187e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>577</sample_count>
+          <value>-4.4462051722396978e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1282</sample_count>
+          <value>1.2318755109692310e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.5543122344752168e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0431984663009644e-01</quality>
+              <le>9.4531498849391937e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>431</sample_count>
+          <value>7.5132879390893734e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1428</sample_count>
+          <value>-1.1572265040778912e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0449663400650024e-01</quality>
+              <le>8.3246499300003052e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>254</sample_count>
+          <value>-1.3886315377806810e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1605</sample_count>
+          <value>6.5272198323505989e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0269138813018799e-01</quality>
+              <le>8.0150999128818512e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>206</sample_count>
+          <value>1.4709439757444326e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1653</sample_count>
+          <value>-3.6874150180988774e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.0234586000442505e-01</quality>
+              <le>5.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1819</sample_count>
+          <value>-6.0288987564491008e-03</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>40</sample_count>
+          <value>1.0408276196291879e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.0341212749481201e-01</quality>
+              <le>4.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1810</sample_count>
+          <value>1.0137680632639267e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>49</sample_count>
+          <value>-8.1653792840371642e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.2767564783189316e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0258362293243408e-01</quality>
+              <le>9.2052650451660156e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1253</sample_count>
+          <value>7.8639624293829910e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>606</sample_count>
+          <value>-6.5938476875607277e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>2.8865798640253987e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0378650426864624e-01</quality>
+              <le>6.9388854503631592e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>533</sample_count>
+          <value>-2.7381819728273915e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1326</sample_count>
+          <value>2.8352394260546955e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.1102230246251577e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.0225412845611572e-01</quality>
+              <le>2.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1791</sample_count>
+          <value>-1.0029134530916375e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>68</sample_count>
+          <value>4.8411186971158215e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-2.2204460492503178e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0205397605895996e-01</quality>
+              <le>2.3689103126525879e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1838</sample_count>
+          <value>-6.5406523866491815e-03</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>21</sample_count>
+          <value>7.3822507765214740e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>2.8865798640253987e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0392109155654907e-01</quality>
+              <le>2.2119045257568359e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1836</sample_count>
+          <value>1.2851614092359220e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>23</sample_count>
+          <value>-7.0930664915331199e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-1.7208456881689956e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0325584411621094e-01</quality>
+              <le>1.6267855167388916e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1811</sample_count>
+          <value>-1.4416639629038558e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>48</sample_count>
+          <value>4.8685109113945280e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>1.5543122344752168e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0334036350250244e-01</quality>
+              <le>1.4522725343704224e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1776</sample_count>
+          <value>2.1134121503482612e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>83</sample_count>
+          <value>-3.2737254080837347e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0246149301528931e-01</quality>
+              <le>9.0829002857208252e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1219</sample_count>
+          <value>-8.5346673521825803e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>640</sample_count>
+          <value>5.7886290686531831e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>4.4408920985006242e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0454872846603394e-01</quality>
+              <le>7.8601944446563721e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>774</sample_count>
+          <value>1.8066758954429543e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1085</sample_count>
+          <value>-5.0945701357667621e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>3.1086244689504288e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.0212800502777100e-01</quality>
+              <le>1.5322914123535156e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1796</sample_count>
+          <value>-1.4167158667728857e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>63</sample_count>
+          <value>3.0999551339753734e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-8.8817841970012602e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0266414880752563e-01</quality>
+              <le>1.0847599804401398e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>754</sample_count>
+          <value>2.2656461251734570e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1105</sample_count>
+          <value>-2.3970407856964623e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>8.8817841970012444e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0370788574218750e-01</quality>
+              <le>1.0789400339126587e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>742</sample_count>
+          <value>-3.1940419009583935e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1117</sample_count>
+          <value>2.3657134249111228e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0368273258209229e-01</quality>
+              <le>1.0177600383758545e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>590</sample_count>
+          <value>4.4494384607886871e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1269</sample_count>
+          <value>-1.6718599716109608e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0391924381256104e-01</quality>
+              <le>1.0132449865341187e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>577</sample_count>
+          <value>-5.2562531380568607e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1282</sample_count>
+          <value>1.5038797351826086e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-2.1649348980190600e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0355648994445801e-01</quality>
+              <le>9.2722505331039429e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>392</sample_count>
+          <value>7.2872834808621612e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1467</sample_count>
+          <value>-9.8094551697467572e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>6.6613381477509353e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0229591131210327e-01</quality>
+              <le>1.4143149554729462e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1571</sample_count>
+          <value>-1.1560090798064190e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>288</sample_count>
+          <value>4.2036013274394679e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0365710258483887e-01</quality>
+              <le>1.0963299870491028e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>781</sample_count>
+          <value>2.1235893234003624e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1078</sample_count>
+          <value>-3.5916730278930570e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>-2.2204460492503178e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0305408239364624e-01</quality>
+              <le>9.1055497527122498e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>376</sample_count>
+          <value>-7.5233897759775223e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1483</sample_count>
+          <value>8.1521687774440590e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>1859</sample_count>
+          <value>2.4424906541753385e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.0231134891510010e-01</quality>
+              <le>8.8118501007556915e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>322</sample_count>
+          <value>7.4755757692778257e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1537</sample_count>
+          <value>-6.2031478956482825e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_></trees></boost>
+</opencv_storage>
diff --git a/samples/cpp/trained_classifierNM2.xml b/samples/cpp/trained_classifierNM2.xml
new file mode 100644
index 000000000..eae66f01a
--- /dev/null
+++ b/samples/cpp/trained_classifierNM2.xml
@@ -0,0 +1,4046 @@
+<?xml version="1.0"?>
+<opencv_storage>
+<boost type_id="opencv-ml-boost-tree">
+  <boosting_type>RealAdaboost</boosting_type>
+  <splitting_criteria>Gini</splitting_criteria>
+  <ntrees>100</ntrees>
+  <weight_trimming_rate>1.</weight_trimming_rate>
+  <is_classifier>1</is_classifier>
+  <var_all>8</var_all>
+  <var_count>7</var_count>
+  <ord_var_count>7</ord_var_count>
+  <cat_var_count>0</cat_var_count>
+  <training_params>
+    <use_surrogates>0</use_surrogates>
+    <max_categories>10</max_categories>
+    <max_depth>1</max_depth>
+    <min_sample_count>10</min_sample_count>
+    <cross_validation_folds>0</cross_validation_folds>
+    <priors type_id="opencv-matrix">
+      <rows>1</rows>
+      <cols>2</cols>
+      <dt>d</dt>
+      <data>
+        1. 1.</data></priors></training_params>
+  <var_idx type_id="opencv-matrix">
+    <rows>1</rows>
+    <cols>7</cols>
+    <dt>i</dt>
+    <data>
+      1 2 3 4 5 6 7</data></var_idx>
+  <var_type>
+    0 0 0 0 0 0 0</var_type>
+  <cat_count type_id="opencv-matrix">
+    <rows>1</rows>
+    <cols>1</cols>
+    <dt>i</dt>
+    <data>
+      2</data></cat_count>
+  <cat_map type_id="opencv-matrix">
+    <rows>1</rows>
+    <cols>2</cols>
+    <dt>i</dt>
+    <data>
+      1 2</data></cat_map>
+  <trees>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.0078929470944956e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>6</var>
+              <quality>8.7790262699127197e-01</quality>
+              <le>8.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2105</sample_count>
+          <value>-1.1353035616351272e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1110</sample_count>
+          <value>2.0526971992043412e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.0428103653102464e-14</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>6</var>
+              <quality>6.9283843040466309e-01</quality>
+              <le>4.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1558</sample_count>
+          <value>-1.1814132639953403e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1657</sample_count>
+          <value>5.0475867936025332e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2259971643734884e-14</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>7.1280795335769653e-01</quality>
+              <le>1.2044999748468399e-03</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1475</sample_count>
+          <value>-7.9375129344950335e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1740</sample_count>
+          <value>7.6558743305188104e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.4424906541753385e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.9598809480667114e-01</quality>
+              <le>1.6245440244674683e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1385</sample_count>
+          <value>-5.4645841888834568e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1830</sample_count>
+          <value>4.0662339031566280e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>6.2172489379008380e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.5169683694839478e-01</quality>
+              <le>1.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2960</sample_count>
+          <value>-1.2615933276314473e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>255</sample_count>
+          <value>1.1688135468772842e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-8.4376949871512607e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.4812711477279663e-01</quality>
+              <le>5.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2891</sample_count>
+          <value>1.3088588195700304e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>324</sample_count>
+          <value>-9.4959634248367153e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.8865798640253987e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.9334284067153931e-01</quality>
+              <le>9.2769998311996460e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1964</sample_count>
+          <value>-4.5236837327316332e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1251</sample_count>
+          <value>4.7279968034956410e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-1.3322676295501896e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.6080043315887451e-01</quality>
+              <le>2.2512035369873047e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2464</sample_count>
+          <value>-2.1052010276554461e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>751</sample_count>
+          <value>6.7174941695746326e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-7.3274719625260868e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.3896796703338623e-01</quality>
+              <le>1.6267855167388916e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3041</sample_count>
+          <value>-8.1739798958620380e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>174</sample_count>
+          <value>1.8924547997011532e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>3.3306690738754586e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>6</var>
+              <quality>5.3723198175430298e-01</quality>
+              <le>2.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>851</sample_count>
+          <value>-9.3661052654285037e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2364</sample_count>
+          <value>1.0184709367515142e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>1.2656542480726624e-14</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.4598015546798706e-01</quality>
+              <le>3.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1494</sample_count>
+          <value>3.5820791601148183e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1721</sample_count>
+          <value>-2.7428866405164359e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-1.3322676295501896e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>5.5166077613830566e-01</quality>
+              <le>2.9999999242136255e-05</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1467</sample_count>
+          <value>-3.3467424181887540e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1748</sample_count>
+          <value>3.3182965736372233e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>1.3322676295501861e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>6</var>
+              <quality>5.4394942522048950e-01</quality>
+              <le>6.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1817</sample_count>
+          <value>-2.9822080188111177e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1398</sample_count>
+          <value>3.1329368107827943e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.6645352591003686e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.3358083963394165e-01</quality>
+              <le>1.3833449780941010e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2635</sample_count>
+          <value>8.0265603900023089e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>580</sample_count>
+          <value>-1.2162305066944930e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-6.4392935428259497e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>5.3098827600479126e-01</quality>
+              <le>1.2702149152755737e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2676</sample_count>
+          <value>1.3513810711631610e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>539</sample_count>
+          <value>-4.9909506904561546e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>3.7747582837255180e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>6</var>
+              <quality>5.3376436233520508e-01</quality>
+              <le>9.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2193</sample_count>
+          <value>1.3117433044170448e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1022</sample_count>
+          <value>-5.7326180584233111e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>1.7763568394002473e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.2698892354965210e-01</quality>
+              <le>2.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3127</sample_count>
+          <value>-5.4553392906591960e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>88</sample_count>
+          <value>2.6688100460888684e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-3.0531133177191900e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>6</var>
+              <quality>5.2742463350296021e-01</quality>
+              <le>1.2500000000000000e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2620</sample_count>
+          <value>-5.4904310687255702e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>595</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>1.7368061883308035e-04</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.2537131309509277e-01</quality>
+              <le>3.7997097969055176e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3184</sample_count>
+          <value>-5.4291952000757461e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>31</sample_count>
+          <value>1.6789882514261603e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>1.9984014443252778e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.3548330068588257e-01</quality>
+              <le>6.6345453262329102e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>777</sample_count>
+          <value>-9.1576813577644156e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2438</sample_count>
+          <value>9.8351123166090529e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.2158576250076294e-01</quality>
+              <le>5.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2891</sample_count>
+          <value>8.1007865418959935e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>324</sample_count>
+          <value>-5.9585687406274601e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-6.6613381477509432e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.2358013391494751e-01</quality>
+              <le>1.0659420490264893e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2457</sample_count>
+          <value>-1.3607641362335321e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>758</sample_count>
+          <value>3.6397269406151606e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>3.7747582837255180e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.2605414390563965e-01</quality>
+              <le>1.3934500217437744e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>909</sample_count>
+          <value>-4.4217410871376006e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2306</sample_count>
+          <value>1.2609161734369648e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>1.5543122344752168e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>5.2353090047836304e-01</quality>
+              <le>1.2044999748468399e-03</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1475</sample_count>
+          <value>-2.0545012432588597e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1740</sample_count>
+          <value>2.3659867388791755e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-1.8318679906315118e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.2399057149887085e-01</quality>
+              <le>4.7306199073791504e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3204</sample_count>
+          <value>-4.8506775757358948e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>11</sample_count>
+          <value>2.6435685747751951e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503083e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>6</var>
+              <quality>5.2452313899993896e-01</quality>
+              <le>6.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1817</sample_count>
+          <value>-2.0830922304142732e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1398</sample_count>
+          <value>2.4354914812770342e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>3.7747582837255180e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>1</var>
+              <quality>5.1760864257812500e-01</quality>
+              <le>7.4752002954483032e-02</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>270</sample_count>
+          <value>7.4056582493755241e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2945</sample_count>
+          <value>-5.6004456612259952e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>1.5543122344752168e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.1595693826675415e-01</quality>
+              <le>5.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2891</sample_count>
+          <value>6.8929447086892887e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>324</sample_count>
+          <value>-5.0204800200277599e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-8.8262730457700718e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>5.1432991027832031e-01</quality>
+              <le>2.5954151153564453e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2981</sample_count>
+          <value>3.1556263241584975e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>234</sample_count>
+          <value>-1.5189501708336401e+00</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.1588785648345947e-01</quality>
+              <le>1.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2960</sample_count>
+          <value>-6.0429432019906144e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>255</sample_count>
+          <value>5.8525011139658878e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.4424906541753385e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.2001929283142090e-01</quality>
+              <le>1.3934500217437744e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>909</sample_count>
+          <value>-4.6709016373000739e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2306</sample_count>
+          <value>9.2123924914545782e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.6645352591003686e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>5.1656109094619751e-01</quality>
+              <le>1.2044999748468399e-03</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1475</sample_count>
+          <value>-1.6749125053548522e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1740</sample_count>
+          <value>2.0231653132989663e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-3.7747582837255464e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>3</var>
+              <quality>5.1900207996368408e-01</quality>
+              <le>3.</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1494</sample_count>
+          <value>2.5948749710951657e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1721</sample_count>
+          <value>-1.5086548637238759e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-1.3322676295501896e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>0</var>
+              <quality>5.1032912731170654e-01</quality>
+              <le>1.1417410373687744e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2628</sample_count>
+          <value>-6.3461321700044010e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>587</sample_count>
+          <value>3.3830370061028520e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>1.1102230246251554e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.1085340976715088e-01</quality>
+              <le>1.3021064996719360e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>734</sample_count>
+          <value>-3.7065768373801600e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2481</sample_count>
+          <value>6.1297280870936215e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>4.4408920985006242e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>2</var>
+              <quality>5.1171976327896118e-01</quality>
+              <le>1.5000000000000000e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2960</sample_count>
+          <value>-5.1679907432686661e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>255</sample_count>
+          <value>4.8967202182224279e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-3.3306690738754807e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>5.1276576519012451e-01</quality>
+              <le>1.4958450198173523e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2755</sample_count>
+          <value>7.7575074151822049e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>460</sample_count>
+          <value>-3.4258234102830631e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>5.0944852828979492e-01</quality>
+              <le>2.5842750072479248e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2980</sample_count>
+          <value>-2.8188779664253410e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>235</sample_count>
+          <value>8.1175021374635614e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-5.2735593669695212e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.1038962602615356e-01</quality>
+              <le>1.4510664939880371e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1007</sample_count>
+          <value>-3.3150262977164136e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2208</sample_count>
+          <value>6.5052948330937457e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>1.3322676295501861e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>5.1113504171371460e-01</quality>
+              <le>2.4550000671297312e-04</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1471</sample_count>
+          <value>-1.2860595713867043e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1744</sample_count>
+          <value>1.7591144586528801e-01</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-8.8817841970012602e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>4</var>
+              <quality>5.1135826110839844e-01</quality>
+              <le>1.9797500967979431e-01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>2857</sample_count>
+          <value>4.5695890739118755e-02</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>358</sample_count>
+          <value>-5.4593965595422900e-01</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.8865798640253987e-15</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.0861865282058716e-01</quality>
+              <le>4.4535398483276367e+00</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3204</sample_count>
+          <value>-1.7238960071156845e-02</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>11</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>5.4514379135116734e-05</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.1613724231719971e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>5.4514379101591658e-05</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-4.2188474935756130e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>5.8550304174423218e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-4.2188474935756130e-15</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.1834394931793213e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2149631977081299e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2519973516464233e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3510566949844360e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3096535205841064e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.0966378450393677e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3168644905090332e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3053596019744873e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.1062115430831909e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2805336713790894e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3325423002243042e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.0798364877700806e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2765526771545410e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3239699602127075e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.0819625854492188e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2792611122131348e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3205927610397339e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.0529828071594238e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2831163406372070e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3197416067123413e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.0644149780273438e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2831127643585205e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2990629673004150e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.1229735612869263e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2957435846328735e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2827998399734497e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.1507642269134521e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>4.4408920985006242e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3906562328338623e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>4.4408920985006242e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-4.4408920985006281e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2384939193725586e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-4.4408920985006281e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3710451126098633e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.3055968284606934e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>0.</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2085735797882080e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>-2.2204460492503136e-16</value>
+          <norm_class_idx>0</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_>
+    <_>
+      <best_tree_idx>-1</best_tree_idx>
+      <nodes>
+        <_>
+          <depth>0</depth>
+          <sample_count>3215</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error>
+          <splits>
+            <_><var>5</var>
+              <quality>6.2932032346725464e-01</quality>
+              <le>1.3783290863037109e+01</le></_></splits></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>3214</sample_count>
+          <value>2.2204460492503126e-16</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_>
+        <_>
+          <depth>1</depth>
+          <sample_count>1</sample_count>
+          <value>5.7564577324623896e+00</value>
+          <norm_class_idx>1</norm_class_idx>
+          <Tn>0</Tn>
+          <complexity>0</complexity>
+          <alpha>0.</alpha>
+          <node_risk>0.</node_risk>
+          <tree_risk>0.</tree_risk>
+          <tree_error>0.</tree_error></_></nodes></_></trees></boost>
+</opencv_storage>
diff --git a/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp b/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp
index ec0076803..67147b3a0 100644
--- a/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp
+++ b/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp
@@ -76,5 +76,3 @@ void Morphology_Operations( int, void* )
   morphologyEx( src, dst, operation, element );
   imshow( window_name, dst );
 }
-
-
diff --git a/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp b/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp
index 48ec8286c..fc98d1c21 100644
--- a/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp
+++ b/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp
@@ -66,10 +66,3 @@ int main( void )
 
    return 0;
 }
-
-
-
-
-
-
-
diff --git a/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp b/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
index a5498f715..f8c97c411 100644
--- a/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
@@ -61,5 +61,3 @@ int main( int, char** argv )
 
   return 0;
 }
-
-
diff --git a/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp b/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
index 53120b780..0a441ccdf 100644
--- a/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
@@ -15,7 +15,6 @@ using namespace cv;
 Mat src, dst;
 int top, bottom, left, right;
 int borderType;
-Scalar value;
 const char* window_name = "copyMakeBorder Demo";
 RNG rng(12345);
 
@@ -64,7 +63,7 @@ int main( int, char** argv )
          else if( (char)c == 'r' )
            { borderType = BORDER_REPLICATE; }
 
-         value = Scalar( rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255) );
+         Scalar value( rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255) );
          copyMakeBorder( src, dst, top, bottom, left, right, borderType, value );
 
          imshow( window_name, dst );
@@ -72,5 +71,3 @@ int main( int, char** argv )
 
   return 0;
 }
-
-
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
index 61dcd7001..9ca230f87 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
@@ -92,4 +92,3 @@ void thresh_callback(int, void* )
        circle( drawing, mc[i], 4, color, -1, 8, 0 );
      }
 }
-
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
index 23f794a90..f55f8f687 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
@@ -79,5 +79,3 @@ int main( void )
   waitKey(0);
   return(0);
 }
-
-
diff --git a/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp b/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp
index dafc4be94..31c700478 100644
--- a/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp
+++ b/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp
@@ -120,4 +120,3 @@ void myHarris_function( int, void* )
      }
   imshow( myHarris_window, myHarris_copy );
 }
-
diff --git a/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp b/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp
index 320eb100a..5957a95d6 100644
--- a/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp
+++ b/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp
@@ -102,4 +102,3 @@ void goodFeaturesToTrack_Demo( int, void* )
   for( size_t i = 0; i < corners.size(); i++ )
      { cout<<" -- Refined Corner ["<<i<<"]  ("<<corners[i].x<<","<<corners[i].y<<")"<<endl; }
 }
-
diff --git a/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp b/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp
index a0f6d125c..b45d60a08 100644
--- a/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp
+++ b/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp
@@ -90,4 +90,3 @@ void goodFeaturesToTrack_Demo( int, void* )
   namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, copy );
 }
-
diff --git a/samples/cpp/tutorial_code/contrib/retina_tutorial.cpp b/samples/cpp/tutorial_code/bioinspired/retina_tutorial.cpp
similarity index 95%
rename from samples/cpp/tutorial_code/contrib/retina_tutorial.cpp
rename to samples/cpp/tutorial_code/bioinspired/retina_tutorial.cpp
index 284bbf5d8..994d881de 100644
--- a/samples/cpp/tutorial_code/contrib/retina_tutorial.cpp
+++ b/samples/cpp/tutorial_code/bioinspired/retina_tutorial.cpp
@@ -9,7 +9,7 @@
 #include <iostream>
 #include <cstring>
 
-#include "opencv2/contrib.hpp"
+#include "opencv2/bioinspired.hpp"
 #include "opencv2/highgui.hpp"
 
 static void help(std::string errorMessage)
@@ -95,16 +95,16 @@ int main(int argc, char* argv[]) {
     try
     {
         // create a retina instance with default parameters setup, uncomment the initialisation you wanna test
-        cv::Ptr<cv::Retina> myRetina;
+        cv::Ptr<cv::bioinspired::Retina> myRetina;
 
         // if the last parameter is 'log', then activate log sampling (favour foveal vision and subsamples peripheral vision)
         if (useLogSampling)
         {
-            myRetina = cv::createRetina(inputFrame.size(), true, cv::RETINA_COLOR_BAYER, true, 2.0, 10.0);
+            myRetina = cv::bioinspired::createRetina(inputFrame.size(), true, cv::bioinspired::RETINA_COLOR_BAYER, true, 2.0, 10.0);
         }
         else// -> else allocate "classical" retina :
         {
-            myRetina = cv::createRetina(inputFrame.size());
+            myRetina = cv::bioinspired::createRetina(inputFrame.size());
         }
 
         // save default retina parameters file in order to let you see this and maybe modify it and reload using method "setup"
diff --git a/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp b/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp
index f10c4963b..5cad99208 100644
--- a/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp
+++ b/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp
@@ -168,5 +168,3 @@ void MyLine( Mat img, Point start, Point end )
     thickness,
     lineType );
 }
-
-
diff --git a/samples/cpp/tutorial_code/core/discrete_fourier_transform/discrete_fourier_transform.cpp b/samples/cpp/tutorial_code/core/discrete_fourier_transform/discrete_fourier_transform.cpp
index dd77bb2eb..de7a32ca6 100644
--- a/samples/cpp/tutorial_code/core/discrete_fourier_transform/discrete_fourier_transform.cpp
+++ b/samples/cpp/tutorial_code/core/discrete_fourier_transform/discrete_fourier_transform.cpp
@@ -75,4 +75,4 @@ int main(int argc, char ** argv)
     waitKey();
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp b/samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp
index 9c527a8aa..388b7744f 100644
--- a/samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp
+++ b/samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp
@@ -151,4 +151,4 @@ int main(int ac, char** av)
         << "Tip: Open up " << filename << " with a text editor to see the serialized data." << endl;
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/samples/cpp/tutorial_code/core/how_to_scan_images/how_to_scan_images.cpp b/samples/cpp/tutorial_code/core/how_to_scan_images/how_to_scan_images.cpp
index 7bd91ddf6..f257d6ed6 100644
--- a/samples/cpp/tutorial_code/core/how_to_scan_images/how_to_scan_images.cpp
+++ b/samples/cpp/tutorial_code/core/how_to_scan_images/how_to_scan_images.cpp
@@ -214,4 +214,4 @@ Mat& ScanImageAndReduceRandomAccess(Mat& I, const uchar* const table)
     }
 
     return I;
-}
\ No newline at end of file
+}
diff --git a/samples/cpp/tutorial_code/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.cpp b/samples/cpp/tutorial_code/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.cpp
index 6c681d643..e13f2b696 100644
--- a/samples/cpp/tutorial_code/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.cpp
+++ b/samples/cpp/tutorial_code/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.cpp
@@ -32,8 +32,8 @@ int main( int argc, char** argv )
     const char* imagename = argc > 1 ? argv[1] : "lena.jpg";
 
 #ifdef DEMO_MIXED_API_USE
-    Ptr<IplImage> IplI = cvLoadImage(imagename);      // Ptr<T> is safe ref-counting pointer class
-    if(IplI.empty())
+    Ptr<IplImage> IplI(cvLoadImage(imagename));      // Ptr<T> is a safe ref-counting pointer class
+    if(!IplI)
     {
         cerr << "Can not load image " <<  imagename << endl;
         return -1;
diff --git a/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp b/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp
index 1c476453d..0310b973a 100644
--- a/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp
+++ b/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp
@@ -84,4 +84,4 @@ void Sharpen(const Mat& myImage,Mat& Result)
     Result.row(Result.rows-1).setTo(Scalar(0));
     Result.col(0).setTo(Scalar(0));
     Result.col(Result.cols-1).setTo(Scalar(0));
-}
\ No newline at end of file
+}
diff --git a/samples/cpp/tutorial_code/core/mat_the_basic_image_container/mat_the_basic_image_container.cpp b/samples/cpp/tutorial_code/core/mat_the_basic_image_container/mat_the_basic_image_container.cpp
index 9c51ec857..91cc64f9b 100644
--- a/samples/cpp/tutorial_code/core/mat_the_basic_image_container/mat_the_basic_image_container.cpp
+++ b/samples/cpp/tutorial_code/core/mat_the_basic_image_container/mat_the_basic_image_container.cpp
@@ -82,4 +82,4 @@ int main(int,char**)
 
     cout << "A vector of 2D Points = " << vPoints << endl << endl;
     return 0;
-}
\ No newline at end of file
+}
diff --git a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
index 1815cc6de..e8e961031 100644
--- a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
+++ b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
@@ -430,4 +430,3 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
     }
     return mssim;
 }
-
diff --git a/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp b/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
index 44ef32c0a..847cd8f6d 100644
--- a/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
+++ b/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
@@ -27,4 +27,4 @@ int main( int argc, char** argv )
 
     waitKey(0); // Wait for a keystroke in the window
     return 0;
-}
\ No newline at end of file
+}
diff --git a/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp b/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp
index b1bac5bab..b5470efa3 100644
--- a/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp
+++ b/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp
@@ -203,4 +203,4 @@ Scalar getMSSIM( const Mat& i1, const Mat& i2)
 
     Scalar mssim = mean( ssim_map ); // mssim = average of ssim map
     return mssim;
-}
\ No newline at end of file
+}
diff --git a/samples/cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp b/samples/cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp
index a24029a23..4f6daecaa 100644
--- a/samples/cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp
+++ b/samples/cpp/tutorial_code/ml/non_linear_svms/non_linear_svms.cpp
@@ -127,4 +127,4 @@ int main()
     imwrite("result.png", I);	                   // save the Image
     imshow("SVM for Non-Linear Training Data", I); // show it to the user
     waitKey(0);
-}
\ No newline at end of file
+}
diff --git a/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp b/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
index 2f1575e6e..e7dc3e98f 100644
--- a/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
+++ b/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
@@ -1,14 +1,6 @@
-/**
- * @file objectDetection.cpp
- * @author A. Huaman ( based in the classic facedetect.cpp in samples/c )
- * @brief A simplified version of facedetect.cpp, show how to load a cascade classifier and how to find objects (Face + eyes) in a video stream
- */
-#include "opencv2/objdetect/objdetect.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/core/utility.hpp"
-
-#include "opencv2/highgui/highgui_c.h"
+#include "opencv2/objdetect.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
 
 #include <iostream>
 #include <stdio.h>
@@ -20,79 +12,73 @@ using namespace cv;
 void detectAndDisplay( Mat frame );
 
 /** Global variables */
-//-- Note, either copy these two files from opencv/data/haarscascades to your current folder, or change these locations
-string face_cascade_name = "haarcascade_frontalface_alt.xml";
-string eyes_cascade_name = "haarcascade_eye_tree_eyeglasses.xml";
+String face_cascade_name = "haarcascade_frontalface_alt.xml";
+String eyes_cascade_name = "haarcascade_eye_tree_eyeglasses.xml";
 CascadeClassifier face_cascade;
 CascadeClassifier eyes_cascade;
-string window_name = "Capture - Face detection";
-RNG rng(12345);
+String window_name = "Capture - Face detection";
 
-/**
- * @function main
- */
+/** @function main */
 int main( void )
 {
-  CvCapture* capture;
-  Mat frame;
+    VideoCapture capture;
+    Mat frame;
 
-  //-- 1. Load the cascades
-  if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
-  if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
+    //-- 1. Load the cascades
+    if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading face cascade\n"); return -1; };
+    if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading eyes cascade\n"); return -1; };
 
-  //-- 2. Read the video stream
-  capture = cvCaptureFromCAM( -1 );
-  if( capture )
-  {
-    for(;;)
+    //-- 2. Read the video stream
+    capture.open( -1 );
+    if ( ! capture.isOpened() ) { printf("--(!)Error opening video capture\n"); return -1; }
+
+    while ( capture.read(frame) )
     {
-      frame = cv::cvarrToMat(cvQueryFrame( capture ));
+        if( frame.empty() )
+        {
+            printf(" --(!) No captured frame -- Break!");
+            break;
+        }
 
-      //-- 3. Apply the classifier to the frame
-      if( !frame.empty() )
-       { detectAndDisplay( frame ); }
-      else
-       { printf(" --(!) No captured frame -- Break!"); break; }
-
-      int c = waitKey(10);
-      if( (char)c == 'c' ) { break; }
+        //-- 3. Apply the classifier to the frame
+        detectAndDisplay( frame );
 
+        int c = waitKey(10);
+        if( (char)c == 27 ) { break; } // escape
     }
-  }
-  return 0;
+    return 0;
 }
 
-/**
- * @function detectAndDisplay
- */
+/** @function detectAndDisplay */
 void detectAndDisplay( Mat frame )
 {
-   std::vector<Rect> faces;
-   Mat frame_gray;
+    std::vector<Rect> faces;
+    Mat frame_gray;
 
-   cvtColor( frame, frame_gray, COLOR_BGR2GRAY );
-   equalizeHist( frame_gray, frame_gray );
-   //-- Detect faces
-   face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CASCADE_SCALE_IMAGE, Size(30, 30) );
+    cvtColor( frame, frame_gray, COLOR_BGR2GRAY );
+    equalizeHist( frame_gray, frame_gray );
 
-   for( size_t i = 0; i < faces.size(); i++ )
+    //-- Detect faces
+    face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CASCADE_SCALE_IMAGE, Size(30, 30) );
+
+    for ( size_t i = 0; i < faces.size(); i++ )
     {
-      Point center( faces[i].x + faces[i].width/2, faces[i].y + faces[i].height/2 );
-      ellipse( frame, center, Size( faces[i].width/2, faces[i].height/2), 0, 0, 360, Scalar( 255, 0, 255 ), 2, 8, 0 );
+        Point center( faces[i].x + faces[i].width/2, faces[i].y + faces[i].height/2 );
+        ellipse( frame, center, Size( faces[i].width/2, faces[i].height/2 ), 0, 0, 360, Scalar( 255, 0, 255 ), 4, 8, 0 );
 
-      Mat faceROI = frame_gray( faces[i] );
-      std::vector<Rect> eyes;
+        Mat faceROI = frame_gray( faces[i] );
+        std::vector<Rect> eyes;
 
-      //-- In each face, detect eyes
-      eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CASCADE_SCALE_IMAGE, Size(30, 30) );
+        //-- In each face, detect eyes
+        eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CASCADE_SCALE_IMAGE, Size(30, 30) );
 
-      for( size_t j = 0; j < eyes.size(); j++ )
-       {
-         Point eye_center( faces[i].x + eyes[j].x + eyes[j].width/2, faces[i].y + eyes[j].y + eyes[j].height/2 );
-         int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );
-         circle( frame, eye_center, radius, Scalar( 255, 0, 0 ), 3, 8, 0 );
-       }
+        for ( size_t j = 0; j < eyes.size(); j++ )
+        {
+            Point eye_center( faces[i].x + eyes[j].x + eyes[j].width/2, faces[i].y + eyes[j].y + eyes[j].height/2 );
+            int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );
+            circle( frame, eye_center, radius, Scalar( 255, 0, 0 ), 4, 8, 0 );
+        }
     }
-   //-- Show what you got
-   imshow( window_name, frame );
+    //-- Show what you got
+    imshow( window_name, frame );
 }
diff --git a/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp b/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp
index 0cd21966e..326485839 100644
--- a/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp
+++ b/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp
@@ -3,12 +3,9 @@
  * @author A. Huaman ( based in the classic facedetect.cpp in samples/c )
  * @brief A simplified version of facedetect.cpp, show how to load a cascade classifier and how to find objects (Face + eyes) in a video stream - Using LBP here
  */
-#include "opencv2/objdetect/objdetect.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/core/utility.hpp"
-
-#include "opencv2/highgui/highgui_c.h"
+#include "opencv2/objdetect.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
 
 #include <iostream>
 #include <stdio.h>
@@ -20,46 +17,43 @@ using namespace cv;
 void detectAndDisplay( Mat frame );
 
 /** Global variables */
-string face_cascade_name = "lbpcascade_frontalface.xml";
-string eyes_cascade_name = "haarcascade_eye_tree_eyeglasses.xml";
+String face_cascade_name = "lbpcascade_frontalface.xml";
+String eyes_cascade_name = "haarcascade_eye_tree_eyeglasses.xml";
 CascadeClassifier face_cascade;
 CascadeClassifier eyes_cascade;
-string window_name = "Capture - Face detection";
-
-RNG rng(12345);
-
+String window_name = "Capture - Face detection";
 /**
  * @function main
  */
 int main( void )
 {
-  CvCapture* capture;
-  Mat frame;
+    VideoCapture capture;
+    Mat frame;
 
-  //-- 1. Load the cascade
-  if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
-  if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
+    //-- 1. Load the cascade
+    if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading face cascade\n"); return -1; };
+    if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading eyes cascade\n"); return -1; };
 
-  //-- 2. Read the video stream
-  capture = cvCaptureFromCAM( -1 );
-  if( capture )
-  {
-    for(;;)
+    //-- 2. Read the video stream
+    capture.open( -1 );
+    if ( ! capture.isOpened() ) { printf("--(!)Error opening video capture\n"); return -1; }
+
+    while ( capture.read(frame) )
     {
-      frame = cv::cvarrToMat(cvQueryFrame( capture ));
+        if( frame.empty() )
+        {
+            printf(" --(!) No captured frame -- Break!");
+            break;
+        }
 
-      //-- 3. Apply the classifier to the frame
-      if( !frame.empty() )
-       { detectAndDisplay( frame ); }
-      else
-       { printf(" --(!) No captured frame -- Break!"); break; }
-
-      int c = waitKey(10);
-      if( (char)c == 'c' ) { break; }
+        //-- 3. Apply the classifier to the frame
+        detectAndDisplay( frame );
 
+        //-- bail out if escape was pressed
+        int c = waitKey(10);
+        if( (char)c == 27 ) { break; }
     }
-  }
-  return 0;
+    return 0;
 }
 
 /**
@@ -67,37 +61,37 @@ int main( void )
  */
 void detectAndDisplay( Mat frame )
 {
-   std::vector<Rect> faces;
-   Mat frame_gray;
+    std::vector<Rect> faces;
+    Mat frame_gray;
 
-   cvtColor( frame, frame_gray, COLOR_BGR2GRAY );
-   equalizeHist( frame_gray, frame_gray );
+    cvtColor( frame, frame_gray, COLOR_BGR2GRAY );
+    equalizeHist( frame_gray, frame_gray );
 
-   //-- Detect faces
-   face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0, Size(80, 80) );
+    //-- Detect faces
+    face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0, Size(80, 80) );
 
-   for( size_t i = 0; i < faces.size(); i++ )
+    for( size_t i = 0; i < faces.size(); i++ )
     {
-      Mat faceROI = frame_gray( faces[i] );
-      std::vector<Rect> eyes;
+        Mat faceROI = frame_gray( faces[i] );
+        std::vector<Rect> eyes;
 
-      //-- In each face, detect eyes
-      eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CASCADE_SCALE_IMAGE, Size(30, 30) );
-      if( eyes.size() == 2)
-      {
-         //-- Draw the face
-         Point center( faces[i].x + faces[i].width/2, faces[i].y + faces[i].height/2 );
-         ellipse( frame, center, Size( faces[i].width/2, faces[i].height/2), 0, 0, 360, Scalar( 255, 0, 0 ), 2, 8, 0 );
+        //-- In each face, detect eyes
+        eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CASCADE_SCALE_IMAGE, Size(30, 30) );
+        if( eyes.size() == 2)
+        {
+            //-- Draw the face
+            Point center( faces[i].x + faces[i].width/2, faces[i].y + faces[i].height/2 );
+            ellipse( frame, center, Size( faces[i].width/2, faces[i].height/2 ), 0, 0, 360, Scalar( 255, 0, 0 ), 2, 8, 0 );
 
-         for( size_t j = 0; j < eyes.size(); j++ )
-          { //-- Draw the eyes
-            Point eye_center( faces[i].x + eyes[j].x + eyes[j].width/2, faces[i].y + eyes[j].y + eyes[j].height/2 );
-            int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );
-            circle( frame, eye_center, radius, Scalar( 255, 0, 255 ), 3, 8, 0 );
-          }
-       }
+            for( size_t j = 0; j < eyes.size(); j++ )
+            { //-- Draw the eyes
+                Point eye_center( faces[i].x + eyes[j].x + eyes[j].width/2, faces[i].y + eyes[j].y + eyes[j].height/2 );
+                int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );
+                circle( frame, eye_center, radius, Scalar( 255, 0, 255 ), 3, 8, 0 );
+            }
+        }
 
     }
-   //-- Show what you got
-   imshow( window_name, frame );
+    //-- Show what you got
+    imshow( window_name, frame );
 }
diff --git a/samples/cpp/video_homography.cpp b/samples/cpp/video_homography.cpp
index c8388007d..bf2559fba 100644
--- a/samples/cpp/video_homography.cpp
+++ b/samples/cpp/video_homography.cpp
@@ -152,7 +152,7 @@ int main(int ac, char ** av)
 
     Mat train_desc, query_desc;
     const int DESIRED_FTRS = 500;
-    GridAdaptedFeatureDetector detector(new FastFeatureDetector(10, true), DESIRED_FTRS, 4, 4);
+    GridAdaptedFeatureDetector detector(makePtr<FastFeatureDetector>(10, true), DESIRED_FTRS, 4, 4);
 
     Mat H_prev = Mat::eye(3, 3, CV_32FC1);
     for (;;)
diff --git a/samples/cpp/videostab.cpp b/samples/cpp/videostab.cpp
index 21606d495..01cdcf37f 100644
--- a/samples/cpp/videostab.cpp
+++ b/samples/cpp/videostab.cpp
@@ -193,7 +193,7 @@ public:
 
     virtual Ptr<ImageMotionEstimatorBase> build()
     {
-        MotionEstimatorRansacL2 *est = new MotionEstimatorRansacL2(motionModel(arg(prefix + "model")));
+        Ptr<MotionEstimatorRansacL2> est = makePtr<MotionEstimatorRansacL2>(motionModel(arg(prefix + "model")));
 
         RansacParams ransac = est->ransacParams();
         if (arg(prefix + "subset") != "auto")
@@ -205,10 +205,10 @@ public:
 
         est->setMinInlierRatio(argf(prefix + "min-inlier-ratio"));
 
-        Ptr<IOutlierRejector> outlierRejector = new NullOutlierRejector();
+        Ptr<IOutlierRejector> outlierRejector = makePtr<NullOutlierRejector>();
         if (arg(prefix + "local-outlier-rejection") == "yes")
         {
-            TranslationBasedLocalOutlierRejector *tblor = new TranslationBasedLocalOutlierRejector();
+            Ptr<TranslationBasedLocalOutlierRejector> tblor = makePtr<TranslationBasedLocalOutlierRejector>();
             RansacParams ransacParams = tblor->ransacParams();
             if (arg(prefix + "thresh") != "auto")
                 ransacParams.thresh = argf(prefix + "thresh");
@@ -219,14 +219,14 @@ public:
 #if defined(HAVE_OPENCV_GPUIMGPROC) && defined(HAVE_OPENCV_GPU) && defined(HAVE_OPENCV_GPUOPTFLOW)
         if (gpu)
         {
-            KeypointBasedMotionEstimatorGpu *kbest = new KeypointBasedMotionEstimatorGpu(est);
+            Ptr<KeypointBasedMotionEstimatorGpu> kbest = makePtr<KeypointBasedMotionEstimatorGpu>(est);
             kbest->setOutlierRejector(outlierRejector);
             return kbest;
         }
 #endif
 
-        KeypointBasedMotionEstimator *kbest = new KeypointBasedMotionEstimator(est);
-        kbest->setDetector(new GoodFeaturesToTrackDetector(argi(prefix + "nkps")));
+        Ptr<KeypointBasedMotionEstimator> kbest = makePtr<KeypointBasedMotionEstimator>(est);
+        kbest->setDetector(makePtr<GoodFeaturesToTrackDetector>(argi(prefix + "nkps")));
         kbest->setOutlierRejector(outlierRejector);
         return kbest;
     }
@@ -244,12 +244,12 @@ public:
 
     virtual Ptr<ImageMotionEstimatorBase> build()
     {
-        MotionEstimatorL1 *est = new MotionEstimatorL1(motionModel(arg(prefix + "model")));
+        Ptr<MotionEstimatorL1> est = makePtr<MotionEstimatorL1>(motionModel(arg(prefix + "model")));
 
-        Ptr<IOutlierRejector> outlierRejector = new NullOutlierRejector();
+        Ptr<IOutlierRejector> outlierRejector = makePtr<NullOutlierRejector>();
         if (arg(prefix + "local-outlier-rejection") == "yes")
         {
-            TranslationBasedLocalOutlierRejector *tblor = new TranslationBasedLocalOutlierRejector();
+            Ptr<TranslationBasedLocalOutlierRejector> tblor = makePtr<TranslationBasedLocalOutlierRejector>();
             RansacParams ransacParams = tblor->ransacParams();
             if (arg(prefix + "thresh") != "auto")
                 ransacParams.thresh = argf(prefix + "thresh");
@@ -260,14 +260,14 @@ public:
 #if defined(HAVE_OPENCV_GPUIMGPROC) && defined(HAVE_OPENCV_GPU) && defined(HAVE_OPENCV_GPUOPTFLOW)
         if (gpu)
         {
-            KeypointBasedMotionEstimatorGpu *kbest = new KeypointBasedMotionEstimatorGpu(est);
+            Ptr<KeypointBasedMotionEstimatorGpu> kbest = makePtr<KeypointBasedMotionEstimatorGpu>(est);
             kbest->setOutlierRejector(outlierRejector);
             return kbest;
         }
 #endif
 
-        KeypointBasedMotionEstimator *kbest = new KeypointBasedMotionEstimator(est);
-        kbest->setDetector(new GoodFeaturesToTrackDetector(argi(prefix + "nkps")));
+        Ptr<KeypointBasedMotionEstimator> kbest = makePtr<KeypointBasedMotionEstimator>(est);
+        kbest->setDetector(makePtr<GoodFeaturesToTrackDetector>(argi(prefix + "nkps")));
         kbest->setOutlierRejector(outlierRejector);
         return kbest;
     }
@@ -363,7 +363,7 @@ int main(int argc, const char **argv)
 
         // get source video parameters
 
-        VideoFileSource *source = new VideoFileSource(inputPath);
+        Ptr<VideoFileSource> source = makePtr<VideoFileSource>(inputPath);
         cout << "frame count (rough): " << source->count() << endl;
         if (arg("fps") == "auto")
             outputFps = source->fps();
@@ -374,15 +374,15 @@ int main(int argc, const char **argv)
 
         Ptr<IMotionEstimatorBuilder> motionEstBuilder;
         if (arg("lin-prog-motion-est") == "yes")
-            motionEstBuilder = new MotionEstimatorL1Builder(cmd, arg("gpu") == "yes");
+            motionEstBuilder.reset(new MotionEstimatorL1Builder(cmd, arg("gpu") == "yes"));
         else
-            motionEstBuilder = new MotionEstimatorRansacL2Builder(cmd, arg("gpu") == "yes");
+            motionEstBuilder.reset(new MotionEstimatorRansacL2Builder(cmd, arg("gpu") == "yes"));
 
         Ptr<IMotionEstimatorBuilder> wsMotionEstBuilder;
         if (arg("ws-lp") == "yes")
-            wsMotionEstBuilder = new MotionEstimatorL1Builder(cmd, arg("gpu") == "yes", "ws-");
+            wsMotionEstBuilder.reset(new MotionEstimatorL1Builder(cmd, arg("gpu") == "yes", "ws-"));
         else
-            wsMotionEstBuilder = new MotionEstimatorRansacL2Builder(cmd, arg("gpu") == "yes", "ws-");
+            wsMotionEstBuilder.reset(new MotionEstimatorRansacL2Builder(cmd, arg("gpu") == "yes", "ws-"));
 
         // determine whether we must use one pass or two pass stabilizer
         bool isTwoPass =
@@ -400,7 +400,7 @@ int main(int argc, const char **argv)
 
             if (arg("lin-prog-stab") == "yes")
             {
-                LpMotionStabilizer *stab = new LpMotionStabilizer();
+                Ptr<LpMotionStabilizer> stab = makePtr<LpMotionStabilizer>();
                 stab->setFrameSize(Size(source->width(), source->height()));
                 stab->setTrimRatio(arg("lps-trim-ratio") == "auto" ? argf("trim-ratio") : argf("lps-trim-ratio"));
                 stab->setWeight1(argf("lps-w1"));
@@ -410,18 +410,18 @@ int main(int argc, const char **argv)
                 twoPassStabilizer->setMotionStabilizer(stab);
             }
             else if (arg("stdev") == "auto")
-                twoPassStabilizer->setMotionStabilizer(new GaussianMotionFilter(argi("radius")));
+                twoPassStabilizer->setMotionStabilizer(makePtr<GaussianMotionFilter>(argi("radius")));
             else
-                twoPassStabilizer->setMotionStabilizer(new GaussianMotionFilter(argi("radius"), argf("stdev")));
+                twoPassStabilizer->setMotionStabilizer(makePtr<GaussianMotionFilter>(argi("radius"), argf("stdev")));
 
             // init wobble suppressor if necessary
 
             if (arg("wobble-suppress") == "yes")
             {
-                MoreAccurateMotionWobbleSuppressorBase *ws = new MoreAccurateMotionWobbleSuppressor();
+                Ptr<MoreAccurateMotionWobbleSuppressorBase> ws = makePtr<MoreAccurateMotionWobbleSuppressor>();
                 if (arg("gpu") == "yes")
 #ifdef HAVE_OPENCV_GPU
-                    ws = new MoreAccurateMotionWobbleSuppressorGpu();
+                    ws = makePtr<MoreAccurateMotionWobbleSuppressorGpu>();
 #else
                     throw runtime_error("OpenCV is built without GPU support");
 #endif
@@ -433,12 +433,12 @@ int main(int argc, const char **argv)
                 MotionModel model = ws->motionEstimator()->motionModel();
                 if (arg("load-motions2") != "no")
                 {
-                    ws->setMotionEstimator(new FromFileMotionReader(arg("load-motions2")));
+                    ws->setMotionEstimator(makePtr<FromFileMotionReader>(arg("load-motions2")));
                     ws->motionEstimator()->setMotionModel(model);
                 }
                 if (arg("save-motions2") != "no")
                 {
-                    ws->setMotionEstimator(new ToFileMotionWriter(arg("save-motions2"), ws->motionEstimator()));
+                    ws->setMotionEstimator(makePtr<ToFileMotionWriter>(arg("save-motions2"), ws->motionEstimator()));
                     ws->motionEstimator()->setMotionModel(model);
                 }
             }
@@ -450,26 +450,26 @@ int main(int argc, const char **argv)
             OnePassStabilizer *onePassStabilizer = new OnePassStabilizer();
             stabilizer = onePassStabilizer;
             if (arg("stdev") == "auto")
-                onePassStabilizer->setMotionFilter(new GaussianMotionFilter(argi("radius")));
+                onePassStabilizer->setMotionFilter(makePtr<GaussianMotionFilter>(argi("radius")));
             else
-                onePassStabilizer->setMotionFilter(new GaussianMotionFilter(argi("radius"), argf("stdev")));
+                onePassStabilizer->setMotionFilter(makePtr<GaussianMotionFilter>(argi("radius"), argf("stdev")));
         }
 
         stabilizer->setFrameSource(source);
         stabilizer->setMotionEstimator(motionEstBuilder->build());
 
         // cast stabilizer to simple frame source interface to read stabilized frames
-        stabilizedFrames = dynamic_cast<IFrameSource*>(stabilizer);
+        stabilizedFrames.reset(dynamic_cast<IFrameSource*>(stabilizer));
 
         MotionModel model = stabilizer->motionEstimator()->motionModel();
         if (arg("load-motions") != "no")
         {
-            stabilizer->setMotionEstimator(new FromFileMotionReader(arg("load-motions")));
+            stabilizer->setMotionEstimator(makePtr<FromFileMotionReader>(arg("load-motions")));
             stabilizer->motionEstimator()->setMotionModel(model);
         }
         if (arg("save-motions") != "no")
         {
-            stabilizer->setMotionEstimator(new ToFileMotionWriter(arg("save-motions"), stabilizer->motionEstimator()));
+            stabilizer->setMotionEstimator(makePtr<ToFileMotionWriter>(arg("save-motions"), stabilizer->motionEstimator()));
             stabilizer->motionEstimator()->setMotionModel(model);
         }
 
@@ -478,7 +478,7 @@ int main(int argc, const char **argv)
         // init deblurer
         if (arg("deblur") == "yes")
         {
-            WeightingDeblurer *deblurer = new WeightingDeblurer();
+            Ptr<WeightingDeblurer> deblurer = makePtr<WeightingDeblurer>();
             deblurer->setRadius(argi("radius"));
             deblurer->setSensitivity(argf("deblur-sens"));
             stabilizer->setDeblurer(deblurer);
@@ -503,22 +503,22 @@ int main(int argc, const char **argv)
         Ptr<InpainterBase> inpainters_(inpainters);
         if (arg("mosaic") == "yes")
         {
-            ConsistentMosaicInpainter *inp = new ConsistentMosaicInpainter();
+            Ptr<ConsistentMosaicInpainter> inp = makePtr<ConsistentMosaicInpainter>();
             inp->setStdevThresh(argf("mosaic-stdev"));
             inpainters->pushBack(inp);
         }
         if (arg("motion-inpaint") == "yes")
         {
-            MotionInpainter *inp = new MotionInpainter();
+            Ptr<MotionInpainter> inp = makePtr<MotionInpainter>();
             inp->setDistThreshold(argf("mi-dist-thresh"));
             inpainters->pushBack(inp);
         }
         if (arg("color-inpaint") == "average")
-            inpainters->pushBack(new ColorAverageInpainter());
+            inpainters->pushBack(makePtr<ColorAverageInpainter>());
         else if (arg("color-inpaint") == "ns")
-            inpainters->pushBack(new ColorInpainter(INPAINT_NS, argd("ci-radius")));
+            inpainters->pushBack(makePtr<ColorInpainter>(int(INPAINT_NS), argd("ci-radius")));
         else if (arg("color-inpaint") == "telea")
-            inpainters->pushBack(new ColorInpainter(INPAINT_TELEA, argd("ci-radius")));
+            inpainters->pushBack(makePtr<ColorInpainter>(int(INPAINT_TELEA), argd("ci-radius")));
         else if (arg("color-inpaint") != "no")
             throw runtime_error("unknown color inpainting method: " + arg("color-inpaint"));
         if (!inpainters->empty())
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index a14c5907a..b953627b1 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -5,7 +5,6 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
                                      opencv_gpufeatures2d opencv_gpuoptflow opencv_gpubgsegm
                                      opencv_gpustereo opencv_gpulegacy)
-
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 
 if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
@@ -32,6 +31,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_include_directories(${CUDA_INCLUDE_DIRS})
   endif()
 
+  if(HAVE_OPENCL)
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/ocl/include")
+  endif()
+
   if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function")
   endif()
@@ -44,6 +47,11 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     add_executable(${the_target} ${srcs})
 
     target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
+
+    if(HAVE_CUDA)
+        target_link_libraries(${the_target} ${CUDA_CUDA_LIBRARY})
+    endif()
+
     if(HAVE_opencv_nonfree)
       target_link_libraries(${the_target} opencv_nonfree)
     endif()
@@ -51,6 +59,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
       target_link_libraries(${the_target} opencv_gpucodec)
     endif()
 
+    if(HAVE_OPENCL)
+      target_link_libraries(${the_target} opencv_ocl)
+    endif()
+
     set_target_properties(${the_target} PROPERTIES
       OUTPUT_NAME "${project}-example-${name}"
       PROJECT_LABEL "(EXAMPLE_${project_upper}) ${name}")
@@ -84,4 +96,3 @@ if (INSTALL_C_EXAMPLES AND NOT WIN32)
           DESTINATION share/OpenCV/samples/${project}
           PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
 endif()
-
diff --git a/samples/gpu/bgfg_segm.cpp b/samples/gpu/bgfg_segm.cpp
index 4c2d8dc18..1b8e53271 100644
--- a/samples/gpu/bgfg_segm.cpp
+++ b/samples/gpu/bgfg_segm.cpp
@@ -18,10 +18,10 @@ using namespace cv::gpu;
 
 enum Method
 {
-    FGD_STAT,
     MOG,
     MOG2,
-    GMG
+    GMG,
+    FGD_STAT
 };
 
 int main(int argc, const char** argv)
@@ -29,7 +29,7 @@ int main(int argc, const char** argv)
     cv::CommandLineParser cmd(argc, argv,
         "{ c camera |             | use camera }"
         "{ f file   | 768x576.avi | input video file }"
-        "{ m method | mog         | method (fgd, mog, mog2, gmg) }"
+        "{ m method | mog         | method (mog, mog2, gmg, fgd) }"
         "{ h help   |             | print help message }");
 
     if (cmd.has("help") || !cmd.check())
@@ -43,18 +43,18 @@ int main(int argc, const char** argv)
     string file = cmd.get<string>("file");
     string method = cmd.get<string>("method");
 
-    if (method != "fgd"
-        && method != "mog"
+    if (method != "mog"
         && method != "mog2"
-        && method != "gmg")
+        && method != "gmg"
+        && method != "fgd")
     {
         cerr << "Incorrect method" << endl;
         return -1;
     }
 
-    Method m = method == "fgd" ? FGD_STAT :
-               method == "mog" ? MOG :
+    Method m = method == "mog" ? MOG :
                method == "mog2" ? MOG2 :
+               method == "fgd" ? FGD_STAT :
                                   GMG;
 
     VideoCapture cap;
@@ -75,11 +75,10 @@ int main(int argc, const char** argv)
 
     GpuMat d_frame(frame);
 
-    FGDStatModel fgd_stat;
-    MOG_GPU mog;
-    MOG2_GPU mog2;
-    GMG_GPU gmg;
-    gmg.numInitializationFrames = 40;
+    Ptr<BackgroundSubtractor> mog = gpu::createBackgroundSubtractorMOG();
+    Ptr<BackgroundSubtractor> mog2 = gpu::createBackgroundSubtractorMOG2();
+    Ptr<BackgroundSubtractor> gmg = gpu::createBackgroundSubtractorGMG(40);
+    Ptr<BackgroundSubtractor> fgd = gpu::createBackgroundSubtractorFGD();
 
     GpuMat d_fgmask;
     GpuMat d_fgimg;
@@ -91,20 +90,20 @@ int main(int argc, const char** argv)
 
     switch (m)
     {
-    case FGD_STAT:
-        fgd_stat.create(d_frame);
-        break;
-
     case MOG:
-        mog(d_frame, d_fgmask, 0.01f);
+        mog->apply(d_frame, d_fgmask, 0.01);
         break;
 
     case MOG2:
-        mog2(d_frame, d_fgmask);
+        mog2->apply(d_frame, d_fgmask);
         break;
 
     case GMG:
-        gmg.initialize(d_frame.size());
+        gmg->apply(d_frame, d_fgmask);
+        break;
+
+    case FGD_STAT:
+        fgd->apply(d_frame, d_fgmask);
         break;
     }
 
@@ -128,24 +127,23 @@ int main(int argc, const char** argv)
         //update the model
         switch (m)
         {
-        case FGD_STAT:
-            fgd_stat.update(d_frame);
-            d_fgmask = fgd_stat.foreground;
-            d_bgimg = fgd_stat.background;
-            break;
-
         case MOG:
-            mog(d_frame, d_fgmask, 0.01f);
-            mog.getBackgroundImage(d_bgimg);
+            mog->apply(d_frame, d_fgmask, 0.01);
+            mog->getBackgroundImage(d_bgimg);
             break;
 
         case MOG2:
-            mog2(d_frame, d_fgmask);
-            mog2.getBackgroundImage(d_bgimg);
+            mog2->apply(d_frame, d_fgmask);
+            mog2->getBackgroundImage(d_bgimg);
             break;
 
         case GMG:
-            gmg(d_frame, d_fgmask);
+            gmg->apply(d_frame, d_fgmask);
+            break;
+
+        case FGD_STAT:
+            fgd->apply(d_frame, d_fgmask);
+            fgd->getBackgroundImage(d_bgimg);
             break;
         }
 
diff --git a/samples/gpu/brox_optical_flow.cpp b/samples/gpu/brox_optical_flow.cpp
index 1d92e4b3f..1fb85c903 100644
--- a/samples/gpu/brox_optical_flow.cpp
+++ b/samples/gpu/brox_optical_flow.cpp
@@ -85,8 +85,8 @@ int main(int argc, const char* argv[])
 
         Mat frame0Gray, frame1Gray;
 
-        cvtColor(frame0Color, frame0Gray, COLOR_BGR2GRAY);
-        cvtColor(frame1Color, frame1Gray, COLOR_BGR2GRAY);
+        cv::cvtColor(frame0Color, frame0Gray, COLOR_BGR2GRAY);
+        cv::cvtColor(frame1Color, frame1Gray, COLOR_BGR2GRAY);
 
         GpuMat d_frame0(frame0Gray);
         GpuMat d_frame1(frame1Gray);
diff --git a/samples/gpu/cascadeclassifier.cpp b/samples/gpu/cascadeclassifier.cpp
index 271e7dca8..e27186c27 100644
--- a/samples/gpu/cascadeclassifier.cpp
+++ b/samples/gpu/cascadeclassifier.cpp
@@ -24,12 +24,11 @@ static void help()
 }
 
 
-template<class T>
-void convertAndResize(const T& src, T& gray, T& resized, double scale)
+static void convertAndResize(const Mat& src, Mat& gray, Mat& resized, double scale)
 {
     if (src.channels() == 3)
     {
-        cvtColor( src, gray, COLOR_BGR2GRAY );
+        cv::cvtColor( src, gray, COLOR_BGR2GRAY );
     }
     else
     {
@@ -40,7 +39,30 @@ void convertAndResize(const T& src, T& gray, T& resized, double scale)
 
     if (scale != 1)
     {
-        resize(gray, resized, sz);
+        cv::resize(gray, resized, sz);
+    }
+    else
+    {
+        resized = gray;
+    }
+}
+
+static void convertAndResize(const GpuMat& src, GpuMat& gray, GpuMat& resized, double scale)
+{
+    if (src.channels() == 3)
+    {
+        cv::gpu::cvtColor( src, gray, COLOR_BGR2GRAY );
+    }
+    else
+    {
+        gray = src;
+    }
+
+    Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));
+
+    if (scale != 1)
+    {
+        cv::gpu::resize(gray, resized, sz);
     }
     else
     {
@@ -272,7 +294,7 @@ int main(int argc, const char *argv[])
         }
         cout << endl;
 
-        cvtColor(resized_cpu, frameDisp, COLOR_GRAY2BGR);
+        cv::cvtColor(resized_cpu, frameDisp, COLOR_GRAY2BGR);
         displayState(frameDisp, helpScreen, useGPU, findLargestObject, filterRects, fps);
         imshow("result", frameDisp);
 
diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp
index 1dfe2123c..e78f7e54f 100644
--- a/samples/gpu/driver_api_multi.cpp
+++ b/samples/gpu/driver_api_multi.cpp
@@ -86,8 +86,8 @@ int main()
         if (!dev_info.isCompatible())
         {
             std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.major()
-                 << dev_info.minor() << "\n";
+                 << dev_info.name() << ", CC " << dev_info.majorVersion()
+                 << dev_info.minorVersion() << "\n";
             return -1;
         }
     }
diff --git a/samples/gpu/driver_api_stereo_multi.cpp b/samples/gpu/driver_api_stereo_multi.cpp
index c49fc8564..d40c20c1e 100644
--- a/samples/gpu/driver_api_stereo_multi.cpp
+++ b/samples/gpu/driver_api_stereo_multi.cpp
@@ -116,8 +116,8 @@ int main(int argc, char** argv)
         if (!dev_info.isCompatible())
         {
             std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.major()
-                 << dev_info.minor() << "\n";
+                 << dev_info.name() << ", CC " << dev_info.majorVersion()
+                 << dev_info.minorVersion() << "\n";
             return -1;
         }
     }
diff --git a/samples/gpu/generalized_hough.cpp b/samples/gpu/generalized_hough.cpp
index c8fae7c41..186308525 100644
--- a/samples/gpu/generalized_hough.cpp
+++ b/samples/gpu/generalized_hough.cpp
@@ -5,13 +5,12 @@
 #include "opencv2/core.hpp"
 #include "opencv2/core/utility.hpp"
 #include "opencv2/imgproc.hpp"
-#include "opencv2/gpu.hpp"
+#include "opencv2/gpuimgproc.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/contrib.hpp"
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
 
 static Mat loadImage(const string& name)
 {
@@ -29,8 +28,7 @@ int main(int argc, const char* argv[])
     CommandLineParser cmd(argc, argv,
         "{ image i        | pic1.png  | input image }"
         "{ template t     | templ.png | template image }"
-        "{ scale s        |           | estimate scale }"
-        "{ rotation r     |           | estimate rotation }"
+        "{ full           |           | estimate scale and rotation }"
         "{ gpu            |           | use gpu version }"
         "{ minDist        | 100       | minimum distance between the centers of the detected objects }"
         "{ levels         | 360       | R-Table levels }"
@@ -45,7 +43,7 @@ int main(int argc, const char* argv[])
         "{ minAngle       | 0         | minimal rotation angle to detect in degrees }"
         "{ maxAngle       | 360       | maximal rotation angle to detect in degrees }"
         "{ angleStep      | 1         | angle step in degrees }"
-        "{ maxSize        | 1000      | maximal size of inner buffers }"
+        "{ maxBufSize     | 1000      | maximal size of inner buffers }"
         "{ help h ?       |           | print help message }"
     );
 
@@ -59,8 +57,7 @@ int main(int argc, const char* argv[])
 
     const string templName = cmd.get<string>("template");
     const string imageName = cmd.get<string>("image");
-    const bool estimateScale = cmd.has("scale");
-    const bool estimateRotation = cmd.has("rotation");
+    const bool full = cmd.has("full");
     const bool useGpu = cmd.has("gpu");
     const double minDist = cmd.get<double>("minDist");
     const int levels = cmd.get<int>("levels");
@@ -75,7 +72,7 @@ int main(int argc, const char* argv[])
     const double minAngle = cmd.get<double>("minAngle");
     const double maxAngle = cmd.get<double>("maxAngle");
     const double angleStep = cmd.get<double>("angleStep");
-    const int maxSize = cmd.get<int>("maxSize");
+    const int maxBufSize = cmd.get<int>("maxBufSize");
 
     if (!cmd.check())
     {
@@ -86,93 +83,69 @@ int main(int argc, const char* argv[])
     Mat templ = loadImage(templName);
     Mat image = loadImage(imageName);
 
-    int method = cv::GeneralizedHough::GHT_POSITION;
-    if (estimateScale)
-        method += cv::GeneralizedHough::GHT_SCALE;
-    if (estimateRotation)
-        method += cv::GeneralizedHough::GHT_ROTATION;
+    Ptr<GeneralizedHough> alg;
+
+    if (!full)
+    {
+        Ptr<GeneralizedHoughBallard> ballard = useGpu ? gpu::createGeneralizedHoughBallard() : createGeneralizedHoughBallard();
+
+        ballard->setMinDist(minDist);
+        ballard->setLevels(levels);
+        ballard->setDp(dp);
+        ballard->setMaxBufferSize(maxBufSize);
+        ballard->setVotesThreshold(votesThreshold);
+
+        alg = ballard;
+    }
+    else
+    {
+        Ptr<GeneralizedHoughGuil> guil = useGpu ? gpu::createGeneralizedHoughGuil() : createGeneralizedHoughGuil();
+
+        guil->setMinDist(minDist);
+        guil->setLevels(levels);
+        guil->setDp(dp);
+        guil->setMaxBufferSize(maxBufSize);
+
+        guil->setMinAngle(minAngle);
+        guil->setMaxAngle(maxAngle);
+        guil->setAngleStep(angleStep);
+        guil->setAngleThresh(angleThresh);
+
+        guil->setMinScale(minScale);
+        guil->setMaxScale(maxScale);
+        guil->setScaleStep(scaleStep);
+        guil->setScaleThresh(scaleThresh);
+
+        guil->setPosThresh(posThresh);
+
+        alg = guil;
+    }
 
     vector<Vec4f> position;
-    cv::TickMeter tm;
+    TickMeter tm;
 
     if (useGpu)
     {
-        GpuMat d_templ(templ);
-        GpuMat d_image(image);
-        GpuMat d_position;
+        gpu::GpuMat d_templ(templ);
+        gpu::GpuMat d_image(image);
+        gpu::GpuMat d_position;
 
-        Ptr<GeneralizedHough_GPU> d_hough = GeneralizedHough_GPU::create(method);
-        d_hough->set("minDist", minDist);
-        d_hough->set("levels", levels);
-        d_hough->set("dp", dp);
-        d_hough->set("maxSize", maxSize);
-        if (estimateScale && estimateRotation)
-        {
-            d_hough->set("angleThresh", angleThresh);
-            d_hough->set("scaleThresh", scaleThresh);
-            d_hough->set("posThresh", posThresh);
-        }
-        else
-        {
-            d_hough->set("votesThreshold", votesThreshold);
-        }
-        if (estimateScale)
-        {
-            d_hough->set("minScale", minScale);
-            d_hough->set("maxScale", maxScale);
-            d_hough->set("scaleStep", scaleStep);
-        }
-        if (estimateRotation)
-        {
-            d_hough->set("minAngle", minAngle);
-            d_hough->set("maxAngle", maxAngle);
-            d_hough->set("angleStep", angleStep);
-        }
-
-        d_hough->setTemplate(d_templ);
+        alg->setTemplate(d_templ);
 
         tm.start();
 
-        d_hough->detect(d_image, d_position);
-        d_hough->download(d_position, position);
+        alg->detect(d_image, d_position);
+        d_position.download(position);
 
         tm.stop();
     }
     else
     {
-        Ptr<GeneralizedHough> hough = GeneralizedHough::create(method);
-        hough->set("minDist", minDist);
-        hough->set("levels", levels);
-        hough->set("dp", dp);
-        if (estimateScale && estimateRotation)
-        {
-            hough->set("angleThresh", angleThresh);
-            hough->set("scaleThresh", scaleThresh);
-            hough->set("posThresh", posThresh);
-            hough->set("maxSize", maxSize);
-        }
-        else
-        {
-            hough->set("votesThreshold", votesThreshold);
-        }
-        if (estimateScale)
-        {
-            hough->set("minScale", minScale);
-            hough->set("maxScale", maxScale);
-            hough->set("scaleStep", scaleStep);
-        }
-        if (estimateRotation)
-        {
-            hough->set("minAngle", minAngle);
-            hough->set("maxAngle", maxAngle);
-            hough->set("angleStep", angleStep);
-        }
-
-        hough->setTemplate(templ);
+        alg->setTemplate(templ);
 
         tm.start();
 
-        hough->detect(image, position);
+        alg->detect(image, position);
 
         tm.stop();
     }
@@ -181,7 +154,7 @@ int main(int argc, const char* argv[])
     cout << "Detection time : " << tm.getTimeMilli() << " ms" << endl;
 
     Mat out;
-    cvtColor(image, out, COLOR_GRAY2BGR);
+    cv::cvtColor(image, out, COLOR_GRAY2BGR);
 
     for (size_t i = 0; i < position.size(); ++i)
     {
diff --git a/samples/gpu/hog.cpp b/samples/gpu/hog.cpp
index a6a1c0b2a..c90cc2333 100644
--- a/samples/gpu/hog.cpp
+++ b/samples/gpu/hog.cpp
@@ -458,4 +458,3 @@ inline string App::workFps() const
     ss << work_fps;
     return ss.str();
 }
-
diff --git a/samples/gpu/houghlines.cpp b/samples/gpu/houghlines.cpp
index f53724ecc..14245e5fd 100644
--- a/samples/gpu/houghlines.cpp
+++ b/samples/gpu/houghlines.cpp
@@ -31,17 +31,17 @@ int main(int argc, const char* argv[])
     }
 
     Mat mask;
-    Canny(src, mask, 100, 200, 3);
+    cv::Canny(src, mask, 100, 200, 3);
 
     Mat dst_cpu;
-    cvtColor(mask, dst_cpu, COLOR_GRAY2BGR);
+    cv::cvtColor(mask, dst_cpu, COLOR_GRAY2BGR);
     Mat dst_gpu = dst_cpu.clone();
 
     vector<Vec4i> lines_cpu;
     {
         const int64 start = getTickCount();
 
-        HoughLinesP(mask, lines_cpu, 1, CV_PI / 180, 50, 60, 5);
+        cv::HoughLinesP(mask, lines_cpu, 1, CV_PI / 180, 50, 60, 5);
 
         const double timeSec = (getTickCount() - start) / getTickFrequency();
         cout << "CPU Time : " << timeSec * 1000 << " ms" << endl;
@@ -56,11 +56,12 @@ int main(int argc, const char* argv[])
 
     GpuMat d_src(mask);
     GpuMat d_lines;
-    HoughLinesBuf d_buf;
     {
         const int64 start = getTickCount();
 
-        gpu::HoughLinesP(d_src, d_lines, d_buf, 1.0f, (float) (CV_PI / 180.0f), 50, 5);
+        Ptr<gpu::HoughSegmentDetector> hough = gpu::createHoughSegmentDetector(1.0f, (float) (CV_PI / 180.0f), 50, 5);
+
+        hough->detect(d_src, d_lines);
 
         const double timeSec = (getTickCount() - start) / getTickFrequency();
         cout << "GPU Time : " << timeSec * 1000 << " ms" << endl;
@@ -87,4 +88,3 @@ int main(int argc, const char* argv[])
 
     return 0;
 }
-
diff --git a/samples/gpu/multi.cpp b/samples/gpu/multi.cpp
index c6e6aa398..b83fd2ce4 100644
--- a/samples/gpu/multi.cpp
+++ b/samples/gpu/multi.cpp
@@ -62,8 +62,8 @@ int main()
         if (!dev_info.isCompatible())
         {
             std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.major()
-                 << dev_info.minor() << "\n";
+                 << dev_info.name() << ", CC " << dev_info.majorVersion()
+                 << dev_info.minorVersion() << "\n";
             return -1;
         }
     }
diff --git a/samples/gpu/performance/performance.cpp b/samples/gpu/performance/performance.cpp
index 42fd978c1..8af0b3d0d 100644
--- a/samples/gpu/performance/performance.cpp
+++ b/samples/gpu/performance/performance.cpp
@@ -191,7 +191,7 @@ int main(int argc, const char* argv[])
     DeviceInfo dev_info(device);
     if (!dev_info.isCompatible())
     {
-        cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.major() << '.' << dev_info.minor() << endl;
+        cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
         return -1;
     }
     setDevice(device);
diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp
index 4333b7625..a3df43c6e 100644
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -17,24 +17,16 @@
 using namespace std;
 using namespace cv;
 
-static void InitMatchTemplate()
-{
-    Mat src; gen(src, 500, 500, CV_32F, 0, 1);
-    Mat templ; gen(templ, 500, 500, CV_32F, 0, 1);
-    gpu::GpuMat d_src(src), d_templ(templ), d_dst;
-    gpu::matchTemplate(d_src, d_templ, d_dst, TM_CCORR);
-}
-
 
 TEST(matchTemplate)
 {
-    InitMatchTemplate();
-
     Mat src, templ, dst;
     gen(src, 3000, 3000, CV_32F, 0, 1);
 
     gpu::GpuMat d_src(src), d_templ, d_dst;
 
+    Ptr<gpu::TemplateMatching> alg = gpu::createTemplateMatching(src.type(), TM_CCORR);
+
     for (int templ_size = 5; templ_size < 200; templ_size *= 5)
     {
         SUBTEST << src.cols << 'x' << src.rows << ", 32FC1" << ", templ " << templ_size << 'x' << templ_size << ", CCORR";
@@ -47,10 +39,10 @@ TEST(matchTemplate)
         CPU_OFF;
 
         d_templ.upload(templ);
-        gpu::matchTemplate(d_src, d_templ, d_dst, TM_CCORR);
+        alg->match(d_src, d_templ, d_dst);
 
         GPU_ON;
-        gpu::matchTemplate(d_src, d_templ, d_dst, TM_CCORR);
+        alg->match(d_src, d_templ, d_dst);
         GPU_OFF;
     }
 }
@@ -176,10 +168,12 @@ TEST(cornerHarris)
 
         d_src.upload(src);
 
-        gpu::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT101);
+        Ptr<gpu::CornernessCriteria> harris = gpu::createHarrisCorner(src.type(), 5, 7, 0.1, BORDER_REFLECT101);
+
+        harris->compute(d_src, d_dst);
 
         GPU_ON;
-        gpu::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT101);
+        harris->compute(d_src, d_dst);
         GPU_OFF;
     }
 }
@@ -1047,13 +1041,12 @@ TEST(equalizeHist)
 
         gpu::GpuMat d_src(src);
         gpu::GpuMat d_dst;
-        gpu::GpuMat d_hist;
         gpu::GpuMat d_buf;
 
-        gpu::equalizeHist(d_src, d_dst, d_hist, d_buf);
+        gpu::equalizeHist(d_src, d_dst, d_buf);
 
         GPU_ON;
-        gpu::equalizeHist(d_src, d_dst, d_hist, d_buf);
+        gpu::equalizeHist(d_src, d_dst, d_buf);
         GPU_OFF;
     }
 }
@@ -1073,12 +1066,13 @@ TEST(Canny)
 
     gpu::GpuMat d_img(img);
     gpu::GpuMat d_edges;
-    gpu::CannyBuf d_buf;
 
-    gpu::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
+    Ptr<gpu::CannyEdgeDetector> canny = gpu::createCannyEdgeDetector(50.0, 100.0);
+
+    canny->detect(d_img, d_edges);
 
     GPU_ON;
-    gpu::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
+    canny->detect(d_img, d_edges);
     GPU_OFF;
 }
 
@@ -1172,15 +1166,15 @@ TEST(GoodFeaturesToTrack)
     goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0);
     CPU_OFF;
 
-    gpu::GoodFeaturesToTrackDetector_GPU detector(8000, 0.01, 0.0);
+    Ptr<gpu::CornersDetector> detector = gpu::createGoodFeaturesToTrackDetector(src.type(), 8000, 0.01, 0.0);
 
     gpu::GpuMat d_src(src);
     gpu::GpuMat d_pts;
 
-    detector(d_src, d_pts);
+    detector->detect(d_src, d_pts);
 
     GPU_ON;
-    detector(d_src, d_pts);
+    detector->detect(d_src, d_pts);
     GPU_OFF;
 }
 
@@ -1267,7 +1261,7 @@ TEST(FarnebackOpticalFlow)
 
 namespace cv
 {
-    template<> void Ptr<CvBGStatModel>::delete_obj()
+    template<> void DefaultDeleter<CvBGStatModel>::operator ()(CvBGStatModel* obj) const
     {
         cvReleaseBGStatModel(&obj);
     }
@@ -1277,14 +1271,14 @@ TEST(FGDStatModel)
 {
     const std::string inputFile = abspath("768x576.avi");
 
-    cv::VideoCapture cap(inputFile);
+    VideoCapture cap(inputFile);
     if (!cap.isOpened()) throw runtime_error("can't open 768x576.avi");
 
-    cv::Mat frame;
+    Mat frame;
     cap >> frame;
 
     IplImage ipl_frame = frame;
-    cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
+    Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
 
     while (!TestSystem::instance().stop())
     {
@@ -1303,8 +1297,10 @@ TEST(FGDStatModel)
 
     cap >> frame;
 
-    cv::gpu::GpuMat d_frame(frame);
-    cv::gpu::FGDStatModel d_model(d_frame);
+    gpu::GpuMat d_frame(frame), d_fgmask;
+    Ptr<BackgroundSubtractor> d_fgd = gpu::createBackgroundSubtractorFGD();
+
+    d_fgd->apply(d_frame, d_fgmask);
 
     while (!TestSystem::instance().stop())
     {
@@ -1313,7 +1309,7 @@ TEST(FGDStatModel)
 
         TestSystem::instance().gpuOn();
 
-        d_model.update(d_frame);
+        d_fgd->apply(d_frame, d_fgmask);
 
         TestSystem::instance().gpuOff();
     }
@@ -1352,10 +1348,10 @@ TEST(MOG)
     cap >> frame;
 
     cv::gpu::GpuMat d_frame(frame);
-    cv::gpu::MOG_GPU d_mog;
+    cv::Ptr<cv::BackgroundSubtractor> d_mog = cv::gpu::createBackgroundSubtractorMOG();
     cv::gpu::GpuMat d_foreground;
 
-    d_mog(d_frame, d_foreground, 0.01f);
+    d_mog->apply(d_frame, d_foreground, 0.01);
 
     while (!TestSystem::instance().stop())
     {
@@ -1364,7 +1360,7 @@ TEST(MOG)
 
         TestSystem::instance().gpuOn();
 
-        d_mog(d_frame, d_foreground, 0.01f);
+        d_mog->apply(d_frame, d_foreground, 0.01);
 
         TestSystem::instance().gpuOff();
     }
@@ -1405,13 +1401,13 @@ TEST(MOG2)
 
     cap >> frame;
 
+    cv::Ptr<cv::BackgroundSubtractor> d_mog2 = cv::gpu::createBackgroundSubtractorMOG2();
     cv::gpu::GpuMat d_frame(frame);
-    cv::gpu::MOG2_GPU d_mog2;
     cv::gpu::GpuMat d_foreground;
     cv::gpu::GpuMat d_background;
 
-    d_mog2(d_frame, d_foreground);
-    d_mog2.getBackgroundImage(d_background);
+    d_mog2->apply(d_frame, d_foreground);
+    d_mog2->getBackgroundImage(d_background);
 
     while (!TestSystem::instance().stop())
     {
@@ -1420,8 +1416,8 @@ TEST(MOG2)
 
         TestSystem::instance().gpuOn();
 
-        d_mog2(d_frame, d_foreground);
-        d_mog2.getBackgroundImage(d_background);
+        d_mog2->apply(d_frame, d_foreground);
+        d_mog2->getBackgroundImage(d_background);
 
         TestSystem::instance().gpuOff();
     }
diff --git a/samples/gpu/pyrlk_optical_flow.cpp b/samples/gpu/pyrlk_optical_flow.cpp
index 95170cc7e..08717292c 100644
--- a/samples/gpu/pyrlk_optical_flow.cpp
+++ b/samples/gpu/pyrlk_optical_flow.cpp
@@ -170,18 +170,18 @@ int main(int argc, const char* argv[])
     cout << endl;
 
     Mat frame0Gray;
-    cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+    cv::cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
     Mat frame1Gray;
-    cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+    cv::cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
 
     // goodFeaturesToTrack
 
-    GoodFeaturesToTrackDetector_GPU detector(points, 0.01, minDist);
-
     GpuMat d_frame0Gray(frame0Gray);
     GpuMat d_prevPts;
 
-    detector(d_frame0Gray, d_prevPts);
+    Ptr<gpu::CornersDetector> detector = gpu::createGoodFeaturesToTrackDetector(d_frame0Gray.type(), points, 0.01, minDist);
+
+    detector->detect(d_frame0Gray, d_prevPts);
 
     // Sparse
 
diff --git a/samples/gpu/stereo_match.cpp b/samples/gpu/stereo_match.cpp
index a080153a6..e40447640 100644
--- a/samples/gpu/stereo_match.cpp
+++ b/samples/gpu/stereo_match.cpp
@@ -379,5 +379,3 @@ void App::handleKey(char key)
         break;
     }
 }
-
-
diff --git a/samples/gpu/stereo_multi.cpp b/samples/gpu/stereo_multi.cpp
index 1bb09b22b..430e15bd7 100644
--- a/samples/gpu/stereo_multi.cpp
+++ b/samples/gpu/stereo_multi.cpp
@@ -1,158 +1,499 @@
-/* This sample demonstrates working on one piece of data using two GPUs.
-   It splits input into two parts and processes them separately on different
-   GPUs. */
+// This sample demonstrates working on one piece of data using two GPUs.
+// It splits input into two parts and processes them separately on different GPUs.
 
-// Disable some warnings which are caused with CUDA headers
-#if defined(_MSC_VER)
-#pragma warning(disable: 4201 4408 4100)
+#ifdef WIN32
+    #define NOMINMAX
+    #include <windows.h>
+#else
+    #include <pthread.h>
+    #include <unistd.h>
 #endif
 
 #include <iostream>
-#include "cvconfig.h"
-#include "opencv2/core/core.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/gpu/gpu.hpp"
+#include <iomanip>
 
-#ifdef HAVE_TBB
-#  include "tbb/tbb_stddef.h"
-#  if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
-#    include "tbb/tbb.h"
-#    include "tbb/task.h"
-#    undef min
-#    undef max
-#  else
-#    undef HAVE_TBB
-#  endif
-#endif
-
-#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)
-
-int main()
-{
-#if !defined(HAVE_CUDA)
-    std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
-#endif
-
-#if !defined(HAVE_TBB)
-    std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
-#endif
-
-    return 0;
-}
-
-#else
+#include "opencv2/core.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/contrib.hpp"
+#include "opencv2/gpustereo.hpp"
 
 using namespace std;
 using namespace cv;
 using namespace cv::gpu;
 
-struct Worker { void operator()(int device_id) const; };
+///////////////////////////////////////////////////////////
+// Thread
+// OS-specific wrappers for multi-threading
 
-// GPUs data
-GpuMat d_left[2];
-GpuMat d_right[2];
-Ptr<gpu::StereoBM> bm[2];
-GpuMat d_result[2];
-
-static void printHelp()
+#ifdef WIN32
+class Thread
 {
-    std::cout << "Usage: stereo_multi_gpu --left <image> --right <image>\n";
+    struct UserData
+    {
+        void (*func)(void* userData);
+        void* param;
+    };
+
+    static DWORD WINAPI WinThreadFunction(LPVOID lpParam)
+    {
+        UserData* userData = static_cast<UserData*>(lpParam);
+
+        userData->func(userData->param);
+
+        return 0;
+    }
+
+    UserData userData_;
+    HANDLE thread_;
+    DWORD threadId_;
+
+public:
+    Thread(void (*func)(void* userData), void* userData)
+    {
+        userData_.func = func;
+        userData_.param = userData;
+
+        thread_ = CreateThread(
+            NULL,                   // default security attributes
+            0,                      // use default stack size
+            WinThreadFunction,      // thread function name
+            &userData_,             // argument to thread function
+            0,                      // use default creation flags
+            &threadId_);            // returns the thread identifier
+    }
+
+    ~Thread()
+    {
+        CloseHandle(thread_);
+    }
+
+    void wait()
+    {
+        WaitForSingleObject(thread_, INFINITE);
+    }
+};
+#else
+class Thread
+{
+    struct UserData
+    {
+        void (*func)(void* userData);
+        void* param;
+    };
+
+    static void* PThreadFunction(void* lpParam)
+    {
+        UserData* userData = static_cast<UserData*>(lpParam);
+
+        userData->func(userData->param);
+
+        return 0;
+    }
+
+    pthread_t thread_;
+    UserData userData_;
+
+public:
+    Thread(void (*func)(void* userData), void* userData)
+    {
+        userData_.func = func;
+        userData_.param = userData;
+
+        pthread_create(&thread_, NULL, PThreadFunction, &userData_);
+    }
+
+    ~Thread()
+    {
+        pthread_detach(thread_);
+    }
+
+    void wait()
+    {
+        pthread_join(thread_, NULL);
+    }
+};
+#endif
+
+///////////////////////////////////////////////////////////
+// StereoSingleGpu
+// Run Stereo algorithm on single GPU
+
+class StereoSingleGpu
+{
+public:
+    explicit StereoSingleGpu(int deviceId = 0);
+    ~StereoSingleGpu();
+
+    void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity);
+
+private:
+    int deviceId_;
+    GpuMat d_leftFrame;
+    GpuMat d_rightFrame;
+    GpuMat d_disparity;
+    Ptr<gpu::StereoBM> d_alg;
+};
+
+StereoSingleGpu::StereoSingleGpu(int deviceId) : deviceId_(deviceId)
+{
+    gpu::setDevice(deviceId_);
+    d_alg = gpu::createStereoBM(256);
 }
 
+StereoSingleGpu::~StereoSingleGpu()
+{
+    gpu::setDevice(deviceId_);
+    d_leftFrame.release();
+    d_rightFrame.release();
+    d_disparity.release();
+    d_alg.release();
+}
+
+void StereoSingleGpu::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity)
+{
+    gpu::setDevice(deviceId_);
+    d_leftFrame.upload(leftFrame);
+    d_rightFrame.upload(rightFrame);
+    d_alg->compute(d_leftFrame, d_rightFrame, d_disparity);
+    d_disparity.download(disparity);
+}
+
+///////////////////////////////////////////////////////////
+// StereoMultiGpuThread
+// Run Stereo algorithm on two GPUs using different host threads
+
+class StereoMultiGpuThread
+{
+public:
+    StereoMultiGpuThread();
+    ~StereoMultiGpuThread();
+
+    void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity);
+
+private:
+    GpuMat d_leftFrames[2];
+    GpuMat d_rightFrames[2];
+    GpuMat d_disparities[2];
+    Ptr<gpu::StereoBM> d_algs[2];
+
+    struct StereoLaunchData
+    {
+        int deviceId;
+        Mat leftFrame;
+        Mat rightFrame;
+        Mat disparity;
+        GpuMat* d_leftFrame;
+        GpuMat* d_rightFrame;
+        GpuMat* d_disparity;
+        Ptr<gpu::StereoBM> d_alg;
+    };
+
+    static void launchGpuStereoAlg(void* userData);
+};
+
+StereoMultiGpuThread::StereoMultiGpuThread()
+{
+    gpu::setDevice(0);
+    d_algs[0] = gpu::createStereoBM(256);
+
+    gpu::setDevice(1);
+    d_algs[1] = gpu::createStereoBM(256);
+}
+
+StereoMultiGpuThread::~StereoMultiGpuThread()
+{
+    gpu::setDevice(0);
+    d_leftFrames[0].release();
+    d_rightFrames[0].release();
+    d_disparities[0].release();
+    d_algs[0].release();
+
+    gpu::setDevice(1);
+    d_leftFrames[1].release();
+    d_rightFrames[1].release();
+    d_disparities[1].release();
+    d_algs[1].release();
+}
+
+void StereoMultiGpuThread::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity)
+{
+    disparity.create(leftFrame.size(), CV_8UC1);
+
+    // Split input data onto two parts for each GPUs.
+    // We add small border for each part,
+    // because original algorithm doesn't calculate disparity on image borders.
+    // With such padding we will get output in the middle of final result.
+
+    StereoLaunchData launchDatas[2];
+
+    launchDatas[0].deviceId = 0;
+    launchDatas[0].leftFrame = leftFrame.rowRange(0, leftFrame.rows / 2 + 32);
+    launchDatas[0].rightFrame = rightFrame.rowRange(0, rightFrame.rows / 2 + 32);
+    launchDatas[0].disparity = disparity.rowRange(0, leftFrame.rows / 2);
+    launchDatas[0].d_leftFrame = &d_leftFrames[0];
+    launchDatas[0].d_rightFrame = &d_rightFrames[0];
+    launchDatas[0].d_disparity = &d_disparities[0];
+    launchDatas[0].d_alg = d_algs[0];
+
+    launchDatas[1].deviceId = 1;
+    launchDatas[1].leftFrame = leftFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows);
+    launchDatas[1].rightFrame = rightFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows);
+    launchDatas[1].disparity = disparity.rowRange(leftFrame.rows / 2, leftFrame.rows);
+    launchDatas[1].d_leftFrame = &d_leftFrames[1];
+    launchDatas[1].d_rightFrame = &d_rightFrames[1];
+    launchDatas[1].d_disparity = &d_disparities[1];
+    launchDatas[1].d_alg = d_algs[1];
+
+    Thread thread0(launchGpuStereoAlg, &launchDatas[0]);
+    Thread thread1(launchGpuStereoAlg, &launchDatas[1]);
+
+    thread0.wait();
+    thread1.wait();
+}
+
+void StereoMultiGpuThread::launchGpuStereoAlg(void* userData)
+{
+    StereoLaunchData* data = static_cast<StereoLaunchData*>(userData);
+
+    gpu::setDevice(data->deviceId);
+    data->d_leftFrame->upload(data->leftFrame);
+    data->d_rightFrame->upload(data->rightFrame);
+    data->d_alg->compute(*data->d_leftFrame, *data->d_rightFrame, *data->d_disparity);
+
+    if (data->deviceId == 0)
+        data->d_disparity->rowRange(0, data->d_disparity->rows - 32).download(data->disparity);
+    else
+        data->d_disparity->rowRange(32, data->d_disparity->rows).download(data->disparity);
+}
+
+///////////////////////////////////////////////////////////
+// StereoMultiGpuStream
+// Run Stereo algorithm on two GPUs from single host thread using async API
+
+class StereoMultiGpuStream
+{
+public:
+    StereoMultiGpuStream();
+    ~StereoMultiGpuStream();
+
+    void compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity);
+
+private:
+    GpuMat d_leftFrames[2];
+    GpuMat d_rightFrames[2];
+    GpuMat d_disparities[2];
+    Ptr<gpu::StereoBM> d_algs[2];
+    Ptr<Stream> streams[2];
+};
+
+StereoMultiGpuStream::StereoMultiGpuStream()
+{
+    gpu::setDevice(0);
+    d_algs[0] = gpu::createStereoBM(256);
+    streams[0] = makePtr<Stream>();
+
+    gpu::setDevice(1);
+    d_algs[1] = gpu::createStereoBM(256);
+    streams[1] = makePtr<Stream>();
+}
+
+StereoMultiGpuStream::~StereoMultiGpuStream()
+{
+    gpu::setDevice(0);
+    d_leftFrames[0].release();
+    d_rightFrames[0].release();
+    d_disparities[0].release();
+    d_algs[0].release();
+    streams[0].release();
+
+    gpu::setDevice(1);
+    d_leftFrames[1].release();
+    d_rightFrames[1].release();
+    d_disparities[1].release();
+    d_algs[1].release();
+    streams[1].release();
+}
+
+void StereoMultiGpuStream::compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity)
+{
+    disparity.create(leftFrame.size(), CV_8UC1);
+
+    // Split input data onto two parts for each GPUs.
+    // We add small border for each part,
+    // because original algorithm doesn't calculate disparity on image borders.
+    // With such padding we will get output in the middle of final result.
+
+    Mat leftFrameHdr = leftFrame.createMatHeader();
+    Mat rightFrameHdr = rightFrame.createMatHeader();
+    Mat disparityHdr = disparity.createMatHeader();
+    Mat disparityPart0 = disparityHdr.rowRange(0, leftFrame.rows / 2);
+    Mat disparityPart1 = disparityHdr.rowRange(leftFrame.rows / 2, leftFrame.rows);
+
+    gpu::setDevice(0);
+    d_leftFrames[0].upload(leftFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), *streams[0]);
+    d_rightFrames[0].upload(rightFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), *streams[0]);
+    d_algs[0]->compute(d_leftFrames[0], d_rightFrames[0], d_disparities[0], *streams[0]);
+    d_disparities[0].rowRange(0, leftFrame.rows / 2).download(disparityPart0, *streams[0]);
+
+    gpu::setDevice(1);
+    d_leftFrames[1].upload(leftFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), *streams[1]);
+    d_rightFrames[1].upload(rightFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), *streams[1]);
+    d_algs[1]->compute(d_leftFrames[1], d_rightFrames[1], d_disparities[1], *streams[1]);
+    d_disparities[1].rowRange(32, d_disparities[1].rows).download(disparityPart1, *streams[1]);
+
+    gpu::setDevice(0);
+    streams[0]->waitForCompletion();
+
+    gpu::setDevice(1);
+    streams[1]->waitForCompletion();
+}
+
+///////////////////////////////////////////////////////////
+// main
+
 int main(int argc, char** argv)
 {
-    if (argc < 5)
+    if (argc != 3)
     {
-        printHelp();
+        cerr << "Usage: stereo_multi_gpu <left_video> <right_video>" << endl;
         return -1;
     }
 
-    int num_devices = getCudaEnabledDeviceCount();
-    if (num_devices < 2)
+    const int numDevices = getCudaEnabledDeviceCount();
+    if (numDevices != 2)
     {
-        std::cout << "Two or more GPUs are required\n";
+        cerr << "Two GPUs are required" << endl;
         return -1;
     }
-    for (int i = 0; i < num_devices; ++i)
-    {
-        cv::gpu::printShortCudaDeviceInfo(i);
 
-        DeviceInfo dev_info(i);
-        if (!dev_info.isCompatible())
+    for (int i = 0; i < numDevices; ++i)
+    {
+        DeviceInfo devInfo(i);
+        if (!devInfo.isCompatible())
         {
-            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.major()
-                 << dev_info.minor() << "\n";
+            cerr << "GPU module was't built for GPU #" << i << " ("
+                 << devInfo.name() << ", CC " << devInfo.majorVersion()
+                 << devInfo.minorVersion() << endl;
             return -1;
         }
+
+        printShortCudaDeviceInfo(i);
     }
 
-    // Load input data
-    Mat left, right;
-    for (int i = 1; i < argc; ++i)
+    VideoCapture leftVideo(argv[1]);
+    VideoCapture rightVideo(argv[2]);
+
+    if (!leftVideo.isOpened())
     {
-        if (string(argv[i]) == "--left")
+         cerr << "Can't open " << argv[1] << " video file" << endl;
+         return -1;
+    }
+
+    if (!rightVideo.isOpened())
+    {
+         cerr << "Can't open " << argv[2] << " video file" << endl;
+         return -1;
+    }
+
+    cout << endl;
+    cout << "This sample demonstrates working on one piece of data using two GPUs." << endl;
+    cout << "It splits input into two parts and processes them separately on different GPUs." << endl;
+    cout << endl;
+
+    Mat leftFrame, rightFrame;
+    CudaMem leftGrayFrame, rightGrayFrame;
+
+    StereoSingleGpu gpu0Alg(0);
+    StereoSingleGpu gpu1Alg(1);
+    StereoMultiGpuThread multiThreadAlg;
+    StereoMultiGpuStream multiStreamAlg;
+
+    Mat disparityGpu0;
+    Mat disparityGpu1;
+    Mat disparityMultiThread;
+    CudaMem disparityMultiStream;
+
+    Mat disparityGpu0Show;
+    Mat disparityGpu1Show;
+    Mat disparityMultiThreadShow;
+    Mat disparityMultiStreamShow;
+
+    TickMeter tm;
+
+    cout << "-------------------------------------------------------------------" << endl;
+    cout << "| Frame | GPU 0 ms | GPU 1 ms | Multi Thread ms | Multi Stream ms |" << endl;
+    cout << "-------------------------------------------------------------------" << endl;
+
+    for (int i = 0;; ++i)
+    {
+        leftVideo >> leftFrame;
+        rightVideo >> rightFrame;
+
+        if (leftFrame.empty() || rightFrame.empty())
+            break;
+
+        if (leftFrame.size() != rightFrame.size())
         {
-            left = imread(argv[++i], cv::IMREAD_GRAYSCALE);
-            CV_Assert(!left.empty());
-        }
-        else if (string(argv[i]) == "--right")
-        {
-            right = imread(argv[++i], cv::IMREAD_GRAYSCALE);
-            CV_Assert(!right.empty());
-        }
-        else if (string(argv[i]) == "--help")
-        {
-            printHelp();
+            cerr << "Frames have different sizes" << endl;
             return -1;
         }
+
+        leftGrayFrame.create(leftFrame.size(), CV_8UC1);
+        rightGrayFrame.create(leftFrame.size(), CV_8UC1);
+
+        cvtColor(leftFrame, leftGrayFrame.createMatHeader(), COLOR_BGR2GRAY);
+        cvtColor(rightFrame, rightGrayFrame.createMatHeader(), COLOR_BGR2GRAY);
+
+        tm.reset(); tm.start();
+        gpu0Alg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(),
+                        disparityGpu0);
+        tm.stop();
+
+        const double gpu0Time = tm.getTimeMilli();
+
+        tm.reset(); tm.start();
+        gpu1Alg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(),
+                        disparityGpu1);
+        tm.stop();
+
+        const double gpu1Time = tm.getTimeMilli();
+
+        tm.reset(); tm.start();
+        multiThreadAlg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(),
+                               disparityMultiThread);
+        tm.stop();
+
+        const double multiThreadTime = tm.getTimeMilli();
+
+        tm.reset(); tm.start();
+        multiStreamAlg.compute(leftGrayFrame, rightGrayFrame, disparityMultiStream);
+        tm.stop();
+
+        const double multiStreamTime = tm.getTimeMilli();
+
+        cout << "| " << setw(5) << i << " | "
+             << setw(8) << setprecision(1) << fixed << gpu0Time << " | "
+             << setw(8) << setprecision(1) << fixed << gpu1Time << " | "
+             << setw(15) << setprecision(1) << fixed << multiThreadTime << " | "
+             << setw(15) << setprecision(1) << fixed << multiStreamTime << " |" << endl;
+
+        resize(disparityGpu0, disparityGpu0Show, Size(1024, 768), 0, 0, INTER_AREA);
+        resize(disparityGpu1, disparityGpu1Show, Size(1024, 768), 0, 0, INTER_AREA);
+        resize(disparityMultiThread, disparityMultiThreadShow, Size(1024, 768), 0, 0, INTER_AREA);
+        resize(disparityMultiStream.createMatHeader(), disparityMultiStreamShow, Size(1024, 768), 0, 0, INTER_AREA);
+
+        imshow("disparityGpu0", disparityGpu0Show);
+        imshow("disparityGpu1", disparityGpu1Show);
+        imshow("disparityMultiThread", disparityMultiThreadShow);
+        imshow("disparityMultiStream", disparityMultiStreamShow);
+
+        const int key = waitKey(30) & 0xff;
+        if (key == 27)
+            break;
     }
 
-    // Split source images for processing on the GPU #0
-    setDevice(0);
-    d_left[0].upload(left.rowRange(0, left.rows / 2));
-    d_right[0].upload(right.rowRange(0, right.rows / 2));
-    bm[0] = gpu::createStereoBM();
+    cout << "-------------------------------------------------------------------" << endl;
 
-    // Split source images for processing on the GPU #1
-    setDevice(1);
-    d_left[1].upload(left.rowRange(left.rows / 2, left.rows));
-    d_right[1].upload(right.rowRange(right.rows / 2, right.rows));
-    bm[1] = gpu::createStereoBM();
-
-    // Execute calculation in two threads using two GPUs
-    int devices[] = {0, 1};
-    tbb::parallel_do(devices, devices + 2, Worker());
-
-    // Release the first GPU resources
-    setDevice(0);
-    imshow("GPU #0 result", Mat(d_result[0]));
-    d_left[0].release();
-    d_right[0].release();
-    d_result[0].release();
-    bm[0].release();
-
-    // Release the second GPU resources
-    setDevice(1);
-    imshow("GPU #1 result", Mat(d_result[1]));
-    d_left[1].release();
-    d_right[1].release();
-    d_result[1].release();
-    bm[1].release();
-
-    waitKey();
     return 0;
 }
-
-
-void Worker::operator()(int device_id) const
-{
-    setDevice(device_id);
-
-    bm[device_id]->compute(d_left[device_id], d_right[device_id], d_result[device_id]);
-
-    std::cout << "GPU #" << device_id << " (" << DeviceInfo().name()
-        << "): finished\n";
-}
-
-#endif
diff --git a/samples/gpu/super_resolution.cpp b/samples/gpu/super_resolution.cpp
index 4f3b4e20e..e124b375e 100644
--- a/samples/gpu/super_resolution.cpp
+++ b/samples/gpu/super_resolution.cpp
@@ -8,11 +8,16 @@
 #include "opencv2/contrib.hpp"
 #include "opencv2/superres.hpp"
 #include "opencv2/superres/optical_flow.hpp"
+#include "opencv2/opencv_modules.hpp"
+
+#if defined(HAVE_OPENCV_OCL)
+#include "opencv2/ocl/ocl.hpp"
+#endif
 
 using namespace std;
 using namespace cv;
 using namespace cv::superres;
-
+bool useOclChanged;
 #define MEASURE_TIME(op) \
     { \
         TickMeter tm; \
@@ -48,11 +53,40 @@ static Ptr<DenseOpticalFlowExt> createOptFlow(const string& name, bool useGpu)
     {
         cerr << "Incorrect Optical Flow algorithm - " << name << endl;
     }
-    return 0;
+    return Ptr<DenseOpticalFlowExt>();
 }
-
+#if defined(HAVE_OPENCV_OCL)
+static Ptr<DenseOpticalFlowExt> createOptFlow(const string& name)
+{
+    if (name == "farneback")
+    {
+        return createOptFlow_Farneback_OCL();
+    }
+    else if (name == "simple")
+    {
+        useOclChanged = true;
+        std::cout<<"simple on OpenCL has not been implemented. Use CPU instead!\n";
+        return createOptFlow_Simple();
+    }
+    else if (name == "tvl1")
+        return createOptFlow_DualTVL1_OCL();
+    else if (name == "brox")
+    {
+        std::cout<<"brox has not been implemented!\n";
+        return Ptr<DenseOpticalFlowExt>();
+    }
+    else if (name == "pyrlk")
+        return createOptFlow_PyrLK_OCL();
+    else
+    {
+        cerr << "Incorrect Optical Flow algorithm - " << name << endl;
+    }
+    return Ptr<DenseOpticalFlowExt>();
+}
+#endif
 int main(int argc, const char* argv[])
 {
+    useOclChanged = false;
     CommandLineParser cmd(argc, argv,
         "{ v video      |           | Input video }"
         "{ o output     |           | Output video }"
@@ -60,7 +94,7 @@ int main(int argc, const char* argv[])
         "{ i iterations | 180       | Iteration count }"
         "{ t temporal   | 4         | Radius of the temporal search area }"
         "{ f flow       | farneback | Optical flow algorithm (farneback, simple, tvl1, brox, pyrlk) }"
-        "{ gpu          | false     | Use GPU }"
+        "{ g            | false     | CPU as default device, cuda for CUDA and ocl for OpenCL }"
         "{ h help       | false     | Print help message }"
     );
 
@@ -77,25 +111,79 @@ int main(int argc, const char* argv[])
     const int iterations = cmd.get<int>("iterations");
     const int temporalAreaRadius = cmd.get<int>("temporal");
     const string optFlow = cmd.get<string>("flow");
-    const bool useGpu = cmd.get<bool>("gpu");
+    string gpuOption = cmd.get<string>("gpu");
 
+    std::transform(gpuOption.begin(), gpuOption.end(), gpuOption.begin(), ::tolower);
+
+    bool useCuda = false;
+    bool useOcl = false;
+
+    if(gpuOption.compare("ocl") == 0)
+        useOcl = true;
+    else if(gpuOption.compare("cuda") == 0)
+        useCuda = true;
+
+#ifndef HAVE_OPENCV_OCL
+    if(useOcl)
+    {
+        {
+            cout<<"OPENCL is not compiled\n";
+            return 0;
+        }
+    }
+#endif
+#if defined(HAVE_OPENCV_OCL)
+    std::vector<cv::ocl::Info>info;
+    if(useCuda)
+    {
+        CV_Assert(!useOcl);
+        info.clear();
+    }
+
+    if(useOcl)
+    {
+        CV_Assert(!useCuda);
+        cv::ocl::getDevice(info);
+    }
+#endif
     Ptr<SuperResolution> superRes;
-    if (useGpu)
-        superRes = createSuperResolution_BTVL1_GPU();
+
+
+#if defined(HAVE_OPENCV_OCL)
+    if(useOcl)
+    {
+        Ptr<DenseOpticalFlowExt> of = createOptFlow(optFlow);
+        if (of.empty())
+            exit(-1);
+        if(useOclChanged)
+        {
+            superRes = createSuperResolution_BTVL1();
+            useOcl = !useOcl;
+        }else
+            superRes = createSuperResolution_BTVL1_OCL();
+        superRes->set("opticalFlow", of);
+    }
     else
-        superRes = createSuperResolution_BTVL1();
+#endif
+    {
+        if (useCuda)
+            superRes = createSuperResolution_BTVL1_GPU();
+        else
+            superRes = createSuperResolution_BTVL1();
+
+        Ptr<DenseOpticalFlowExt> of = createOptFlow(optFlow, useCuda);
+
+        if (of.empty())
+            exit(-1);
+        superRes->set("opticalFlow", of);
+    }
 
     superRes->set("scale", scale);
     superRes->set("iterations", iterations);
     superRes->set("temporalAreaRadius", temporalAreaRadius);
 
-    Ptr<DenseOpticalFlowExt> of = createOptFlow(optFlow, useGpu);
-    if (of.empty())
-        exit(-1);
-    superRes->set("opticalFlow", of);
-
     Ptr<FrameSource> frameSource;
-    if (useGpu)
+    if (useCuda)
     {
         // Try to use gpu Video Decoding
         try
@@ -109,7 +197,7 @@ int main(int argc, const char* argv[])
             frameSource.release();
         }
     }
-    if (frameSource.empty())
+    if (!frameSource)
         frameSource = createFrameSource_Video(inputVideoName);
 
     // skip first frame, it is usually corrupted
@@ -121,7 +209,11 @@ int main(int argc, const char* argv[])
         cout << "Iterations      : " << iterations << endl;
         cout << "Temporal radius : " << temporalAreaRadius << endl;
         cout << "Optical Flow    : " << optFlow << endl;
-        cout << "Mode            : " << (useGpu ? "GPU" : "CPU") << endl;
+#if defined(HAVE_OPENCV_OCL)
+        cout << "Mode            : " << (useCuda ? "CUDA" : useOcl? "OpenCL" : "CPU") << endl;
+#else
+        cout << "Mode            : " << (useCuda ? "CUDA" : "CPU") << endl;
+#endif
     }
 
     superRes->setInput(frameSource);
@@ -131,10 +223,30 @@ int main(int argc, const char* argv[])
     for (int i = 0;; ++i)
     {
         cout << '[' << setw(3) << i << "] : ";
-
         Mat result;
-        MEASURE_TIME(superRes->nextFrame(result));
 
+#if defined(HAVE_OPENCV_OCL)
+        cv::ocl::oclMat result_;
+
+        if(useOcl)
+        {
+            MEASURE_TIME(superRes->nextFrame(result_));
+        }
+        else
+#endif
+        {
+            MEASURE_TIME(superRes->nextFrame(result));
+        }
+
+#ifdef HAVE_OPENCV_OCL
+        if(useOcl)
+        {
+            if(!result_.empty())
+            {
+                result_.download(result);
+            }
+        }
+#endif
         if (result.empty())
             break;
 
diff --git a/samples/java/sbt/src/main/java/DetectFaceDemo.java b/samples/java/sbt/src/main/java/DetectFaceDemo.java
index fb0856723..07b4202e6 100644
--- a/samples/java/sbt/src/main/java/DetectFaceDemo.java
+++ b/samples/java/sbt/src/main/java/DetectFaceDemo.java
@@ -41,4 +41,4 @@ public class DetectFaceDemo {
         System.out.println(String.format("Writing %s", filename));
         Highgui.imwrite(filename, image);
     }
-}
\ No newline at end of file
+}
diff --git a/samples/java/sbt/src/main/scala/ScalaCorrespondenceMatchingDemo.scala b/samples/java/sbt/src/main/scala/ScalaCorrespondenceMatchingDemo.scala
index 30ab0553e..ea50200c9 100644
--- a/samples/java/sbt/src/main/scala/ScalaCorrespondenceMatchingDemo.scala
+++ b/samples/java/sbt/src/main/scala/ScalaCorrespondenceMatchingDemo.scala
@@ -66,4 +66,4 @@ object ScalaCorrespondenceMatchingDemo {
     println(s"Writing ${filename}")
     assert(Highgui.imwrite(filename, correspondenceImage))
   }
-}
\ No newline at end of file
+}
diff --git a/samples/java/sbt/src/main/scala/ScalaDetectFaceDemo.scala b/samples/java/sbt/src/main/scala/ScalaDetectFaceDemo.scala
index a35eeb778..85fdeb0d7 100644
--- a/samples/java/sbt/src/main/scala/ScalaDetectFaceDemo.scala
+++ b/samples/java/sbt/src/main/scala/ScalaDetectFaceDemo.scala
@@ -40,4 +40,4 @@ object ScalaDetectFaceDemo {
     println(s"Writing ${filename}")
     assert(Highgui.imwrite(filename, image))
   }
-}
\ No newline at end of file
+}
diff --git a/samples/ocl/CMakeLists.txt b/samples/ocl/CMakeLists.txt
index cdcf2f3e5..9b04dc397 100644
--- a/samples/ocl/CMakeLists.txt
+++ b/samples/ocl/CMakeLists.txt
@@ -27,7 +27,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_OCL_SAMPLES_REQUIRED_DEPS})
 
     set_target_properties(${the_target} PROPERTIES
-      OUTPUT_NAME "${name}_${project}"
+      OUTPUT_NAME "${project}-example-${name}"
       PROJECT_LABEL "(EXAMPLE_${project_upper}) ${name}")
 
     if(ENABLE_SOLUTION_FOLDERS)
@@ -54,7 +54,6 @@ endif()
 if (INSTALL_C_EXAMPLES AND NOT WIN32)
   file(GLOB install_list *.c *.cpp *.jpg *.png *.data makefile.* build_all.sh *.dsp *.cmd )
   install(FILES ${install_list}
-          DESTINATION share/opencv/samples/${project}
+          DESTINATION share/OpenCV/samples/${project}
           PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
 endif()
-
diff --git a/samples/ocl/adaptive_bilateral_filter.cpp b/samples/ocl/adaptive_bilateral_filter.cpp
new file mode 100644
index 000000000..d6a2415f9
--- /dev/null
+++ b/samples/ocl/adaptive_bilateral_filter.cpp
@@ -0,0 +1,52 @@
+// This sample shows the difference of adaptive bilateral filter and bilateral filter.
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/ocl.hpp"
+
+using namespace cv;
+using namespace std;
+
+
+int main( int argc, const char** argv )
+{
+    const char* keys =
+        "{ i input   |          | specify input image }"
+        "{ k ksize   |     5    | specify kernel size }";
+    CommandLineParser cmd(argc, argv, keys);
+    string src_path = cmd.get<string>("i");
+    int ks = cmd.get<int>("k");
+    const char * winName[] = {"input", "adaptive bilateral CPU", "adaptive bilateral OpenCL", "bilateralFilter OpenCL"};
+
+    Mat src = imread(src_path);
+    Mat abFilterCPU;
+    if(src.empty()){
+        //cout << "error read image: " << src_path << endl;
+        return -1;
+    }
+
+    std::vector<ocl::Info> infos;
+    ocl::getDevice(infos);
+
+    ocl::oclMat dsrc(src), dABFilter, dBFilter;
+
+    Size ksize(ks, ks);
+    adaptiveBilateralFilter(src,abFilterCPU, ksize, 10);
+    ocl::adaptiveBilateralFilter(dsrc, dABFilter, ksize, 10);
+    ocl::bilateralFilter(dsrc, dBFilter, ks, 30, 9);
+
+    Mat abFilter = dABFilter;
+    Mat bFilter = dBFilter;
+    imshow(winName[0], src);
+
+    imshow(winName[1], abFilterCPU);
+
+    imshow(winName[2], abFilter);
+
+    imshow(winName[3], bFilter);
+
+    waitKey();
+    return 0;
+
+}
diff --git a/samples/ocl/bgfg_segm.cpp b/samples/ocl/bgfg_segm.cpp
new file mode 100644
index 000000000..87b3ea4ee
--- /dev/null
+++ b/samples/ocl/bgfg_segm.cpp
@@ -0,0 +1,136 @@
+#include <iostream>
+#include <string>
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/ocl.hpp"
+#include "opencv2/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+#define M_MOG  1
+#define M_MOG2 2
+
+int main(int argc, const char** argv)
+{
+
+    cv::CommandLineParser cmd(argc, argv,
+        "{ c camera | false       | use camera }"
+        "{ f file   | 768x576.avi | input video file }"
+        "{ m method | mog         | method (mog, mog2) }"
+        "{ h help   | false       | print help message }");
+
+    if (cmd.get<bool>("help"))
+    {
+        cout << "Usage : bgfg_segm [options]" << endl;
+        cout << "Avaible options:" << endl;
+        cmd.printMessage();
+        return 0;
+    }
+
+    bool useCamera = cmd.get<bool>("camera");
+    string file = cmd.get<string>("file");
+    string method = cmd.get<string>("method");
+
+    if (method != "mog" && method != "mog2")
+    {
+        cerr << "Incorrect method" << endl;
+        return -1;
+    }
+
+    int m = method == "mog" ? M_MOG : M_MOG2;
+
+    VideoCapture cap;
+
+    if (useCamera)
+        cap.open(0);
+    else
+        cap.open(file);
+
+    if (!cap.isOpened())
+    {
+        cerr << "can not open camera or video file" << endl;
+        return -1;
+    }
+
+    std::vector<cv::ocl::Info>info;
+    cv::ocl::getDevice(info);
+
+    Mat frame;
+    cap >> frame;
+
+    oclMat d_frame(frame);
+
+    cv::ocl::MOG mog;
+    cv::ocl::MOG2 mog2;
+
+    oclMat d_fgmask;
+    oclMat d_fgimg;
+    oclMat d_bgimg;
+
+    d_fgimg.create(d_frame.size(), d_frame.type());
+
+    Mat fgmask;
+    Mat fgimg;
+    Mat bgimg;
+
+    switch (m)
+    {
+    case M_MOG:
+        mog(d_frame, d_fgmask, 0.01f);
+        break;
+
+    case M_MOG2:
+        mog2(d_frame, d_fgmask);
+        break;
+    }
+
+    for(;;)
+    {
+        cap >> frame;
+        if (frame.empty())
+            break;
+        d_frame.upload(frame);
+
+        int64 start = cv::getTickCount();
+
+        //update the model
+        switch (m)
+        {
+        case M_MOG:
+            mog(d_frame, d_fgmask, 0.01f);
+            mog.getBackgroundImage(d_bgimg);
+            break;
+
+        case M_MOG2:
+            mog2(d_frame, d_fgmask);
+            mog2.getBackgroundImage(d_bgimg);
+            break;
+        }
+
+        double fps = cv::getTickFrequency() / (cv::getTickCount() - start);
+        std::cout << "FPS : " << fps << std::endl;
+
+        d_fgimg.setTo(Scalar::all(0));
+        d_frame.copyTo(d_fgimg, d_fgmask);
+
+        d_fgmask.download(fgmask);
+        d_fgimg.download(fgimg);
+        if (!d_bgimg.empty())
+            d_bgimg.download(bgimg);
+
+        imshow("image", frame);
+        imshow("foreground mask", fgmask);
+        imshow("foreground image", fgimg);
+        if (!bgimg.empty())
+            imshow("mean background image", bgimg);
+
+        int key = waitKey(30);
+        if (key == 27)
+            break;
+    }
+
+    return 0;
+}
diff --git a/samples/ocl/clahe.cpp b/samples/ocl/clahe.cpp
new file mode 100644
index 000000000..1ae2c8655
--- /dev/null
+++ b/samples/ocl/clahe.cpp
@@ -0,0 +1,113 @@
+#include <iostream>
+#include "opencv2/core/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/ocl/ocl.hpp"
+using namespace cv;
+using namespace std;
+
+Ptr<CLAHE> pFilter;
+int tilesize;
+int cliplimit;
+string outfile;
+
+static void TSize_Callback(int pos)
+{
+    if(pos==0)
+    {
+        pFilter->setTilesGridSize(Size(1,1));
+    }
+    pFilter->setTilesGridSize(Size(tilesize,tilesize));
+}
+
+static void Clip_Callback(int)
+{
+    pFilter->setClipLimit(cliplimit);
+}
+
+int main(int argc, char** argv)
+{
+    const char* keys =
+        "{ i input   |                    | specify input image }"
+        "{ c camera  |    0               | specify camera id   }"
+        "{ s use_cpu |    false           | use cpu algorithm   }"
+        "{ o output  | clahe_output.jpg   | specify output save path}";
+
+    CommandLineParser cmd(argc, argv, keys);
+    string infile = cmd.get<string>("i");
+    outfile = cmd.get<string>("o");
+    int camid = cmd.get<int>("c");
+    bool use_cpu = cmd.get<bool>("s");
+    VideoCapture capture;
+    bool running = true;
+
+    namedWindow("CLAHE");
+    createTrackbar("Tile Size", "CLAHE", &tilesize, 32, (TrackbarCallback)TSize_Callback);
+    createTrackbar("Clip Limit", "CLAHE", &cliplimit, 20, (TrackbarCallback)Clip_Callback);
+
+    vector<ocl::Info> info;
+    CV_Assert(ocl::getDevice(info));
+
+    Mat frame, outframe;
+    ocl::oclMat d_outframe;
+
+    int cur_clip;
+    Size cur_tilesize;
+    if(use_cpu)
+    {
+        pFilter = createCLAHE();
+    }
+    else
+    {
+        pFilter = ocl::createCLAHE();
+    }
+    cur_clip = (int)pFilter->getClipLimit();
+    cur_tilesize = pFilter->getTilesGridSize();
+    setTrackbarPos("Tile Size", "CLAHE", cur_tilesize.width);
+    setTrackbarPos("Clip Limit", "CLAHE", cur_clip);
+    if(infile != "")
+    {
+        frame = imread(infile);
+        if(frame.empty())
+        {
+            cout << "error read image: " << infile << endl;
+            return -1;
+        }
+    }
+    else
+    {
+        capture.open(camid);
+    }
+    cout << "\nControls:\n"
+         << "\to - save output image\n"
+         << "\tESC - exit\n";
+    while(running)
+    {
+        if(capture.isOpened())
+            capture.read(frame);
+        else
+            frame = imread(infile);
+        if(frame.empty())
+        {
+            continue;
+        }
+        if(use_cpu)
+        {
+            cvtColor(frame, frame, COLOR_BGR2GRAY);
+            pFilter->apply(frame, outframe);
+        }
+        else
+        {
+            ocl::oclMat d_frame(frame);
+            ocl::cvtColor(d_frame, d_outframe, COLOR_BGR2GRAY);
+            pFilter->apply(d_outframe, d_outframe);
+            d_outframe.download(outframe);
+        }
+        imshow("CLAHE", outframe);
+        char key = (char)waitKey(3);
+        if(key == 'o') imwrite(outfile, outframe);
+        else if(key == 27) running = false;
+    }
+    return 0;
+}
diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp
index 5ffed2e40..eef4c9be9 100644
--- a/samples/ocl/facedetect.cpp
+++ b/samples/ocl/facedetect.cpp
@@ -8,25 +8,25 @@
 #include <iostream>
 #include <stdio.h>
 
-int main( int, const char** ) { return 0; }
-
-#if 0
 
 using namespace std;
 using namespace cv;
 #define LOOP_NUM 10
 
 const static Scalar colors[] =  { CV_RGB(0,0,255),
-        CV_RGB(0,128,255),
-        CV_RGB(0,255,255),
-        CV_RGB(0,255,0),
-        CV_RGB(255,128,0),
-        CV_RGB(255,255,0),
-        CV_RGB(255,0,0),
-        CV_RGB(255,0,255)} ;
+                                  CV_RGB(0,128,255),
+                                  CV_RGB(0,255,255),
+                                  CV_RGB(0,255,0),
+                                  CV_RGB(255,128,0),
+                                  CV_RGB(255,255,0),
+                                  CV_RGB(255,0,0),
+                                  CV_RGB(255,0,255)
+                                } ;
+
 
 int64 work_begin = 0;
 int64 work_end = 0;
+string outputName;
 
 static void workBegin()
 {
@@ -37,40 +37,45 @@ static void workEnd()
     work_end += (getTickCount() - work_begin);
 }
 
-
-static double getTime(){
+static double getTime()
+{
     return work_end /((double)cvGetTickFrequency() * 1000.);
 }
 
 void detect( Mat& img, vector<Rect>& faces,
-    cv::ocl::OclCascadeClassifierBuf& cascade,
-    double scale, bool calTime);
+             ocl::OclCascadeClassifier& cascade,
+             double scale, bool calTime);
+
 
 void detectCPU( Mat& img, vector<Rect>& faces,
-    CascadeClassifier& cascade,
-    double scale, bool calTime);
+                CascadeClassifier& cascade,
+                double scale, bool calTime);
 
 void Draw(Mat& img, vector<Rect>& faces, double scale);
 
+
 // This function test if gpu_rst matches cpu_rst.
 // If the two vectors are not equal, it will return the difference in vector size
 // Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
-double checkRectSimilarity(Size sz, std::vector<Rect>& cpu_rst, std::vector<Rect>& gpu_rst);
+double checkRectSimilarity(Size sz, vector<Rect>& cpu_rst, vector<Rect>& gpu_rst);
+
 
 int main( int argc, const char** argv )
 {
     const char* keys =
-        "{ h | help       | false       | print help message }"
-        "{ i | input      |             | specify input image }"
-        "{ t | template   | ../../../data/haarcascades/haarcascade_frontalface_alt.xml  | specify template file }"
-        "{ c | scale      |   1.0       | scale image }"
-        "{ s | use_cpu    | false       | use cpu or gpu to process the image }";
+        "{ h  help       | false       | print help message }"
+        "{ i  input      |             | specify input image }"
+        "{ t  template   | haarcascade_frontalface_alt.xml |"
+        " specify template file path }"
+        "{ c  scale      |   1.0       | scale image }"
+        "{ s  use_cpu    | false       | use cpu or gpu to process the image }"
+        "{ o  output     | facedetect_output.jpg  |"
+        " specify output image save path(only works when input is images) }";
 
     CommandLineParser cmd(argc, argv, keys);
     if (cmd.get<bool>("help"))
     {
         cout << "Avaible options:" << endl;
-        cmd.printParams();
         return 0;
     }
     CvCapture* capture = 0;
@@ -78,9 +83,10 @@ int main( int argc, const char** argv )
 
     bool useCPU = cmd.get<bool>("s");
     string inputName = cmd.get<string>("i");
+    outputName = cmd.get<string>("o");
     string cascadeName = cmd.get<string>("t");
     double scale = cmd.get<double>("c");
-    cv::ocl::OclCascadeClassifierBuf cascade;
+    ocl::OclCascadeClassifier cascade;
     CascadeClassifier  cpu_cascade;
 
     if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
@@ -114,9 +120,10 @@ int main( int argc, const char** argv )
         return -1;
     }
 
+
     cvNamedWindow( "result", 1 );
-    std::vector<cv::ocl::Info> oclinfo;
-    int devnums = cv::ocl::getDevice(oclinfo);
+    vector<ocl::Info> oclinfo;
+    int devnums = ocl::getDevice(oclinfo);
     if( devnums < 1 )
     {
         std::cout << "no device found\n";
@@ -139,10 +146,12 @@ int main( int argc, const char** argv )
                 frame.copyTo( frameCopy );
             else
                 flip( frame, frameCopy, 0 );
-            if(useCPU){
+            if(useCPU)
+            {
                 detectCPU(frameCopy, faces, cpu_cascade, scale, false);
             }
-            else{
+            else
+            {
                 detect(frameCopy, faces, cascade, scale, false);
             }
             Draw(frameCopy, faces, scale);
@@ -150,8 +159,10 @@ int main( int argc, const char** argv )
                 goto _cleanup_;
         }
 
+
         waitKey(0);
 
+
 _cleanup_:
         cvReleaseCapture( &capture );
     }
@@ -161,15 +172,18 @@ _cleanup_:
         vector<Rect> faces;
         vector<Rect> ref_rst;
         double accuracy = 0.;
-        for(int i = 0; i <= LOOP_NUM;i ++)
+        for(int i = 0; i <= LOOP_NUM; i ++)
         {
             cout << "loop" << i << endl;
-            if(useCPU){
+            if(useCPU)
+            {
                 detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);
             }
-            else{
+            else
+            {
                 detect(image, faces, cascade, scale, i==0?false:true);
-                if(i == 0){
+                if(i == 0)
+                {
                     detectCPU(image, ref_rst, cpu_cascade, scale, false);
                     accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
                 }
@@ -189,43 +203,43 @@ _cleanup_:
     }
 
     cvDestroyWindow("result");
-
     return 0;
 }
 
 void detect( Mat& img, vector<Rect>& faces,
-    cv::ocl::OclCascadeClassifierBuf& cascade,
-    double scale, bool calTime)
+             ocl::OclCascadeClassifier& cascade,
+             double scale, bool calTime)
 {
-    cv::ocl::oclMat image(img);
-    cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+    ocl::oclMat image(img);
+    ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
     if(calTime) workBegin();
-    cv::ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
-    cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
-    cv::ocl::equalizeHist( smallImg, smallImg );
+    ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
+    ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
+    ocl::equalizeHist( smallImg, smallImg );
 
     cascade.detectMultiScale( smallImg, faces, 1.1,
-        3, 0
-        |CV_HAAR_SCALE_IMAGE
-        , Size(30,30), Size(0, 0) );
+                              3, 0
+                              |CASCADE_SCALE_IMAGE
+                              , Size(30,30), Size(0, 0) );
     if(calTime) workEnd();
 }
 
 void detectCPU( Mat& img, vector<Rect>& faces,
-    CascadeClassifier& cascade,
-    double scale, bool calTime)
+                CascadeClassifier& cascade,
+                double scale, bool calTime)
 {
     if(calTime) workBegin();
     Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
-    cvtColor(img, cpu_gray, CV_BGR2GRAY);
+    cvtColor(img, cpu_gray, COLOR_BGR2GRAY);
     resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR);
     equalizeHist(cpu_smallImg, cpu_smallImg);
     cascade.detectMultiScale(cpu_smallImg, faces, 1.1,
-        3, 0 | CV_HAAR_SCALE_IMAGE,
-        Size(30, 30), Size(0, 0));
+                             3, 0 | CASCADE_SCALE_IMAGE,
+                             Size(30, 30), Size(0, 0));
     if(calTime) workEnd();
 }
 
+
 void Draw(Mat& img, vector<Rect>& faces, double scale)
 {
     int i = 0;
@@ -239,31 +253,43 @@ void Draw(Mat& img, vector<Rect>& faces, double scale)
         radius = cvRound((r->width + r->height)*0.25*scale);
         circle( img, center, radius, color, 3, 8, 0 );
     }
-    cv::imshow( "result", img );
+    imwrite( outputName, img );
+    if(abs(scale-1.0)>.001)
+    {
+        resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale)));
+    }
+    imshow( "result", img );
+
 }
 
-double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
+
+double checkRectSimilarity(Size sz, vector<Rect>& ob1, vector<Rect>& ob2)
 {
     double final_test_result = 0.0;
     size_t sz1 = ob1.size();
     size_t sz2 = ob2.size();
 
     if(sz1 != sz2)
+    {
         return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+    }
     else
     {
-        cv::Mat cpu_result(sz, CV_8UC1);
+        if(sz1==0 && sz2==0)
+            return 0;
+        Mat cpu_result(sz, CV_8UC1);
         cpu_result.setTo(0);
 
         for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
         {
-            cv::Mat cpu_result_roi(cpu_result, *r);
+            Mat cpu_result_roi(cpu_result, *r);
             cpu_result_roi.setTo(1);
             cpu_result.copyTo(cpu_result);
         }
-        int cpu_area = cv::countNonZero(cpu_result > 0);
+        int cpu_area = countNonZero(cpu_result > 0);
 
-        cv::Mat gpu_result(sz, CV_8UC1);
+
+        Mat gpu_result(sz, CV_8UC1);
         gpu_result.setTo(0);
         for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
         {
@@ -272,12 +298,13 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
             gpu_result.copyTo(gpu_result);
         }
 
-        cv::Mat result_;
+        Mat result_;
         multiply(cpu_result, gpu_result, result_);
-        int result = cv::countNonZero(result_ > 0);
-
-        final_test_result = 1.0 - (double)result/(double)cpu_area;
+        int result = countNonZero(result_ > 0);
+        if(cpu_area!=0 && result!=0)
+            final_test_result = 1.0 - (double)result/(double)cpu_area;
+        else if(cpu_area==0 && result!=0)
+            final_test_result = -1;
     }
     return final_test_result;
 }
-#endif
diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp
index daff26771..12280b1f8 100644
--- a/samples/ocl/hog.cpp
+++ b/samples/ocl/hog.cpp
@@ -11,75 +11,39 @@
 using namespace std;
 using namespace cv;
 
-bool help_showed = false;
-
-class Args
-{
-public:
-    Args();
-    static Args read(int argc, char** argv);
-
-    string src;
-    bool src_is_video;
-    bool src_is_camera;
-    int camera_id;
-
-    bool write_video;
-    string dst_video;
-    double dst_video_fps;
-
-    bool make_gray;
-
-    bool resize_src;
-    int width, height;
-
-    double scale;
-    int nlevels;
-    int gr_threshold;
-
-    double hit_threshold;
-    bool hit_threshold_auto;
-
-    int win_width;
-    int win_stride_width, win_stride_height;
-
-    bool gamma_corr;
-};
-
 class App
 {
 public:
-    App(const Args& s);
+    App(CommandLineParser& cmd);
     void run();
-
     void handleKey(char key);
-
     void hogWorkBegin();
     void hogWorkEnd();
     string hogWorkFps() const;
-
     void workBegin();
     void workEnd();
     string workFps() const;
-
     string message() const;
 
+
 // This function test if gpu_rst matches cpu_rst.
 // If the two vectors are not equal, it will return the difference in vector size
-// Else if will return 
+// Else if will return
 // (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
-    double checkRectSimilarity(Size sz, 
-                               std::vector<Rect>& cpu_rst, 
+    double checkRectSimilarity(Size sz,
+                               std::vector<Rect>& cpu_rst,
                                std::vector<Rect>& gpu_rst);
 private:
     App operator=(App&);
 
-    Args args;
+    //Args args;
     bool running;
-
     bool use_gpu;
     bool make_gray;
     double scale;
+    double resize_scale;
+    int win_width;
+    int win_stride_width, win_stride_height;
     int gr_threshold;
     int nlevels;
     double hit_threshold;
@@ -87,179 +51,112 @@ private:
 
     int64 hog_work_begin;
     double hog_work_fps;
-
     int64 work_begin;
     double work_fps;
-};
 
-static void printHelp()
-{
-    cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"
-         << "\nUsage: hog_gpu\n"
-         << "  (<image>|--video <vide>|--camera <camera_id>) # frames source\n"
-         << "  [--make_gray <true/false>] # convert image to gray one or not\n"
-         << "  [--resize_src <true/false>] # do resize of the source image or not\n"
-         << "  [--width <int>] # resized image width\n"
-         << "  [--height <int>] # resized image height\n"
-         << "  [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
-         << "  [--scale <double>] # HOG window scale factor\n"
-         << "  [--nlevels <int>] # max number of HOG window scales\n"
-         << "  [--win_width <int>] # width of the window (48 or 64)\n"
-         << "  [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
-         << "  [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
-         << "  [--gr_threshold <int>] # merging similar rects constant\n"
-         << "  [--gamma_correct <int>] # do gamma correction or not\n"
-         << "  [--write_video <bool>] # write video or not\n"
-         << "  [--dst_video <path>] # output video path\n"
-         << "  [--dst_video_fps <double>] # output video fps\n";
-    help_showed = true;
-}
+    string img_source;
+    string vdo_source;
+    string output;
+    int camera_id;
+    bool write_once;
+};
 
 int main(int argc, char** argv)
 {
+    const char* keys =
+        "{ h |  help    | false          | print help message }"
+        "{ i |  input   |                | specify input image}"
+        "{ c | camera   | -1             | enable camera capturing }"
+        "{ v | video    |                | use video as input }"
+        "{ g |  gray    | false          | convert image to gray one or not}"
+        "{ s |  scale   | 1.0            | resize the image before detect}"
+        "{ l |larger_win| false          | use 64x128 window}"
+        "{ o |  output  |                | specify output path when input is images}";
+    CommandLineParser cmd(argc, argv, keys);
+    App app(cmd);
     try
     {
-        if (argc < 2)
-            printHelp();
-        Args args = Args::read(argc, argv);
-        if (help_showed)
-            return -1;
-        App app(args);
         app.run();
     }
-    catch (const Exception& e) { return cout << "error: "  << e.what() << endl, 1; }
-    catch (const exception& e) { return cout << "error: "  << e.what() << endl, 1; }
-    catch(...) { return cout << "unknown exception" << endl, 1; }
+    catch (const Exception& e)
+    {
+        return cout << "error: "  << e.what() << endl, 1;
+    }
+    catch (const exception& e)
+    {
+        return cout << "error: "  << e.what() << endl, 1;
+    }
+    catch(...)
+    {
+        return cout << "unknown exception" << endl, 1;
+    }
     return 0;
 }
 
-
-Args::Args()
+App::App(CommandLineParser& cmd)
 {
-    src_is_video = false;
-    src_is_camera = false;
-    camera_id = 0;
-
-    write_video = false;
-    dst_video_fps = 24.;
-
-    make_gray = false;
-
-    resize_src = false;
-    width = 640;
-    height = 480;
-
-    scale = 1.05;
-    nlevels = 13;
-    gr_threshold = 8;
-    hit_threshold = 1.4;
-    hit_threshold_auto = true;
-
-    win_width = 48;
-    win_stride_width = 8;
-    win_stride_height = 8;
-
-    gamma_corr = true;
-}
-
-
-Args Args::read(int argc, char** argv)
-{
-    Args args;
-    for (int i = 1; i < argc; i++)
-    {
-        if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true");
-        else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true");
-        else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]);
-        else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]);
-        else if (string(argv[i]) == "--hit_threshold")
-        {
-            args.hit_threshold = atof(argv[++i]);
-            args.hit_threshold_auto = false;
-        }
-        else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]);
-        else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]);
-        else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]);
-        else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
-        else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
-        else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
-        else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
-        else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
-        else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i];
-        else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]);
-        else if (string(argv[i]) == "--help") printHelp();
-        else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
-        else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
-        else if (args.src.empty()) args.src = argv[i];
-        else throw runtime_error((string("unknown key: ") + argv[i]));
-    }
-    return args;
-}
-
-
-App::App(const Args& s)
-{
-    args = s;
     cout << "\nControls:\n"
          << "\tESC - exit\n"
          << "\tm - change mode GPU <-> CPU\n"
          << "\tg - convert image to gray or not\n"
+         << "\to - save output image once, or switch on/off video save\n"
          << "\t1/q - increase/decrease HOG scale\n"
          << "\t2/w - increase/decrease levels count\n"
          << "\t3/e - increase/decrease HOG group threshold\n"
          << "\t4/r - increase/decrease hit threshold\n"
          << endl;
 
+
     use_gpu = true;
-    make_gray = args.make_gray;
-    scale = args.scale;
-    gr_threshold = args.gr_threshold;
-    nlevels = args.nlevels;
+    make_gray = cmd.get<bool>("g");
+    resize_scale = cmd.get<double>("s");
+    win_width = cmd.get<bool>("l") == true ? 64 : 48;
+    vdo_source = cmd.get<string>("v");
+    img_source = cmd.get<string>("i");
+    output = cmd.get<string>("o");
+    camera_id = cmd.get<int>("c");
 
-    if (args.hit_threshold_auto)
-        args.hit_threshold = args.win_width == 48 ? 1.4 : 0.;
-    hit_threshold = args.hit_threshold;
+    win_stride_width = 8;
+    win_stride_height = 8;
+    gr_threshold = 8;
+    nlevels = 13;
+    hit_threshold = win_width == 48 ? 1.4 : 0.;
+    scale = 1.05;
+    gamma_corr = true;
+    write_once = false;
 
-    gamma_corr = args.gamma_corr;
-
-    if (args.win_width != 64 && args.win_width != 48)
-        args.win_width = 64;
-
-    cout << "Scale: " << scale << endl;
-    if (args.resize_src)
-        cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
     cout << "Group threshold: " << gr_threshold << endl;
     cout << "Levels number: " << nlevels << endl;
-    cout << "Win width: " << args.win_width << endl;
-    cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
+    cout << "Win width: " << win_width << endl;
+    cout << "Win stride: (" << win_stride_width << ", " << win_stride_height << ")\n";
     cout << "Hit threshold: " << hit_threshold << endl;
     cout << "Gamma correction: " << gamma_corr << endl;
     cout << endl;
 }
 
-
 void App::run()
 {
-    std::vector<ocl::Info> oclinfo;
+    vector<ocl::Info> oclinfo;
     ocl::getDevice(oclinfo);
     running = true;
-    cv::VideoWriter video_writer;
+    VideoWriter video_writer;
 
-    Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
-    Size win_stride(args.win_stride_width, args.win_stride_height);
+    Size win_size(win_width, win_width * 2);
+    Size win_stride(win_stride_width, win_stride_height);
 
     // Create HOG descriptors and detectors here
     vector<float> detector;
     if (win_size == Size(64, 128))
-        detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();
+        detector = ocl::HOGDescriptor::getPeopleDetector64x128();
     else
-        detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();
+        detector = ocl::HOGDescriptor::getPeopleDetector48x96();
 
-    cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
-                                   cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
-                                   cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);
-    cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
-                              HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
+
+    ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
+                               ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
+                               ocl::HOGDescriptor::DEFAULT_NLEVELS);
+    HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
+                          HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
     gpu_hog.setSVMDetector(detector);
     cpu_hog.setSVMDetector(detector);
 
@@ -268,29 +165,29 @@ void App::run()
         VideoCapture vc;
         Mat frame;
 
-        if (args.src_is_video)
+        if (vdo_source!="")
         {
-            vc.open(args.src.c_str());
+            vc.open(vdo_source.c_str());
             if (!vc.isOpened())
-                throw runtime_error(string("can't open video file: " + args.src));
+                throw runtime_error(string("can't open video file: " + vdo_source));
             vc >> frame;
         }
-        else if (args.src_is_camera)
+        else if (camera_id != -1)
         {
-            vc.open(args.camera_id);
+            vc.open(camera_id);
             if (!vc.isOpened())
             {
                 stringstream msg;
-                msg << "can't open camera: " << args.camera_id;
+                msg << "can't open camera: " << camera_id;
                 throw runtime_error(msg.str());
             }
             vc >> frame;
         }
         else
         {
-            frame = imread(args.src);
+            frame = imread(img_source);
             if (frame.empty())
-                throw runtime_error(string("can't open image file: " + args.src));
+                throw runtime_error(string("can't open image file: " + img_source));
         }
 
         Mat img_aux, img, img_to_show;
@@ -308,13 +205,15 @@ void App::run()
             else frame.copyTo(img_aux);
 
             // Resize image
-            if (args.resize_src) resize(img_aux, img, Size(args.width, args.height));
+            if (abs(scale-1.0)>0.001)
+            {
+                Size sz((int)((double)img_aux.cols/resize_scale), (int)((double)img_aux.rows/resize_scale));
+                resize(img_aux, img, sz);
+            }
             else img = img_aux;
             img_to_show = img;
-
             gpu_hog.nlevels = nlevels;
             cpu_hog.nlevels = nlevels;
-
             vector<Rect> found;
 
             // Perform HOG classification
@@ -331,15 +230,16 @@ void App::run()
                     vector<Rect> ref_rst;
                     cvtColor(img, img, COLOR_BGRA2BGR);
                     cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride,
-                                              Size(0, 0), scale, gr_threshold-2);
+                                             Size(0, 0), scale, gr_threshold-2);
                     double accuracy = checkRectSimilarity(img.size(), ref_rst, found);
-                    cout << "\naccuracy value: " << accuracy << endl;           
-                } 
-           }
+                    cout << "\naccuracy value: " << accuracy << endl;
+                }
+            }
             else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
-                                          Size(0, 0), scale, gr_threshold);
+                                              Size(0, 0), scale, gr_threshold);
             hogWorkEnd();
 
+
             // Draw positive classified windows
             for (size_t i = 0; i < found.size(); i++)
             {
@@ -354,25 +254,32 @@ void App::run()
             putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
             putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
             imshow("opencv_gpu_hog", img_to_show);
-
-            if (args.src_is_video || args.src_is_camera) vc >> frame;
+            if (vdo_source!="" || camera_id!=-1) vc >> frame;
 
             workEnd();
 
-            if (args.write_video)
+            if (output!="" && write_once)
             {
-                if (!video_writer.isOpened())
+                if (img_source!="")     // wirte image
                 {
-                    video_writer.open(args.dst_video, VideoWriter::fourcc('x','v','i','d'), args.dst_video_fps,
-                                      img_to_show.size(), true);
-                    if (!video_writer.isOpened())
-                        throw std::runtime_error("can't create video writer");
+                    write_once = false;
+                    imwrite(output, img_to_show);
                 }
+                else                    //write video
+                {
+                    if (!video_writer.isOpened())
+                    {
+                        video_writer.open(output, VideoWriter::fourcc('x','v','i','d'), 24,
+                                          img_to_show.size(), true);
+                        if (!video_writer.isOpened())
+                            throw std::runtime_error("can't create video writer");
+                    }
 
-                if (make_gray) cvtColor(img_to_show, img, COLOR_GRAY2BGR);
-                else cvtColor(img_to_show, img, COLOR_BGRA2BGR);
+                    if (make_gray) cvtColor(img_to_show, img, COLOR_GRAY2BGR);
+                    else cvtColor(img_to_show, img, COLOR_BGRA2BGR);
 
-                video_writer << img;
+                    video_writer << img;
+                }
             }
 
             handleKey((char)waitKey(3));
@@ -380,7 +287,6 @@ void App::run()
     }
 }
 
-
 void App::handleKey(char key)
 {
     switch (key)
@@ -439,11 +345,18 @@ void App::handleKey(char key)
         gamma_corr = !gamma_corr;
         cout << "Gamma correction: " << gamma_corr << endl;
         break;
+    case 'o':
+    case 'O':
+        write_once = !write_once;
+        break;
     }
 }
 
 
-inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); }
+inline void App::hogWorkBegin()
+{
+    hog_work_begin = getTickCount();
+}
 
 inline void App::hogWorkEnd()
 {
@@ -459,8 +372,10 @@ inline string App::hogWorkFps() const
     return ss.str();
 }
 
-
-inline void App::workBegin() { work_begin = getTickCount(); }
+inline void App::workBegin()
+{
+    work_begin = getTickCount();
+}
 
 inline void App::workEnd()
 {
@@ -476,8 +391,9 @@ inline string App::workFps() const
     return ss.str();
 }
 
-double App::checkRectSimilarity(Size sz, 
-                                std::vector<Rect>& ob1, 
+
+double App::checkRectSimilarity(Size sz,
+                                std::vector<Rect>& ob1,
                                 std::vector<Rect>& ob2)
 {
     double final_test_result = 0.0;
@@ -485,20 +401,26 @@ double App::checkRectSimilarity(Size sz,
     size_t sz2 = ob2.size();
 
     if(sz1 != sz2)
+    {
         return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+    }
     else
     {
+        if(sz1==0 && sz2==0)
+            return 0;
         cv::Mat cpu_result(sz, CV_8UC1);
         cpu_result.setTo(0);
 
+
         for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
-        {      
+        {
             cv::Mat cpu_result_roi(cpu_result, *r);
             cpu_result_roi.setTo(1);
             cpu_result.copyTo(cpu_result);
         }
         int cpu_area = cv::countNonZero(cpu_result > 0);
 
+
         cv::Mat gpu_result(sz, CV_8UC1);
         gpu_result.setTo(0);
         for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
@@ -511,10 +433,10 @@ double App::checkRectSimilarity(Size sz,
         cv::Mat result_;
         multiply(cpu_result, gpu_result, result_);
         int result = cv::countNonZero(result_ > 0);
-
-        final_test_result = 1.0 - (double)result/(double)cpu_area;
+        if(cpu_area!=0 && result!=0)
+            final_test_result = 1.0 - (double)result/(double)cpu_area;
+        else if(cpu_area==0 && result!=0)
+            final_test_result = -1;
     }
     return final_test_result;
-
 }
-
diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp
index 3ce0edc8f..f7d066171 100644
--- a/samples/ocl/pyrlk_optical_flow.cpp
+++ b/samples/ocl/pyrlk_optical_flow.cpp
@@ -12,19 +12,20 @@ using namespace cv;
 using namespace cv::ocl;
 
 typedef unsigned char uchar;
-#define LOOP_NUM 10 
+#define LOOP_NUM 10
 int64 work_begin = 0;
 int64 work_end = 0;
 
-static void workBegin() 
-{ 
+static void workBegin()
+{
     work_begin = getTickCount();
 }
 static void workEnd()
 {
     work_end += (getTickCount() - work_begin);
 }
-static double getTime(){
+static double getTime()
+{
     return work_end * 1000. / getTickFrequency();
 }
 
@@ -94,14 +95,15 @@ int main(int argc, const char* argv[])
     //set this to save kernel compile time from second time you run
     ocl::setBinpath("./");
     const char* keys =
-        "{ help h           | false | print help message }"
-        "{ left l           |       | specify left image }"
-        "{ right r          |       | specify right image }"
-        "{ camera c         | 0     | enable camera capturing }"
-        "{ use_cpu s        | false | use cpu or gpu to process the image }"
-        "{ video v          |       | use video as input }"
-        "{ points           | 1000  | specify points count [GoodFeatureToTrack] }"
-        "{ min_dist         | 0     | specify minimal distance between points [GoodFeatureToTrack] }";
+        "{ help h           | false           | print help message }"
+        "{ left l           |                 | specify left image }"
+        "{ right r          |                 | specify right image }"
+        "{ camera c         | 0               | enable camera capturing }"
+        "{ use_cpu s        | false           | use cpu or gpu to process the image }"
+        "{ video v          |                 | use video as input }"
+        "{ output o         | pyrlk_output.jpg| specify output save path when input is images }"
+        "{ points           | 1000            | specify points count [GoodFeatureToTrack] }"
+        "{ min_dist         | 0               | specify minimal distance between points [GoodFeatureToTrack] }";
 
     CommandLineParser cmd(argc, argv, keys);
 
@@ -115,10 +117,10 @@ int main(int argc, const char* argv[])
     string fname0 = cmd.get<string>("left");
     string fname1 = cmd.get<string>("right");
     string vdofile = cmd.get<string>("video");
+    string outfile = cmd.get<string>("output");
     int points = cmd.get<int>("points");
     double minDist = cmd.get<double>("min_dist");
     bool useCPU = cmd.has("s");
-    bool useCamera = cmd.has("c");
     int inputName = cmd.get<int>("c");
 
     oclMat d_nextPts, d_status;
@@ -131,21 +133,9 @@ int main(int argc, const char* argv[])
     vector<unsigned char> status(points);
     vector<float> err;
 
-    if (frame0.empty() || frame1.empty())
-    {
-        useCamera = true;
-        defaultPicturesFail = true;
-        VideoCapture capture(inputName);
-        if (!capture.isOpened())
-        {
-            cout << "Can't load input images" << endl;
-            return -1;
-        }
-    }
-
     cout << "Points count : " << points << endl << endl;
 
-    if (useCamera)
+    if (frame0.empty() || frame1.empty())
     {
         VideoCapture capture;
         Mat frame, frameCopy;
@@ -238,10 +228,10 @@ _cleanup_:
     else
     {
 nocamera:
-        for(int i = 0; i <= LOOP_NUM;i ++) 
+        for(int i = 0; i <= LOOP_NUM; i ++)
         {
             cout << "loop" << i << endl;
-            if (i > 0) workBegin();     
+            if (i > 0) workBegin();
 
             if (useCPU)
             {
@@ -271,8 +261,8 @@ nocamera:
                 cout << getTime() / LOOP_NUM << " ms" << endl;
 
                 drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0));
-
                 imshow("PyrLK [Sparse]", frame0);
+                imwrite(outfile, frame0);
             }
         }
     }
diff --git a/samples/ocl/squares.cpp b/samples/ocl/squares.cpp
index 40d60fe2c..d31e36007 100644
--- a/samples/ocl/squares.cpp
+++ b/samples/ocl/squares.cpp
@@ -2,11 +2,11 @@
 // It loads several images sequentially and tries to find squares in
 // each image
 
-#include "opencv2/core/core.hpp"
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/ocl/ocl.hpp"
-
 #include <iostream>
 #include <math.h>
 #include <string.h>
@@ -14,23 +14,50 @@
 using namespace cv;
 using namespace std;
 
-static void help()
-{
-    cout <<
-        "\nA program using OCL module pyramid scaling, Canny, dilate functions, threshold, split; cpu contours, contour simpification and\n"
-        "memory storage (it's got it all folks) to find\n"
-        "squares in a list of images pic1-6.png\n"
-        "Returns sequence of squares detected on the image.\n"
-        "the sequence is stored in the specified memory storage\n"
-        "Call:\n"
-        "./squares\n"
-        "Using OpenCV version %s\n" << CV_VERSION << "\n" << endl;
-}
+#define ACCURACY_CHECK 1
 
+#if ACCURACY_CHECK
+// check if two vectors of vector of points are near or not
+// prior assumption is that they are in correct order
+static bool checkPoints(
+    vector< vector<Point> > set1,
+    vector< vector<Point> > set2,
+    int maxDiff = 5)
+{
+    if(set1.size() != set2.size())
+    {
+        return false;
+    }
+
+    for(vector< vector<Point> >::iterator it1 = set1.begin(), it2 = set2.begin();
+            it1 < set1.end() && it2 < set2.end(); it1 ++, it2 ++)
+    {
+        vector<Point> pts1 = *it1;
+        vector<Point> pts2 = *it2;
+
+
+        if(pts1.size() != pts2.size())
+        {
+            return false;
+        }
+        for(size_t i = 0; i < pts1.size(); i ++)
+        {
+            Point pt1 = pts1[i], pt2 = pts2[i];
+            if(std::abs(pt1.x - pt2.x) > maxDiff ||
+                    std::abs(pt1.y - pt2.y) > maxDiff)
+            {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+#endif
 
 int thresh = 50, N = 11;
 const char* wndname = "OpenCL Square Detection Demo";
 
+
 // helper function:
 // finds a cosine of angle between vectors
 // from pt0->pt1 and from pt0->pt2
@@ -43,9 +70,92 @@ static double angle( Point pt1, Point pt2, Point pt0 )
     return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
 }
 
+
 // returns sequence of squares detected on the image.
 // the sequence is stored in the specified memory storage
 static void findSquares( const Mat& image, vector<vector<Point> >& squares )
+{
+    squares.clear();
+    Mat pyr, timg, gray0(image.size(), CV_8U), gray;
+
+    // down-scale and upscale the image to filter out the noise
+    pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
+    pyrUp(pyr, timg, image.size());
+    vector<vector<Point> > contours;
+
+    // find squares in every color plane of the image
+    for( int c = 0; c < 3; c++ )
+    {
+        int ch[] = {c, 0};
+        mixChannels(&timg, 1, &gray0, 1, ch, 1);
+
+        // try several threshold levels
+        for( int l = 0; l < N; l++ )
+        {
+            // hack: use Canny instead of zero threshold level.
+            // Canny helps to catch squares with gradient shading
+            if( l == 0 )
+            {
+                // apply Canny. Take the upper threshold from slider
+                // and set the lower to 0 (which forces edges merging)
+                Canny(gray0, gray, 0, thresh, 5);
+                // dilate canny output to remove potential
+                // holes between edge segments
+                dilate(gray, gray, Mat(), Point(-1,-1));
+            }
+            else
+            {
+                // apply threshold if l!=0:
+                //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
+                cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY);
+            }
+
+            // find contours and store them all as a list
+            findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
+
+            vector<Point> approx;
+
+            // test each contour
+            for( size_t i = 0; i < contours.size(); i++ )
+            {
+                // approximate contour with accuracy proportional
+                // to the contour perimeter
+                approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
+
+                // square contours should have 4 vertices after approximation
+                // relatively large area (to filter out noisy contours)
+                // and be convex.
+                // Note: absolute value of an area is used because
+                // area may be positive or negative - in accordance with the
+                // contour orientation
+                if( approx.size() == 4 &&
+                        fabs(contourArea(Mat(approx))) > 1000 &&
+                        isContourConvex(Mat(approx)) )
+                {
+                    double maxCosine = 0;
+
+                    for( int j = 2; j < 5; j++ )
+                    {
+                        // find the maximum cosine of the angle between joint edges
+                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
+                        maxCosine = MAX(maxCosine, cosine);
+                    }
+
+                    // if cosines of all angles are small
+                    // (all angles are ~90 degree) then write quandrange
+                    // vertices to resultant sequence
+                    if( maxCosine < 0.3 )
+                        squares.push_back(approx);
+                }
+            }
+        }
+    }
+}
+
+
+// returns sequence of squares detected on the image.
+// the sequence is stored in the specified memory storage
+static void findSquares_ocl( const Mat& image, vector<vector<Point> >& squares )
 {
     squares.clear();
 
@@ -91,7 +201,6 @@ static void findSquares( const Mat& image, vector<vector<Point> >& squares )
             findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
 
             vector<Point> approx;
-
             // test each contour
             for( size_t i = 0; i < contours.size(); i++ )
             {
@@ -106,11 +215,10 @@ static void findSquares( const Mat& image, vector<vector<Point> >& squares )
                 // area may be positive or negative - in accordance with the
                 // contour orientation
                 if( approx.size() == 4 &&
-                    fabs(contourArea(Mat(approx))) > 1000 &&
-                    isContourConvex(Mat(approx)) )
+                        fabs(contourArea(Mat(approx))) > 1000 &&
+                        isContourConvex(Mat(approx)) )
                 {
                     double maxCosine = 0;
-
                     for( int j = 2; j < 5; j++ )
                     {
                         // find the maximum cosine of the angle between joint edges
@@ -139,40 +247,93 @@ static void drawSquares( Mat& image, const vector<vector<Point> >& squares )
         int n = (int)squares[i].size();
         polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, LINE_AA);
     }
-
-    imshow(wndname, image);
 }
 
 
-int main(int /*argc*/, char** /*argv*/)
+// draw both pure-C++ and ocl square results onto a single image
+static Mat drawSquaresBoth( const Mat& image,
+                            const vector<vector<Point> >& sqsCPP,
+                            const vector<vector<Point> >& sqsOCL
+)
 {
+    Mat imgToShow(Size(image.cols * 2, image.rows), image.type());
+    Mat lImg = imgToShow(Rect(Point(0, 0), image.size()));
+    Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size()));
+    image.copyTo(lImg);
+    image.copyTo(rImg);
+    drawSquares(lImg, sqsCPP);
+    drawSquares(rImg, sqsOCL);
+    float fontScale = 0.8f;
+    Scalar white = Scalar::all(255), black = Scalar::all(0);
+
+    putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
+    putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
+    putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
+    putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
+
+    return imgToShow;
+}
+
+
+int main(int argc, char** argv)
+{
+    const char* keys =
+        "{ i | input   |                    | specify input image }"
+        "{ o | output  | squares_output.jpg | specify output save path}";
+    CommandLineParser cmd(argc, argv, keys);
+    string inputName = cmd.get<string>("i");
+    string outfile = cmd.get<string>("o");
+    if(inputName.empty())
+    {
+        cout << "Avaible options:" << endl;
+        cmd.printMessage();
+        return 0;
+    }
 
-    //ocl::setBinpath("F:/kernel_bin");
     vector<ocl::Info> info;
     CV_Assert(ocl::getDevice(info));
-
-    static const char* names[] = { "pic1.png", "pic2.png", "pic3.png",
-        "pic4.png", "pic5.png", "pic6.png", 0 };
-    help();
+    int iterations = 10;
     namedWindow( wndname, 1 );
-    vector<vector<Point> > squares;
+    vector<vector<Point> > squares_cpu, squares_ocl;
 
-    for( int i = 0; names[i] != 0; i++ )
+    Mat image = imread(inputName, 1);
+    if( image.empty() )
     {
-        Mat image = imread(names[i], 1);
-        if( image.empty() )
-        {
-            cout << "Couldn't load " << names[i] << endl;
-            continue;
-        }
-
-        findSquares(image, squares);
-        drawSquares(image, squares);
-
-        int c = waitKey();
-        if( (char)c == 27 )
-            break;
+        cout << "Couldn't load " << inputName << endl;
+        return -1;
     }
+    int j = iterations;
+    int64 t_ocl = 0, t_cpp = 0;
+    //warm-ups
+    cout << "warming up ..." << endl;
+    findSquares(image, squares_cpu);
+    findSquares_ocl(image, squares_ocl);
+
+
+#if ACCURACY_CHECK
+    cout << "Checking ocl accuracy ... " << endl;
+    cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl;
+#endif
+    do
+    {
+        int64 t_start = cv::getTickCount();
+        findSquares(image, squares_cpu);
+        t_cpp += cv::getTickCount() - t_start;
+
+
+        t_start  = cv::getTickCount();
+        findSquares_ocl(image, squares_ocl);
+        t_ocl += cv::getTickCount() - t_start;
+        cout << "run loop: " << j << endl;
+    }
+    while(--j);
+    cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl;
+    cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl;
+
+    Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl);
+    imshow(wndname, result);
+    imwrite(outfile, result);
+    waitKey(0);
 
     return 0;
 }
diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp
index 8cc6530d5..100e62a5b 100644
--- a/samples/ocl/stereo_match.cpp
+++ b/samples/ocl/stereo_match.cpp
@@ -12,57 +12,46 @@ using namespace cv;
 using namespace std;
 using namespace ocl;
 
-bool help_showed = false;
-
-struct Params
-{
-    Params();
-    static Params read(int argc, char** argv);
-
-    string left;
-    string right;
-
-    string method_str() const
-    {
-        switch (method)
-        {
-        case BM: return "BM";
-        case BP: return "BP";
-        case CSBP: return "CSBP";
-        }
-        return "";
-    }
-    enum {BM, BP, CSBP} method;
-    int ndisp; // Max disparity + 1
-    enum {GPU, CPU} type;
-};
-
 
 struct App
 {
-    App(const Params& p);
+    App(CommandLineParser& cmd);
     void run();
     void handleKey(char key);
     void printParams() const;
 
-    void workBegin() { work_begin = getTickCount(); }
+    void workBegin()
+    {
+        work_begin = getTickCount();
+    }
     void workEnd()
     {
         int64 d = getTickCount() - work_begin;
         double f = getTickFrequency();
         work_fps = f / d;
     }
-
+    string method_str() const
+    {
+        switch (method)
+        {
+        case BM:
+            return "BM";
+        case BP:
+            return "BP";
+        case CSBP:
+            return "CSBP";
+        }
+        return "";
+    }
     string text() const
     {
         stringstream ss;
-        ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left)
-            << setprecision(4) << work_fps;
+        ss << "(" << method_str() << ") FPS: " << setiosflags(ios::left)
+           << setprecision(4) << work_fps;
         return ss.str();
     }
 private:
-    Params p;
-    bool running;
+    bool running, write_once;
 
     Mat left_src, right_src;
     Mat left, right;
@@ -74,42 +63,45 @@ private:
 
     int64 work_begin;
     double work_fps;
-};
 
-static void printHelp()
-{
-    cout << "Usage: stereo_match_gpu\n"
-        << "\t--left <left_view> --right <right_view> # must be rectified\n"
-        << "\t--method <stereo_match_method> # BM | BP | CSBP\n"
-        << "\t--ndisp <number> # number of disparity levels\n"
-        << "\t--type <device_type> # cpu | CPU | gpu | GPU\n";
-    help_showed = true;
-}
+    string l_img, r_img;
+    string out_img;
+    enum {BM, BP, CSBP} method;
+    int ndisp; // Max disparity + 1
+    enum {GPU, CPU} type;
+};
 
 int main(int argc, char** argv)
 {
+    const char* keys =
+        "{ h | help     | false                     | print help message }"
+        "{ l | left     |                           | specify left image }"
+        "{ r | right    |                           | specify right image }"
+        "{ m | method   | BM                        | specify match method(BM/BP/CSBP) }"
+        "{ n | ndisp    | 64                        |  specify number of disparity levels }"
+        "{ s | cpu_ocl  | false                     | use cpu or gpu as ocl device to process the image }"
+        "{ o | output   | stereo_match_output.jpg   | specify output path when input is images}";
+    CommandLineParser cmd(argc, argv, keys);
+    if (cmd.get<bool>("help"))
+    {
+        cout << "Avaible options:" << endl;
+        cmd.printMessage();
+        return 0;
+    }
     try
     {
-        if (argc < 2)
-        {
-            printHelp();
-            return 1;
-        }
+        App app(cmd);
+        int flag = CVCL_DEVICE_TYPE_GPU;
+        if(cmd.get<bool>("s") == true)
+            flag = CVCL_DEVICE_TYPE_CPU;
 
-        Params args = Params::read(argc, argv);
-        if (help_showed)
-            return -1;
-
-        int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU };
         vector<Info> info;
-
-        if(getDevice(info, flags[args.type]) == 0)
+        if(getDevice(info, flag) == 0)
         {
             throw runtime_error("Error: Did not find a valid OpenCL device!");
         }
         cout << "Device name:" << info[0].DeviceName[0] << endl;
 
-        App app(args);
         app.run();
     }
     catch (const exception& e)
@@ -119,77 +111,41 @@ int main(int argc, char** argv)
     return 0;
 }
 
-
-Params::Params()
-{
-    method = BM;
-    ndisp = 64;
-    type = GPU;
-}
-
-
-Params Params::read(int argc, char** argv)
-{
-    Params p;
-
-    for (int i = 1; i < argc; i++)
-    {
-        if (string(argv[i]) == "--left") p.left = argv[++i];
-        else if (string(argv[i]) == "--right") p.right = argv[++i];
-        else if (string(argv[i]) == "--method")
-        {
-            if (string(argv[i + 1]) == "BM") p.method = BM;
-            else if (string(argv[i + 1]) == "BP") p.method = BP;
-            else if (string(argv[i + 1]) == "CSBP") p.method = CSBP;
-            else throw runtime_error("unknown stereo match method: " + string(argv[i + 1]));
-            i++;
-        }
-        else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]);
-        else if (string(argv[i]) == "--type")
-        {
-            string t(argv[++i]);
-            if (t == "cpu" || t == "CPU")
-            {
-                p.type = CPU;
-            } 
-            else if (t == "gpu" || t == "GPU")
-            {
-                p.type = GPU;
-            }
-            else throw runtime_error("unknown device type: " + t);
-        }
-        else if (string(argv[i]) == "--help") printHelp();
-        else throw runtime_error("unknown key: " + string(argv[i]));
-    }
-
-    return p;
-}
-
-
-App::App(const Params& params)
-    : p(params), running(false)
+App::App(CommandLineParser& cmd)
+    : running(false),method(BM)
 {
     cout << "stereo_match_ocl sample\n";
     cout << "\nControls:\n"
-        << "\tesc - exit\n"
-        << "\tp - print current parameters\n"
-        << "\tg - convert source images into gray\n"
-        << "\tm - change stereo match method\n"
-        << "\ts - change Sobel prefiltering flag (for BM only)\n"
-        << "\t1/q - increase/decrease maximum disparity\n"
-        << "\t2/w - increase/decrease window size (for BM only)\n"
-        << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
-        << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
+         << "\tesc - exit\n"
+         << "\to - save output image once\n"
+         << "\tp - print current parameters\n"
+         << "\tg - convert source images into gray\n"
+         << "\tm - change stereo match method\n"
+         << "\ts - change Sobel prefiltering flag (for BM only)\n"
+         << "\t1/q - increase/decrease maximum disparity\n"
+         << "\t2/w - increase/decrease window size (for BM only)\n"
+         << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
+         << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
+    l_img = cmd.get<string>("l");
+    r_img = cmd.get<string>("r");
+    string mstr = cmd.get<string>("m");
+    if(mstr == "BM") method = BM;
+    else if(mstr == "BP") method = BP;
+    else if(mstr == "CSBP") method = CSBP;
+    else cout << "unknown method!\n";
+    ndisp = cmd.get<int>("n");
+    out_img = cmd.get<string>("o");
+    write_once = false;
 }
 
 
 void App::run()
 {
     // Load images
-    left_src = imread(p.left);
-    right_src = imread(p.right);
-    if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\"");
-    if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\"");
+    left_src = imread(l_img);
+    right_src = imread(r_img);
+    if (left_src.empty()) throw runtime_error("can't open file \"" + l_img + "\"");
+    if (right_src.empty()) throw runtime_error("can't open file \"" + r_img + "\"");
 
     cvtColor(left_src, left, COLOR_BGR2GRAY);
     cvtColor(right_src, right, COLOR_BGR2GRAY);
@@ -201,9 +157,9 @@ void App::run()
     imshow("right", right);
 
     // Set common parameters
-    bm.ndisp = p.ndisp;
-    bp.ndisp = p.ndisp;
-    csbp.ndisp = p.ndisp;
+    bm.ndisp = ndisp;
+    bp.ndisp = ndisp;
+    csbp.ndisp = ndisp;
 
     cout << endl;
     printParams();
@@ -211,14 +167,13 @@ void App::run()
     running = true;
     while (running)
     {
-
         // Prepare disparity map of specified type
         Mat disp;
         oclMat d_disp;
         workBegin();
-        switch (p.method)
+        switch (method)
         {
-        case Params::BM:
+        case BM:
             if (d_left.channels() > 1 || d_right.channels() > 1)
             {
                 cout << "BM doesn't support color images\n";
@@ -232,25 +187,29 @@ void App::run()
             }
             bm(d_left, d_right, d_disp);
             break;
-        case Params::BP:
+        case BP:
             bp(d_left, d_right, d_disp);
             break;
-        case Params::CSBP:
+        case CSBP:
             csbp(d_left, d_right, d_disp);
             break;
         }
-        ocl::finish();
-        workEnd();
 
         // Show results
         d_disp.download(disp);
-        if (p.method != Params::BM)
+        workEnd();
+
+        if (method != BM)
         {
             disp.convertTo(disp, 0);
         }
         putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
         imshow("disparity", disp);
-
+        if(write_once)
+        {
+            imwrite(out_img, disp);
+            write_once = false;
+        }
         handleKey((char)waitKey(3));
     }
 }
@@ -261,19 +220,19 @@ void App::printParams() const
     cout << "--- Parameters ---\n";
     cout << "image_size: (" << left.cols << ", " << left.rows << ")\n";
     cout << "image_channels: " << left.channels() << endl;
-    cout << "method: " << p.method_str() << endl
-        << "ndisp: " << p.ndisp << endl;
-    switch (p.method)
+    cout << "method: " << method_str() << endl
+         << "ndisp: " << ndisp << endl;
+    switch (method)
     {
-    case Params::BM:
+    case BM:
         cout << "win_size: " << bm.winSize << endl;
         cout << "prefilter_sobel: " << bm.preset << endl;
         break;
-    case Params::BP:
+    case BP:
         cout << "iter_count: " << bp.iters << endl;
         cout << "level_count: " << bp.levels << endl;
         break;
-    case Params::CSBP:
+    case CSBP:
         cout << "iter_count: " << csbp.iters << endl;
         cout << "level_count: " << csbp.levels << endl;
         break;
@@ -289,11 +248,13 @@ void App::handleKey(char key)
     case 27:
         running = false;
         break;
-    case 'p': case 'P':
+    case 'p':
+    case 'P':
         printParams();
         break;
-    case 'g': case 'G':
-        if (left.channels() == 1 && p.method != Params::BM)
+    case 'g':
+    case 'G':
+        if (left.channels() == 1 && method != BM)
         {
             left = left_src;
             right = right_src;
@@ -309,23 +270,25 @@ void App::handleKey(char key)
         imshow("left", left);
         imshow("right", right);
         break;
-    case 'm': case 'M':
-        switch (p.method)
+    case 'm':
+    case 'M':
+        switch (method)
         {
-        case Params::BM:
-            p.method = Params::BP;
+        case BM:
+            method = BP;
             break;
-        case Params::BP:
-            p.method = Params::CSBP;
+        case BP:
+            method = CSBP;
             break;
-        case Params::CSBP:
-            p.method = Params::BM;
+        case CSBP:
+            method = BM;
             break;
         }
-        cout << "method: " << p.method_str() << endl;
+        cout << "method: " << method_str() << endl;
         break;
-    case 's': case 'S':
-        if (p.method == Params::BM)
+    case 's':
+    case 'S':
+        if (method == BM)
         {
             switch (bm.preset)
             {
@@ -340,82 +303,88 @@ void App::handleKey(char key)
         }
         break;
     case '1':
-        p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8;
-        cout << "ndisp: " << p.ndisp << endl;
-        bm.ndisp = p.ndisp;
-        bp.ndisp = p.ndisp;
-        csbp.ndisp = p.ndisp;
+        ndisp == 1 ? ndisp = 8 : ndisp += 8;
+        cout << "ndisp: " << ndisp << endl;
+        bm.ndisp = ndisp;
+        bp.ndisp = ndisp;
+        csbp.ndisp = ndisp;
         break;
-    case 'q': case 'Q':
-        p.ndisp = max(p.ndisp - 8, 1);
-        cout << "ndisp: " << p.ndisp << endl;
-        bm.ndisp = p.ndisp;
-        bp.ndisp = p.ndisp;
-        csbp.ndisp = p.ndisp;
+    case 'q':
+    case 'Q':
+        ndisp = max(ndisp - 8, 1);
+        cout << "ndisp: " << ndisp << endl;
+        bm.ndisp = ndisp;
+        bp.ndisp = ndisp;
+        csbp.ndisp = ndisp;
         break;
     case '2':
-        if (p.method == Params::BM)
+        if (method == BM)
         {
             bm.winSize = min(bm.winSize + 1, 51);
             cout << "win_size: " << bm.winSize << endl;
         }
         break;
-    case 'w': case 'W':
-        if (p.method == Params::BM)
+    case 'w':
+    case 'W':
+        if (method == BM)
         {
             bm.winSize = max(bm.winSize - 1, 2);
             cout << "win_size: " << bm.winSize << endl;
         }
         break;
     case '3':
-        if (p.method == Params::BP)
+        if (method == BP)
         {
             bp.iters += 1;
             cout << "iter_count: " << bp.iters << endl;
         }
-        else if (p.method == Params::CSBP)
+        else if (method == CSBP)
         {
             csbp.iters += 1;
             cout << "iter_count: " << csbp.iters << endl;
         }
         break;
-    case 'e': case 'E':
-        if (p.method == Params::BP)
+    case 'e':
+    case 'E':
+        if (method == BP)
         {
             bp.iters = max(bp.iters - 1, 1);
             cout << "iter_count: " << bp.iters << endl;
         }
-        else if (p.method == Params::CSBP)
+        else if (method == CSBP)
         {
             csbp.iters = max(csbp.iters - 1, 1);
             cout << "iter_count: " << csbp.iters << endl;
         }
         break;
     case '4':
-        if (p.method == Params::BP)
+        if (method == BP)
         {
             bp.levels += 1;
             cout << "level_count: " << bp.levels << endl;
         }
-        else if (p.method == Params::CSBP)
+        else if (method == CSBP)
         {
             csbp.levels += 1;
             cout << "level_count: " << csbp.levels << endl;
         }
         break;
-    case 'r': case 'R':
-        if (p.method == Params::BP)
+    case 'r':
+    case 'R':
+        if (method == BP)
         {
             bp.levels = max(bp.levels - 1, 1);
             cout << "level_count: " << bp.levels << endl;
         }
-        else if (p.method == Params::CSBP)
+        else if (method == CSBP)
         {
             csbp.levels = max(csbp.levels - 1, 1);
             cout << "level_count: " << csbp.levels << endl;
         }
         break;
+    case 'o':
+    case 'O':
+        write_once = true;
+        break;
     }
 }
-
-
diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp
index e938a77c2..29619808a 100644
--- a/samples/ocl/surf_matcher.cpp
+++ b/samples/ocl/surf_matcher.cpp
@@ -1,48 +1,3 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
 #include <iostream>
 #include <stdio.h>
 #include "opencv2/core/core.hpp"
@@ -62,14 +17,6 @@ const float GOOD_PORTION = 0.15f;
 
 namespace
 {
-void help();
-
-void help()
-{
-    std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl;
-    std::cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2> [-c]" << std::endl;
-    std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl;
-}
 
 int64 work_begin = 0;
 int64 work_end = 0;
@@ -82,7 +29,8 @@ void workEnd()
 {
     work_end = getTickCount() - work_begin;
 }
-double getTime(){
+double getTime()
+{
     return work_end /((double)getTickFrequency() * 1000.);
 }
 
@@ -125,7 +73,7 @@ Mat drawGoodMatches(
     std::sort(matches.begin(), matches.end());
     std::vector< DMatch > good_matches;
     double minDist = matches.front().distance,
-        maxDist = matches.back().distance;
+           maxDist = matches.back().distance;
 
     const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION));
     for( int i = 0; i < ptsPairs; i++ )
@@ -140,8 +88,8 @@ Mat drawGoodMatches(
     // drawing the results
     Mat img_matches;
     drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2,
-        good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
-        std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
+                 good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
+                 std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
 
     //-- Localize the object
     std::vector<Point2f> obj;
@@ -155,8 +103,10 @@ Mat drawGoodMatches(
     }
     //-- Get the corners from the image_1 ( the object to be "detected" )
     std::vector<Point2f> obj_corners(4);
-    obj_corners[0] = Point(0,0); obj_corners[1] = Point( cpu_img1.cols, 0 );
-    obj_corners[2] = Point( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = Point( 0, cpu_img1.rows );
+    obj_corners[0] = Point(0,0);
+    obj_corners[1] = Point( cpu_img1.cols, 0 );
+    obj_corners[2] = Point( cpu_img1.cols, cpu_img1.rows );
+    obj_corners[3] = Point( 0, cpu_img1.rows );
     std::vector<Point2f> scene_corners(4);
 
     Mat H = findHomography( obj, scene, RANSAC );
@@ -166,17 +116,17 @@ Mat drawGoodMatches(
 
     //-- Draw lines between the corners (the mapped object in the scene - image_2 )
     line( img_matches,
-        scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
-        Scalar( 0, 255, 0), 2, LINE_AA );
+          scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
+          Scalar( 0, 255, 0), 2, LINE_AA );
     line( img_matches,
-        scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
-        Scalar( 0, 255, 0), 2, LINE_AA );
+          scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
+          Scalar( 0, 255, 0), 2, LINE_AA );
     line( img_matches,
-        scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
-        Scalar( 0, 255, 0), 2, LINE_AA );
+          scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
+          Scalar( 0, 255, 0), 2, LINE_AA );
     line( img_matches,
-        scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
-        Scalar( 0, 255, 0), 2, LINE_AA );
+          scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
+          Scalar( 0, 255, 0), 2, LINE_AA );
     return img_matches;
 }
 
@@ -186,6 +136,21 @@ Mat drawGoodMatches(
 // use cpu findHomography interface to calculate the transformation matrix
 int main(int argc, char* argv[])
 {
+    const char* keys =
+        "{ help h    | false           | print help message  }"
+        "{ left l    |                 | specify left image  }"
+        "{ right r   |                 | specify right image }"
+        "{ output o  | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }"
+        "{ use_cpu c | false           | use CPU algorithms  }"
+        "{ use_all a | false           | use both CPU and GPU algorithms}";
+    CommandLineParser cmd(argc, argv, keys);
+    if (cmd.get<bool>("help"))
+    {
+        std::cout << "Avaible options:" << std::endl;
+        cmd.printMessage();
+        return 0;
+    }
+
     std::vector<cv::ocl::Info> info;
     if(cv::ocl::getDevice(info) == 0)
     {
@@ -196,54 +161,38 @@ int main(int argc, char* argv[])
 
     Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
     oclMat img1, img2;
-    bool useCPU = false;
+    bool useCPU = cmd.get<bool>("c");
     bool useGPU = false;
-    bool useALL = false;
+    bool useALL = cmd.get<bool>("a");
 
-    for (int i = 1; i < argc; ++i)
+    std::string outpath = cmd.get<std::string>("o");
+
+    cpu_img1 = imread(cmd.get<std::string>("l"));
+    CV_Assert(!cpu_img1.empty());
+    cvtColor(cpu_img1, cpu_img1_grey, COLOR_BGR2GRAY);
+    img1 = cpu_img1_grey;
+
+    cpu_img2 = imread(cmd.get<std::string>("r"));
+    CV_Assert(!cpu_img2.empty());
+    cvtColor(cpu_img2, cpu_img2_grey, COLOR_BGR2GRAY);
+    img2 = cpu_img2_grey;
+
+    if(useALL)
     {
-        if (String(argv[i]) == "--left")
-        {
-            cpu_img1 = imread(argv[++i]);
-            CV_Assert(!cpu_img1.empty());
-            cvtColor(cpu_img1, cpu_img1_grey, COLOR_BGR2GRAY);
-            img1 = cpu_img1_grey;
-        }
-        else if (String(argv[i]) == "--right")
-        {
-            cpu_img2 = imread(argv[++i]);
-            CV_Assert(!cpu_img2.empty());
-            cvtColor(cpu_img2, cpu_img2_grey, COLOR_BGR2GRAY);
-            img2 = cpu_img2_grey;
-        }
-        else if (String(argv[i]) == "-c")
-        {
-            useCPU = true;
-            useGPU = false;
-            useALL = false;
-        }else if(String(argv[i]) == "-g")
-        {
-            useGPU = true;
-            useCPU = false;
-            useALL = false;
-        }else if(String(argv[i]) == "-a")
-        {
-            useALL = true;
-            useCPU = false;
-            useGPU = false;
-        }
-        else if (String(argv[i]) == "--help")
-        {
-            help();
-            return -1;
-        }
+        useCPU = false;
+        useGPU = false;
     }
+    else if(useCPU==false && useALL==false)
+    {
+        useGPU = true;
+    }
+
     if(!useCPU)
     {
         std::cout
-            << "Device name:"
-            << info[0].DeviceName[0]
-        << std::endl;
+                << "Device name:"
+                << info[0].DeviceName[0]
+                << std::endl;
     }
     double surf_time = 0.;
 
@@ -299,7 +248,8 @@ int main(int argc, char* argv[])
 
         surf_time = getTime();
         std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
-    }else
+    }
+    else
     {
         //cpu runs
         for (int i = 0; i <= LOOP_NUM; i++)
@@ -354,7 +304,7 @@ int main(int argc, char* argv[])
             for(size_t i = 0; i < cpu_corner.size(); i++)
             {
                 if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10)
-                    ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
+                        ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
                 {
                     std::cout<<"Failed\n";
                     result = false;
@@ -372,12 +322,15 @@ int main(int argc, char* argv[])
     {
         namedWindow("cpu surf matches", 0);
         imshow("cpu surf matches", img_matches);
+        imwrite(outpath, img_matches);
     }
     else if(useGPU)
     {
         namedWindow("ocl surf matches", 0);
         imshow("ocl surf matches", img_matches);
-    }else
+        imwrite(outpath, img_matches);
+    }
+    else
     {
         namedWindow("cpu surf matches", 0);
         imshow("cpu surf matches", img_matches);
diff --git a/samples/ocl/tvl1_optical_flow.cpp b/samples/ocl/tvl1_optical_flow.cpp
new file mode 100644
index 000000000..478979d95
--- /dev/null
+++ b/samples/ocl/tvl1_optical_flow.cpp
@@ -0,0 +1,264 @@
+#include <iostream>
+#include <vector>
+#include <iomanip>
+
+#include "opencv2/core/utility.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/video/video.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+typedef unsigned char uchar;
+#define LOOP_NUM 10
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin()
+{
+    work_begin = getTickCount();
+}
+static void workEnd()
+{
+    work_end += (getTickCount() - work_begin);
+}
+static double getTime()
+{
+    return work_end * 1000. / getTickFrequency();
+}
+
+template <typename T> inline T clamp (T x, T a, T b)
+{
+    return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
+}
+
+template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
+{
+    x = clamp(x, a, b);
+    return c + (d - c) * (x - a) / (b - a);
+}
+
+static void getFlowField(const Mat& u, const Mat& v, Mat& flowField)
+{
+    float maxDisplacement = 1.0f;
+
+    for (int i = 0; i < u.rows; ++i)
+    {
+        const float* ptr_u = u.ptr<float>(i);
+        const float* ptr_v = v.ptr<float>(i);
+
+        for (int j = 0; j < u.cols; ++j)
+        {
+            float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j]));
+
+            if (d > maxDisplacement)
+                maxDisplacement = d;
+        }
+    }
+
+    flowField.create(u.size(), CV_8UC4);
+
+    for (int i = 0; i < flowField.rows; ++i)
+    {
+        const float* ptr_u = u.ptr<float>(i);
+        const float* ptr_v = v.ptr<float>(i);
+
+
+        Vec4b* row = flowField.ptr<Vec4b>(i);
+
+        for (int j = 0; j < flowField.cols; ++j)
+        {
+            row[j][0] = 0;
+            row[j][1] = static_cast<unsigned char> (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
+            row[j][2] = static_cast<unsigned char> (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
+            row[j][3] = 255;
+        }
+    }
+}
+
+
+int main(int argc, const char* argv[])
+{
+    static std::vector<Info> ocl_info;
+    ocl::getDevice(ocl_info);
+    //if you want to use undefault device, set it here
+    setDevice(ocl_info[0]);
+
+    //set this to save kernel compile time from second time you run
+    ocl::setBinpath("./");
+    const char* keys =
+        "{ h   | help       | false           | print help message }"
+        "{ l   | left       |                 | specify left image }"
+        "{ r   | right      |                 | specify right image }"
+        "{ o   | output     | tvl1_output.jpg | specify output save path }"
+        "{ c   | camera     | 0               | enable camera capturing }"
+        "{ s   | use_cpu    | false           | use cpu or gpu to process the image }"
+        "{ v   | video      |                 | use video as input }";
+
+    CommandLineParser cmd(argc, argv, keys);
+
+    if (cmd.get<bool>("help"))
+    {
+        cout << "Usage: pyrlk_optical_flow [options]" << endl;
+        cout << "Avaible options:" << endl;
+        cmd.printMessage();
+        return 0;
+    }
+
+    bool defaultPicturesFail = false;
+    string fname0 = cmd.get<string>("l");
+    string fname1 = cmd.get<string>("r");
+    string vdofile = cmd.get<string>("v");
+    string outpath = cmd.get<string>("o");
+    bool useCPU = cmd.get<bool>("s");
+    bool useCamera = cmd.get<bool>("c");
+    int inputName = cmd.get<int>("c");
+
+    Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
+    Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
+    cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
+    cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
+
+
+    Mat flow, show_flow;
+    Mat flow_vec[2];
+    if (frame0.empty() || frame1.empty())
+    {
+        useCamera = true;
+        defaultPicturesFail = true;
+        VideoCapture capture( inputName );
+        if (!capture.isOpened())
+        {
+            cout << "Can't load input images" << endl;
+            return -1;
+        }
+    }
+
+
+    if (useCamera)
+    {
+        VideoCapture capture;
+        Mat frame, frameCopy;
+        Mat frame0Gray, frame1Gray;
+        Mat ptr0, ptr1;
+
+        if(vdofile == "")
+            capture.open( inputName );
+        else
+            capture.open(vdofile.c_str());
+
+        int c = inputName ;
+        if(!capture.isOpened())
+        {
+            if(vdofile == "")
+                cout << "Capture from CAM " << c << " didn't work" << endl;
+            else
+                cout << "Capture from file " << vdofile << " failed" <<endl;
+            if (defaultPicturesFail)
+            {
+                return -1;
+            }
+            goto nocamera;
+        }
+
+        cout << "In capture ..." << endl;
+        for(int i = 0;; i++)
+        {
+            if( !capture.read(frame) )
+                break;
+
+            if (i == 0)
+            {
+                frame.copyTo( frame0 );
+                cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+            }
+            else
+            {
+                if (i%2 == 1)
+                {
+                    frame.copyTo(frame1);
+                    cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame0Gray;
+                    ptr1 = frame1Gray;
+                }
+                else
+                {
+                    frame.copyTo(frame0);
+                    cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame1Gray;
+                    ptr1 = frame0Gray;
+                }
+
+                if (useCPU)
+                {
+                    alg->calc(ptr0, ptr1, flow);
+                    split(flow, flow_vec);
+                }
+                else
+                {
+                    oclMat d_flowx, d_flowy;
+                    d_alg(oclMat(ptr0), oclMat(ptr1), d_flowx, d_flowy);
+                    d_flowx.download(flow_vec[0]);
+                    d_flowy.download(flow_vec[1]);
+                }
+                if (i%2 == 1)
+                    frame1.copyTo(frameCopy);
+                else
+                    frame0.copyTo(frameCopy);
+                getFlowField(flow_vec[0], flow_vec[1], show_flow);
+                imshow("PyrLK [Sparse]", show_flow);
+            }
+
+            if( waitKey( 10 ) >= 0 )
+                goto _cleanup_;
+        }
+
+        waitKey(0);
+
+_cleanup_:
+        capture.release();
+    }
+    else
+    {
+nocamera:
+        oclMat d_flowx, d_flowy;
+        for(int i = 0; i <= LOOP_NUM; i ++)
+        {
+            cout << "loop" << i << endl;
+
+            if (i > 0) workBegin();
+            if (useCPU)
+            {
+                alg->calc(frame0, frame1, flow);
+                split(flow, flow_vec);
+            }
+            else
+            {
+                d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
+                d_flowx.download(flow_vec[0]);
+                d_flowy.download(flow_vec[1]);
+            }
+            if (i > 0 && i <= LOOP_NUM)
+                workEnd();
+
+            if (i == LOOP_NUM)
+            {
+                if (useCPU)
+                    cout << "average CPU time (noCamera) : ";
+                else
+                    cout << "average GPU time (noCamera) : ";
+                cout << getTime() / LOOP_NUM << " ms" << endl;
+
+                getFlowField(flow_vec[0], flow_vec[1], show_flow);
+                imshow("PyrLK [Sparse]", show_flow);
+                imwrite(outpath, show_flow);
+            }
+        }
+    }
+
+    waitKey();
+
+    return 0;
+}
diff --git a/samples/python2/asift.py b/samples/python2/asift.py
index bdcd3c989..ae044d527 100755
--- a/samples/python2/asift.py
+++ b/samples/python2/asift.py
@@ -119,19 +119,19 @@ if __name__ == '__main__':
     img1 = cv2.imread(fn1, 0)
     img2 = cv2.imread(fn2, 0)
     detector, matcher = init_feature(feature_name)
-    
+
     if img1 is None:
         print 'Failed to load fn1:', fn1
         sys.exit(1)
-        
+
     if img2 is None:
         print 'Failed to load fn2:', fn2
         sys.exit(1)
-    
+
     if detector is None:
         print 'unknown feature:', feature_name
         sys.exit(1)
-        
+
     print 'using', feature_name
 
     pool=ThreadPool(processes = cv2.getNumberOfCPUs())
@@ -158,4 +158,3 @@ if __name__ == '__main__':
     match_and_draw('affine find_obj')
     cv2.waitKey()
     cv2.destroyAllWindows()
-
diff --git a/samples/python2/calibrate.py b/samples/python2/calibrate.py
index e23cc323c..2c759ff97 100755
--- a/samples/python2/calibrate.py
+++ b/samples/python2/calibrate.py
@@ -70,4 +70,3 @@ if __name__ == '__main__':
     print "camera matrix:\n", camera_matrix
     print "distortion coefficients: ", dist_coefs.ravel()
     cv2.destroyAllWindows()
-
diff --git a/samples/python2/camshift.py b/samples/python2/camshift.py
index 910fd82ad..6e9402095 100755
--- a/samples/python2/camshift.py
+++ b/samples/python2/camshift.py
@@ -102,7 +102,7 @@ class App(object):
                     vis[:] = prob[...,np.newaxis]
                 try:
                     cv2.ellipse(vis, track_box, (0, 0, 255), 2)
-                except: 
+                except:
                     print track_box
 
             cv2.imshow('camshift', vis)
@@ -119,8 +119,7 @@ if __name__ == '__main__':
     import sys
     try:
         video_src = sys.argv[1]
-    except: 
+    except:
         video_src = 0
     print __doc__
     App(video_src).run()
-
diff --git a/samples/python2/coherence.py b/samples/python2/coherence.py
index 2db26e63d..92122e876 100755
--- a/samples/python2/coherence.py
+++ b/samples/python2/coherence.py
@@ -40,9 +40,9 @@ def coherence_filter(img, sigma = 11, str_sigma = 11, blend = 0.5, iter_n = 4):
 
 if __name__ == '__main__':
     import sys
-    try: 
+    try:
         fn = sys.argv[1]
-    except: 
+    except:
         fn = '../cpp/baboon.jpg'
 
     src = cv2.imread(fn)
diff --git a/samples/python2/common.py b/samples/python2/common.py
index 5b13ca5c4..e418b703c 100755
--- a/samples/python2/common.py
+++ b/samples/python2/common.py
@@ -217,4 +217,3 @@ def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
     for kp in keypoints:
             x, y = kp.pt
             cv2.circle(vis, (int(x), int(y)), 2, color)
-
diff --git a/samples/python2/deconvolution.py b/samples/python2/deconvolution.py
index 1cf5fb41e..abc8dc181 100755
--- a/samples/python2/deconvolution.py
+++ b/samples/python2/deconvolution.py
@@ -125,4 +125,3 @@ if __name__ == '__main__':
         if ch == ord(' '):
             defocus = not defocus
             update(None)
-
diff --git a/samples/python2/demo.py b/samples/python2/demo.py
index c69de825b..03d624ddc 100755
--- a/samples/python2/demo.py
+++ b/samples/python2/demo.py
@@ -141,7 +141,7 @@ class App:
         count = tk.IntVar()
         while True:
             match_index = text.search(pattern, 'matchPos', count=count, regexp=regexp, stopindex='end')
-            if not match_index: 
+            if not match_index:
                 break
             end_index = text.index( "%s+%sc" % (match_index, count.get()) )
             text.mark_set('matchPos', end_index)
@@ -161,4 +161,3 @@ class App:
 
 if __name__ == '__main__':
     App().run()
-
diff --git a/samples/python2/distrans.py b/samples/python2/distrans.py
index 8e6fff238..dc2a137c5 100755
--- a/samples/python2/distrans.py
+++ b/samples/python2/distrans.py
@@ -67,4 +67,3 @@ if __name__ == '__main__':
         if need_update:
             update()
     cv2.destroyAllWindows()
-
diff --git a/samples/python2/edge.py b/samples/python2/edge.py
index 2cc1631a2..bd0c8bde7 100755
--- a/samples/python2/edge.py
+++ b/samples/python2/edge.py
@@ -49,4 +49,3 @@ if __name__ == '__main__':
         if ch == 27:
             break
     cv2.destroyAllWindows()
-
diff --git a/samples/python2/facedetect.py b/samples/python2/facedetect.py
index a1b60d0a8..ab82f9a9f 100755
--- a/samples/python2/facedetect.py
+++ b/samples/python2/facedetect.py
@@ -62,4 +62,3 @@ if __name__ == '__main__':
         if 0xFF & cv2.waitKey(5) == 27:
             break
     cv2.destroyAllWindows()
-
diff --git a/samples/python2/facerec_demo.py b/samples/python2/facerec_demo.py
index 1b0adcc21..9eeb04e0b 100755
--- a/samples/python2/facerec_demo.py
+++ b/samples/python2/facerec_demo.py
@@ -31,6 +31,11 @@
 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
+# ------------------------------------------------------------------------------------------------
+# Note:
+# When using the FaceRecognizer interface in combination with Python, please stick to Python 2.
+# Some underlying scripts like create_csv will not work in other versions, like Python 3.
+# ------------------------------------------------------------------------------------------------
 
 import os
 import sys
diff --git a/samples/python2/find_obj.py b/samples/python2/find_obj.py
index ccab39214..908da68fe 100755
--- a/samples/python2/find_obj.py
+++ b/samples/python2/find_obj.py
@@ -143,15 +143,15 @@ if __name__ == '__main__':
     if img1 is None:
         print 'Failed to load fn1:', fn1
         sys.exit(1)
-        
+
     if img2 is None:
         print 'Failed to load fn2:', fn2
         sys.exit(1)
-    
+
     if detector is None:
         print 'unknown feature:', feature_name
         sys.exit(1)
-    
+
     print 'using', feature_name
 
     kp1, desc1 = detector.detectAndCompute(img1, None)
diff --git a/samples/python2/gabor_threads.py b/samples/python2/gabor_threads.py
index 1f56a9beb..6d10ffdb4 100755
--- a/samples/python2/gabor_threads.py
+++ b/samples/python2/gabor_threads.py
@@ -51,14 +51,14 @@ if __name__ == '__main__':
     print __doc__
     try:
         img_fn = sys.argv[1]
-    except: 
+    except:
         img_fn = '../cpp/baboon.jpg'
 
     img = cv2.imread(img_fn)
     if img is None:
         print 'Failed to load image file:', img_fn
         sys.exit(1)
-    
+
     filters = build_filters()
 
     with Timer('running single-threaded'):
diff --git a/samples/python2/grabcut.py b/samples/python2/grabcut.py
index 9fc1280ac..1d5b823dd 100644
--- a/samples/python2/grabcut.py
+++ b/samples/python2/grabcut.py
@@ -8,12 +8,12 @@ This sample shows interactive image segmentation using grabcut algorithm.
 USAGE :
     python grabcut.py <filename>
 
-README FIRST:    
+README FIRST:
     Two windows will show up, one for input and one for output.
-    
-    At first, in input window, draw a rectangle around the object using 
+
+    At first, in input window, draw a rectangle around the object using
 mouse right button. Then press 'n' to segment the object (once or a few times)
-For any finer touch-ups, you can press any of the keys below and draw lines on 
+For any finer touch-ups, you can press any of the keys below and draw lines on
 the areas you want. Then again press 'n' for updating the output.
 
 Key '0' - To select areas of sure background
@@ -53,7 +53,7 @@ thickness = 3           # brush thickness
 
 def onmouse(event,x,y,flags,param):
     global img,img2,drawing,value,mask,rectangle,rect,rect_or_mask,ix,iy,rect_over
-    
+
     # Draw Rectangle
     if event == cv2.EVENT_RBUTTONDOWN:
         rectangle = True
@@ -73,9 +73,9 @@ def onmouse(event,x,y,flags,param):
         rect = (ix,iy,abs(ix-x),abs(iy-y))
         rect_or_mask = 0
         print " Now press the key 'n' a few times until no further change \n"
-        
+
     # draw touchup curves
-    
+
     if event == cv2.EVENT_LBUTTONDOWN:
         if rect_over == False:
             print "first draw rectangle \n"
@@ -94,7 +94,7 @@ def onmouse(event,x,y,flags,param):
             drawing = False
             cv2.circle(img,(x,y),thickness,value['color'],-1)
             cv2.circle(mask,(x,y),thickness,value['val'],-1)
-        
+
 # print documentation
 print __doc__
 
@@ -125,7 +125,7 @@ while(1):
     cv2.imshow('output',output)
     cv2.imshow('input',img)
     k = 0xFF & cv2.waitKey(1)
-    
+
     # key bindings
     if k == 27:         # esc to exit
         break
@@ -147,11 +147,11 @@ while(1):
     elif k == ord('r'): # reset everything
         print "resetting \n"
         rect = (0,0,1,1)
-        drawing = False         
-        rectangle = False       
-        rect_or_mask = 100 
-        rect_over = False     
-        value = DRAW_FG         
+        drawing = False
+        rectangle = False
+        rect_or_mask = 100
+        rect_over = False
+        value = DRAW_FG
         img = img2.copy()
         mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG
         output = np.zeros(img.shape,np.uint8)           # output image to be shown
@@ -160,15 +160,15 @@ while(1):
         and again press 'n' \n"""
         if (rect_or_mask == 0):         # grabcut with rect
             bgdmodel = np.zeros((1,65),np.float64)
-            fgdmodel = np.zeros((1,65),np.float64)    
+            fgdmodel = np.zeros((1,65),np.float64)
             cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_RECT)
             rect_or_mask = 1
         elif rect_or_mask == 1:         # grabcut with mask
             bgdmodel = np.zeros((1,65),np.float64)
-            fgdmodel = np.zeros((1,65),np.float64) 
+            fgdmodel = np.zeros((1,65),np.float64)
             cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_MASK)
 
     mask2 = np.where((mask==1) + (mask==3),255,0).astype('uint8')
-    output = cv2.bitwise_and(img2,img2,mask=mask2)   
+    output = cv2.bitwise_and(img2,img2,mask=mask2)
 
 cv2.destroyAllWindows()
diff --git a/samples/python2/hist.py b/samples/python2/hist.py
index c5e21c4d7..41eec1d3a 100755
--- a/samples/python2/hist.py
+++ b/samples/python2/hist.py
@@ -61,7 +61,7 @@ if __name__ == '__main__':
         print "usage : python hist.py <image_file>"
 
     im = cv2.imread(fname)
-    
+
     if im is None:
         print 'Failed to load image file:', fname
         sys.exit(1)
@@ -114,4 +114,3 @@ if __name__ == '__main__':
             cv2.destroyAllWindows()
             break
     cv2.destroyAllWindows()
-
diff --git a/samples/python2/houghcircles.py b/samples/python2/houghcircles.py
index 620118cce..38ed6f303 100755
--- a/samples/python2/houghcircles.py
+++ b/samples/python2/houghcircles.py
@@ -2,7 +2,7 @@
 
 '''
 This example illustrates how to use cv2.HoughCircles() function.
-Usage: ./houghcircles.py [<image_name>] 
+Usage: ./houghcircles.py [<image_name>]
 image argument defaults to ../cpp/board.jpg
 '''
 
@@ -13,9 +13,9 @@ import sys
 
 print __doc__
 try:
-	fn = sys.argv[1]
+    fn = sys.argv[1]
 except:
-	fn = "../cpp/board.jpg"
+    fn = "../cpp/board.jpg"
 
 src = cv2.imread(fn, 1)
 img = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
diff --git a/samples/python2/inpaint.py b/samples/python2/inpaint.py
index 15497d406..5044afb89 100755
--- a/samples/python2/inpaint.py
+++ b/samples/python2/inpaint.py
@@ -23,16 +23,16 @@ if __name__ == '__main__':
     import sys
     try:
         fn = sys.argv[1]
-    except: 
+    except:
         fn = '../cpp/fruits.jpg'
-        
+
     print __doc__
 
     img = cv2.imread(fn)
     if img is None:
         print 'Failed to load image file:', fn
         sys.exit(1)
-        
+
     img_mark = img.copy()
     mark = np.zeros(img.shape[:2], np.uint8)
     sketch = Sketcher('img', [img_mark, mark], lambda : ((255, 255, 255), 255))
@@ -49,4 +49,3 @@ if __name__ == '__main__':
             mark[:] = 0
             sketch.show()
     cv2.destroyAllWindows()
-
diff --git a/samples/python2/lappyr.py b/samples/python2/lappyr.py
index 929d513da..0c08484de 100755
--- a/samples/python2/lappyr.py
+++ b/samples/python2/lappyr.py
@@ -64,5 +64,3 @@ if __name__ == '__main__':
 
         if cv2.waitKey(1) == 27:
             break
-
-
diff --git a/samples/python2/morphology.py b/samples/python2/morphology.py
index cfed9b8b0..d9bdb7f1c 100755
--- a/samples/python2/morphology.py
+++ b/samples/python2/morphology.py
@@ -27,13 +27,13 @@ if __name__ == '__main__':
         fn = sys.argv[1]
     except:
         fn = '../cpp/baboon.jpg'
-    
+
     img = cv2.imread(fn)
-    
+
     if img is None:
         print 'Failed to load image file:', fn
         sys.exit(1)
-    
+
     cv2.imshow('original', img)
 
     modes = cycle(['erode/dilate', 'open/close', 'blackhat/tophat', 'gradient'])
diff --git a/samples/python2/opt_flow.py b/samples/python2/opt_flow.py
index 14efbfa8f..c8a6086ed 100755
--- a/samples/python2/opt_flow.py
+++ b/samples/python2/opt_flow.py
@@ -85,4 +85,3 @@ if __name__ == '__main__':
                 cur_glitch = img.copy()
             print 'glitch is', ['off', 'on'][show_glitch]
     cv2.destroyAllWindows()
-
diff --git a/samples/winrt/ImageManipulations/AdvancedCapture.xaml b/samples/winrt/ImageManipulations/AdvancedCapture.xaml
new file mode 100644
index 000000000..07db96f27
--- /dev/null
+++ b/samples/winrt/ImageManipulations/AdvancedCapture.xaml
@@ -0,0 +1,75 @@
+﻿<!--
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+-->
+
+<common:LayoutAwarePage
+    x:Class="SDKSample.MediaCapture.AdvancedCapture"
+    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
+    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
+    xmlns:local="using:$rootsnamespace$"
+    xmlns:common="using:SDKSample.Common"
+    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
+    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
+    mc:Ignorable="d">
+
+    <Grid x:Name="LayoutRoot" Background="White" HorizontalAlignment="Left" VerticalAlignment="Top">
+        <Grid.RowDefinitions>
+            <RowDefinition Height="Auto"/>
+            <RowDefinition Height="*"/>
+        </Grid.RowDefinitions>
+        <Grid x:Name="Input" Grid.Row="0">
+            <Grid.RowDefinitions>
+                <RowDefinition Height="Auto"/>
+                <RowDefinition Height="Auto"/>
+                <RowDefinition Height="*"/>
+            </Grid.RowDefinitions>
+            <TextBlock TextWrapping="Wrap" Grid.Row="0"  Text="This scenario shows how to enumerate cameras in the system. Choose a camera from the list to preview, record or take a photo from the chosen camera.  You can add the gray scale effect using the checkbox provided." Style="{StaticResource BasicTextStyle}" HorizontalAlignment="Left"/>
+            <StackPanel Orientation="Horizontal" Grid.Row="1" Margin="0,10,0,0">
+                <ListBox x:Name="EnumedDeviceList2" SelectionChanged="lstEnumedDevices_SelectionChanged" />
+                <Button x:Name="btnStartDevice2" Click="btnStartDevice_Click" IsEnabled="true"  Margin="0,0,10,0" Content="StartDevice"/>
+                <Button x:Name="btnStartPreview2" Click="btnStartPreview_Click" IsEnabled="true"  Margin="0,0,10,0" Content="StartPreview"/>
+                <ComboBox x:Name="EffectTypeCombo" Width="120" SelectedIndex="0">
+                    <ComboBoxItem Content="Preview"/>
+                    <ComboBoxItem Content="Grayscale"/>
+                    <ComboBoxItem Content="Canny"/>
+                    <ComboBoxItem Content="Sobel"/>
+                    <ComboBoxItem Content="Histogram"/>
+                </ComboBox>
+                <Button Content="Apply" HorizontalAlignment="Stretch" VerticalAlignment="Top" Click="Button_Click"/>
+            </StackPanel>
+            <StackPanel x:Name="EffectTypeCombo1" Orientation="Horizontal" Grid.Row="1" Margin="324,5,-324,7"/>
+        </Grid>
+
+        <Grid x:Name="Output" HorizontalAlignment="Left" VerticalAlignment="Top" Grid.Row="1">
+            <StackPanel Orientation="Horizontal" Margin="0,10,0,0">
+                <StackPanel>
+                    <TextBlock Style="{StaticResource BasicTextStyle}"  HorizontalAlignment='Center'  VerticalAlignment='Center'  TextAlignment='Center'	Text='Preview' />
+                    <Canvas x:Name="previewCanvas2" Background="Gray">
+                        <CaptureElement x:Name="previewElement2" />
+                    </Canvas>
+                </StackPanel>
+                <StackPanel/>
+                <StackPanel/>
+            </StackPanel>
+        </Grid>
+
+        <!-- Add Storyboards to the visual states below as necessary for supporting the various layouts -->
+        <VisualStateManager.VisualStateGroups>
+            <VisualStateGroup>
+                <VisualState x:Name="FullScreenLandscape"/>
+                <VisualState x:Name="Filled"/>
+                <VisualState x:Name="FullScreenPortrait"/>
+                <VisualState x:Name="Snapped"/>
+            </VisualStateGroup>
+        </VisualStateManager.VisualStateGroups>
+    </Grid>
+
+</common:LayoutAwarePage>
diff --git a/samples/winrt/ImageManipulations/AdvancedCapture.xaml.cpp b/samples/winrt/ImageManipulations/AdvancedCapture.xaml.cpp
new file mode 100644
index 000000000..cff0a5a79
--- /dev/null
+++ b/samples/winrt/ImageManipulations/AdvancedCapture.xaml.cpp
@@ -0,0 +1,613 @@
+﻿//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// AdvancedCapture.xaml.cpp
+// Implementation of the AdvancedCapture class
+//
+
+#include "pch.h"
+#include "AdvancedCapture.xaml.h"
+
+using namespace SDKSample::MediaCapture;
+
+using namespace Windows::UI::Xaml;
+using namespace Windows::UI::Xaml::Navigation;
+using namespace Windows::UI::Xaml::Data;
+using namespace Windows::System;
+using namespace Windows::Foundation;
+using namespace Windows::Foundation::Collections;
+using namespace Platform;
+using namespace Windows::UI;
+using namespace Windows::UI::Core;
+using namespace Windows::UI::Xaml;
+using namespace Windows::UI::Xaml::Controls;
+using namespace Windows::UI::Xaml::Data;
+using namespace Windows::UI::Xaml::Media;
+using namespace Windows::Storage;
+using namespace Windows::Media::MediaProperties;
+using namespace Windows::Storage::Streams;
+using namespace Windows::System;
+using namespace Windows::UI::Xaml::Media::Imaging;
+using namespace Windows::Devices::Enumeration;
+
+ref class ReencodeState sealed
+{
+public:
+    ReencodeState()
+    {
+    }
+
+    virtual ~ReencodeState()
+    {
+        if (InputStream != nullptr)
+        {
+            delete InputStream;
+        }
+        if (OutputStream != nullptr)
+        {
+            delete OutputStream;
+        }
+    }
+
+internal:
+    Windows::Storage::Streams::IRandomAccessStream ^InputStream;
+    Windows::Storage::Streams::IRandomAccessStream ^OutputStream;
+    Windows::Storage::StorageFile ^PhotoStorage;
+    Windows::Graphics::Imaging::BitmapDecoder ^Decoder;
+    Windows::Graphics::Imaging::BitmapEncoder ^Encoder;
+};
+
+AdvancedCapture::AdvancedCapture()
+{
+    InitializeComponent();
+    ScenarioInit();
+}
+
+/// <summary>
+/// Invoked when this page is about to be displayed in a Frame.
+/// </summary>
+/// <param name="e">Event data that describes how this page was reached.  The Parameter
+/// property is typically used to configure the page.</param>
+void AdvancedCapture::OnNavigatedTo(NavigationEventArgs^ e)
+{
+    // A pointer back to the main page.  This is needed if you want to call methods in MainPage such
+    // as NotifyUser()
+    rootPage = MainPage::Current;
+
+    m_orientationChangedEventToken = Windows::Graphics::Display::DisplayProperties::OrientationChanged += ref new Windows::Graphics::Display::DisplayPropertiesEventHandler(this, &AdvancedCapture::DisplayProperties_OrientationChanged);
+}
+
+void AdvancedCapture::OnNavigatedFrom(NavigationEventArgs^ e)
+{
+    Windows::Media::MediaControl::SoundLevelChanged -= m_eventRegistrationToken;
+    Windows::Graphics::Display::DisplayProperties::OrientationChanged  -= m_orientationChangedEventToken;
+}
+
+void  AdvancedCapture::ScenarioInit()
+{
+    rootPage = MainPage::Current;
+    btnStartDevice2->IsEnabled = true;
+    btnStartPreview2->IsEnabled = false;
+    m_bRecording = false;
+    m_bPreviewing = false;
+    m_bEffectAdded = false;
+    previewElement2->Source = nullptr;
+    ShowStatusMessage("");
+    EffectTypeCombo->IsEnabled = false;
+    previewCanvas2->Visibility = Windows::UI::Xaml::Visibility::Collapsed;
+    EnumerateWebcamsAsync();
+    m_bSuspended = false;
+}
+
+void AdvancedCapture::ScenarioReset()
+{
+    previewCanvas2->Visibility = Windows::UI::Xaml::Visibility::Collapsed;
+    ScenarioInit();
+}
+
+void AdvancedCapture::Failed(Windows::Media::Capture::MediaCapture ^currentCaptureObject, Windows::Media::Capture::MediaCaptureFailedEventArgs^ currentFailure)
+{
+    String ^message = "Fatal error" + currentFailure->Message;
+    create_task(Dispatcher->RunAsync(Windows::UI::Core::CoreDispatcherPriority::High,
+        ref new Windows::UI::Core::DispatchedHandler([this, message]()
+    {
+        ShowStatusMessage(message);
+    })));
+}
+
+void AdvancedCapture::btnStartDevice_Click(Platform::Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e)
+{
+    try
+    {
+        EnableButton(false, "StartDevice");
+        ShowStatusMessage("Starting device");
+        auto mediaCapture = ref new Windows::Media::Capture::MediaCapture();
+        m_mediaCaptureMgr = mediaCapture;
+        auto settings = ref new Windows::Media::Capture::MediaCaptureInitializationSettings();
+        auto chosenDevInfo = m_devInfoCollection->GetAt(EnumedDeviceList2->SelectedIndex);
+        settings->VideoDeviceId = chosenDevInfo->Id;
+        if (chosenDevInfo->EnclosureLocation != nullptr && chosenDevInfo->EnclosureLocation->Panel == Windows::Devices::Enumeration::Panel::Back)
+        {
+            m_bRotateVideoOnOrientationChange = true;
+            m_bReversePreviewRotation = false;
+        }
+        else if (chosenDevInfo->EnclosureLocation != nullptr && chosenDevInfo->EnclosureLocation->Panel == Windows::Devices::Enumeration::Panel::Front)
+        {
+            m_bRotateVideoOnOrientationChange = true;
+            m_bReversePreviewRotation = true;
+        }
+        else
+        {
+            m_bRotateVideoOnOrientationChange = false;
+        }
+
+        create_task(mediaCapture->InitializeAsync(settings)).then([this](task<void> initTask)
+        {
+            try
+            {
+                initTask.get();
+
+                auto mediaCapture =  m_mediaCaptureMgr.Get();
+
+                DisplayProperties_OrientationChanged(nullptr);
+
+                EnableButton(true, "StartPreview");
+                EnableButton(true, "StartStopRecord");
+                EnableButton(true, "TakePhoto");
+                ShowStatusMessage("Device initialized successful");
+                EffectTypeCombo->IsEnabled = true;
+                mediaCapture->Failed += ref new Windows::Media::Capture::MediaCaptureFailedEventHandler(this, &AdvancedCapture::Failed);
+            }
+            catch (Exception ^ e)
+            {
+                ShowExceptionMessage(e);
+            }
+        });
+    }
+    catch (Platform::Exception^ e)
+    {
+        ShowExceptionMessage(e);
+    }
+}
+
+void AdvancedCapture::btnStartPreview_Click(Platform::Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e)
+{
+    m_bPreviewing = false;
+    try
+    {
+        ShowStatusMessage("Starting preview");
+        EnableButton(false, "StartPreview");
+
+        auto mediaCapture = m_mediaCaptureMgr.Get();
+        previewCanvas2->Visibility = Windows::UI::Xaml::Visibility::Visible;
+        previewElement2->Source = mediaCapture;
+        create_task(mediaCapture->StartPreviewAsync()).then([this](task<void> previewTask)
+        {
+            try
+            {
+                previewTask.get();
+                m_bPreviewing = true;
+                ShowStatusMessage("Start preview successful");
+            }
+            catch (Exception ^e)
+            {
+                ShowExceptionMessage(e);
+            }
+        });
+    }
+    catch (Platform::Exception^ e)
+    {
+        m_bPreviewing = false;
+        previewElement2->Source = nullptr;
+        EnableButton(true, "StartPreview");
+        ShowExceptionMessage(e);
+    }
+}
+
+void AdvancedCapture::lstEnumedDevices_SelectionChanged(Platform::Object^ sender, Windows::UI::Xaml::Controls::SelectionChangedEventArgs^ e)
+{
+     if ( m_bPreviewing )
+     {
+         create_task(m_mediaCaptureMgr->StopPreviewAsync()).then([this](task<void> previewTask)
+         {
+             try
+             {
+                 previewTask.get();
+                 m_bPreviewing = false;
+             }
+             catch (Exception ^e)
+             {
+                ShowExceptionMessage(e);
+             }
+         });
+    }
+
+    btnStartDevice2->IsEnabled = true;
+    btnStartPreview2->IsEnabled = false;
+    m_bRecording = false;
+    previewElement2->Source = nullptr;
+    EffectTypeCombo->IsEnabled = false;
+    m_bEffectAdded = false;
+    m_bEffectAddedToRecord = false;
+    m_bEffectAddedToPhoto = false;
+    ShowStatusMessage("");
+}
+
+void AdvancedCapture::EnumerateWebcamsAsync()
+{
+    try
+    {
+        ShowStatusMessage("Enumerating Webcams...");
+        m_devInfoCollection = nullptr;
+
+        EnumedDeviceList2->Items->Clear();
+
+        task<DeviceInformationCollection^>(DeviceInformation::FindAllAsync(DeviceClass::VideoCapture)).then([this](task<DeviceInformationCollection^> findTask)
+        {
+            try
+            {
+                m_devInfoCollection = findTask.get();
+                if (m_devInfoCollection == nullptr || m_devInfoCollection->Size == 0)
+                {
+                    ShowStatusMessage("No WebCams found.");
+                }
+                else
+                {
+                    for(unsigned int i = 0; i < m_devInfoCollection->Size; i++)
+                    {
+                        auto devInfo = m_devInfoCollection->GetAt(i);
+                        EnumedDeviceList2->Items->Append(devInfo->Name);
+                    }
+                    EnumedDeviceList2->SelectedIndex = 0;
+                    ShowStatusMessage("Enumerating Webcams completed successfully.");
+                    btnStartDevice2->IsEnabled = true;
+                }
+            }
+            catch (Exception ^e)
+            {
+                ShowExceptionMessage(e);
+            }
+        });
+    }
+    catch (Platform::Exception^ e)
+    {
+        ShowExceptionMessage(e);
+    }
+}
+
+void AdvancedCapture::AddEffectToImageStream()
+{
+    auto mediaCapture = m_mediaCaptureMgr.Get();
+    Windows::Media::Capture::VideoDeviceCharacteristic charecteristic = mediaCapture->MediaCaptureSettings->VideoDeviceCharacteristic;
+
+    if((charecteristic != Windows::Media::Capture::VideoDeviceCharacteristic::AllStreamsIdentical) &&
+        (charecteristic != Windows::Media::Capture::VideoDeviceCharacteristic::PreviewPhotoStreamsIdentical) &&
+        (charecteristic != Windows::Media::Capture::VideoDeviceCharacteristic::RecordPhotoStreamsIdentical))
+    {
+        Windows::Media::MediaProperties::IMediaEncodingProperties ^props = mediaCapture->VideoDeviceController->GetMediaStreamProperties(Windows::Media::Capture::MediaStreamType::Photo);
+        if(props->Type->Equals("Image"))
+        {
+            //Switch to a video media type instead since we cant add an effect to a image media type
+            Windows::Foundation::Collections::IVectorView<Windows::Media::MediaProperties::IMediaEncodingProperties^>^ supportedPropsList = mediaCapture->VideoDeviceController->GetAvailableMediaStreamProperties(Windows::Media::Capture::MediaStreamType::Photo);
+            {
+                unsigned int i = 0;
+                while (i < supportedPropsList->Size)
+                {
+                    Windows::Media::MediaProperties::IMediaEncodingProperties^ props = supportedPropsList->GetAt(i);
+
+                    String^ s = props->Type;
+                    if(props->Type->Equals("Video"))
+                    {
+                        task<void>(mediaCapture->VideoDeviceController->SetMediaStreamPropertiesAsync(Windows::Media::Capture::MediaStreamType::Photo,props)).then([this](task<void> changeTypeTask)
+                        {
+                            try
+                            {
+                                changeTypeTask.get();
+                                ShowStatusMessage("Change type on photo stream successful");
+                                //Now add the effect on the image pin
+                                task<void>(m_mediaCaptureMgr->AddEffectAsync(Windows::Media::Capture::MediaStreamType::Photo,"OcvTransform.OcvImageManipulations", nullptr)).then([this](task<void> effectTask3)
+                                {
+                                    try
+                                    {
+                                        effectTask3.get();
+                                        m_bEffectAddedToPhoto = true;
+                                        ShowStatusMessage("Adding effect to photo stream successful");
+                                        EffectTypeCombo->IsEnabled = true;
+
+                                    }
+                                    catch(Exception ^e)
+                                    {
+                                        ShowExceptionMessage(e);
+                                        EffectTypeCombo->IsEnabled = true;
+                                    }
+                                });
+
+                            }
+                            catch(Exception ^e)
+                            {
+                                ShowExceptionMessage(e);
+                                EffectTypeCombo->IsEnabled = true;
+                            }
+
+                        });
+                        break;
+
+                    }
+                    i++;
+                }
+            }
+        }
+        else
+        {
+            //Add the effect to the image pin if the type is already "Video"
+            task<void>(mediaCapture->AddEffectAsync(Windows::Media::Capture::MediaStreamType::Photo,"OcvTransform.OcvImageManipulations", nullptr)).then([this](task<void> effectTask3)
+            {
+                try
+                {
+                    effectTask3.get();
+                    m_bEffectAddedToPhoto = true;
+                    ShowStatusMessage("Adding effect to photo stream successful");
+                    EffectTypeCombo->IsEnabled = true;
+
+                }
+                catch(Exception ^e)
+                {
+                    ShowExceptionMessage(e);
+                    EffectTypeCombo->IsEnabled = true;
+                }
+            });
+        }
+    }
+}
+
+void AdvancedCapture::ShowStatusMessage(Platform::String^ text)
+{
+    rootPage->NotifyUser(text, NotifyType::StatusMessage);
+}
+
+void AdvancedCapture::ShowExceptionMessage(Platform::Exception^ ex)
+{
+    rootPage->NotifyUser(ex->Message, NotifyType::ErrorMessage);
+}
+
+void AdvancedCapture::EnableButton(bool enabled, String^ name)
+{
+    if (name->Equals("StartDevice"))
+    {
+        btnStartDevice2->IsEnabled = enabled;
+    }
+    else if (name->Equals("StartPreview"))
+    {
+        btnStartPreview2->IsEnabled = enabled;
+    }
+}
+
+task<Windows::Storage::StorageFile^> AdvancedCapture::ReencodePhotoAsync(
+    Windows::Storage::StorageFile ^tempStorageFile,
+    Windows::Storage::FileProperties::PhotoOrientation photoRotation)
+{
+    ReencodeState ^state = ref new ReencodeState();
+
+    return create_task(tempStorageFile->OpenAsync(Windows::Storage::FileAccessMode::Read)).then([state](Windows::Storage::Streams::IRandomAccessStream ^stream)
+    {
+        state->InputStream = stream;
+        return Windows::Graphics::Imaging::BitmapDecoder::CreateAsync(state->InputStream);
+    }).then([state](Windows::Graphics::Imaging::BitmapDecoder ^decoder)
+    {
+        state->Decoder = decoder;
+        return Windows::Storage::KnownFolders::PicturesLibrary->CreateFileAsync(PHOTO_FILE_NAME, Windows::Storage::CreationCollisionOption::GenerateUniqueName);
+    }).then([state](Windows::Storage::StorageFile ^storageFile)
+    {
+        state->PhotoStorage = storageFile;
+        return state->PhotoStorage->OpenAsync(Windows::Storage::FileAccessMode::ReadWrite);
+    }).then([state](Windows::Storage::Streams::IRandomAccessStream ^stream)
+    {
+        state->OutputStream = stream;
+        state->OutputStream->Size = 0;
+        return Windows::Graphics::Imaging::BitmapEncoder::CreateForTranscodingAsync(state->OutputStream, state->Decoder);
+    }).then([state, photoRotation](Windows::Graphics::Imaging::BitmapEncoder ^encoder)
+    {
+        state->Encoder = encoder;
+        auto properties = ref new Windows::Graphics::Imaging::BitmapPropertySet();
+        properties->Insert("System.Photo.Orientation",
+            ref new Windows::Graphics::Imaging::BitmapTypedValue((unsigned short)photoRotation, Windows::Foundation::PropertyType::UInt16));
+        return create_task(state->Encoder->BitmapProperties->SetPropertiesAsync(properties));
+    }).then([state]()
+    {
+        return state->Encoder->FlushAsync();
+    }).then([tempStorageFile, state](task<void> previousTask)
+    {
+        auto result = state->PhotoStorage;
+        delete state;
+
+        tempStorageFile->DeleteAsync(Windows::Storage::StorageDeleteOption::PermanentDelete);
+
+        previousTask.get();
+
+        return result;
+    });
+}
+
+Windows::Storage::FileProperties::PhotoOrientation AdvancedCapture::GetCurrentPhotoRotation()
+{
+    bool counterclockwiseRotation = m_bReversePreviewRotation;
+
+    if (m_bRotateVideoOnOrientationChange)
+    {
+        return PhotoRotationLookup(Windows::Graphics::Display::DisplayProperties::CurrentOrientation, counterclockwiseRotation);
+    }
+    else
+    {
+        return Windows::Storage::FileProperties::PhotoOrientation::Normal;
+    }
+}
+
+void AdvancedCapture::PrepareForVideoRecording()
+{
+    Windows::Media::Capture::MediaCapture ^mediaCapture = m_mediaCaptureMgr.Get();
+    if (mediaCapture == nullptr)
+    {
+        return;
+    }
+
+    bool counterclockwiseRotation = m_bReversePreviewRotation;
+
+    if (m_bRotateVideoOnOrientationChange)
+    {
+        mediaCapture->SetRecordRotation(VideoRotationLookup(Windows::Graphics::Display::DisplayProperties::CurrentOrientation, counterclockwiseRotation));
+    }
+    else
+    {
+        mediaCapture->SetRecordRotation(Windows::Media::Capture::VideoRotation::None);
+    }
+}
+
+void AdvancedCapture::DisplayProperties_OrientationChanged(Platform::Object^ sender)
+{
+    Windows::Media::Capture::MediaCapture ^mediaCapture = m_mediaCaptureMgr.Get();
+    if (mediaCapture == nullptr)
+    {
+        return;
+    }
+
+    bool previewMirroring = mediaCapture->GetPreviewMirroring();
+    bool counterclockwiseRotation = (previewMirroring && !m_bReversePreviewRotation) ||
+        (!previewMirroring && m_bReversePreviewRotation);
+
+    if (m_bRotateVideoOnOrientationChange)
+    {
+        mediaCapture->SetPreviewRotation(VideoRotationLookup(Windows::Graphics::Display::DisplayProperties::CurrentOrientation, counterclockwiseRotation));
+    }
+    else
+    {
+        mediaCapture->SetPreviewRotation(Windows::Media::Capture::VideoRotation::None);
+    }
+}
+
+Windows::Storage::FileProperties::PhotoOrientation AdvancedCapture::PhotoRotationLookup(
+    Windows::Graphics::Display::DisplayOrientations displayOrientation, bool counterclockwise)
+{
+    switch (displayOrientation)
+    {
+    case Windows::Graphics::Display::DisplayOrientations::Landscape:
+        return Windows::Storage::FileProperties::PhotoOrientation::Normal;
+
+    case Windows::Graphics::Display::DisplayOrientations::Portrait:
+        return (counterclockwise) ? Windows::Storage::FileProperties::PhotoOrientation::Rotate270:
+            Windows::Storage::FileProperties::PhotoOrientation::Rotate90;
+
+    case Windows::Graphics::Display::DisplayOrientations::LandscapeFlipped:
+        return Windows::Storage::FileProperties::PhotoOrientation::Rotate180;
+
+    case Windows::Graphics::Display::DisplayOrientations::PortraitFlipped:
+        return (counterclockwise) ? Windows::Storage::FileProperties::PhotoOrientation::Rotate90 :
+            Windows::Storage::FileProperties::PhotoOrientation::Rotate270;
+
+    default:
+        return Windows::Storage::FileProperties::PhotoOrientation::Unspecified;
+    }
+}
+
+Windows::Media::Capture::VideoRotation AdvancedCapture::VideoRotationLookup(
+    Windows::Graphics::Display::DisplayOrientations displayOrientation, bool counterclockwise)
+{
+    switch (displayOrientation)
+    {
+    case Windows::Graphics::Display::DisplayOrientations::Landscape:
+        return Windows::Media::Capture::VideoRotation::None;
+
+    case Windows::Graphics::Display::DisplayOrientations::Portrait:
+        return (counterclockwise) ? Windows::Media::Capture::VideoRotation::Clockwise270Degrees :
+            Windows::Media::Capture::VideoRotation::Clockwise90Degrees;
+
+    case Windows::Graphics::Display::DisplayOrientations::LandscapeFlipped:
+        return Windows::Media::Capture::VideoRotation::Clockwise180Degrees;
+
+    case Windows::Graphics::Display::DisplayOrientations::PortraitFlipped:
+        return (counterclockwise) ? Windows::Media::Capture::VideoRotation::Clockwise90Degrees:
+            Windows::Media::Capture::VideoRotation::Clockwise270Degrees ;
+
+    default:
+        return Windows::Media::Capture::VideoRotation::None;
+    }
+}
+
+void SDKSample::MediaCapture::AdvancedCapture::Button_Click(Platform::Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e)
+{
+    try
+    {
+        create_task(m_mediaCaptureMgr->ClearEffectsAsync(Windows::Media::Capture::MediaStreamType::VideoPreview)).then([this](task<void> cleanTask)
+        {
+            m_bEffectAdded = true;
+            int index = EffectTypeCombo->SelectedIndex;
+            PropertySet^ props = ref new PropertySet();
+            props->Insert(L"{698649BE-8EAE-4551-A4CB-3EC98FBD3D86}", index);
+            create_task(m_mediaCaptureMgr->AddEffectAsync(Windows::Media::Capture::MediaStreamType::VideoPreview,"OcvTransform.OcvImageManipulations", props)).then([this](task<void> effectTask)
+            {
+                try
+                {
+                    effectTask.get();
+
+                    auto mediaCapture = m_mediaCaptureMgr.Get();
+                    Windows::Media::Capture::VideoDeviceCharacteristic charecteristic = mediaCapture->MediaCaptureSettings->VideoDeviceCharacteristic;
+
+                    ShowStatusMessage("Add effect successful to preview stream successful");
+                    if((charecteristic != Windows::Media::Capture::VideoDeviceCharacteristic::AllStreamsIdentical) &&
+                        (charecteristic != Windows::Media::Capture::VideoDeviceCharacteristic::PreviewRecordStreamsIdentical))
+                    {
+                        Windows::Media::MediaProperties::IMediaEncodingProperties ^props = mediaCapture->VideoDeviceController->GetMediaStreamProperties(Windows::Media::Capture::MediaStreamType::VideoRecord);
+                        Windows::Media::MediaProperties::VideoEncodingProperties ^videoEncodingProperties  = static_cast<Windows::Media::MediaProperties::VideoEncodingProperties ^>(props);
+                        if(!videoEncodingProperties->Subtype->Equals("H264")) //Cant add an effect to an H264 stream
+                        {
+                            task<void>(mediaCapture->AddEffectAsync(Windows::Media::Capture::MediaStreamType::VideoRecord,"OcvTransform.OcvImageManipulations", nullptr)).then([this](task<void> effectTask2)
+                            {
+                                try
+                                {
+                                    effectTask2.get();
+                                    ShowStatusMessage("Add effect successful to record stream successful");
+                                    m_bEffectAddedToRecord = true;
+                                    AddEffectToImageStream();
+                                    EffectTypeCombo->IsEnabled = true;
+                                }
+                                catch(Exception ^e)
+                                {
+                                    ShowExceptionMessage(e);
+                                    EffectTypeCombo->IsEnabled = true;
+                                }
+                            });
+                        }
+                        else
+                        {
+                            AddEffectToImageStream();
+                            EffectTypeCombo->IsEnabled = true;
+                        }
+
+                    }
+                    else
+                    {
+                        AddEffectToImageStream();
+                        EffectTypeCombo->IsEnabled = true;
+                    }
+                }
+                catch (Exception ^e)
+                {
+                    ShowExceptionMessage(e);
+                    EffectTypeCombo->IsEnabled = true;
+                }
+            });
+        });
+    }
+    catch (Platform::Exception ^e)
+    {
+        ShowExceptionMessage(e);
+        EffectTypeCombo->IsEnabled = true;
+    }
+}
diff --git a/samples/winrt/ImageManipulations/AdvancedCapture.xaml.h b/samples/winrt/ImageManipulations/AdvancedCapture.xaml.h
new file mode 100644
index 000000000..94fa87c61
--- /dev/null
+++ b/samples/winrt/ImageManipulations/AdvancedCapture.xaml.h
@@ -0,0 +1,95 @@
+﻿//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// AdvancedCapture.xaml.h
+// Declaration of the AdvancedCapture class
+//
+
+#pragma once
+
+#include "pch.h"
+#include "AdvancedCapture.g.h"
+#include "MainPage.xaml.h"
+#include <ppl.h>
+
+#define VIDEO_FILE_NAME "video.mp4"
+#define PHOTO_FILE_NAME "photo.jpg"
+#define TEMP_PHOTO_FILE_NAME "photoTmp.jpg"
+
+using namespace concurrency;
+using namespace Windows::Devices::Enumeration;
+
+namespace SDKSample
+{
+    namespace MediaCapture
+    {
+        /// <summary>
+        /// An empty page that can be used on its own or navigated to within a Frame.
+        /// </summary>
+        [Windows::Foundation::Metadata::WebHostHidden]
+        public ref class AdvancedCapture sealed
+        {
+        public:
+            AdvancedCapture();
+
+        protected:
+            virtual void OnNavigatedTo(Windows::UI::Xaml::Navigation::NavigationEventArgs^ e) override;
+            virtual void OnNavigatedFrom(Windows::UI::Xaml::Navigation::NavigationEventArgs^ e) override;
+
+        private:
+            MainPage^ rootPage;
+            void ScenarioInit();
+            void ScenarioReset();
+
+            void Failed(Windows::Media::Capture::MediaCapture ^ mediaCapture, Windows::Media::Capture::MediaCaptureFailedEventArgs ^ args);
+
+            void btnStartDevice_Click(Platform::Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+
+            void btnStartPreview_Click(Platform::Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+
+            void lstEnumedDevices_SelectionChanged(Platform::Object^ sender, Windows::UI::Xaml::Controls::SelectionChangedEventArgs^ e);
+            void EnumerateWebcamsAsync();
+
+            void AddEffectToImageStream();
+
+            void ShowStatusMessage(Platform::String^ text);
+            void ShowExceptionMessage(Platform::Exception^ ex);
+
+            void EnableButton(bool enabled, Platform::String ^name);
+
+            task<Windows::Storage::StorageFile^> ReencodePhotoAsync(
+                Windows::Storage::StorageFile ^tempStorageFile,
+                Windows::Storage::FileProperties::PhotoOrientation photoRotation);
+            Windows::Storage::FileProperties::PhotoOrientation GetCurrentPhotoRotation();
+            void PrepareForVideoRecording();
+            void DisplayProperties_OrientationChanged(Platform::Object^ sender);
+            Windows::Storage::FileProperties::PhotoOrientation PhotoRotationLookup(
+                Windows::Graphics::Display::DisplayOrientations displayOrientation, bool counterclockwise);
+            Windows::Media::Capture::VideoRotation VideoRotationLookup(
+                Windows::Graphics::Display::DisplayOrientations displayOrientation, bool counterclockwise);
+
+            Platform::Agile<Windows::Media::Capture::MediaCapture> m_mediaCaptureMgr;
+            Windows::Storage::StorageFile^ m_recordStorageFile;
+            bool m_bRecording;
+            bool m_bEffectAdded;
+            bool m_bEffectAddedToRecord;
+            bool m_bEffectAddedToPhoto;
+            bool m_bSuspended;
+            bool m_bPreviewing;
+            DeviceInformationCollection^ m_devInfoCollection;
+            Windows::Foundation::EventRegistrationToken m_eventRegistrationToken;
+            bool m_bRotateVideoOnOrientationChange;
+            bool m_bReversePreviewRotation;
+            Windows::Foundation::EventRegistrationToken m_orientationChangedEventToken;
+            void Button_Click(Platform::Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+        };
+    }
+}
diff --git a/samples/winrt/ImageManipulations/App.xaml b/samples/winrt/ImageManipulations/App.xaml
new file mode 100644
index 000000000..769f4400d
--- /dev/null
+++ b/samples/winrt/ImageManipulations/App.xaml
@@ -0,0 +1,30 @@
+﻿<!--
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+-->
+
+<Application xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
+    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
+    x:Class="SDKSample.App"
+    RequestedTheme="Light">
+    <Application.Resources>
+
+        <ResourceDictionary>
+            <ResourceDictionary.MergedDictionaries>
+                <!--
+                    Styles that define common aspects of the platform look and feel
+                    Required by Visual Studio project and item templates
+                 -->
+                <ResourceDictionary Source="Common/StandardStyles.xaml"/>
+                <ResourceDictionary Source="Sample-Utils/SampleTemplateStyles.xaml"/>
+            </ResourceDictionary.MergedDictionaries>
+        </ResourceDictionary>
+    </Application.Resources>
+</Application>
diff --git a/samples/winrt/ImageManipulations/App.xaml.cpp b/samples/winrt/ImageManipulations/App.xaml.cpp
new file mode 100644
index 000000000..a24a4f9c0
--- /dev/null
+++ b/samples/winrt/ImageManipulations/App.xaml.cpp
@@ -0,0 +1,116 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// App.xaml.cpp
+// Implementation of the App.xaml class.
+//
+
+#include "pch.h"
+#include "MainPage.xaml.h"
+#include "AdvancedCapture.xaml.h"
+#include "Common\SuspensionManager.h"
+
+using namespace SDKSample;
+using namespace SDKSample::Common;
+using namespace SDKSample::MediaCapture;
+
+using namespace Concurrency;
+using namespace Platform;
+using namespace Windows::ApplicationModel;
+using namespace Windows::ApplicationModel::Activation;
+using namespace Windows::Foundation;
+using namespace Windows::Foundation::Collections;
+using namespace Windows::UI::Core;
+using namespace Windows::UI::Xaml;
+using namespace Windows::UI::Xaml::Controls;
+using namespace Windows::UI::Xaml::Controls::Primitives;
+using namespace Windows::UI::Xaml::Data;
+using namespace Windows::UI::Xaml::Input;
+using namespace Windows::UI::Xaml::Interop;
+using namespace Windows::UI::Xaml::Media;
+using namespace Windows::UI::Xaml::Navigation;
+
+/// <summary>
+/// Initializes the singleton application object.  This is the first line of authored code
+/// executed, and as such is the logical equivalent of main() or WinMain().
+/// </summary>
+App::App()
+{
+    InitializeComponent();
+    this->Suspending += ref new SuspendingEventHandler(this, &SDKSample::App::OnSuspending);
+}
+
+/// <summary>
+/// Invoked when the application is launched normally by the end user.  Other entry points will
+/// be used when the application is launched to open a specific file, to display search results,
+/// and so forth.
+/// </summary>
+/// <param name="pArgs">Details about the launch request and process.</param>
+void App::OnLaunched(LaunchActivatedEventArgs^ pArgs)
+{
+    this->LaunchArgs = pArgs;
+
+    // Do not repeat app initialization when already running, just ensure that
+    // the window is active
+    if (pArgs->PreviousExecutionState == ApplicationExecutionState::Running)
+    {
+        Window::Current->Activate();
+        return;
+    }
+
+    // Create a Frame to act as the navigation context and associate it with
+    // a SuspensionManager key
+    auto rootFrame = ref new Frame();
+    SuspensionManager::RegisterFrame(rootFrame, "AppFrame");
+
+    auto prerequisite = task<void>([](){});
+    if (pArgs->PreviousExecutionState == ApplicationExecutionState::Terminated)
+    {
+        // Restore the saved session state only when appropriate, scheduling the
+        // final launch steps after the restore is complete
+        prerequisite = SuspensionManager::RestoreAsync();
+    }
+    prerequisite.then([=]()
+    {
+        // When the navigation stack isn't restored navigate to the first page,
+        // configuring the new page by passing required information as a navigation
+        // parameter
+        if (rootFrame->Content == nullptr)
+        {
+            if (!rootFrame->Navigate(TypeName(MainPage::typeid)))
+            {
+                throw ref new FailureException("Failed to create initial page");
+            }
+        }
+
+        // Place the frame in the current Window and ensure that it is active
+        Window::Current->Content = rootFrame;
+        Window::Current->Activate();
+    }, task_continuation_context::use_current());
+}
+
+/// <summary>
+/// Invoked when application execution is being suspended.  Application state is saved
+/// without knowing whether the application will be terminated or resumed with the contents
+/// of memory still intact.
+/// </summary>
+/// <param name="sender">The source of the suspend request.</param>
+/// <param name="e">Details about the suspend request.</param>
+void App::OnSuspending(Object^ sender, SuspendingEventArgs^ e)
+{
+    (void) sender;	// Unused parameter
+
+    auto deferral = e->SuspendingOperation->GetDeferral();
+    SuspensionManager::SaveAsync().then([=]()
+    {
+        deferral->Complete();
+    });
+}
diff --git a/samples/winrt/ImageManipulations/App.xaml.h b/samples/winrt/ImageManipulations/App.xaml.h
new file mode 100644
index 000000000..a8b606424
--- /dev/null
+++ b/samples/winrt/ImageManipulations/App.xaml.h
@@ -0,0 +1,35 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// App.xaml.h
+// Declaration of the App.xaml class.
+//
+
+#pragma once
+
+#include "pch.h"
+#include "App.g.h"
+#include "MainPage.g.h"
+
+namespace SDKSample
+{
+    ref class App
+    {
+    internal:
+        App();
+        virtual void OnSuspending(Platform::Object^ sender, Windows::ApplicationModel::SuspendingEventArgs^ pArgs);
+        Windows::ApplicationModel::Activation::LaunchActivatedEventArgs^ LaunchArgs;
+    protected:
+        virtual void OnLaunched(Windows::ApplicationModel::Activation::LaunchActivatedEventArgs^ pArgs) override;
+    private:
+        Windows::UI::Xaml::Controls::Frame^ rootFrame;
+    };
+}
diff --git a/samples/winrt/ImageManipulations/Constants.cpp b/samples/winrt/ImageManipulations/Constants.cpp
new file mode 100644
index 000000000..a26634272
--- /dev/null
+++ b/samples/winrt/ImageManipulations/Constants.cpp
@@ -0,0 +1,22 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+#include "pch.h"
+#include "MainPage.xaml.h"
+#include "Constants.h"
+
+using namespace SDKSample;
+
+Platform::Array<Scenario>^ MainPage::scenariosInner = ref new Platform::Array<Scenario>
+{
+    // The format here is the following:
+    //     { "Description for the sample", "Fully quaified name for the class that implements the scenario" }
+    { "Enumerate cameras and add a video effect", "SDKSample.MediaCapture.AdvancedCapture" },
+};
diff --git a/samples/winrt/ImageManipulations/Constants.h b/samples/winrt/ImageManipulations/Constants.h
new file mode 100644
index 000000000..143f06960
--- /dev/null
+++ b/samples/winrt/ImageManipulations/Constants.h
@@ -0,0 +1,45 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+#pragma once
+
+#include <collection.h>
+namespace SDKSample
+{
+    public value struct Scenario
+    {
+        Platform::String^ Title;
+        Platform::String^ ClassName;
+    };
+
+    partial ref class MainPage
+    {
+    public:
+        static property Platform::String^ FEATURE_NAME
+        {
+            Platform::String^ get()
+            {
+                return ref new Platform::String(L"OpenCV Image Manipulations sample");
+            }
+        }
+
+        static property Platform::Array<Scenario>^ scenarios
+        {
+            Platform::Array<Scenario>^ get()
+            {
+                return scenariosInner;
+            }
+        }
+    private:
+        static Platform::Array<Scenario>^ scenariosInner;
+    };
+
+
+}
diff --git a/samples/winrt/ImageManipulations/MainPage.xaml b/samples/winrt/ImageManipulations/MainPage.xaml
new file mode 100644
index 000000000..66ce5715f
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MainPage.xaml
@@ -0,0 +1,156 @@
+﻿<!--
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+-->
+
+<common:LayoutAwarePage
+    x:Class="SDKSample.MainPage"
+    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
+    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
+    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
+    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
+    xmlns:common="using:SDKSample.Common"
+    mc:Ignorable="d"
+    x:Name="RootPage">
+
+    <common:LayoutAwarePage.Resources>
+        <Style x:Key="BaseStatusStyle" TargetType="TextBlock">
+            <Setter Property="FontFamily" Value="Segoe UI Semilight"/>
+            <Setter Property="FontSize" Value="14.667"/>
+            <Setter Property="Margin" Value="0,0,0,5"/>
+        </Style>
+        <Style x:Key="StatusStyle" BasedOn="{StaticResource BaseStatusStyle}" TargetType="TextBlock">
+            <Setter Property="Foreground" Value="Green"/>
+        </Style>
+        <Style x:Key="ErrorStyle" BasedOn="{StaticResource BaseStatusStyle}" TargetType="TextBlock">
+            <Setter Property="Foreground" Value="Blue"/>
+        </Style>
+    </common:LayoutAwarePage.Resources>
+
+
+    <Grid x:Name="LayoutRoot" Background="{StaticResource ApplicationPageBackgroundThemeBrush}">
+
+        <Grid x:Name="ContentRoot" Background="{StaticResource ApplicationPageBackgroundThemeBrush}" Margin="100,20,100,20">
+            <Grid.RowDefinitions>
+                <RowDefinition Height="Auto"/>
+                <RowDefinition Height="*"/>
+                <RowDefinition Height="Auto"/>
+            </Grid.RowDefinitions>
+
+            <!-- Header -->
+            <StackPanel Orientation="Horizontal" Grid.Row="0">
+                <Image x:Name="WindowsLogo" Stretch="None" Source="Assets/windows-sdk.png" AutomationProperties.Name="Windows Logo" HorizontalAlignment="Left" Grid.Column="0"/>
+                <TextBlock VerticalAlignment="Bottom" Style="{StaticResource TitleTextStyle}" TextWrapping="Wrap" Grid.Column="1" Text="OpenCV for Windows RT"/>
+            </StackPanel>
+            <ScrollViewer x:Name="MainScrollViewer" Grid.Row="1" ZoomMode="Disabled" IsTabStop="False" VerticalScrollBarVisibility="Auto" HorizontalScrollBarVisibility="Auto" Padding="0,0,0,20" >
+                <Grid>
+                    <Grid.RowDefinitions>
+                        <RowDefinition Height="Auto"/>
+                        <RowDefinition Height="*"/>
+                    </Grid.RowDefinitions>
+                    <TextBlock x:Name="FeatureName" Grid.Row="0"  Text="Add Sample Title Here" Style="{StaticResource HeaderTextStyle}" TextWrapping="Wrap"/>
+
+                    <!-- Content -->
+                    <Grid Grid.Row="1">
+
+                        <!-- All XAML in this section is purely for design time so you can see sample content in the designer. -->
+                        <!-- This will be repaced at runtime by live content.                                                  -->
+                        <Grid>
+                            <Grid.RowDefinitions>
+                                <RowDefinition Height="Auto"/>
+                                <RowDefinition Height="Auto"/>
+                                <RowDefinition Height="Auto"/>
+                                <RowDefinition Height="Auto"/>
+                                <RowDefinition Height="Auto"/>
+                                <RowDefinition Height="Auto"/>
+                                <RowDefinition Height="Auto"/>
+                                <RowDefinition Height="*"/>
+                            </Grid.RowDefinitions>
+                            <Grid.ColumnDefinitions>
+                                <ColumnDefinition Width="Auto"/>
+                                <ColumnDefinition Width="*"/>
+                            </Grid.ColumnDefinitions>
+                            <TextBlock Grid.Row="0" Text="Input" Style="{StaticResource H2Style}"/>
+
+                            <TextBlock x:Name="ScenarioListLabel" Text="Select Scenario:" Grid.Row="1"  Style="{StaticResource SubheaderTextStyle}" Margin="0,5,0,0" />
+                            <ListBox x:Name="Scenarios" Margin="0,0,20,0" Grid.Row="2" AutomationProperties.Name="Scenarios" HorizontalAlignment="Left"
+                                         VerticalAlignment="Top" ScrollViewer.HorizontalScrollBarVisibility="Auto"
+                                         AutomationProperties.LabeledBy="{Binding ElementName=ScenarioListLabel}" MaxHeight="125">
+                                <ListBox.ItemTemplate>
+                                    <DataTemplate>
+                                        <TextBlock Text="{Binding Name}"/>
+                                    </DataTemplate>
+                                </ListBox.ItemTemplate>
+                            </ListBox>
+                            <TextBlock x:Name="DescriptionText" Margin="0,5,0,0" Text="Description:" Style="{StaticResource SubheaderTextStyle}" Grid.Row="1" Grid.Column="1"/>
+                            <!-- Input Scenarios -->
+                            <UserControl x:Name="InputSection" Margin="0,5,0,0" IsTabStop="False" Grid.Row="2" Grid.Column="1" HorizontalAlignment="Left" VerticalAlignment="Top"/>
+
+                            <!-- Output section -->
+                            <TextBlock Text="Output" Grid.Row="5"  Margin="0,25,0,20" Style="{StaticResource H2Style}" Grid.ColumnSpan="2"/>
+                            <TextBlock x:Name="StatusBlock" Grid.Row="6" Margin="0,0,0,5" Grid.ColumnSpan="2"/>
+
+                            <!-- Output Scenarios -->
+                            <UserControl x:Name="OutputSection" Grid.Row="7" Grid.ColumnSpan="2" BorderThickness="0"/>
+                        </Grid>
+                    </Grid>
+                </Grid>
+            </ScrollViewer>
+
+            <!-- Footer -->
+            <Grid x:Name="Footer"  Grid.Row="3" Margin="0,10,0,10" VerticalAlignment="Bottom" >
+                <Grid.RowDefinitions>
+                    <RowDefinition Height="Auto"/>
+                    <RowDefinition Height="Auto"/>
+                    <RowDefinition Height="Auto"/>
+                </Grid.RowDefinitions>
+
+                <Grid.ColumnDefinitions>
+                    <ColumnDefinition Width="Auto"/>
+                    <ColumnDefinition Width="*"/>
+                </Grid.ColumnDefinitions>
+                <StackPanel x:Name="FooterPanel" Orientation="Horizontal" Grid.Row="1" Grid.Column="1" HorizontalAlignment="Right"/>
+            </Grid>
+
+
+        </Grid>
+
+        <VisualStateManager.VisualStateGroups>
+            <!-- Visual states reflect the application's view state -->
+            <VisualStateGroup>
+                <VisualState x:Name="FullScreenLandscape">
+                    <Storyboard>
+                    </Storyboard>
+                </VisualState>
+                <VisualState x:Name="Filled">
+                    <Storyboard>
+                    </Storyboard>
+                </VisualState>
+
+                <VisualState x:Name="FullScreenPortrait">
+                    <Storyboard>
+                    </Storyboard>
+                </VisualState>
+
+                <VisualState x:Name="Snapped">
+                    <Storyboard>
+                        <ObjectAnimationUsingKeyFrames Storyboard.TargetProperty="(FrameworkElement.Margin)" Storyboard.TargetName="ContentRoot">
+                            <DiscreteObjectKeyFrame KeyTime="0">
+                                <DiscreteObjectKeyFrame.Value>
+                                    <Thickness>20,20,20,20</Thickness>
+                                </DiscreteObjectKeyFrame.Value>
+                            </DiscreteObjectKeyFrame>
+                        </ObjectAnimationUsingKeyFrames>
+                    </Storyboard>
+                </VisualState>
+            </VisualStateGroup>
+        </VisualStateManager.VisualStateGroups>
+    </Grid>
+</common:LayoutAwarePage>
diff --git a/samples/winrt/ImageManipulations/MainPage.xaml.cpp b/samples/winrt/ImageManipulations/MainPage.xaml.cpp
new file mode 100644
index 000000000..bd897fcc0
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MainPage.xaml.cpp
@@ -0,0 +1,315 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// MainPage.xaml.cpp
+// Implementation of the MainPage.xaml class.
+//
+
+#include "pch.h"
+#include "MainPage.xaml.h"
+#include "App.xaml.h"
+
+#include <collection.h>
+
+using namespace Windows::UI::Xaml;
+using namespace Windows::UI::Xaml::Controls;
+using namespace Windows::Foundation;
+using namespace Windows::Foundation::Collections;
+using namespace Platform;
+using namespace SDKSample;
+using namespace Windows::UI::Xaml::Navigation;
+using namespace Windows::UI::Xaml::Interop;
+using namespace Windows::Graphics::Display;
+using namespace Windows::UI::ViewManagement;
+
+MainPage^ MainPage::Current = nullptr;
+
+MainPage::MainPage()
+{
+    InitializeComponent();
+
+    // This frame is hidden, meaning it is never shown.  It is simply used to load
+    // each scenario page and then pluck out the input and output sections and
+    // place them into the UserControls on the main page.
+    HiddenFrame = ref new Windows::UI::Xaml::Controls::Frame();
+    HiddenFrame->Visibility = Windows::UI::Xaml::Visibility::Collapsed;
+    ContentRoot->Children->Append(HiddenFrame);
+
+    FeatureName->Text = FEATURE_NAME;
+
+    this->SizeChanged += ref new SizeChangedEventHandler(this, &MainPage::MainPage_SizeChanged);
+    Scenarios->SelectionChanged += ref new SelectionChangedEventHandler(this, &MainPage::Scenarios_SelectionChanged);
+
+    MainPage::Current = this;
+    autoSizeInputSectionWhenSnapped = true;
+}
+
+/// <summary>
+/// We need to handle SizeChanged so that we can make the sample layout property
+/// in the various layouts.
+/// </summary>
+/// <param name="sender"></param>
+/// <param name="e"></param>
+void MainPage::MainPage_SizeChanged(Object^ sender, SizeChangedEventArgs^ e)
+{
+    InvalidateSize();
+    MainPageSizeChangedEventArgs^ args = ref new MainPageSizeChangedEventArgs();
+    args->ViewState = ApplicationView::Value;
+    MainPageResized(this, args);
+
+}
+
+void MainPage::InvalidateSize()
+{
+    // Get the window width
+    double windowWidth = this->ActualWidth;
+
+    if (windowWidth != 0.0)
+    {
+        // Get the width of the ListBox.
+        double listBoxWidth = Scenarios->ActualWidth;
+
+        // Is the ListBox using any margins that we need to consider?
+        double listBoxMarginLeft = Scenarios->Margin.Left;
+        double listBoxMarginRight = Scenarios->Margin.Right;
+
+        // Figure out how much room is left after considering the list box width
+        double availableWidth = windowWidth - listBoxWidth;
+
+        // Is the top most child using margins?
+        double layoutRootMarginLeft = ContentRoot->Margin.Left;
+        double layoutRootMarginRight = ContentRoot->Margin.Right;
+
+        // We have different widths to use depending on the view state
+        if (ApplicationView::Value != ApplicationViewState::Snapped)
+        {
+            // Make us as big as the the left over space, factoring in the ListBox width, the ListBox margins.
+            // and the LayoutRoot's margins
+            InputSection->Width = ((availableWidth) -
+                (layoutRootMarginLeft + layoutRootMarginRight + listBoxMarginLeft + listBoxMarginRight));
+        }
+        else
+        {
+            // Make us as big as the left over space, factoring in just the LayoutRoot's margins.
+            if (autoSizeInputSectionWhenSnapped)
+            {
+                InputSection->Width = (windowWidth - (layoutRootMarginLeft + layoutRootMarginRight));
+            }
+        }
+    }
+    InvalidateViewState();
+}
+
+void MainPage::InvalidateViewState()
+{
+    // Are we going to snapped mode?
+    if (ApplicationView::Value == ApplicationViewState::Snapped)
+    {
+        Grid::SetRow(DescriptionText, 3);
+        Grid::SetColumn(DescriptionText, 0);
+
+        Grid::SetRow(InputSection, 4);
+        Grid::SetColumn(InputSection, 0);
+
+        Grid::SetRow(FooterPanel, 2);
+        Grid::SetColumn(FooterPanel, 0);
+    }
+    else
+    {
+        Grid::SetRow(DescriptionText, 1);
+        Grid::SetColumn(DescriptionText, 1);
+
+        Grid::SetRow(InputSection, 2);
+        Grid::SetColumn(InputSection, 1);
+
+        Grid::SetRow(FooterPanel, 1);
+        Grid::SetColumn(FooterPanel, 1);
+    }
+
+    //  Since we don't load the scenario page in the traditional manner (we just pluck out the
+    // input and output sections from the page) we need to ensure that any VSM code used
+    // by the scenario's input and output sections is fired.
+    VisualStateManager::GoToState(InputSection, "Input" + LayoutAwarePage::DetermineVisualState(ApplicationView::Value), false);
+    VisualStateManager::GoToState(OutputSection, "Output" + LayoutAwarePage::DetermineVisualState(ApplicationView::Value), false);
+}
+
+void MainPage::PopulateScenarios()
+{
+    ScenarioList = ref new Platform::Collections::Vector<Object^>();
+
+    // Populate the ListBox with the list of scenarios as defined in Constants.cpp.
+    for (unsigned int i = 0; i < scenarios->Length; ++i)
+    {
+        Scenario s = scenarios[i];
+        ListBoxItem^ item = ref new ListBoxItem();
+        item->Name = s.ClassName;
+        item->Content = (i + 1).ToString() + ") " + s.Title;
+        ScenarioList->Append(item);
+    }
+
+    // Bind the ListBox to the scenario list.
+    Scenarios->ItemsSource = ScenarioList;
+    Scenarios->ScrollIntoView(Scenarios->SelectedItem);
+}
+
+/// <summary>
+/// This method is responsible for loading the individual input and output sections for each scenario.  This
+/// is based on navigating a hidden Frame to the ScenarioX.xaml page and then extracting out the input
+/// and output sections into the respective UserControl on the main page.
+/// </summary>
+/// <param name="scenarioName"></param>
+void MainPage::LoadScenario(String^ scenarioName)
+{
+    autoSizeInputSectionWhenSnapped = true;
+
+    // Load the ScenarioX.xaml file into the Frame.
+    TypeName scenarioType = {scenarioName, TypeKind::Custom};
+    HiddenFrame->Navigate(scenarioType, this);
+
+    // Get the top element, the Page, so we can look up the elements
+    // that represent the input and output sections of the ScenarioX file.
+    Page^ hiddenPage = safe_cast<Page^>(HiddenFrame->Content);
+
+    // Get each element.
+    UIElement^ input = safe_cast<UIElement^>(hiddenPage->FindName("Input"));
+    UIElement^ output = safe_cast<UIElement^>(hiddenPage->FindName("Output"));
+
+    if (input == nullptr)
+    {
+        // Malformed input section.
+        NotifyUser("Cannot load scenario input section for " + scenarioName +
+            "  Make sure root of input section markup has x:Name of 'Input'", NotifyType::ErrorMessage);
+        return;
+    }
+
+    if (output == nullptr)
+    {
+        // Malformed output section.
+        NotifyUser("Cannot load scenario output section for " + scenarioName +
+            "  Make sure root of output section markup has x:Name of 'Output'", NotifyType::ErrorMessage);
+        return;
+    }
+
+    // Find the LayoutRoot which parents the input and output sections in the main page.
+    Panel^ panel = safe_cast<Panel^>(hiddenPage->FindName("LayoutRoot"));
+
+    if (panel != nullptr)
+    {
+        unsigned int index = 0;
+        UIElementCollection^ collection = panel->Children;
+
+        // Get rid of the content that is currently in the intput and output sections.
+        collection->IndexOf(input, &index);
+        collection->RemoveAt(index);
+
+        collection->IndexOf(output, &index);
+        collection->RemoveAt(index);
+
+        // Populate the input and output sections with the newly loaded content.
+        InputSection->Content = input;
+        OutputSection->Content = output;
+
+        ScenarioLoaded(this, nullptr);
+    }
+    else
+    {
+        // Malformed Scenario file.
+        NotifyUser("Cannot load scenario: " + scenarioName + ".  Make sure root tag in the '" +
+            scenarioName + "' file has an x:Name of 'LayoutRoot'", NotifyType::ErrorMessage);
+    }
+}
+
+void MainPage::Scenarios_SelectionChanged(Object^ sender, SelectionChangedEventArgs^ e)
+{
+    if (Scenarios->SelectedItem != nullptr)
+    {
+        NotifyUser("", NotifyType::StatusMessage);
+
+        LoadScenario((safe_cast<ListBoxItem^>(Scenarios->SelectedItem))->Name);
+        InvalidateSize();
+    }
+}
+
+void MainPage::NotifyUser(String^ strMessage, NotifyType type)
+{
+    switch (type)
+    {
+    case NotifyType::StatusMessage:
+        // Use the status message style.
+        StatusBlock->Style = safe_cast<Windows::UI::Xaml::Style^>(this->Resources->Lookup("StatusStyle"));
+        break;
+    case NotifyType::ErrorMessage:
+        // Use the error message style.
+        StatusBlock->Style = safe_cast<Windows::UI::Xaml::Style^>(this->Resources->Lookup("ErrorStyle"));
+        break;
+    default:
+        break;
+    }
+    StatusBlock->Text = strMessage;
+
+    // Collapsed the StatusBlock if it has no text to conserve real estate.
+    if (StatusBlock->Text != "")
+    {
+        StatusBlock->Visibility = Windows::UI::Xaml::Visibility::Visible;
+    }
+    else
+    {
+        StatusBlock->Visibility = Windows::UI::Xaml::Visibility::Collapsed;
+    }
+}
+
+void MainPage::Footer_Click(Object^ sender, RoutedEventArgs^ e)
+{
+    auto uri = ref new Uri((String^)((HyperlinkButton^)sender)->Tag);
+    Windows::System::Launcher::LaunchUriAsync(uri);
+}
+
+
+/// <summary>
+/// Populates the page with content passed during navigation.  Any saved state is also
+/// provided when recreating a page from a prior session.
+/// </summary>
+/// <param name="navigationParameter">The parameter value passed to
+/// <see cref="Frame::Navigate(Type, Object)"/> when this page was initially requested.
+/// </param>
+/// <param name="pageState">A map of state preserved by this page during an earlier
+/// session.  This will be null the first time a page is visited.</param>
+void MainPage::LoadState(Object^ navigationParameter, IMap<String^, Object^>^ pageState)
+{
+    (void) navigationParameter;    // Unused parameter
+
+    PopulateScenarios();
+
+    // Starting scenario is the first or based upon a previous state.
+    ListBoxItem^ startingScenario = nullptr;
+    int startingScenarioIndex = -1;
+
+    if (pageState != nullptr && pageState->HasKey("SelectedScenarioIndex"))
+    {
+        startingScenarioIndex = safe_cast<int>(pageState->Lookup("SelectedScenarioIndex"));
+    }
+
+    Scenarios->SelectedIndex = startingScenarioIndex != -1 ? startingScenarioIndex : 0;
+
+    InvalidateViewState();
+}
+
+/// <summary>
+/// Preserves state associated with this page in case the application is suspended or the
+/// page is discarded from the navigation cache.  Values must conform to the serialization
+/// requirements of <see cref="SuspensionManager::SessionState"/>.
+/// </summary>
+/// <param name="pageState">An empty map to be populated with serializable state.</param>
+void MainPage::SaveState(IMap<String^, Object^>^ pageState)
+{
+    int selectedListBoxItemIndex = Scenarios->SelectedIndex;
+    pageState->Insert("SelectedScenarioIndex", selectedListBoxItemIndex);
+}
diff --git a/samples/winrt/ImageManipulations/MainPage.xaml.h b/samples/winrt/ImageManipulations/MainPage.xaml.h
new file mode 100644
index 000000000..36fb7796a
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MainPage.xaml.h
@@ -0,0 +1,105 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// MainPage.xaml.h
+// Declaration of the MainPage.xaml class.
+//
+
+#pragma once
+
+#include "pch.h"
+#include "MainPage.g.h"
+#include "Common\LayoutAwarePage.h" // Required by generated header
+#include "Constants.h"
+
+namespace SDKSample
+{
+    public enum class NotifyType
+    {
+        StatusMessage,
+        ErrorMessage
+    };
+
+    public ref class MainPageSizeChangedEventArgs sealed
+    {
+    public:
+        property Windows::UI::ViewManagement::ApplicationViewState ViewState
+        {
+            Windows::UI::ViewManagement::ApplicationViewState get()
+            {
+                return viewState;
+            }
+
+            void set(Windows::UI::ViewManagement::ApplicationViewState value)
+            {
+                viewState = value;
+            }
+        }
+
+    private:
+        Windows::UI::ViewManagement::ApplicationViewState viewState;
+    };
+
+    public ref class MainPage sealed
+    {
+    public:
+        MainPage();
+
+    protected:
+        virtual void LoadState(Platform::Object^ navigationParameter,
+            Windows::Foundation::Collections::IMap<Platform::String^, Platform::Object^>^ pageState) override;
+        virtual void SaveState(Windows::Foundation::Collections::IMap<Platform::String^, Platform::Object^>^ pageState) override;
+
+    internal:
+        property bool AutoSizeInputSectionWhenSnapped
+        {
+            bool get()
+            {
+                return autoSizeInputSectionWhenSnapped;
+            }
+
+            void set(bool value)
+            {
+                autoSizeInputSectionWhenSnapped = value;
+            }
+        }
+
+        property Windows::ApplicationModel::Activation::LaunchActivatedEventArgs^ LaunchArgs
+       {
+            Windows::ApplicationModel::Activation::LaunchActivatedEventArgs^ get()
+            {
+                return safe_cast<App^>(App::Current)->LaunchArgs;
+            }
+        }
+
+        void NotifyUser(Platform::String^ strMessage, NotifyType type);
+        void LoadScenario(Platform::String^ scenarioName);
+        event Windows::Foundation::EventHandler<Platform::Object^>^ ScenarioLoaded;
+        event Windows::Foundation::EventHandler<MainPageSizeChangedEventArgs^>^ MainPageResized;
+
+    private:
+        void PopulateScenarios();
+        void InvalidateSize();
+        void InvalidateViewState();
+
+        Platform::Collections::Vector<Object^>^ ScenarioList;
+        Windows::UI::Xaml::Controls::Frame^ HiddenFrame;
+        void Footer_Click(Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+        bool autoSizeInputSectionWhenSnapped;
+
+        void MainPage_SizeChanged(Object^ sender, Windows::UI::Xaml::SizeChangedEventArgs^ e);
+        void Scenarios_SelectionChanged(Object^ sender, Windows::UI::Xaml::Controls::SelectionChangedEventArgs^ e);
+
+    internal:
+        static MainPage^ Current;
+
+    };
+}
diff --git a/samples/winrt/ImageManipulations/MediaCapture.sln b/samples/winrt/ImageManipulations/MediaCapture.sln
new file mode 100644
index 000000000..e1f9aa385
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaCapture.sln
@@ -0,0 +1,52 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 11 Express for Windows 8
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MediaCapture", "MediaCapture.vcxproj", "{C5B886A7-8300-46FF-B533-9613DE2AF637}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GrayscaleTransform", "MediaExtensions\OcvTransform\OcvTransform.vcxproj", "{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|ARM = Debug|ARM
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|ARM = Release|ARM
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|ARM.ActiveCfg = Debug|ARM
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|ARM.Build.0 = Debug|ARM
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|Win32.ActiveCfg = Debug|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|Win32.Build.0 = Debug|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|x64.ActiveCfg = Debug|x64
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|x64.Build.0 = Debug|x64
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|ARM.ActiveCfg = Release|ARM
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|ARM.Build.0 = Release|ARM
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|Win32.ActiveCfg = Release|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|Win32.Build.0 = Release|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|x64.ActiveCfg = Release|x64
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|x64.Build.0 = Release|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|ARM.ActiveCfg = Debug|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|ARM.Build.0 = Debug|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|ARM.Deploy.0 = Debug|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|Win32.ActiveCfg = Debug|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|Win32.Build.0 = Debug|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|Win32.Deploy.0 = Debug|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|x64.ActiveCfg = Debug|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|x64.Build.0 = Debug|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|x64.Deploy.0 = Debug|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|ARM.ActiveCfg = Release|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|ARM.Build.0 = Release|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|ARM.Deploy.0 = Release|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|Win32.ActiveCfg = Release|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|Win32.Build.0 = Release|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|Win32.Deploy.0 = Release|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|x64.ActiveCfg = Release|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|x64.Build.0 = Release|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|x64.Deploy.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/samples/winrt/ImageManipulations/MediaCapture.vcxproj b/samples/winrt/ImageManipulations/MediaCapture.vcxproj
new file mode 100644
index 000000000..a0f8c84f9
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaCapture.vcxproj
@@ -0,0 +1,310 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM">
+      <Configuration>Release</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{C5B886A7-8300-46FF-B533-9613DE2AF637}</ProjectGuid>
+    <RootNamespace>SDKSample</RootNamespace>
+    <DefaultLanguage>en-US</DefaultLanguage>
+    <VCTargetsPath Condition="'$(VCTargetsPath11)' != '' and '$(VSVersion)' == '' and '$(VisualStudioVersion)' == ''">$(VCTargetsPath11)</VCTargetsPath>
+    <MinimumVisualStudioVersion>11.0</MinimumVisualStudioVersion>
+    <AppContainerApplication>true</AppContainerApplication>
+    <ProjectName>MediaCapture</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="AdvancedCapture.xaml.h">
+      <DependentUpon>AdvancedCapture.xaml</DependentUpon>
+      <SubType>Code</SubType>
+    </ClInclude>
+    <ClInclude Include="Constants.h" />
+    <ClInclude Include="MainPage.xaml.h">
+      <DependentUpon>MainPage.xaml</DependentUpon>
+    </ClInclude>
+    <ClInclude Include="pch.h" />
+    <ClInclude Include="Common\LayoutAwarePage.h" />
+    <ClInclude Include="Common\SuspensionManager.h" />
+    <ClInclude Include="App.xaml.h">
+      <DependentUpon>App.xaml</DependentUpon>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ApplicationDefinition Include="App.xaml">
+      <SubType>Designer</SubType>
+    </ApplicationDefinition>
+    <Page Include="AdvancedCapture.xaml">
+      <SubType>Designer</SubType>
+    </Page>
+    <Page Include="Common\StandardStyles.xaml">
+      <SubType>Designer</SubType>
+    </Page>
+    <Page Include="MainPage.xaml" />
+    <Page Include="Sample-Utils\SampleTemplateStyles.xaml">
+      <SubType>Designer</SubType>
+    </Page>
+  </ItemGroup>
+  <ItemGroup>
+    <AppxManifest Include="Package.appxmanifest">
+      <SubType>Designer</SubType>
+    </AppxManifest>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="AdvancedCapture.xaml.cpp">
+      <DependentUpon>AdvancedCapture.xaml</DependentUpon>
+      <SubType>Code</SubType>
+    </ClCompile>
+    <ClCompile Include="App.xaml.cpp">
+      <DependentUpon>App.xaml</DependentUpon>
+    </ClCompile>
+    <ClCompile Include="Common\LayoutAwarePage.cpp" />
+    <ClCompile Include="Constants.cpp" />
+    <ClCompile Include="Common\SuspensionManager.cpp" />
+    <ClCompile Include="MainPage.xaml.cpp">
+      <DependentUpon>MainPage.xaml</DependentUpon>
+    </ClCompile>
+    <ClCompile Include="pch.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <Image Include="assets\opencv-logo-150.png" />
+    <Image Include="assets\opencv-logo-30.png" />
+    <Image Include="Assets\splash-sdk.png" />
+    <Image Include="Assets\windows-sdk.png" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="MediaExtensions\OcvTransform\OcvTransform.vcxproj">
+      <Project>{ba69218f-da5c-4d14-a78d-21a9e4dec669}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="$(OPENCV_DIR)\bin\opencv_calib3d245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_contrib245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_core245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_features2d245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_flann245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_highgui245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_imgproc245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_legacy245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_ml245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_nonfree245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_objdetect245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_photo245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_stitching245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_superres245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_ts245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_video245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+    <None Include="$(OPENCV_DIR)\bin\opencv_videostab245.dll">
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+    </None>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/Common/AsyncCB.h b/samples/winrt/ImageManipulations/MediaExtensions/Common/AsyncCB.h
new file mode 100644
index 000000000..3321b47c9
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/Common/AsyncCB.h
@@ -0,0 +1,81 @@
+#pragma once
+
+//////////////////////////////////////////////////////////////////////////
+//  AsyncCallback [template]
+//
+//  Description:
+//  Helper class that routes IMFAsyncCallback::Invoke calls to a class
+//  method on the parent class.
+//
+//  Usage:
+//  Add this class as a member variable. In the parent class constructor,
+//  initialize the AsyncCallback class like this:
+//  	m_cb(this, &CYourClass::OnInvoke)
+//  where
+//      m_cb       = AsyncCallback object
+//      CYourClass = parent class
+//      OnInvoke   = Method in the parent class to receive Invoke calls.
+//
+//  The parent's OnInvoke method (you can name it anything you like) must
+//  have a signature that matches the InvokeFn typedef below.
+//////////////////////////////////////////////////////////////////////////
+
+// T: Type of the parent object
+template<class T>
+class AsyncCallback : public IMFAsyncCallback
+{
+public:
+    typedef HRESULT (T::*InvokeFn)(IMFAsyncResult *pAsyncResult);
+
+    AsyncCallback(T *pParent, InvokeFn fn) : m_pParent(pParent), m_pInvokeFn(fn)
+    {
+    }
+
+    // IUnknown
+    STDMETHODIMP_(ULONG) AddRef() {
+        // Delegate to parent class.
+        return m_pParent->AddRef();
+    }
+    STDMETHODIMP_(ULONG) Release() {
+        // Delegate to parent class.
+        return m_pParent->Release();
+    }
+    STDMETHODIMP QueryInterface(REFIID iid, void** ppv)
+    {
+        if (!ppv)
+        {
+            return E_POINTER;
+        }
+        if (iid == __uuidof(IUnknown))
+        {
+            *ppv = static_cast<IUnknown*>(static_cast<IMFAsyncCallback*>(this));
+        }
+        else if (iid == __uuidof(IMFAsyncCallback))
+        {
+            *ppv = static_cast<IMFAsyncCallback*>(this);
+        }
+        else
+        {
+            *ppv = NULL;
+            return E_NOINTERFACE;
+        }
+        AddRef();
+        return S_OK;
+    }
+
+
+    // IMFAsyncCallback methods
+    STDMETHODIMP GetParameters(DWORD*, DWORD*)
+    {
+        // Implementation of this method is optional.
+        return E_NOTIMPL;
+    }
+
+    STDMETHODIMP Invoke(IMFAsyncResult* pAsyncResult)
+    {
+        return (m_pParent->*m_pInvokeFn)(pAsyncResult);
+    }
+
+    T *m_pParent;
+    InvokeFn m_pInvokeFn;
+};
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/Common/BufferLock.h b/samples/winrt/ImageManipulations/MediaExtensions/Common/BufferLock.h
new file mode 100644
index 000000000..1c47deabd
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/Common/BufferLock.h
@@ -0,0 +1,101 @@
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved
+
+
+#pragma once
+
+
+//////////////////////////////////////////////////////////////////////////
+//  VideoBufferLock
+//
+//  Description:
+//  Locks a video buffer that might or might not support IMF2DBuffer.
+//
+//////////////////////////////////////////////////////////////////////////
+
+class VideoBufferLock
+{
+public:
+    VideoBufferLock(IMFMediaBuffer *pBuffer) : m_p2DBuffer(NULL)
+    {
+        m_pBuffer = pBuffer;
+        m_pBuffer->AddRef();
+
+        // Query for the 2-D buffer interface. OK if this fails.
+        m_pBuffer->QueryInterface(IID_PPV_ARGS(&m_p2DBuffer));
+    }
+
+    ~VideoBufferLock()
+    {
+        UnlockBuffer();
+        SafeRelease(&m_pBuffer);
+        SafeRelease(&m_p2DBuffer);
+    }
+
+    // LockBuffer:
+    // Locks the buffer. Returns a pointer to scan line 0 and returns the stride.
+
+    // The caller must provide the default stride as an input parameter, in case
+    // the buffer does not expose IMF2DBuffer. You can calculate the default stride
+    // from the media type.
+
+    HRESULT LockBuffer(
+        LONG  lDefaultStride,    // Minimum stride (with no padding).
+        DWORD dwHeightInPixels,  // Height of the image, in pixels.
+        BYTE  **ppbScanLine0,    // Receives a pointer to the start of scan line 0.
+        LONG  *plStride          // Receives the actual stride.
+        )
+    {
+        HRESULT hr = S_OK;
+
+        // Use the 2-D version if available.
+        if (m_p2DBuffer)
+        {
+            hr = m_p2DBuffer->Lock2D(ppbScanLine0, plStride);
+        }
+        else
+        {
+            // Use non-2D version.
+            BYTE *pData = NULL;
+
+            hr = m_pBuffer->Lock(&pData, NULL, NULL);
+            if (SUCCEEDED(hr))
+            {
+                *plStride = lDefaultStride;
+                if (lDefaultStride < 0)
+                {
+                    // Bottom-up orientation. Return a pointer to the start of the
+                    // last row *in memory* which is the top row of the image.
+                    *ppbScanLine0 = pData + abs(lDefaultStride) * (dwHeightInPixels - 1);
+                }
+                else
+                {
+                    // Top-down orientation. Return a pointer to the start of the
+                    // buffer.
+                    *ppbScanLine0 = pData;
+                }
+            }
+        }
+        return hr;
+    }
+
+    HRESULT UnlockBuffer()
+    {
+        if (m_p2DBuffer)
+        {
+            return m_p2DBuffer->Unlock2D();
+        }
+        else
+        {
+            return m_pBuffer->Unlock();
+        }
+    }
+
+private:
+    IMFMediaBuffer  *m_pBuffer;
+    IMF2DBuffer     *m_p2DBuffer;
+};
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/Common/CritSec.h b/samples/winrt/ImageManipulations/MediaExtensions/Common/CritSec.h
new file mode 100644
index 000000000..88851f9f0
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/Common/CritSec.h
@@ -0,0 +1,62 @@
+#pragma once
+
+//////////////////////////////////////////////////////////////////////////
+//  CritSec
+//  Description: Wraps a critical section.
+//////////////////////////////////////////////////////////////////////////
+
+class CritSec
+{
+public:
+    CRITICAL_SECTION m_criticalSection;
+public:
+    CritSec()
+    {
+        InitializeCriticalSectionEx(&m_criticalSection, 100, 0);
+    }
+
+    ~CritSec()
+    {
+        DeleteCriticalSection(&m_criticalSection);
+    }
+
+    _Acquires_lock_(m_criticalSection)
+    void Lock()
+    {
+        EnterCriticalSection(&m_criticalSection);
+    }
+
+    _Releases_lock_(m_criticalSection)
+    void Unlock()
+    {
+        LeaveCriticalSection(&m_criticalSection);
+    }
+};
+
+
+//////////////////////////////////////////////////////////////////////////
+//  AutoLock
+//  Description: Provides automatic locking and unlocking of a
+//               of a critical section.
+//
+//  Note: The AutoLock object must go out of scope before the CritSec.
+//////////////////////////////////////////////////////////////////////////
+
+class AutoLock
+{
+private:
+    CritSec *m_pCriticalSection;
+public:
+    _Acquires_lock_(m_pCriticalSection)
+    AutoLock(CritSec& crit)
+    {
+        m_pCriticalSection = &crit;
+        m_pCriticalSection->Lock();
+    }
+
+    _Releases_lock_(m_pCriticalSection)
+    ~AutoLock()
+    {
+        m_pCriticalSection->Unlock();
+    }
+};
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/Common/LinkList.h b/samples/winrt/ImageManipulations/MediaExtensions/Common/LinkList.h
new file mode 100644
index 000000000..3657b3d7c
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/Common/LinkList.h
@@ -0,0 +1,516 @@
+//-----------------------------------------------------------------------------
+// File: Linklist.h
+// Desc: Linked list class.
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+//  Copyright (C) Microsoft Corporation. All rights reserved.
+//-----------------------------------------------------------------------------
+
+#pragma once
+
+// Notes:
+//
+// The List class template implements a simple double-linked list.
+// It uses STL's copy semantics.
+
+// There are two versions of the Clear() method:
+//  Clear(void) clears the list w/out cleaning up the object.
+//  Clear(FN fn) takes a functor object that releases the objects, if they need cleanup.
+
+// The List class supports enumeration. Example of usage:
+//
+// List<T>::POSIITON pos = list.GetFrontPosition();
+// while (pos != list.GetEndPosition())
+// {
+//     T item;
+//     hr = list.GetItemPos(&item);
+//     pos = list.Next(pos);
+// }
+
+// The ComPtrList class template derives from List<> and implements a list of COM pointers.
+
+template <class T>
+struct NoOp
+{
+    void operator()(T& t)
+    {
+    }
+};
+
+template <class T>
+class List
+{
+protected:
+
+    // Nodes in the linked list
+    struct Node
+    {
+        Node *prev;
+        Node *next;
+        T    item;
+
+        Node() : prev(nullptr), next(nullptr)
+        {
+        }
+
+        Node(T item) : prev(nullptr), next(nullptr)
+        {
+            this->item = item;
+        }
+
+        T Item() const { return item; }
+    };
+
+public:
+
+    // Object for enumerating the list.
+    class POSITION
+    {
+        friend class List<T>;
+
+    public:
+        POSITION() : pNode(nullptr)
+        {
+        }
+
+        bool operator==(const POSITION &p) const
+        {
+            return pNode == p.pNode;
+        }
+
+        bool operator!=(const POSITION &p) const
+        {
+            return pNode != p.pNode;
+        }
+
+    private:
+        const Node *pNode;
+
+        POSITION(Node *p) : pNode(p)
+        {
+        }
+    };
+
+protected:
+    Node    m_anchor;  // Anchor node for the linked list.
+    DWORD   m_count;   // Number of items in the list.
+
+    Node* Front() const
+    {
+        return m_anchor.next;
+    }
+
+    Node* Back() const
+    {
+        return m_anchor.prev;
+    }
+
+    virtual HRESULT InsertAfter(T item, Node *pBefore)
+    {
+        if (pBefore == nullptr)
+        {
+            return E_POINTER;
+        }
+
+        Node *pNode = new Node(item);
+        if (pNode == nullptr)
+        {
+            return E_OUTOFMEMORY;
+        }
+
+        Node *pAfter = pBefore->next;
+
+        pBefore->next = pNode;
+        pAfter->prev = pNode;
+
+        pNode->prev = pBefore;
+        pNode->next = pAfter;
+
+        m_count++;
+
+        return S_OK;
+    }
+
+    virtual HRESULT GetItem(const Node *pNode, T* ppItem)
+    {
+        if (pNode == nullptr || ppItem == nullptr)
+        {
+            return E_POINTER;
+        }
+
+        *ppItem = pNode->item;
+        return S_OK;
+    }
+
+    // RemoveItem:
+    // Removes a node and optionally returns the item.
+    // ppItem can be nullptr.
+    virtual HRESULT RemoveItem(Node *pNode, T *ppItem)
+    {
+        if (pNode == nullptr)
+        {
+            return E_POINTER;
+        }
+
+        assert(pNode != &m_anchor); // We should never try to remove the anchor node.
+        if (pNode == &m_anchor)
+        {
+            return E_INVALIDARG;
+        }
+
+
+        T item;
+
+        // The next node's previous is this node's previous.
+        pNode->next->prev = pNode->prev;
+
+        // The previous node's next is this node's next.
+        pNode->prev->next = pNode->next;
+
+        item = pNode->item;
+        delete pNode;
+
+        m_count--;
+
+        if (ppItem)
+        {
+            *ppItem = item;
+        }
+
+        return S_OK;
+    }
+
+public:
+
+    List()
+    {
+        m_anchor.next = &m_anchor;
+        m_anchor.prev = &m_anchor;
+
+        m_count = 0;
+    }
+
+    virtual ~List()
+    {
+        Clear();
+    }
+
+    // Insertion functions
+    HRESULT InsertBack(T item)
+    {
+        return InsertAfter(item, m_anchor.prev);
+    }
+
+
+    HRESULT InsertFront(T item)
+    {
+        return InsertAfter(item, &m_anchor);
+    }
+
+    HRESULT InsertPos(POSITION pos, T item)
+    {
+        if (pos.pNode == nullptr)
+        {
+            return InsertBack(item);
+        }
+
+        return InsertAfter(item, pos.pNode->prev);
+    }
+
+    // RemoveBack: Removes the tail of the list and returns the value.
+    // ppItem can be nullptr if you don't want the item back. (But the method does not release the item.)
+    HRESULT RemoveBack(T *ppItem)
+    {
+        if (IsEmpty())
+        {
+            return E_FAIL;
+        }
+        else
+        {
+            return RemoveItem(Back(), ppItem);
+        }
+    }
+
+    // RemoveFront: Removes the head of the list and returns the value.
+    // ppItem can be nullptr if you don't want the item back. (But the method does not release the item.)
+    HRESULT RemoveFront(T *ppItem)
+    {
+        if (IsEmpty())
+        {
+            return E_FAIL;
+        }
+        else
+        {
+            return RemoveItem(Front(), ppItem);
+        }
+    }
+
+    // GetBack: Gets the tail item.
+    HRESULT GetBack(T *ppItem)
+    {
+        if (IsEmpty())
+        {
+            return E_FAIL;
+        }
+        else
+        {
+            return GetItem(Back(), ppItem);
+        }
+    }
+
+    // GetFront: Gets the front item.
+    HRESULT GetFront(T *ppItem)
+    {
+        if (IsEmpty())
+        {
+            return E_FAIL;
+        }
+        else
+        {
+            return GetItem(Front(), ppItem);
+        }
+    }
+
+
+    // GetCount: Returns the number of items in the list.
+    DWORD GetCount() const { return m_count; }
+
+    bool IsEmpty() const
+    {
+        return (GetCount() == 0);
+    }
+
+    // Clear: Takes a functor object whose operator()
+    // frees the object on the list.
+    template <class FN>
+    void Clear(FN& clear_fn)
+    {
+        Node *n = m_anchor.next;
+
+        // Delete the nodes
+        while (n != &m_anchor)
+        {
+            clear_fn(n->item);
+
+            Node *tmp = n->next;
+            delete n;
+            n = tmp;
+        }
+
+        // Reset the anchor to point at itself
+        m_anchor.next = &m_anchor;
+        m_anchor.prev = &m_anchor;
+
+        m_count = 0;
+    }
+
+    // Clear: Clears the list. (Does not delete or release the list items.)
+    virtual void Clear()
+    {
+        NoOp<T> clearOp;
+        Clear<>(clearOp);
+    }
+
+
+    // Enumerator functions
+
+    POSITION FrontPosition()
+    {
+        if (IsEmpty())
+        {
+            return POSITION(nullptr);
+        }
+        else
+        {
+            return POSITION(Front());
+        }
+    }
+
+    POSITION EndPosition() const
+    {
+        return POSITION();
+    }
+
+    HRESULT GetItemPos(POSITION pos, T *ppItem)
+    {
+        if (pos.pNode)
+        {
+            return GetItem(pos.pNode, ppItem);
+        }
+        else
+        {
+            return E_FAIL;
+        }
+    }
+
+    POSITION Next(const POSITION pos)
+    {
+        if (pos.pNode && (pos.pNode->next != &m_anchor))
+        {
+            return POSITION(pos.pNode->next);
+        }
+        else
+        {
+            return POSITION(nullptr);
+        }
+    }
+
+    // Remove an item at a position.
+    // The item is returns in ppItem, unless ppItem is nullptr.
+    // NOTE: This method invalidates the POSITION object.
+    HRESULT Remove(POSITION& pos, T *ppItem)
+    {
+        if (pos.pNode)
+        {
+            // Remove const-ness temporarily...
+            Node *pNode = const_cast<Node*>(pos.pNode);
+
+            pos = POSITION();
+
+            return RemoveItem(pNode, ppItem);
+        }
+        else
+        {
+            return E_INVALIDARG;
+        }
+    }
+
+};
+
+
+
+// Typical functors for Clear method.
+
+// ComAutoRelease: Releases COM pointers.
+// MemDelete: Deletes pointers to new'd memory.
+
+class ComAutoRelease
+{
+public:
+    void operator()(IUnknown *p)
+    {
+        if (p)
+        {
+            p->Release();
+        }
+    }
+};
+
+class MemDelete
+{
+public:
+    void operator()(void *p)
+    {
+        if (p)
+        {
+            delete p;
+        }
+    }
+};
+
+
+// ComPtrList class
+// Derived class that makes it safer to store COM pointers in the List<> class.
+// It automatically AddRef's the pointers that are inserted onto the list
+// (unless the insertion method fails).
+//
+// T must be a COM interface type.
+// example: ComPtrList<IUnknown>
+//
+// NULLABLE: If true, client can insert nullptr pointers. This means GetItem can
+// succeed but return a nullptr pointer. By default, the list does not allow nullptr
+// pointers.
+
+template <class T, bool NULLABLE = FALSE>
+class ComPtrList : public List<T*>
+{
+public:
+
+    typedef T* Ptr;
+
+    void Clear()
+    {
+        ComAutoRelease car;
+        List<Ptr>::Clear(car);
+    }
+
+    ~ComPtrList()
+    {
+        Clear();
+    }
+
+protected:
+    HRESULT InsertAfter(Ptr item, Node *pBefore)
+    {
+        // Do not allow nullptr item pointers unless NULLABLE is true.
+        if (item == nullptr && !NULLABLE)
+        {
+            return E_POINTER;
+        }
+
+        if (item)
+        {
+            item->AddRef();
+        }
+
+        HRESULT hr = List<Ptr>::InsertAfter(item, pBefore);
+        if (FAILED(hr) && item != nullptr)
+        {
+            item->Release();
+        }
+        return hr;
+    }
+
+    HRESULT GetItem(const Node *pNode, Ptr* ppItem)
+    {
+        Ptr pItem = nullptr;
+
+        // The base class gives us the pointer without AddRef'ing it.
+        // If we return the pointer to the caller, we must AddRef().
+        HRESULT hr = List<Ptr>::GetItem(pNode, &pItem);
+        if (SUCCEEDED(hr))
+        {
+            assert(pItem || NULLABLE);
+            if (pItem)
+            {
+                *ppItem = pItem;
+                (*ppItem)->AddRef();
+            }
+        }
+        return hr;
+    }
+
+    HRESULT RemoveItem(Node *pNode, Ptr *ppItem)
+    {
+        // ppItem can be nullptr, but we need to get the
+        // item so that we can release it.
+
+        // If ppItem is not nullptr, we will AddRef it on the way out.
+
+        Ptr pItem = nullptr;
+
+        HRESULT hr = List<Ptr>::RemoveItem(pNode, &pItem);
+
+        if (SUCCEEDED(hr))
+        {
+            assert(pItem || NULLABLE);
+            if (ppItem && pItem)
+            {
+                *ppItem = pItem;
+                (*ppItem)->AddRef();
+            }
+
+            if (pItem)
+            {
+                pItem->Release();
+                pItem = nullptr;
+            }
+        }
+
+        return hr;
+    }
+};
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/Common/OpQueue.h b/samples/winrt/ImageManipulations/MediaExtensions/Common/OpQueue.h
new file mode 100644
index 000000000..73f3042ce
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/Common/OpQueue.h
@@ -0,0 +1,222 @@
+//////////////////////////////////////////////////////////////////////////
+//
+// OpQueue.h
+// Async operation queue.
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+//////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+#pragma warning( push )
+#pragma warning( disable : 4355 )  // 'this' used in base member initializer list
+
+/*
+    This header file defines an object to help queue and serialize
+    asynchronous operations.
+
+    Background:
+
+    To perform an operation asynchronously in Media Foundation, an object
+    does one of the following:
+
+        1. Calls MFPutWorkItem(Ex), using either a standard work queue
+           identifier or a caller-allocated work queue. The work-queue
+           thread invokes the object's callback.
+
+        2. Creates an async result object (IMFAsyncResult) and calls
+           MFInvokeCallback to invoke the object's callback.
+
+    Ultimately, either of these cause the object's callback to be invoked
+    from a work-queue thread. The object can then complete the operation
+    inside the callback.
+
+    However, the Media Foundation platform may dispatch async callbacks in
+    parallel on several threads. Putting an item on a work queue does NOT
+    guarantee that one operation will complete before the next one starts,
+    or even that work items will be dispatched in the same order they were
+    called.
+
+    To serialize async operations that should not overlap, an object should
+    use a queue. While one operation is pending, subsequent operations are
+    put on the queue, and only dispatched after the previous operation is
+    complete.
+
+    The granularity of a single "operation" depends on the requirements of
+    that particular object. A single operation might involve several
+    asynchronous calls before the object dispatches the next operation on
+    the queue.
+
+
+*/
+
+
+
+//-------------------------------------------------------------------
+// OpQueue class template
+//
+// Base class for an async operation queue.
+//
+// TOperation: The class used to describe operations. This class must
+//          implement IUnknown.
+//
+// The OpQueue class is an abstract class. The derived class must
+// implement the following pure-virtual methods:
+//
+// - IUnknown methods (AddRef, Release, QI)
+//
+// - DispatchOperation:
+//
+//      Performs the asynchronous operation specified by pOp.
+//
+//      At the end of each operation, the derived class must call
+//      ProcessQueue to process the next operation in the queue.
+//
+//      NOTE: An operation is not required to complete inside the
+//      DispatchOperation method. A single operation might consist
+//      of several asynchronous method calls.
+//
+// - ValidateOperation:
+//
+//      Checks whether the object can perform the operation specified
+//      by pOp at this time.
+//
+//      If the object cannot perform the operation now (e.g., because
+//      another operation is still in progress) the method should
+//      return MF_E_NOTACCEPTING.
+//
+//-------------------------------------------------------------------
+#include "linklist.h"
+#include "AsyncCB.h"
+
+template <class T, class TOperation>
+class OpQueue //: public IUnknown
+{
+public:
+
+    typedef ComPtrList<TOperation>   OpList;
+
+    HRESULT QueueOperation(TOperation *pOp);
+
+protected:
+
+    HRESULT ProcessQueue();
+    HRESULT ProcessQueueAsync(IMFAsyncResult *pResult);
+
+    virtual HRESULT DispatchOperation(TOperation *pOp) = 0;
+    virtual HRESULT ValidateOperation(TOperation *pOp) = 0;
+
+    OpQueue(CRITICAL_SECTION& critsec)
+        : m_OnProcessQueue(static_cast<T *>(this), &OpQueue::ProcessQueueAsync),
+          m_critsec(critsec)
+    {
+    }
+
+    virtual ~OpQueue()
+    {
+    }
+
+protected:
+    OpList                  m_OpQueue;         // Queue of operations.
+    CRITICAL_SECTION&       m_critsec;         // Protects the queue state.
+    AsyncCallback<T>  m_OnProcessQueue;  // ProcessQueueAsync callback.
+};
+
+
+
+//-------------------------------------------------------------------
+// Place an operation on the queue.
+// Public method.
+//-------------------------------------------------------------------
+
+template <class T, class TOperation>
+HRESULT OpQueue<T, TOperation>::QueueOperation(TOperation *pOp)
+{
+    HRESULT hr = S_OK;
+
+    EnterCriticalSection(&m_critsec);
+
+    hr = m_OpQueue.InsertBack(pOp);
+    if (SUCCEEDED(hr))
+    {
+        hr = ProcessQueue();
+    }
+
+    LeaveCriticalSection(&m_critsec);
+    return hr;
+}
+
+
+//-------------------------------------------------------------------
+// Process the next operation on the queue.
+// Protected method.
+//
+// Note: This method dispatches the operation to a work queue.
+//-------------------------------------------------------------------
+
+template <class T, class TOperation>
+HRESULT OpQueue<T, TOperation>::ProcessQueue()
+{
+    HRESULT hr = S_OK;
+    if (m_OpQueue.GetCount() > 0)
+    {
+        hr = MFPutWorkItem2(
+            MFASYNC_CALLBACK_QUEUE_STANDARD,    // Use the standard work queue.
+            0,                                  // Default priority
+            &m_OnProcessQueue,                  // Callback method.
+            nullptr                             // State object.
+            );
+    }
+    return hr;
+}
+
+
+//-------------------------------------------------------------------
+// Process the next operation on the queue.
+// Protected method.
+//
+// Note: This method is called from a work-queue thread.
+//-------------------------------------------------------------------
+
+template <class T, class TOperation>
+HRESULT OpQueue<T, TOperation>::ProcessQueueAsync(IMFAsyncResult *pResult)
+{
+    HRESULT hr = S_OK;
+    TOperation *pOp = nullptr;
+
+    EnterCriticalSection(&m_critsec);
+
+    if (m_OpQueue.GetCount() > 0)
+    {
+        hr = m_OpQueue.GetFront(&pOp);
+
+        if (SUCCEEDED(hr))
+        {
+            hr = ValidateOperation(pOp);
+        }
+        if (SUCCEEDED(hr))
+        {
+            hr = m_OpQueue.RemoveFront(nullptr);
+        }
+        if (SUCCEEDED(hr))
+        {
+            (void)DispatchOperation(pOp);
+        }
+    }
+
+    if (pOp != nullptr)
+    {
+        pOp->Release();
+    }
+
+    LeaveCriticalSection(&m_critsec);
+    return hr;
+}
+
+#pragma warning( pop )
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvImageManipulations.idl b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvImageManipulations.idl
new file mode 100644
index 000000000..37235a092
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvImageManipulations.idl
@@ -0,0 +1,11 @@
+import "Windows.Media.idl";
+
+#include <sdkddkver.h>
+
+namespace OcvTransform
+{
+    [version(NTDDI_WIN8)]
+    runtimeclass OcvImageManipulations
+    {
+    }
+}
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.cpp b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.cpp
new file mode 100644
index 000000000..538f353f5
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.cpp
@@ -0,0 +1,1485 @@
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+
+#include "OcvTransform.h"
+#include "bufferlock.h"
+
+#include "opencv2\core\core.hpp"
+#include "opencv2\imgproc\imgproc.hpp"
+
+using namespace Microsoft::WRL;
+
+/*
+
+This sample implements a video effect as a Media Foundation transform (MFT).
+
+NOTES ON THE MFT IMPLEMENTATION
+
+1. The MFT has fixed streams: One input stream and one output stream.
+
+2. The MFT supports NV12 format only.
+
+3. If the MFT is holding an input sample, SetInputType and SetOutputType both fail.
+
+4. The input and output types must be identical.
+
+5. If both types are set, no type can be set until the current type is cleared.
+
+6. Preferred input types:
+
+     (a) If the output type is set, that's the preferred type.
+     (b) Otherwise, the preferred types are partial types, constructed from the
+         list of supported subtypes.
+
+7. Preferred output types: As above.
+
+8. Streaming:
+
+    The private BeingStreaming() method is called in response to the
+    MFT_MESSAGE_NOTIFY_BEGIN_STREAMING message.
+
+    If the client does not send MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, the MFT calls
+    BeginStreaming inside the first call to ProcessInput or ProcessOutput.
+
+    This is a good approach for allocating resources that your MFT requires for
+    streaming.
+
+9. The configuration attributes are applied in the BeginStreaming method. If the
+   client changes the attributes during streaming, the change is ignored until
+   streaming is stopped (either by changing the media types or by sending the
+   MFT_MESSAGE_NOTIFY_END_STREAMING message) and then restarted.
+
+*/
+
+
+// Static array of media types (preferred and accepted).
+const GUID g_MediaSubtypes[] =
+{
+    MFVideoFormat_NV12
+};
+
+HRESULT GetDefaultStride(IMFMediaType *pType, LONG *plStride);
+
+template <typename T>
+inline T clamp(const T& val, const T& minVal, const T& maxVal)
+{
+    return (val < minVal ? minVal : (val > maxVal ? maxVal : val));
+}
+
+OcvImageManipulations::OcvImageManipulations() :
+    m_pSample(NULL), m_pInputType(NULL), m_pOutputType(NULL),
+    m_imageWidthInPixels(0), m_imageHeightInPixels(0), m_cbImageSize(0),
+    m_TransformType(Preview), m_bStreamingInitialized(false),
+    m_pAttributes(NULL)
+{
+    InitializeCriticalSectionEx(&m_critSec, 3000, 0);
+}
+
+OcvImageManipulations::~OcvImageManipulations()
+{
+    SafeRelease(&m_pInputType);
+    SafeRelease(&m_pOutputType);
+    SafeRelease(&m_pSample);
+    SafeRelease(&m_pAttributes);
+    DeleteCriticalSection(&m_critSec);
+}
+
+// Initialize the instance.
+STDMETHODIMP OcvImageManipulations::RuntimeClassInitialize()
+{
+    // Create the attribute store.
+    return MFCreateAttributes(&m_pAttributes, 3);
+}
+
+// IMediaExtension methods
+
+//-------------------------------------------------------------------
+// SetProperties
+// Sets the configuration of the effect
+//-------------------------------------------------------------------
+HRESULT OcvImageManipulations::SetProperties(ABI::Windows::Foundation::Collections::IPropertySet *pConfiguration)
+{
+    HRESULT hr = S_OK;
+
+    if (!pConfiguration)
+        return hr;
+
+    HSTRING key;
+    WindowsCreateString(L"{698649BE-8EAE-4551-A4CB-3EC98FBD3D86}", 38, &key);
+    Microsoft::WRL::ComPtr<ABI::Windows::Foundation::Collections::IMap<HSTRING, IInspectable *>> spSetting;
+    pConfiguration->QueryInterface(IID_PPV_ARGS(&spSetting));
+    boolean found;
+    spSetting->HasKey(key, &found);
+
+    if (found)
+    {
+        IInspectable* value;
+        spSetting->Lookup(key, &value);
+
+        Microsoft::WRL::ComPtr<ABI::Windows::Foundation::IReference<int>> ref;
+        hr = value->QueryInterface(IID_PPV_ARGS(&ref));
+        int effect = InvalidEffect;
+        hr = ref->get_Value(&effect);
+        if ((effect >= 0) && (effect < InvalidEffect))
+        {
+            m_TransformType = (ProcessingType)effect;
+        }
+    }
+
+    return hr;
+}
+
+// IMFTransform methods. Refer to the Media Foundation SDK documentation for details.
+
+//-------------------------------------------------------------------
+// GetStreamLimits
+// Returns the minimum and maximum number of streams.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetStreamLimits(
+    DWORD   *pdwInputMinimum,
+    DWORD   *pdwInputMaximum,
+    DWORD   *pdwOutputMinimum,
+    DWORD   *pdwOutputMaximum
+)
+{
+    if ((pdwInputMinimum == NULL) ||
+        (pdwInputMaximum == NULL) ||
+        (pdwOutputMinimum == NULL) ||
+        (pdwOutputMaximum == NULL))
+    {
+        return E_POINTER;
+    }
+
+    // This MFT has a fixed number of streams.
+    *pdwInputMinimum = 1;
+    *pdwInputMaximum = 1;
+    *pdwOutputMinimum = 1;
+    *pdwOutputMaximum = 1;
+    return S_OK;
+}
+
+
+//-------------------------------------------------------------------
+// GetStreamCount
+// Returns the actual number of streams.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetStreamCount(
+    DWORD   *pcInputStreams,
+    DWORD   *pcOutputStreams
+)
+{
+    if ((pcInputStreams == NULL) || (pcOutputStreams == NULL))
+
+    {
+        return E_POINTER;
+    }
+
+    // This MFT has a fixed number of streams.
+    *pcInputStreams = 1;
+    *pcOutputStreams = 1;
+    return S_OK;
+}
+
+
+
+//-------------------------------------------------------------------
+// GetStreamIDs
+// Returns stream IDs for the input and output streams.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetStreamIDs(
+    DWORD   dwInputIDArraySize,
+    DWORD   *pdwInputIDs,
+    DWORD   dwOutputIDArraySize,
+    DWORD   *pdwOutputIDs
+)
+{
+    // It is not required to implement this method if the MFT has a fixed number of
+    // streams AND the stream IDs are numbered sequentially from zero (that is, the
+    // stream IDs match the stream indexes).
+
+    // In that case, it is OK to return E_NOTIMPL.
+    return E_NOTIMPL;
+}
+
+
+//-------------------------------------------------------------------
+// GetInputStreamInfo
+// Returns information about an input stream.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetInputStreamInfo(
+    DWORD                     dwInputStreamID,
+    MFT_INPUT_STREAM_INFO *   pStreamInfo
+)
+{
+    if (pStreamInfo == NULL)
+    {
+        return E_POINTER;
+    }
+
+    EnterCriticalSection(&m_critSec);
+
+    if (!IsValidInputStream(dwInputStreamID))
+    {
+        LeaveCriticalSection(&m_critSec);
+        return MF_E_INVALIDSTREAMNUMBER;
+    }
+
+    // NOTE: This method should succeed even when there is no media type on the
+    //       stream. If there is no media type, we only need to fill in the dwFlags
+    //       member of MFT_INPUT_STREAM_INFO. The other members depend on having a
+    //       a valid media type.
+
+    pStreamInfo->hnsMaxLatency = 0;
+    pStreamInfo->dwFlags = MFT_INPUT_STREAM_WHOLE_SAMPLES | MFT_INPUT_STREAM_SINGLE_SAMPLE_PER_BUFFER;
+
+    if (m_pInputType == NULL)
+    {
+        pStreamInfo->cbSize = 0;
+    }
+    else
+    {
+        pStreamInfo->cbSize = m_cbImageSize;
+    }
+
+    pStreamInfo->cbMaxLookahead = 0;
+    pStreamInfo->cbAlignment = 0;
+
+    LeaveCriticalSection(&m_critSec);
+    return S_OK;
+}
+
+//-------------------------------------------------------------------
+// GetOutputStreamInfo
+// Returns information about an output stream.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetOutputStreamInfo(
+    DWORD                     dwOutputStreamID,
+    MFT_OUTPUT_STREAM_INFO *  pStreamInfo
+)
+{
+    if (pStreamInfo == NULL)
+    {
+        return E_POINTER;
+    }
+
+    EnterCriticalSection(&m_critSec);
+
+    if (!IsValidOutputStream(dwOutputStreamID))
+    {
+        LeaveCriticalSection(&m_critSec);
+        return MF_E_INVALIDSTREAMNUMBER;
+    }
+
+    // NOTE: This method should succeed even when there is no media type on the
+    //       stream. If there is no media type, we only need to fill in the dwFlags
+    //       member of MFT_OUTPUT_STREAM_INFO. The other members depend on having a
+    //       a valid media type.
+
+    pStreamInfo->dwFlags =
+        MFT_OUTPUT_STREAM_WHOLE_SAMPLES |
+        MFT_OUTPUT_STREAM_SINGLE_SAMPLE_PER_BUFFER |
+        MFT_OUTPUT_STREAM_FIXED_SAMPLE_SIZE ;
+
+    if (m_pOutputType == NULL)
+    {
+        pStreamInfo->cbSize = 0;
+    }
+    else
+    {
+        pStreamInfo->cbSize = m_cbImageSize;
+    }
+
+    pStreamInfo->cbAlignment = 0;
+
+    LeaveCriticalSection(&m_critSec);
+    return S_OK;
+}
+
+
+//-------------------------------------------------------------------
+// GetAttributes
+// Returns the attributes for the MFT.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetAttributes(IMFAttributes** ppAttributes)
+{
+    if (ppAttributes == NULL)
+    {
+        return E_POINTER;
+    }
+
+    EnterCriticalSection(&m_critSec);
+
+    *ppAttributes = m_pAttributes;
+    (*ppAttributes)->AddRef();
+
+    LeaveCriticalSection(&m_critSec);
+    return S_OK;
+}
+
+
+//-------------------------------------------------------------------
+// GetInputStreamAttributes
+// Returns stream-level attributes for an input stream.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetInputStreamAttributes(
+    DWORD           dwInputStreamID,
+    IMFAttributes   **ppAttributes
+)
+{
+    // This MFT does not support any stream-level attributes, so the method is not implemented.
+    return E_NOTIMPL;
+}
+
+
+//-------------------------------------------------------------------
+// GetOutputStreamAttributes
+// Returns stream-level attributes for an output stream.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetOutputStreamAttributes(
+    DWORD           dwOutputStreamID,
+    IMFAttributes   **ppAttributes
+)
+{
+    // This MFT does not support any stream-level attributes, so the method is not implemented.
+    return E_NOTIMPL;
+}
+
+
+//-------------------------------------------------------------------
+// DeleteInputStream
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::DeleteInputStream(DWORD dwStreamID)
+{
+    // This MFT has a fixed number of input streams, so the method is not supported.
+    return E_NOTIMPL;
+}
+
+
+//-------------------------------------------------------------------
+// AddInputStreams
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::AddInputStreams(
+    DWORD   cStreams,
+    DWORD   *adwStreamIDs
+)
+{
+    // This MFT has a fixed number of output streams, so the method is not supported.
+    return E_NOTIMPL;
+}
+
+
+//-------------------------------------------------------------------
+// GetInputAvailableType
+// Returns a preferred input type.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetInputAvailableType(
+    DWORD           dwInputStreamID,
+    DWORD           dwTypeIndex, // 0-based
+    IMFMediaType    **ppType
+)
+{
+    if (ppType == NULL)
+    {
+        return E_INVALIDARG;
+    }
+
+    EnterCriticalSection(&m_critSec);
+
+    if (!IsValidInputStream(dwInputStreamID))
+    {
+        LeaveCriticalSection(&m_critSec);
+        return MF_E_INVALIDSTREAMNUMBER;
+    }
+
+    HRESULT hr = S_OK;
+
+    // If the output type is set, return that type as our preferred input type.
+    if (m_pOutputType == NULL)
+    {
+        // The output type is not set. Create a partial media type.
+        hr = OnGetPartialType(dwTypeIndex, ppType);
+    }
+    else if (dwTypeIndex > 0)
+    {
+        hr = MF_E_NO_MORE_TYPES;
+    }
+    else
+    {
+        *ppType = m_pOutputType;
+        (*ppType)->AddRef();
+    }
+
+    LeaveCriticalSection(&m_critSec);
+    return hr;
+}
+
+
+
+//-------------------------------------------------------------------
+// GetOutputAvailableType
+// Returns a preferred output type.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetOutputAvailableType(
+    DWORD           dwOutputStreamID,
+    DWORD           dwTypeIndex, // 0-based
+    IMFMediaType    **ppType
+)
+{
+    if (ppType == NULL)
+    {
+        return E_INVALIDARG;
+    }
+
+    EnterCriticalSection(&m_critSec);
+
+    if (!IsValidOutputStream(dwOutputStreamID))
+    {
+        LeaveCriticalSection(&m_critSec);
+        return MF_E_INVALIDSTREAMNUMBER;
+    }
+
+    HRESULT hr = S_OK;
+
+    if (m_pInputType == NULL)
+    {
+        // The input type is not set. Create a partial media type.
+        hr = OnGetPartialType(dwTypeIndex, ppType);
+    }
+    else if (dwTypeIndex > 0)
+    {
+        hr = MF_E_NO_MORE_TYPES;
+    }
+    else
+    {
+        *ppType = m_pInputType;
+        (*ppType)->AddRef();
+    }
+
+    LeaveCriticalSection(&m_critSec);
+    return hr;
+}
+
+
+//-------------------------------------------------------------------
+// SetInputType
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::SetInputType(
+    DWORD           dwInputStreamID,
+    IMFMediaType    *pType, // Can be NULL to clear the input type.
+    DWORD           dwFlags
+)
+{
+    // Validate flags.
+    if (dwFlags & ~MFT_SET_TYPE_TEST_ONLY)
+    {
+        return E_INVALIDARG;
+    }
+
+    EnterCriticalSection(&m_critSec);
+
+    if (!IsValidInputStream(dwInputStreamID))
+    {
+        LeaveCriticalSection(&m_critSec);
+        return MF_E_INVALIDSTREAMNUMBER;
+    }
+
+    HRESULT hr = S_OK;
+
+    // Does the caller want us to set the type, or just test it?
+    BOOL bReallySet = ((dwFlags & MFT_SET_TYPE_TEST_ONLY) == 0);
+
+    // If we have an input sample, the client cannot change the type now.
+    if (HasPendingOutput())
+    {
+        hr = MF_E_TRANSFORM_CANNOT_CHANGE_MEDIATYPE_WHILE_PROCESSING;
+        goto done;
+    }
+
+    // Validate the type, if non-NULL.
+    if (pType)
+    {
+        hr = OnCheckInputType(pType);
+        if (FAILED(hr))
+        {
+            goto done;
+        }
+    }
+
+    // The type is OK. Set the type, unless the caller was just testing.
+    if (bReallySet)
+    {
+        OnSetInputType(pType);
+
+        // When the type changes, end streaming.
+        hr = EndStreaming();
+    }
+
+done:
+    LeaveCriticalSection(&m_critSec);
+    return hr;
+}
+
+
+
+//-------------------------------------------------------------------
+// SetOutputType
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::SetOutputType(
+    DWORD           dwOutputStreamID,
+    IMFMediaType    *pType, // Can be NULL to clear the output type.
+    DWORD           dwFlags
+)
+{
+    // Validate flags.
+    if (dwFlags & ~MFT_SET_TYPE_TEST_ONLY)
+    {
+        return E_INVALIDARG;
+    }
+
+    EnterCriticalSection(&m_critSec);
+
+    if (!IsValidOutputStream(dwOutputStreamID))
+    {
+        LeaveCriticalSection(&m_critSec);
+        return MF_E_INVALIDSTREAMNUMBER;
+    }
+
+    HRESULT hr = S_OK;
+
+    // Does the caller want us to set the type, or just test it?
+    BOOL bReallySet = ((dwFlags & MFT_SET_TYPE_TEST_ONLY) == 0);
+
+    // If we have an input sample, the client cannot change the type now.
+    if (HasPendingOutput())
+    {
+        hr = MF_E_TRANSFORM_CANNOT_CHANGE_MEDIATYPE_WHILE_PROCESSING;
+        goto done;
+    }
+
+    // Validate the type, if non-NULL.
+    if (pType)
+    {
+        hr = OnCheckOutputType(pType);
+        if (FAILED(hr))
+        {
+            goto done;
+        }
+    }
+
+    // The type is OK. Set the type, unless the caller was just testing.
+    if (bReallySet)
+    {
+        OnSetOutputType(pType);
+
+        // When the type changes, end streaming.
+        hr = EndStreaming();
+    }
+
+done:
+    LeaveCriticalSection(&m_critSec);
+    return hr;
+}
+
+
+//-------------------------------------------------------------------
+// GetInputCurrentType
+// Returns the current input type.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetInputCurrentType(
+    DWORD           dwInputStreamID,
+    IMFMediaType    **ppType
+)
+{
+    if (ppType == NULL)
+    {
+        return E_POINTER;
+    }
+
+    HRESULT hr = S_OK;
+
+    EnterCriticalSection(&m_critSec);
+
+    if (!IsValidInputStream(dwInputStreamID))
+    {
+        hr = MF_E_INVALIDSTREAMNUMBER;
+    }
+    else if (!m_pInputType)
+    {
+        hr = MF_E_TRANSFORM_TYPE_NOT_SET;
+    }
+    else
+    {
+        *ppType = m_pInputType;
+        (*ppType)->AddRef();
+    }
+    LeaveCriticalSection(&m_critSec);
+    return hr;
+}
+
+
+//-------------------------------------------------------------------
+// GetOutputCurrentType
+// Returns the current output type.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetOutputCurrentType(
+    DWORD           dwOutputStreamID,
+    IMFMediaType    **ppType
+)
+{
+    if (ppType == NULL)
+    {
+        return E_POINTER;
+    }
+
+    HRESULT hr = S_OK;
+
+    EnterCriticalSection(&m_critSec);
+
+    if (!IsValidOutputStream(dwOutputStreamID))
+    {
+        hr = MF_E_INVALIDSTREAMNUMBER;
+    }
+    else if (!m_pOutputType)
+    {
+        hr = MF_E_TRANSFORM_TYPE_NOT_SET;
+    }
+    else
+    {
+        *ppType = m_pOutputType;
+        (*ppType)->AddRef();
+    }
+
+    LeaveCriticalSection(&m_critSec);
+    return hr;
+}
+
+
+//-------------------------------------------------------------------
+// GetInputStatus
+// Query if the MFT is accepting more input.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetInputStatus(
+    DWORD           dwInputStreamID,
+    DWORD           *pdwFlags
+)
+{
+    if (pdwFlags == NULL)
+    {
+        return E_POINTER;
+    }
+
+    EnterCriticalSection(&m_critSec);
+
+    if (!IsValidInputStream(dwInputStreamID))
+    {
+        LeaveCriticalSection(&m_critSec);
+        return MF_E_INVALIDSTREAMNUMBER;
+    }
+
+    // If an input sample is already queued, do not accept another sample until the
+    // client calls ProcessOutput or Flush.
+
+    // NOTE: It is possible for an MFT to accept more than one input sample. For
+    // example, this might be required in a video decoder if the frames do not
+    // arrive in temporal order. In the case, the decoder must hold a queue of
+    // samples. For the video effect, each sample is transformed independently, so
+    // there is no reason to queue multiple input samples.
+
+    if (m_pSample == NULL)
+    {
+        *pdwFlags = MFT_INPUT_STATUS_ACCEPT_DATA;
+    }
+    else
+    {
+        *pdwFlags = 0;
+    }
+
+    LeaveCriticalSection(&m_critSec);
+    return S_OK;
+}
+
+
+
+//-------------------------------------------------------------------
+// GetOutputStatus
+// Query if the MFT can produce output.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::GetOutputStatus(DWORD *pdwFlags)
+{
+    if (pdwFlags == NULL)
+    {
+        return E_POINTER;
+    }
+
+    EnterCriticalSection(&m_critSec);
+
+    // The MFT can produce an output sample if (and only if) there an input sample.
+    if (m_pSample != NULL)
+    {
+        *pdwFlags = MFT_OUTPUT_STATUS_SAMPLE_READY;
+    }
+    else
+    {
+        *pdwFlags = 0;
+    }
+
+    LeaveCriticalSection(&m_critSec);
+    return S_OK;
+}
+
+
+//-------------------------------------------------------------------
+// SetOutputBounds
+// Sets the range of time stamps that the MFT will output.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::SetOutputBounds(
+    LONGLONG        hnsLowerBound,
+    LONGLONG        hnsUpperBound
+)
+{
+    // Implementation of this method is optional.
+    return E_NOTIMPL;
+}
+
+
+//-------------------------------------------------------------------
+// ProcessEvent
+// Sends an event to an input stream.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::ProcessEvent(
+    DWORD              dwInputStreamID,
+    IMFMediaEvent      *pEvent
+)
+{
+    // This MFT does not handle any stream events, so the method can
+    // return E_NOTIMPL. This tells the pipeline that it can stop
+    // sending any more events to this MFT.
+    return E_NOTIMPL;
+}
+
+
+//-------------------------------------------------------------------
+// ProcessMessage
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::ProcessMessage(
+    MFT_MESSAGE_TYPE    eMessage,
+    ULONG_PTR           ulParam
+)
+{
+    EnterCriticalSection(&m_critSec);
+
+    HRESULT hr = S_OK;
+
+    switch (eMessage)
+    {
+    case MFT_MESSAGE_COMMAND_FLUSH:
+        // Flush the MFT.
+        hr = OnFlush();
+        break;
+
+    case MFT_MESSAGE_COMMAND_DRAIN:
+        // Drain: Tells the MFT to reject further input until all pending samples are
+        // processed. That is our default behavior already, so there is nothing to do.
+        //
+        // For a decoder that accepts a queue of samples, the MFT might need to drain
+        // the queue in response to this command.
+    break;
+
+    case MFT_MESSAGE_SET_D3D_MANAGER:
+        // Sets a pointer to the IDirect3DDeviceManager9 interface.
+
+        // The pipeline should never send this message unless the MFT sets the MF_SA_D3D_AWARE
+        // attribute set to TRUE. Because this MFT does not set MF_SA_D3D_AWARE, it is an error
+        // to send the MFT_MESSAGE_SET_D3D_MANAGER message to the MFT. Return an error code in
+        // this case.
+
+        // NOTE: If this MFT were D3D-enabled, it would cache the IDirect3DDeviceManager9
+        // pointer for use during streaming.
+
+        hr = E_NOTIMPL;
+        break;
+
+    case MFT_MESSAGE_NOTIFY_BEGIN_STREAMING:
+        hr = BeginStreaming();
+        break;
+
+    case MFT_MESSAGE_NOTIFY_END_STREAMING:
+        hr = EndStreaming();
+        break;
+
+    // The next two messages do not require any action from this MFT.
+
+    case MFT_MESSAGE_NOTIFY_END_OF_STREAM:
+        break;
+
+    case MFT_MESSAGE_NOTIFY_START_OF_STREAM:
+        break;
+    }
+
+    LeaveCriticalSection(&m_critSec);
+    return hr;
+}
+
+
+//-------------------------------------------------------------------
+// ProcessInput
+// Process an input sample.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::ProcessInput(
+    DWORD               dwInputStreamID,
+    IMFSample           *pSample,
+    DWORD               dwFlags
+)
+{
+    // Check input parameters.
+    if (pSample == NULL)
+    {
+        return E_POINTER;
+    }
+
+    if (dwFlags != 0)
+    {
+        return E_INVALIDARG; // dwFlags is reserved and must be zero.
+    }
+
+    HRESULT hr = S_OK;
+
+    EnterCriticalSection(&m_critSec);
+
+    // Validate the input stream number.
+    if (!IsValidInputStream(dwInputStreamID))
+    {
+        hr = MF_E_INVALIDSTREAMNUMBER;
+        goto done;
+    }
+
+    // Check for valid media types.
+    // The client must set input and output types before calling ProcessInput.
+    if (!m_pInputType || !m_pOutputType)
+    {
+        hr = MF_E_NOTACCEPTING;
+        goto done;
+    }
+
+    // Check if an input sample is already queued.
+    if (m_pSample != NULL)
+    {
+        hr = MF_E_NOTACCEPTING;   // We already have an input sample.
+        goto done;
+    }
+
+    // Initialize streaming.
+    hr = BeginStreaming();
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    // Cache the sample. We do the actual work in ProcessOutput.
+    m_pSample = pSample;
+    pSample->AddRef();  // Hold a reference count on the sample.
+
+done:
+    LeaveCriticalSection(&m_critSec);
+    return hr;
+}
+
+
+//-------------------------------------------------------------------
+// ProcessOutput
+// Process an output sample.
+//-------------------------------------------------------------------
+
+HRESULT OcvImageManipulations::ProcessOutput(
+    DWORD                   dwFlags,
+    DWORD                   cOutputBufferCount,
+    MFT_OUTPUT_DATA_BUFFER  *pOutputSamples, // one per stream
+    DWORD                   *pdwStatus
+)
+{
+    // Check input parameters...
+
+    // This MFT does not accept any flags for the dwFlags parameter.
+
+    // The only defined flag is MFT_PROCESS_OUTPUT_DISCARD_WHEN_NO_BUFFER. This flag
+    // applies only when the MFT marks an output stream as lazy or optional. But this
+    // MFT has no lazy or optional streams, so the flag is not valid.
+
+    if (dwFlags != 0)
+    {
+        return E_INVALIDARG;
+    }
+
+    if (pOutputSamples == NULL || pdwStatus == NULL)
+    {
+        return E_POINTER;
+    }
+
+    // There must be exactly one output buffer.
+    if (cOutputBufferCount != 1)
+    {
+        return E_INVALIDARG;
+    }
+
+    // It must contain a sample.
+    if (pOutputSamples[0].pSample == NULL)
+    {
+        return E_INVALIDARG;
+    }
+
+    HRESULT hr = S_OK;
+
+    IMFMediaBuffer *pInput = NULL;
+    IMFMediaBuffer *pOutput = NULL;
+
+    EnterCriticalSection(&m_critSec);
+
+    // There must be an input sample available for processing.
+    if (m_pSample == NULL)
+    {
+        hr = MF_E_TRANSFORM_NEED_MORE_INPUT;
+        goto done;
+    }
+
+    // Initialize streaming.
+
+    hr = BeginStreaming();
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    // Get the input buffer.
+    hr = m_pSample->ConvertToContiguousBuffer(&pInput);
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    // Get the output buffer.
+    hr = pOutputSamples[0].pSample->ConvertToContiguousBuffer(&pOutput);
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    hr = OnProcessOutput(pInput, pOutput);
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    // Set status flags.
+    pOutputSamples[0].dwStatus = 0;
+    *pdwStatus = 0;
+
+
+    // Copy the duration and time stamp from the input sample, if present.
+
+    LONGLONG hnsDuration = 0;
+    LONGLONG hnsTime = 0;
+
+    if (SUCCEEDED(m_pSample->GetSampleDuration(&hnsDuration)))
+    {
+        hr = pOutputSamples[0].pSample->SetSampleDuration(hnsDuration);
+        if (FAILED(hr))
+        {
+            goto done;
+        }
+    }
+
+    if (SUCCEEDED(m_pSample->GetSampleTime(&hnsTime)))
+    {
+        hr = pOutputSamples[0].pSample->SetSampleTime(hnsTime);
+    }
+
+done:
+    SafeRelease(&m_pSample);   // Release our input sample.
+    SafeRelease(&pInput);
+    SafeRelease(&pOutput);
+    LeaveCriticalSection(&m_critSec);
+    return hr;
+}
+
+// PRIVATE METHODS
+
+// All methods that follow are private to this MFT and are not part of the IMFTransform interface.
+
+// Create a partial media type from our list.
+//
+// dwTypeIndex: Index into the list of peferred media types.
+// ppmt:        Receives a pointer to the media type.
+
+HRESULT OcvImageManipulations::OnGetPartialType(DWORD dwTypeIndex, IMFMediaType **ppmt)
+{
+    if (dwTypeIndex >= ARRAYSIZE(g_MediaSubtypes))
+    {
+        return MF_E_NO_MORE_TYPES;
+    }
+
+    IMFMediaType *pmt = NULL;
+
+    HRESULT hr = MFCreateMediaType(&pmt);
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    hr = pmt->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    hr = pmt->SetGUID(MF_MT_SUBTYPE, g_MediaSubtypes[dwTypeIndex]);
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    *ppmt = pmt;
+    (*ppmt)->AddRef();
+
+done:
+    SafeRelease(&pmt);
+    return hr;
+}
+
+
+// Validate an input media type.
+
+HRESULT OcvImageManipulations::OnCheckInputType(IMFMediaType *pmt)
+{
+    assert(pmt != NULL);
+
+    HRESULT hr = S_OK;
+
+    // If the output type is set, see if they match.
+    if (m_pOutputType != NULL)
+    {
+        DWORD flags = 0;
+        hr = pmt->IsEqual(m_pOutputType, &flags);
+
+        // IsEqual can return S_FALSE. Treat this as failure.
+        if (hr != S_OK)
+        {
+            hr = MF_E_INVALIDMEDIATYPE;
+        }
+    }
+    else
+    {
+        // Output type is not set. Just check this type.
+        hr = OnCheckMediaType(pmt);
+    }
+    return hr;
+}
+
+
+// Validate an output media type.
+
+HRESULT OcvImageManipulations::OnCheckOutputType(IMFMediaType *pmt)
+{
+    assert(pmt != NULL);
+
+    HRESULT hr = S_OK;
+
+    // If the input type is set, see if they match.
+    if (m_pInputType != NULL)
+    {
+        DWORD flags = 0;
+        hr = pmt->IsEqual(m_pInputType, &flags);
+
+        // IsEqual can return S_FALSE. Treat this as failure.
+        if (hr != S_OK)
+        {
+            hr = MF_E_INVALIDMEDIATYPE;
+        }
+
+    }
+    else
+    {
+        // Input type is not set. Just check this type.
+        hr = OnCheckMediaType(pmt);
+    }
+    return hr;
+}
+
+
+// Validate a media type (input or output)
+
+HRESULT OcvImageManipulations::OnCheckMediaType(IMFMediaType *pmt)
+{
+    BOOL bFoundMatchingSubtype = FALSE;
+
+    // Major type must be video.
+    GUID major_type;
+    HRESULT hr = pmt->GetGUID(MF_MT_MAJOR_TYPE, &major_type);
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    if (major_type != MFMediaType_Video)
+    {
+        hr = MF_E_INVALIDMEDIATYPE;
+        goto done;
+    }
+
+    // Subtype must be one of the subtypes in our global list.
+
+    // Get the subtype GUID.
+    GUID subtype;
+    hr = pmt->GetGUID(MF_MT_SUBTYPE, &subtype);
+    if (FAILED(hr))
+    {
+        goto done;
+    }
+
+    // Look for the subtype in our list of accepted types.
+    for (DWORD i = 0; i < ARRAYSIZE(g_MediaSubtypes); i++)
+    {
+        if (subtype == g_MediaSubtypes[i])
+        {
+            bFoundMatchingSubtype = TRUE;
+            break;
+        }
+    }
+
+    if (!bFoundMatchingSubtype)
+    {
+        hr = MF_E_INVALIDMEDIATYPE; // The MFT does not support this subtype.
+        goto done;
+    }
+
+    // Reject single-field media types.
+    UINT32 interlace = MFGetAttributeUINT32(pmt, MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
+    if (interlace == MFVideoInterlace_FieldSingleUpper  || interlace == MFVideoInterlace_FieldSingleLower)
+    {
+        hr = MF_E_INVALIDMEDIATYPE;
+    }
+
+done:
+    return hr;
+}
+
+
+// Set or clear the input media type.
+//
+// Prerequisite: The input type was already validated.
+
+void OcvImageManipulations::OnSetInputType(IMFMediaType *pmt)
+{
+    // if pmt is NULL, clear the type.
+    // if pmt is non-NULL, set the type.
+
+    SafeRelease(&m_pInputType);
+    m_pInputType = pmt;
+    if (m_pInputType)
+    {
+        m_pInputType->AddRef();
+    }
+
+    // Update the format information.
+    UpdateFormatInfo();
+}
+
+
+// Set or clears the output media type.
+//
+// Prerequisite: The output type was already validated.
+
+void OcvImageManipulations::OnSetOutputType(IMFMediaType *pmt)
+{
+    // If pmt is NULL, clear the type. Otherwise, set the type.
+
+    SafeRelease(&m_pOutputType);
+    m_pOutputType = pmt;
+    if (m_pOutputType)
+    {
+        m_pOutputType->AddRef();
+    }
+}
+
+
+// Initialize streaming parameters.
+//
+// This method is called if the client sends the MFT_MESSAGE_NOTIFY_BEGIN_STREAMING
+// message, or when the client processes a sample, whichever happens first.
+
+HRESULT OcvImageManipulations::BeginStreaming()
+{
+    HRESULT hr = S_OK;
+
+    if (!m_bStreamingInitialized)
+    {
+        m_bStreamingInitialized = true;
+        hr = S_OK;
+    }
+
+    return hr;
+}
+
+
+// End streaming.
+
+// This method is called if the client sends an MFT_MESSAGE_NOTIFY_END_STREAMING
+// message, or when the media type changes. In general, it should be called whenever
+// the streaming parameters need to be reset.
+
+HRESULT OcvImageManipulations::EndStreaming()
+{
+    m_bStreamingInitialized = false;
+    return S_OK;
+}
+
+
+
+// Generate output data.
+
+HRESULT OcvImageManipulations::OnProcessOutput(IMFMediaBuffer *pIn, IMFMediaBuffer *pOut)
+{
+    BYTE *pDest = NULL;         // Destination buffer.
+    LONG lDestStride = 0;       // Destination stride.
+
+    BYTE *pSrc = NULL;          // Source buffer.
+    LONG lSrcStride = 0;        // Source stride.
+
+    // Helper objects to lock the buffers.
+    VideoBufferLock inputLock(pIn);
+    VideoBufferLock outputLock(pOut);
+
+    // Stride if the buffer does not support IMF2DBuffer
+    LONG lDefaultStride = 0;
+
+    HRESULT hr = GetDefaultStride(m_pInputType, &lDefaultStride);
+    if (FAILED(hr))
+    {
+        return hr;
+    }
+
+    // Lock the input buffer.
+    hr = inputLock.LockBuffer(lDefaultStride, m_imageHeightInPixels, &pSrc, &lSrcStride);
+    if (FAILED(hr))
+    {
+        return hr;
+    }
+
+    // Lock the output buffer.
+    hr = outputLock.LockBuffer(lDefaultStride, m_imageHeightInPixels, &pDest, &lDestStride);
+    if (FAILED(hr))
+    {
+        return hr;
+    }
+
+    cv::Mat InputFrame(m_imageHeightInPixels + m_imageHeightInPixels/2, m_imageWidthInPixels, CV_8UC1, pSrc, lSrcStride);
+    cv::Mat InputGreyScale(InputFrame, cv::Range(0, m_imageHeightInPixels), cv::Range(0, m_imageWidthInPixels));
+    cv::Mat OutputFrame(m_imageHeightInPixels + m_imageHeightInPixels/2, m_imageWidthInPixels, CV_8UC1, pDest, lDestStride);
+
+    switch (m_TransformType)
+    {
+    case Preview:
+        {
+            InputFrame.copyTo(OutputFrame);
+        } break;
+    case GrayScale:
+        {
+            OutputFrame.setTo(cv::Scalar(128));
+            cv::Mat OutputGreyScale(OutputFrame, cv::Range(0, m_imageHeightInPixels), cv::Range(0, m_imageWidthInPixels));
+            InputGreyScale.copyTo(OutputGreyScale);
+        } break;
+    case Canny:
+        {
+            OutputFrame.setTo(cv::Scalar(128));
+            cv::Mat OutputGreyScale(OutputFrame, cv::Range(0, m_imageHeightInPixels), cv::Range(0, m_imageWidthInPixels));
+            cv::Canny(InputGreyScale, OutputGreyScale, 80, 90);
+
+        } break;
+    case Sobel:
+        {
+            OutputFrame.setTo(cv::Scalar(128));
+            cv::Mat OutputGreyScale(OutputFrame, cv::Range(0, m_imageHeightInPixels), cv::Range(0, m_imageWidthInPixels));
+            cv::Sobel(InputGreyScale, OutputGreyScale, CV_8U, 1, 1);
+        } break;
+    case Histogram:
+        {
+            const int mHistSizeNum = 25;
+            const int channels[3][1] = {{0}, {1}, {2}};
+            const int mHistSize[] = {25};
+            const float baseRabge[] = {0.f,256.f};
+            const float* ranges[] = {baseRabge};
+
+            const cv::Scalar mColorsY[] = { cv::Scalar(76), cv::Scalar(149), cv::Scalar(29) };
+            const cv::Scalar mColorsUV[] = { cv::Scalar(84, 255), cv::Scalar(43, 21), cv::Scalar(255, 107) };
+
+            cv::Mat OutputY(m_imageHeightInPixels, m_imageWidthInPixels, CV_8UC1, pDest, lDestStride);
+            cv::Mat OutputUV(m_imageHeightInPixels/2, m_imageWidthInPixels/2,
+                             CV_8UC2, pDest+m_imageHeightInPixels*lDestStride, lDestStride);
+            cv::Mat BgrFrame;
+
+            InputFrame.copyTo(OutputFrame);
+
+            cv::cvtColor(InputFrame, BgrFrame, cv::COLOR_YUV420sp2BGR);
+            int thikness = (int) (BgrFrame.cols / (mHistSizeNum + 10) / 5);
+            if(thikness > 5) thikness = 5;
+            int offset = (int) ((BgrFrame.cols - (5*mHistSizeNum + 4*10)*thikness)/2);
+
+            // RGB
+            for (int c=0; c<3; c++)
+            {
+                cv::Mat hist;
+                cv::calcHist(&BgrFrame, 1, channels[c], cv::Mat(), hist, 1, mHistSize, ranges);
+                cv::normalize(hist, hist, BgrFrame.rows/2, 0, cv::NORM_INF);
+                for(int h=0; h<mHistSizeNum; h++) {
+                    cv::Point mP1, mP2;
+                    // Draw on Y plane
+                    mP1.x = mP2.x = offset + (c * (mHistSizeNum + 10) + h) * thikness;
+                    mP1.y = BgrFrame.rows-1;
+                    mP2.y = mP1.y - 2 - (int)hist.at<float>(h);
+                    cv::line(OutputY, mP1, mP2, mColorsY[c], thikness);
+
+                    // Draw on UV planes
+                    mP1.x /= 2;
+                    mP1.y /= 2;
+                    mP2.x /= 2;
+                    mP2.y /= 2;
+                    cv::line(OutputUV, mP1, mP2, mColorsUV[c], thikness/2);
+                }
+            }
+        } break;
+    default:
+        break;
+    }
+
+    // Set the data size on the output buffer.
+    hr = pOut->SetCurrentLength(m_cbImageSize);
+
+    return hr;
+}
+
+
+// Flush the MFT.
+
+HRESULT OcvImageManipulations::OnFlush()
+{
+    // For this MFT, flushing just means releasing the input sample.
+    SafeRelease(&m_pSample);
+    return S_OK;
+}
+
+
+// Update the format information. This method is called whenever the
+// input type is set.
+
+HRESULT OcvImageManipulations::UpdateFormatInfo()
+{
+    HRESULT hr = S_OK;
+
+    GUID subtype = GUID_NULL;
+
+    m_imageWidthInPixels = 0;
+    m_imageHeightInPixels = 0;
+    m_cbImageSize = 0;
+
+    if (m_pInputType != NULL)
+    {
+        hr = m_pInputType->GetGUID(MF_MT_SUBTYPE, &subtype);
+        if (FAILED(hr))
+        {
+            goto done;
+        }
+        if (subtype != MFVideoFormat_NV12)
+        {
+            hr = E_UNEXPECTED;
+            goto done;
+        }
+
+        hr = MFGetAttributeSize(m_pInputType, MF_MT_FRAME_SIZE, &m_imageWidthInPixels, &m_imageHeightInPixels);
+        if (FAILED(hr))
+        {
+            goto done;
+        }
+
+        // Calculate the image size for YUV NV12 image(not including padding)
+        m_cbImageSize = (m_imageHeightInPixels + m_imageHeightInPixels/2)*m_imageWidthInPixels;
+    }
+
+done:
+    return hr;
+}
+
+
+// Get the default stride for a video format.
+HRESULT GetDefaultStride(IMFMediaType *pType, LONG *plStride)
+{
+    LONG lStride = 0;
+
+    // Try to get the default stride from the media type.
+    HRESULT hr = pType->GetUINT32(MF_MT_DEFAULT_STRIDE, (UINT32*)&lStride);
+    if (FAILED(hr))
+    {
+        // Attribute not set. Try to calculate the default stride.
+        GUID subtype = GUID_NULL;
+
+        UINT32 width = 0;
+        UINT32 height = 0;
+
+        // Get the subtype and the image size.
+        hr = pType->GetGUID(MF_MT_SUBTYPE, &subtype);
+        if (SUCCEEDED(hr))
+        {
+            hr = MFGetAttributeSize(pType, MF_MT_FRAME_SIZE, &width, &height);
+        }
+        if (SUCCEEDED(hr))
+        {
+            if (subtype == MFVideoFormat_NV12)
+            {
+                lStride = width;
+            }
+            else if (subtype == MFVideoFormat_YUY2 || subtype == MFVideoFormat_UYVY)
+            {
+                lStride = ((width * 2) + 3) & ~3;
+            }
+            else
+            {
+                hr = E_INVALIDARG;
+            }
+        }
+
+        // Set the attribute for later reference.
+        if (SUCCEEDED(hr))
+        {
+            (void)pType->SetUINT32(MF_MT_DEFAULT_STRIDE, UINT32(lStride));
+        }
+    }
+    if (SUCCEEDED(hr))
+    {
+        *plStride = lStride;
+    }
+    return hr;
+}
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.def b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.def
new file mode 100644
index 000000000..453349adf
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.def
@@ -0,0 +1,4 @@
+EXPORTS
+    DllCanUnloadNow                     PRIVATE
+    DllGetActivationFactory             PRIVATE
+    DllGetClassObject                   PRIVATE
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.h b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.h
new file mode 100644
index 000000000..9162593fc
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.h
@@ -0,0 +1,247 @@
+// Defines the transform class.
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+
+#ifndef GRAYSCALE_H
+#define GRAYSCALE_H
+
+#include <new>
+#include <mfapi.h>
+#include <mftransform.h>
+#include <mfidl.h>
+#include <mferror.h>
+#include <strsafe.h>
+#include <assert.h>
+
+#include <wrl\implements.h>
+#include <wrl\module.h>
+#include <windows.media.h>
+
+#include "OcvImageManipulations.h"
+
+// CLSID of the MFT.
+DEFINE_GUID(CLSID_GrayscaleMFT,
+0x2f3dbc05, 0xc011, 0x4a8f, 0xb2, 0x64, 0xe4, 0x2e, 0x35, 0xc6, 0x7b, 0xf4);
+
+//
+// * IMPORTANT: If you implement your own MFT, create a new GUID for the CLSID. *
+//
+
+
+// Configuration attributes
+// {698649BE-8EAE-4551-A4CB-3EC98FBD3D86}
+DEFINE_GUID(OCV_IMAGE_EFFECT,
+0x698649be, 0x8eae, 0x4551, 0xa4, 0xcb, 0x3e, 0xc9, 0x8f, 0xbd, 0x3d, 0x86);
+
+
+enum ProcessingType
+{
+    Preview,
+    GrayScale,
+    Canny,
+    Sobel,
+    Histogram,
+    InvalidEffect
+};
+
+template <class T> void SafeRelease(T **ppT)
+{
+    if (*ppT)
+    {
+        (*ppT)->Release();
+        *ppT = NULL;
+    }
+}
+
+// OcvImageManipulations class:
+// Implements a grayscale video effect.
+
+class OcvImageManipulations
+    : public Microsoft::WRL::RuntimeClass<
+           Microsoft::WRL::RuntimeClassFlags< Microsoft::WRL::RuntimeClassType::WinRtClassicComMix >,
+           ABI::Windows::Media::IMediaExtension,
+           IMFTransform >
+{
+    InspectableClass(RuntimeClass_OcvTransform_OcvImageManipulations, BaseTrust)
+
+public:
+    OcvImageManipulations();
+
+    ~OcvImageManipulations();
+
+    STDMETHOD(RuntimeClassInitialize)();
+
+    // IMediaExtension
+    STDMETHODIMP SetProperties(ABI::Windows::Foundation::Collections::IPropertySet *pConfiguration);
+
+    // IMFTransform
+    STDMETHODIMP GetStreamLimits(
+        DWORD   *pdwInputMinimum,
+        DWORD   *pdwInputMaximum,
+        DWORD   *pdwOutputMinimum,
+        DWORD   *pdwOutputMaximum
+    );
+
+    STDMETHODIMP GetStreamCount(
+        DWORD   *pcInputStreams,
+        DWORD   *pcOutputStreams
+    );
+
+    STDMETHODIMP GetStreamIDs(
+        DWORD   dwInputIDArraySize,
+        DWORD   *pdwInputIDs,
+        DWORD   dwOutputIDArraySize,
+        DWORD   *pdwOutputIDs
+    );
+
+    STDMETHODIMP GetInputStreamInfo(
+        DWORD                     dwInputStreamID,
+        MFT_INPUT_STREAM_INFO *   pStreamInfo
+    );
+
+    STDMETHODIMP GetOutputStreamInfo(
+        DWORD                     dwOutputStreamID,
+        MFT_OUTPUT_STREAM_INFO *  pStreamInfo
+    );
+
+    STDMETHODIMP GetAttributes(IMFAttributes** pAttributes);
+
+    STDMETHODIMP GetInputStreamAttributes(
+        DWORD           dwInputStreamID,
+        IMFAttributes   **ppAttributes
+    );
+
+    STDMETHODIMP GetOutputStreamAttributes(
+        DWORD           dwOutputStreamID,
+        IMFAttributes   **ppAttributes
+    );
+
+    STDMETHODIMP DeleteInputStream(DWORD dwStreamID);
+
+    STDMETHODIMP AddInputStreams(
+        DWORD   cStreams,
+        DWORD   *adwStreamIDs
+    );
+
+    STDMETHODIMP GetInputAvailableType(
+        DWORD           dwInputStreamID,
+        DWORD           dwTypeIndex, // 0-based
+        IMFMediaType    **ppType
+    );
+
+    STDMETHODIMP GetOutputAvailableType(
+        DWORD           dwOutputStreamID,
+        DWORD           dwTypeIndex, // 0-based
+        IMFMediaType    **ppType
+    );
+
+    STDMETHODIMP SetInputType(
+        DWORD           dwInputStreamID,
+        IMFMediaType    *pType,
+        DWORD           dwFlags
+    );
+
+    STDMETHODIMP SetOutputType(
+        DWORD           dwOutputStreamID,
+        IMFMediaType    *pType,
+        DWORD           dwFlags
+    );
+
+    STDMETHODIMP GetInputCurrentType(
+        DWORD           dwInputStreamID,
+        IMFMediaType    **ppType
+    );
+
+    STDMETHODIMP GetOutputCurrentType(
+        DWORD           dwOutputStreamID,
+        IMFMediaType    **ppType
+    );
+
+    STDMETHODIMP GetInputStatus(
+        DWORD           dwInputStreamID,
+        DWORD           *pdwFlags
+    );
+
+    STDMETHODIMP GetOutputStatus(DWORD *pdwFlags);
+
+    STDMETHODIMP SetOutputBounds(
+        LONGLONG        hnsLowerBound,
+        LONGLONG        hnsUpperBound
+    );
+
+    STDMETHODIMP ProcessEvent(
+        DWORD              dwInputStreamID,
+        IMFMediaEvent      *pEvent
+    );
+
+    STDMETHODIMP ProcessMessage(
+        MFT_MESSAGE_TYPE    eMessage,
+        ULONG_PTR           ulParam
+    );
+
+    STDMETHODIMP ProcessInput(
+        DWORD               dwInputStreamID,
+        IMFSample           *pSample,
+        DWORD               dwFlags
+    );
+
+    STDMETHODIMP ProcessOutput(
+        DWORD                   dwFlags,
+        DWORD                   cOutputBufferCount,
+        MFT_OUTPUT_DATA_BUFFER  *pOutputSamples, // one per stream
+        DWORD                   *pdwStatus
+    );
+
+
+private:
+    // HasPendingOutput: Returns TRUE if the MFT is holding an input sample.
+    BOOL HasPendingOutput() const { return m_pSample != NULL; }
+
+    // IsValidInputStream: Returns TRUE if dwInputStreamID is a valid input stream identifier.
+    BOOL IsValidInputStream(DWORD dwInputStreamID) const
+    {
+        return dwInputStreamID == 0;
+    }
+
+    // IsValidOutputStream: Returns TRUE if dwOutputStreamID is a valid output stream identifier.
+    BOOL IsValidOutputStream(DWORD dwOutputStreamID) const
+    {
+        return dwOutputStreamID == 0;
+    }
+
+    HRESULT OnGetPartialType(DWORD dwTypeIndex, IMFMediaType **ppmt);
+    HRESULT OnCheckInputType(IMFMediaType *pmt);
+    HRESULT OnCheckOutputType(IMFMediaType *pmt);
+    HRESULT OnCheckMediaType(IMFMediaType *pmt);
+    void    OnSetInputType(IMFMediaType *pmt);
+    void    OnSetOutputType(IMFMediaType *pmt);
+    HRESULT BeginStreaming();
+    HRESULT EndStreaming();
+    HRESULT OnProcessOutput(IMFMediaBuffer *pIn, IMFMediaBuffer *pOut);
+    HRESULT OnFlush();
+    HRESULT UpdateFormatInfo();
+
+    CRITICAL_SECTION            m_critSec;
+
+    // Transformation parameters
+    ProcessingType              m_TransformType;
+
+    // Streaming
+    bool                        m_bStreamingInitialized;
+    IMFSample                   *m_pSample;                 // Input sample.
+    IMFMediaType                *m_pInputType;              // Input media type.
+    IMFMediaType                *m_pOutputType;             // Output media type.
+
+    // Fomat information
+    UINT32                      m_imageWidthInPixels;
+    UINT32                      m_imageHeightInPixels;
+    DWORD                       m_cbImageSize;              // Image size, in bytes.
+
+    IMFAttributes               *m_pAttributes;
+};
+#endif
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.vcxproj b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.vcxproj
new file mode 100644
index 000000000..54c0ce40a
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.vcxproj
@@ -0,0 +1,319 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM">
+      <Configuration>Release</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCTargetsPath Condition="'$(VCTargetsPath11)' != '' and '$(VSVersion)' == '' and '$(VisualStudioVersion)' == ''">$(VCTargetsPath11)</VCTargetsPath>
+    <ProjectGuid>{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>OcvTransform</RootNamespace>
+    <ProjectName>OcvTransform</ProjectName>
+    <MinimumVisualStudioVersion>11.0</MinimumVisualStudioVersion>
+    <AppContainerApplication>true</AppContainerApplication>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <PropertyGroup>
+    <DefaultLanguage>en-US</DefaultLanguage>
+  </PropertyGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <GenerateManifest>false</GenerateManifest>
+    <OutDir>$(Configuration)\$(MSBuildProjectName)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <GenerateManifest>false</GenerateManifest>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <GenerateManifest>false</GenerateManifest>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <GenerateManifest>false</GenerateManifest>
+    <OutDir>$(Configuration)\$(MSBuildProjectName)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
+    <GenerateManifest>false</GenerateManifest>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <GenerateManifest>false</GenerateManifest>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PreprocessorDefinitions>_WINRT_DLL;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AdditionalUsingDirectories>$(WindowsSDK_WindowsMetadata);$(AdditionalUsingDirectories)</AdditionalUsingDirectories>
+      <CompileAsWinRT>false</CompileAsWinRT>
+      <AdditionalIncludeDirectories>$(OPENCV_DIR)\include;$(ProjectDir);$(IntermediateOutputPath);%(AdditionalIncludeDirectories);$(ProjectDir)\..\Common</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>runtimeobject.lib;%(AdditionalDependencies);mf.lib;mfuuid.lib;mfplat.lib;opencv_core245.lib;opencv_imgproc245.lib</AdditionalDependencies>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ModuleDefinitionFile>OcvTransform.def</ModuleDefinitionFile>
+      <AdditionalLibraryDirectories>$(OPENCV_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+    <CustomBuildStep>
+      <Command>mdmerge -metadata_dir "$(WindowsSDK_MetadataPath)" -o "$(ProjectDir)$(Configuration)\$(MSBuildProjectName)" -i "$(MSBuildProjectDirectory)" -v -partial</Command>
+      <Outputs>$(ProjectDir)$(Configuration)\$(MSBuildProjectName)\$(ProjectName).winmd</Outputs>
+    </CustomBuildStep>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PreprocessorDefinitions>_WINRT_DLL;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AdditionalUsingDirectories>$(WindowsSDK_WindowsMetadata);$(AdditionalUsingDirectories)</AdditionalUsingDirectories>
+      <CompileAsWinRT>false</CompileAsWinRT>
+      <AdditionalIncludeDirectories>$(OPENCV_DIR)\include;$(ProjectDir);$(IntermediateOutputPath);%(AdditionalIncludeDirectories);$(ProjectDir)\..\Common</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>runtimeobject.lib;%(AdditionalDependencies);mf.lib;mfuuid.lib;mfplat.lib</AdditionalDependencies>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ModuleDefinitionFile>OcvTransform.def</ModuleDefinitionFile>
+      <AdditionalLibraryDirectories>$(OPENCV_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+    <CustomBuildStep>
+      <Command>mdmerge -metadata_dir "$(WindowsSDK_MetadataPath)" -o "$(SolutionDir)$(Platform)\$(Configuration)\$(MSBuildProjectName)" -i "$(MSBuildProjectDirectory)" -v -partial</Command>
+      <Outputs>$(SolutionDir)$(Platform)\$(Configuration)\$(MSBuildProjectName)\$(ProjectName).winmd</Outputs>
+    </CustomBuildStep>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PreprocessorDefinitions>_WINRT_DLL;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AdditionalUsingDirectories>$(WindowsSDK_WindowsMetadata);$(AdditionalUsingDirectories)</AdditionalUsingDirectories>
+      <CompileAsWinRT>false</CompileAsWinRT>
+      <AdditionalIncludeDirectories>$(OPENCV_DIR)\include;$(ProjectDir);$(IntermediateOutputPath);%(AdditionalIncludeDirectories);$(ProjectDir)\..\Common</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>runtimeobject.lib;%(AdditionalDependencies);mf.lib;mfuuid.lib;mfplat.lib</AdditionalDependencies>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ModuleDefinitionFile>OcvTransform.def</ModuleDefinitionFile>
+      <AdditionalLibraryDirectories>$(OPENCV_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+    <CustomBuildStep>
+      <Command>mdmerge -metadata_dir "$(WindowsSDK_MetadataPath)" -o "$(SolutionDir)$(Platform)\$(Configuration)\$(MSBuildProjectName)" -i "$(MSBuildProjectDirectory)" -v -partial</Command>
+      <Outputs>$(SolutionDir)$(Platform)\$(Configuration)\$(MSBuildProjectName)\$(ProjectName).winmd</Outputs>
+    </CustomBuildStep>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PreprocessorDefinitions>_WINRT_DLL;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AdditionalUsingDirectories>$(WindowsSDK_WindowsMetadata);$(AdditionalUsingDirectories)</AdditionalUsingDirectories>
+      <CompileAsWinRT>false</CompileAsWinRT>
+      <AdditionalIncludeDirectories>$(OPENCV_DIR)\include;$(ProjectDir);$(IntermediateOutputPath);%(AdditionalIncludeDirectories);$(ProjectDir)\..\Common</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>runtimeobject.lib;%(AdditionalDependencies);mf.lib;mfuuid.lib;mfplat.lib</AdditionalDependencies>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ModuleDefinitionFile>OcvTransform.def</ModuleDefinitionFile>
+      <AdditionalLibraryDirectories>$(OPENCV_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+    <CustomBuildStep>
+      <Command>mdmerge -metadata_dir "$(WindowsSDK_MetadataPath)" -o "$(ProjectDir)$(Configuration)\$(MSBuildProjectName)" -i "$(MSBuildProjectDirectory)" -v -partial</Command>
+      <Outputs>$(ProjectDir)$(Configuration)\$(MSBuildProjectName)\$(ProjectName).winmd</Outputs>
+    </CustomBuildStep>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PreprocessorDefinitions>_WINRT_DLL;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AdditionalUsingDirectories>$(WindowsSDK_WindowsMetadata);$(AdditionalUsingDirectories)</AdditionalUsingDirectories>
+      <CompileAsWinRT>false</CompileAsWinRT>
+      <AdditionalIncludeDirectories>$(OPENCV_DIR)\include;$(ProjectDir);$(IntermediateOutputPath);%(AdditionalIncludeDirectories);$(ProjectDir)\..\Common</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>runtimeobject.lib;%(AdditionalDependencies);mf.lib;mfuuid.lib;mfplat.lib</AdditionalDependencies>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ModuleDefinitionFile>OcvTransform.def</ModuleDefinitionFile>
+      <AdditionalLibraryDirectories>$(OPENCV_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+    <CustomBuildStep>
+      <Command>mdmerge -metadata_dir "$(WindowsSDK_MetadataPath)" -o "$(SolutionDir)$(Platform)\$(Configuration)\$(MSBuildProjectName)" -i "$(MSBuildProjectDirectory)" -v -partial</Command>
+      <Outputs>$(SolutionDir)$(Platform)\$(Configuration)\$(MSBuildProjectName)\$(ProjectName).winmd</Outputs>
+    </CustomBuildStep>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PreprocessorDefinitions>_WINRT_DLL;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AdditionalUsingDirectories>$(WindowsSDK_WindowsMetadata);$(AdditionalUsingDirectories)</AdditionalUsingDirectories>
+      <CompileAsWinRT>false</CompileAsWinRT>
+      <AdditionalIncludeDirectories>$(OPENCV_DIR)\include;$(ProjectDir);$(IntermediateOutputPath);%(AdditionalIncludeDirectories);$(ProjectDir)\..\Common</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>runtimeobject.lib;%(AdditionalDependencies);mf.lib;mfuuid.lib;mfplat.lib</AdditionalDependencies>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ModuleDefinitionFile>OcvTransform.def</ModuleDefinitionFile>
+      <AdditionalLibraryDirectories>$(OPENCV_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+    <CustomBuildStep>
+      <Command>mdmerge -metadata_dir "$(WindowsSDK_MetadataPath)" -o "$(SolutionDir)$(Platform)\$(Configuration)\$(MSBuildProjectName)" -i "$(MSBuildProjectDirectory)" -v -partial</Command>
+      <Outputs>$(SolutionDir)$(Platform)\$(Configuration)\$(MSBuildProjectName)\$(ProjectName).winmd</Outputs>
+    </CustomBuildStep>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="OcvTransform.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="dllmain.cpp" />
+    <ClCompile Include="OcvTransform.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="OcvTransform.def" />
+  </ItemGroup>
+  <ItemGroup>
+    <Midl Include="OcvImageManipulations.idl">
+      <MetadataFileName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+      </MetadataFileName>
+      <MetadataFileName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+      </MetadataFileName>
+      <MetadataFileName Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+      </MetadataFileName>
+      <MetadataFileName Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
+      </MetadataFileName>
+      <MetadataFileName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </MetadataFileName>
+      <MetadataFileName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+      </MetadataFileName>
+      <AdditionalMetadataDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(WindowsSDK_MetadataPath)</AdditionalMetadataDirectories>
+      <AdditionalMetadataDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(WindowsSDK_MetadataPath)</AdditionalMetadataDirectories>
+      <AdditionalMetadataDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">$(WindowsSDK_MetadataPath)</AdditionalMetadataDirectories>
+      <AdditionalMetadataDirectories Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">$(WindowsSDK_MetadataPath)</AdditionalMetadataDirectories>
+      <AdditionalMetadataDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(WindowsSDK_MetadataPath)</AdditionalMetadataDirectories>
+      <AdditionalMetadataDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(WindowsSDK_MetadataPath)</AdditionalMetadataDirectories>
+      <EnableWindowsRuntime Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</EnableWindowsRuntime>
+      <EnableWindowsRuntime Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</EnableWindowsRuntime>
+      <EnableWindowsRuntime Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</EnableWindowsRuntime>
+      <EnableWindowsRuntime Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</EnableWindowsRuntime>
+      <EnableWindowsRuntime Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</EnableWindowsRuntime>
+      <EnableWindowsRuntime Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</EnableWindowsRuntime>
+      <HeaderFileName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).h</HeaderFileName>
+      <HeaderFileName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).h</HeaderFileName>
+      <HeaderFileName Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">%(Filename).h</HeaderFileName>
+      <HeaderFileName Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">%(Filename).h</HeaderFileName>
+      <HeaderFileName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).h</HeaderFileName>
+      <HeaderFileName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).h</HeaderFileName>
+    </Midl>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ItemGroup>
+    <_MdMergeOutput Condition="'$(Platform)' == 'Win32'" Include="$(ProjectDir)$(Configuration)\$(MSBuildProjectName)\$(ProjectName).winmd" />
+    <_MdMergeOutput Condition="'$(Platform)' != 'Win32'" Include="$(SolutionDir)$(Platform)\$(Configuration)\$(MSBuildProjectName)\$(ProjectName).winmd" />
+  </ItemGroup>
+  <Target Name="CopyWinmdArtifactsOutputGroup" Returns="@(CopyWinmdArtifactsOutputGroupOutputs)">
+    <ItemGroup>
+      <CopyWinmdArtifactsOutputGroupOutputs Include="@(_MdMergeOutput)">
+        <TargetPath>$(ProjectName).winmd</TargetPath>
+        <Implementation>$(TargetName)$(TargetExt)</Implementation>
+      </CopyWinmdArtifactsOutputGroupOutputs>
+    </ItemGroup>
+  </Target>
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/dllmain.cpp b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/dllmain.cpp
new file mode 100644
index 000000000..701e7d416
--- /dev/null
+++ b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/dllmain.cpp
@@ -0,0 +1,58 @@
+//////////////////////////////////////////////////////////////////////////
+//
+// dllmain.cpp
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+//////////////////////////////////////////////////////////////////////////
+
+#include <initguid.h>
+#include "OcvTransform.h"
+
+using namespace Microsoft::WRL;
+
+namespace Microsoft { namespace Samples {
+    ActivatableClass(OcvImageManipulations);
+}}
+
+BOOL WINAPI DllMain( _In_ HINSTANCE hInstance, _In_ DWORD dwReason, _In_opt_ LPVOID lpReserved )
+{
+    if( DLL_PROCESS_ATTACH == dwReason )
+    {
+        //
+        //  Don't need per-thread callbacks
+        //
+        DisableThreadLibraryCalls( hInstance );
+
+        Module<InProc>::GetModule().Create();
+    }
+    else if( DLL_PROCESS_DETACH == dwReason )
+    {
+        Module<InProc>::GetModule().Terminate();
+    }
+
+    return TRUE;
+}
+
+HRESULT WINAPI DllGetActivationFactory( _In_ HSTRING activatibleClassId, _Outptr_ IActivationFactory** factory )
+{
+    auto &module = Microsoft::WRL::Module< Microsoft::WRL::InProc >::GetModule();
+    return module.GetActivationFactory( activatibleClassId, factory );
+}
+
+HRESULT WINAPI DllCanUnloadNow()
+{
+    auto &module = Microsoft::WRL::Module<Microsoft::WRL::InProc>::GetModule();
+    return (module.Terminate()) ? S_OK : S_FALSE;
+}
+
+STDAPI DllGetClassObject( _In_ REFCLSID rclsid, _In_ REFIID riid, _Outptr_ LPVOID FAR* ppv )
+{
+    auto &module = Microsoft::WRL::Module<Microsoft::WRL::InProc>::GetModule();
+    return module.GetClassObject( rclsid, riid, ppv );
+}
diff --git a/samples/winrt/ImageManipulations/Package.appxmanifest b/samples/winrt/ImageManipulations/Package.appxmanifest
new file mode 100644
index 000000000..b66e01465
--- /dev/null
+++ b/samples/winrt/ImageManipulations/Package.appxmanifest
@@ -0,0 +1,36 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Package xmlns="http://schemas.microsoft.com/appx/2010/manifest">
+  <Identity Name="Microsoft.SDKSamples.MediaCapture.CPP" Publisher="CN=Microsoft Corporation, O=Microsoft Corporation, L=Redmond, S=Washington, C=US" Version="1.0.0.0" />
+  <Properties>
+    <DisplayName>MediaCapture CPP sample</DisplayName>
+    <PublisherDisplayName>Microsoft Corporation</PublisherDisplayName>
+    <Logo>Assets\storeLogo-sdk.png</Logo>
+  </Properties>
+  <Prerequisites>
+    <OSMinVersion>6.2.1</OSMinVersion>
+    <OSMaxVersionTested>6.2.1</OSMaxVersionTested>
+  </Prerequisites>
+  <Resources>
+    <Resource Language="x-generate" />
+  </Resources>
+  <Applications>
+    <Application Id="MediaCapture.App" Executable="$targetnametoken$.exe" EntryPoint="MediaCapture.App">
+      <VisualElements DisplayName="OCV Image Manipulations" Logo="assets\opencv-logo-150.png" SmallLogo="assets\opencv-logo-30.png" Description="OpenCV Image Manipulations sample" ForegroundText="light" BackgroundColor="#00b2f0">
+        <DefaultTile ShortName="Ocv ImageManipulations" ShowName="allLogos" />
+        <SplashScreen Image="Assets\splash-sdk.png" BackgroundColor="#00b2f0" />
+      </VisualElements>
+    </Application>
+  </Applications>
+  <Capabilities>
+    <DeviceCapability Name="webcam" />
+    <DeviceCapability Name="microphone" />
+  </Capabilities>
+  <Extensions>
+    <Extension Category="windows.activatableClass.inProcessServer">
+      <InProcessServer>
+        <Path>OcvTransform.dll</Path>
+        <ActivatableClass ActivatableClassId="OcvTransform.OcvImageManipulations" ThreadingModel="both" />
+      </InProcessServer>
+    </Extension>
+  </Extensions>
+</Package>
diff --git a/samples/winrt/ImageManipulations/assets/opencv-logo-150.png b/samples/winrt/ImageManipulations/assets/opencv-logo-150.png
new file mode 100644
index 000000000..ea685d651
Binary files /dev/null and b/samples/winrt/ImageManipulations/assets/opencv-logo-150.png differ
diff --git a/samples/winrt/ImageManipulations/assets/opencv-logo-30.png b/samples/winrt/ImageManipulations/assets/opencv-logo-30.png
new file mode 100644
index 000000000..efaf5468a
Binary files /dev/null and b/samples/winrt/ImageManipulations/assets/opencv-logo-30.png differ
diff --git a/samples/winrt/ImageManipulations/assets/splash-sdk.png b/samples/winrt/ImageManipulations/assets/splash-sdk.png
new file mode 100644
index 000000000..901c3b085
Binary files /dev/null and b/samples/winrt/ImageManipulations/assets/splash-sdk.png differ
diff --git a/samples/winrt/ImageManipulations/assets/windows-sdk.png b/samples/winrt/ImageManipulations/assets/windows-sdk.png
new file mode 100644
index 000000000..7f753f7b6
Binary files /dev/null and b/samples/winrt/ImageManipulations/assets/windows-sdk.png differ
diff --git a/samples/winrt/ImageManipulations/common/LayoutAwarePage.cpp b/samples/winrt/ImageManipulations/common/LayoutAwarePage.cpp
new file mode 100644
index 000000000..07092bb74
--- /dev/null
+++ b/samples/winrt/ImageManipulations/common/LayoutAwarePage.cpp
@@ -0,0 +1,452 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+#include "pch.h"
+#include "LayoutAwarePage.h"
+#include "SuspensionManager.h"
+
+using namespace SDKSample::Common;
+
+using namespace Platform;
+using namespace Platform::Collections;
+using namespace Windows::Foundation;
+using namespace Windows::Foundation::Collections;
+using namespace Windows::System;
+using namespace Windows::UI::Core;
+using namespace Windows::UI::ViewManagement;
+using namespace Windows::UI::Xaml;
+using namespace Windows::UI::Xaml::Controls;
+using namespace Windows::UI::Xaml::Interop;
+using namespace Windows::UI::Xaml::Navigation;
+
+/// <summary>
+/// Initializes a new instance of the <see cref="LayoutAwarePage"/> class.
+/// </summary>
+LayoutAwarePage::LayoutAwarePage()
+{
+    if (Windows::ApplicationModel::DesignMode::DesignModeEnabled)
+    {
+        return;
+    }
+
+    // Create an empty default view model
+    DefaultViewModel = ref new Map<String^, Object^>(std::less<String^>());
+
+    // When this page is part of the visual tree make two changes:
+    // 1) Map application view state to visual state for the page
+    // 2) Handle keyboard and mouse navigation requests
+    Loaded += ref new RoutedEventHandler(this, &LayoutAwarePage::OnLoaded);
+
+    // Undo the same changes when the page is no longer visible
+    Unloaded += ref new RoutedEventHandler(this, &LayoutAwarePage::OnUnloaded);
+}
+
+static DependencyProperty^ _defaultViewModelProperty =
+    DependencyProperty::Register("DefaultViewModel",
+    TypeName(IObservableMap<String^, Object^>::typeid), TypeName(LayoutAwarePage::typeid), nullptr);
+
+/// <summary>
+/// Identifies the <see cref="DefaultViewModel"/> dependency property.
+/// </summary>
+DependencyProperty^ LayoutAwarePage::DefaultViewModelProperty::get()
+{
+    return _defaultViewModelProperty;
+}
+
+/// <summary>
+/// Gets an implementation of <see cref="IObservableMap&lt;String, Object&gt;"/> designed to be
+/// used as a trivial view model.
+/// </summary>
+IObservableMap<String^, Object^>^ LayoutAwarePage::DefaultViewModel::get()
+{
+    return safe_cast<IObservableMap<String^, Object^>^>(GetValue(DefaultViewModelProperty));
+}
+
+/// <summary>
+/// Sets an implementation of <see cref="IObservableMap&lt;String, Object&gt;"/> designed to be
+/// used as a trivial view model.
+/// </summary>
+void LayoutAwarePage::DefaultViewModel::set(IObservableMap<String^, Object^>^ value)
+{
+    SetValue(DefaultViewModelProperty, value);
+}
+
+/// <summary>
+/// Invoked when the page is part of the visual tree
+/// </summary>
+/// <param name="sender">Instance that triggered the event.</param>
+/// <param name="e">Event data describing the conditions that led to the event.</param>
+void LayoutAwarePage::OnLoaded(Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e)
+{
+    this->StartLayoutUpdates(sender, e);
+
+    // Keyboard and mouse navigation only apply when occupying the entire window
+    if (this->ActualHeight == Window::Current->Bounds.Height &&
+        this->ActualWidth == Window::Current->Bounds.Width)
+    {
+        // Listen to the window directly so focus isn't required
+        _acceleratorKeyEventToken = Window::Current->CoreWindow->Dispatcher->AcceleratorKeyActivated +=
+            ref new TypedEventHandler<CoreDispatcher^, AcceleratorKeyEventArgs^>(this,
+            &LayoutAwarePage::CoreDispatcher_AcceleratorKeyActivated);
+        _pointerPressedEventToken = Window::Current->CoreWindow->PointerPressed +=
+            ref new TypedEventHandler<CoreWindow^, PointerEventArgs^>(this,
+            &LayoutAwarePage::CoreWindow_PointerPressed);
+        _navigationShortcutsRegistered = true;
+    }
+}
+
+/// <summary>
+/// Invoked when the page is removed from visual tree
+/// </summary>
+/// <param name="sender">Instance that triggered the event.</param>
+/// <param name="e">Event data describing the conditions that led to the event.</param>
+void LayoutAwarePage::OnUnloaded(Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e)
+{
+    if (_navigationShortcutsRegistered)
+    {
+        Window::Current->CoreWindow->Dispatcher->AcceleratorKeyActivated -= _acceleratorKeyEventToken;
+        Window::Current->CoreWindow->PointerPressed -= _pointerPressedEventToken;
+        _navigationShortcutsRegistered = false;
+    }
+    StopLayoutUpdates(sender, e);
+}
+
+#pragma region Navigation support
+
+/// <summary>
+/// Invoked as an event handler to navigate backward in the page's associated <see cref="Frame"/>
+/// until it reaches the top of the navigation stack.
+/// </summary>
+/// <param name="sender">Instance that triggered the event.</param>
+/// <param name="e">Event data describing the conditions that led to the event.</param>
+void LayoutAwarePage::GoHome(Object^ sender, RoutedEventArgs^ e)
+{
+    (void) sender;	// Unused parameter
+    (void) e;	// Unused parameter
+
+    // Use the navigation frame to return to the topmost page
+    if (Frame != nullptr)
+    {
+        while (Frame->CanGoBack)
+        {
+            Frame->GoBack();
+        }
+    }
+}
+
+/// <summary>
+/// Invoked as an event handler to navigate backward in the navigation stack
+/// associated with this page's <see cref="Frame"/>.
+/// </summary>
+/// <param name="sender">Instance that triggered the event.</param>
+/// <param name="e">Event data describing the conditions that led to the event.</param>
+void LayoutAwarePage::GoBack(Object^ sender, RoutedEventArgs^ e)
+{
+    (void) sender;	// Unused parameter
+    (void) e;	// Unused parameter
+
+    // Use the navigation frame to return to the previous page
+    if (Frame != nullptr && Frame->CanGoBack)
+    {
+        Frame->GoBack();
+    }
+}
+
+/// <summary>
+/// Invoked as an event handler to navigate forward in the navigation stack
+/// associated with this page's <see cref="Frame"/>.
+/// </summary>
+/// <param name="sender">Instance that triggered the event.</param>
+/// <param name="e">Event data describing the conditions that led to the event.</param>
+void LayoutAwarePage::GoForward(Object^ sender, RoutedEventArgs^ e)
+{
+    (void) sender;	// Unused parameter
+    (void) e;	// Unused parameter
+
+    // Use the navigation frame to advance to the next page
+    if (Frame != nullptr && Frame->CanGoForward)
+    {
+        Frame->GoForward();
+    }
+}
+
+/// <summary>
+/// Invoked on every keystroke, including system keys such as Alt key combinations, when
+/// this page is active and occupies the entire window.  Used to detect keyboard navigation
+/// between pages even when the page itself doesn't have focus.
+/// </summary>
+/// <param name="sender">Instance that triggered the event.</param>
+/// <param name="args">Event data describing the conditions that led to the event.</param>
+void LayoutAwarePage::CoreDispatcher_AcceleratorKeyActivated(CoreDispatcher^ sender, AcceleratorKeyEventArgs^ args)
+{
+    auto virtualKey = args->VirtualKey;
+
+    // Only investigate further when Left, Right, or the dedicated Previous or Next keys
+    // are pressed
+    if ((args->EventType == CoreAcceleratorKeyEventType::SystemKeyDown ||
+        args->EventType == CoreAcceleratorKeyEventType::KeyDown) &&
+        (virtualKey == VirtualKey::Left || virtualKey == VirtualKey::Right ||
+        (int)virtualKey == 166 || (int)virtualKey == 167))
+    {
+        auto coreWindow = Window::Current->CoreWindow;
+        auto downState = Windows::UI::Core::CoreVirtualKeyStates::Down;
+        bool menuKey = (coreWindow->GetKeyState(VirtualKey::Menu) & downState) == downState;
+        bool controlKey = (coreWindow->GetKeyState(VirtualKey::Control) & downState) == downState;
+        bool shiftKey = (coreWindow->GetKeyState(VirtualKey::Shift) & downState) == downState;
+        bool noModifiers = !menuKey && !controlKey && !shiftKey;
+        bool onlyAlt = menuKey && !controlKey && !shiftKey;
+
+        if (((int)virtualKey == 166 && noModifiers) ||
+            (virtualKey == VirtualKey::Left && onlyAlt))
+        {
+            // When the previous key or Alt+Left are pressed navigate back
+            args->Handled = true;
+            GoBack(this, ref new RoutedEventArgs());
+        }
+        else if (((int)virtualKey == 167 && noModifiers) ||
+            (virtualKey == VirtualKey::Right && onlyAlt))
+        {
+            // When the next key or Alt+Right are pressed navigate forward
+            args->Handled = true;
+            GoForward(this, ref new RoutedEventArgs());
+        }
+    }
+}
+
+/// <summary>
+/// Invoked on every mouse click, touch screen tap, or equivalent interaction when this
+/// page is active and occupies the entire window.  Used to detect browser-style next and
+/// previous mouse button clicks to navigate between pages.
+/// </summary>
+/// <param name="sender">Instance that triggered the event.</param>
+/// <param name="args">Event data describing the conditions that led to the event.</param>
+void LayoutAwarePage::CoreWindow_PointerPressed(CoreWindow^ sender, PointerEventArgs^ args)
+{
+    auto properties = args->CurrentPoint->Properties;
+
+    // Ignore button chords with the left, right, and middle buttons
+    if (properties->IsLeftButtonPressed || properties->IsRightButtonPressed ||
+        properties->IsMiddleButtonPressed) return;
+
+    // If back or foward are pressed (but not both) navigate appropriately
+    bool backPressed = properties->IsXButton1Pressed;
+    bool forwardPressed = properties->IsXButton2Pressed;
+    if (backPressed ^ forwardPressed)
+    {
+        args->Handled = true;
+        if (backPressed) GoBack(this, ref new RoutedEventArgs());
+        if (forwardPressed) GoForward(this, ref new RoutedEventArgs());
+    }
+}
+
+#pragma endregion
+
+#pragma region Visual state switching
+
+/// <summary>
+/// Invoked as an event handler, typically on the <see cref="Loaded"/> event of a
+/// <see cref="Control"/> within the page, to indicate that the sender should start receiving
+/// visual state management changes that correspond to application view state changes.
+/// </summary>
+/// <param name="sender">Instance of <see cref="Control"/> that supports visual state management
+/// corresponding to view states.</param>
+/// <param name="e">Event data that describes how the request was made.</param>
+/// <remarks>The current view state will immediately be used to set the corresponding visual state
+/// when layout updates are requested.  A corresponding <see cref="Unloaded"/> event handler
+/// connected to <see cref="StopLayoutUpdates"/> is strongly encouraged.  Instances of
+/// <see cref="LayoutAwarePage"/> automatically invoke these handlers in their Loaded and Unloaded
+/// events.</remarks>
+/// <seealso cref="DetermineVisualState"/>
+/// <seealso cref="InvalidateVisualState"/>
+void LayoutAwarePage::StartLayoutUpdates(Object^ sender, RoutedEventArgs^ e)
+{
+    (void) e;	// Unused parameter
+
+    auto control = safe_cast<Control^>(sender);
+    if (_layoutAwareControls == nullptr)
+    {
+        // Start listening to view state changes when there are controls interested in updates
+        _layoutAwareControls = ref new Vector<Control^>();
+        _windowSizeEventToken = Window::Current->SizeChanged += ref new WindowSizeChangedEventHandler(this, &LayoutAwarePage::WindowSizeChanged);
+
+        // Page receives notifications for children. Protect the page until we stopped layout updates for all controls.
+        _this = this;
+    }
+    _layoutAwareControls->Append(control);
+
+    // Set the initial visual state of the control
+    VisualStateManager::GoToState(control, DetermineVisualState(ApplicationView::Value), false);
+}
+
+void LayoutAwarePage::WindowSizeChanged(Object^ sender, Windows::UI::Core::WindowSizeChangedEventArgs^ e)
+{
+    (void) sender;	// Unused parameter
+    (void) e;	// Unused parameter
+
+    InvalidateVisualState();
+}
+
+/// <summary>
+/// Invoked as an event handler, typically on the <see cref="Unloaded"/> event of a
+/// <see cref="Control"/>, to indicate that the sender should start receiving visual state
+/// management changes that correspond to application view state changes.
+/// </summary>
+/// <param name="sender">Instance of <see cref="Control"/> that supports visual state management
+/// corresponding to view states.</param>
+/// <param name="e">Event data that describes how the request was made.</param>
+/// <remarks>The current view state will immediately be used to set the corresponding visual state
+/// when layout updates are requested.</remarks>
+/// <seealso cref="StartLayoutUpdates"/>
+void LayoutAwarePage::StopLayoutUpdates(Object^ sender, RoutedEventArgs^ e)
+{
+    (void) e;	// Unused parameter
+
+    auto control = safe_cast<Control^>(sender);
+    unsigned int index;
+    if (_layoutAwareControls != nullptr && _layoutAwareControls->IndexOf(control, &index))
+    {
+        _layoutAwareControls->RemoveAt(index);
+        if (_layoutAwareControls->Size == 0)
+        {
+            // Stop listening to view state changes when no controls are interested in updates
+            Window::Current->SizeChanged -= _windowSizeEventToken;
+            _layoutAwareControls = nullptr;
+            // Last control has received the Unload notification.
+            _this = nullptr;
+        }
+    }
+}
+
+/// <summary>
+/// Translates <see cref="ApplicationViewState"/> values into strings for visual state management
+/// within the page.  The default implementation uses the names of enum values.  Subclasses may
+/// override this method to control the mapping scheme used.
+/// </summary>
+/// <param name="viewState">View state for which a visual state is desired.</param>
+/// <returns>Visual state name used to drive the <see cref="VisualStateManager"/></returns>
+/// <seealso cref="InvalidateVisualState"/>
+String^ LayoutAwarePage::DetermineVisualState(ApplicationViewState viewState)
+{
+    switch (viewState)
+    {
+    case ApplicationViewState::Filled:
+        return "Filled";
+    case ApplicationViewState::Snapped:
+        return "Snapped";
+    case ApplicationViewState::FullScreenPortrait:
+        return "FullScreenPortrait";
+    case ApplicationViewState::FullScreenLandscape:
+    default:
+        return "FullScreenLandscape";
+    }
+}
+
+/// <summary>
+/// Updates all controls that are listening for visual state changes with the correct visual
+/// state.
+/// </summary>
+/// <remarks>
+/// Typically used in conjunction with overriding <see cref="DetermineVisualState"/> to
+/// signal that a different value may be returned even though the view state has not changed.
+/// </remarks>
+void LayoutAwarePage::InvalidateVisualState()
+{
+    if (_layoutAwareControls != nullptr)
+    {
+        String^ visualState = DetermineVisualState(ApplicationView::Value);
+        auto controlIterator = _layoutAwareControls->First();
+        while (controlIterator->HasCurrent)
+        {
+            auto control = controlIterator->Current;
+            VisualStateManager::GoToState(control, visualState, false);
+            controlIterator->MoveNext();
+        }
+    }
+}
+
+#pragma endregion
+
+#pragma region Process lifetime management
+
+/// <summary>
+/// Invoked when this page is about to be displayed in a Frame.
+/// </summary>
+/// <param name="e">Event data that describes how this page was reached.  The Parameter
+/// property provides the group to be displayed.</param>
+void LayoutAwarePage::OnNavigatedTo(NavigationEventArgs^ e)
+{
+    // Returning to a cached page through navigation shouldn't trigger state loading
+    if (_pageKey != nullptr) return;
+
+    auto frameState = SuspensionManager::SessionStateForFrame(Frame);
+    _pageKey = "Page-" + Frame->BackStackDepth;
+
+    if (e->NavigationMode == NavigationMode::New)
+    {
+        // Clear existing state for forward navigation when adding a new page to the
+        // navigation stack
+        auto nextPageKey = _pageKey;
+        int nextPageIndex = Frame->BackStackDepth;
+        while (frameState->HasKey(nextPageKey))
+        {
+            frameState->Remove(nextPageKey);
+            nextPageIndex++;
+            nextPageKey = "Page-" + nextPageIndex;
+        }
+
+        // Pass the navigation parameter to the new page
+        LoadState(e->Parameter, nullptr);
+    }
+    else
+    {
+        // Pass the navigation parameter and preserved page state to the page, using
+        // the same strategy for loading suspended state and recreating pages discarded
+        // from cache
+        LoadState(e->Parameter, safe_cast<IMap<String^, Object^>^>(frameState->Lookup(_pageKey)));
+    }
+}
+
+/// <summary>
+/// Invoked when this page will no longer be displayed in a Frame.
+/// </summary>
+/// <param name="e">Event data that describes how this page was reached.  The Parameter
+/// property provides the group to be displayed.</param>
+void LayoutAwarePage::OnNavigatedFrom(NavigationEventArgs^ e)
+{
+    auto frameState = SuspensionManager::SessionStateForFrame(Frame);
+    auto pageState = ref new Map<String^, Object^>();
+    SaveState(pageState);
+    frameState->Insert(_pageKey, pageState);
+}
+
+/// <summary>
+/// Populates the page with content passed during navigation.  Any saved state is also
+/// provided when recreating a page from a prior session.
+/// </summary>
+/// <param name="navigationParameter">The parameter value passed to
+/// <see cref="Frame.Navigate(Type, Object)"/> when this page was initially requested.
+/// </param>
+/// <param name="pageState">A map of state preserved by this page during an earlier
+/// session.  This will be null the first time a page is visited.</param>
+void LayoutAwarePage::LoadState(Object^ navigationParameter, IMap<String^, Object^>^ pageState)
+{
+}
+
+/// <summary>
+/// Preserves state associated with this page in case the application is suspended or the
+/// page is discarded from the navigation cache.  Values must conform to the serialization
+/// requirements of <see cref="SuspensionManager.SessionState"/>.
+/// </summary>
+/// <param name="pageState">An empty map to be populated with serializable state.</param>
+void LayoutAwarePage::SaveState(IMap<String^, Object^>^ pageState)
+{
+}
+
+#pragma endregion
diff --git a/samples/winrt/ImageManipulations/common/LayoutAwarePage.h b/samples/winrt/ImageManipulations/common/LayoutAwarePage.h
new file mode 100644
index 000000000..bd71062fe
--- /dev/null
+++ b/samples/winrt/ImageManipulations/common/LayoutAwarePage.h
@@ -0,0 +1,88 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+#pragma once
+
+#include <collection.h>
+
+namespace SDKSample
+{
+    namespace Common
+    {
+        /// <summary>
+        /// Typical implementation of Page that provides several important conveniences:
+        /// <list type="bullet">
+        /// <item>
+        /// <description>Application view state to visual state mapping</description>
+        /// </item>
+        /// <item>
+        /// <description>GoBack, GoForward, and GoHome event handlers</description>
+        /// </item>
+        /// <item>
+        /// <description>Mouse and keyboard shortcuts for navigation</description>
+        /// </item>
+        /// <item>
+        /// <description>State management for navigation and process lifetime management</description>
+        /// </item>
+        /// <item>
+        /// <description>A default view model</description>
+        /// </item>
+        /// </list>
+        /// </summary>
+        [Windows::Foundation::Metadata::WebHostHidden]
+        public ref class LayoutAwarePage : Windows::UI::Xaml::Controls::Page
+        {
+        internal:
+            LayoutAwarePage();
+
+        public:
+            void StartLayoutUpdates(Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+            void StopLayoutUpdates(Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+            void InvalidateVisualState();
+            static property Windows::UI::Xaml::DependencyProperty^ DefaultViewModelProperty
+            {
+                Windows::UI::Xaml::DependencyProperty^ get();
+            };
+            property Windows::Foundation::Collections::IObservableMap<Platform::String^, Platform::Object^>^ DefaultViewModel
+            {
+                Windows::Foundation::Collections::IObservableMap<Platform::String^, Platform::Object^>^ get();
+                void set(Windows::Foundation::Collections::IObservableMap<Platform::String^, Platform::Object^>^ value);
+            }
+
+        protected:
+            virtual void GoHome(Platform::Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+            virtual void GoBack(Platform::Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+            virtual void GoForward(Platform::Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+            virtual Platform::String^ DetermineVisualState(Windows::UI::ViewManagement::ApplicationViewState viewState);
+            virtual void OnNavigatedTo(Windows::UI::Xaml::Navigation::NavigationEventArgs^ e) override;
+            virtual void OnNavigatedFrom(Windows::UI::Xaml::Navigation::NavigationEventArgs^ e) override;
+            virtual void LoadState(Platform::Object^ navigationParameter,
+                Windows::Foundation::Collections::IMap<Platform::String^, Platform::Object^>^ pageState);
+            virtual void SaveState(Windows::Foundation::Collections::IMap<Platform::String^, Platform::Object^>^ pageState);
+
+        private:
+            Platform::String^ _pageKey;
+            bool _navigationShortcutsRegistered;
+            Platform::Collections::Map<Platform::String^, Platform::Object^>^ _defaultViewModel;
+            Windows::Foundation::EventRegistrationToken _windowSizeEventToken,
+                _acceleratorKeyEventToken, _pointerPressedEventToken;
+            Platform::Collections::Vector<Windows::UI::Xaml::Controls::Control^>^ _layoutAwareControls;
+            void WindowSizeChanged(Platform::Object^ sender, Windows::UI::Core::WindowSizeChangedEventArgs^ e);
+            void OnLoaded(Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+            void OnUnloaded(Object^ sender, Windows::UI::Xaml::RoutedEventArgs^ e);
+
+            void CoreDispatcher_AcceleratorKeyActivated(Windows::UI::Core::CoreDispatcher^ sender,
+                Windows::UI::Core::AcceleratorKeyEventArgs^ args);
+            void CoreWindow_PointerPressed(Windows::UI::Core::CoreWindow^ sender,
+                Windows::UI::Core::PointerEventArgs^ args);
+            LayoutAwarePage^ _this; // Strong reference to self, cleaned up in OnUnload
+        };
+    }
+}
diff --git a/samples/winrt/ImageManipulations/common/StandardStyles.xaml b/samples/winrt/ImageManipulations/common/StandardStyles.xaml
new file mode 100644
index 000000000..b4edc81f4
--- /dev/null
+++ b/samples/winrt/ImageManipulations/common/StandardStyles.xaml
@@ -0,0 +1,978 @@
+﻿<!--
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+-->
+
+<ResourceDictionary
+    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
+    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml">
+
+    <!-- Non-brush values that vary across themes -->
+
+    <ResourceDictionary.ThemeDictionaries>
+        <ResourceDictionary x:Key="Default">
+            <x:String x:Key="BackButtonGlyph">&#xE071;</x:String>
+            <x:String x:Key="BackButtonSnappedGlyph">&#xE0BA;</x:String>
+        </ResourceDictionary>
+
+        <ResourceDictionary x:Key="HighContrast">
+            <x:String x:Key="BackButtonGlyph">&#xE0A6;</x:String>
+            <x:String x:Key="BackButtonSnappedGlyph">&#xE0C4;</x:String>
+        </ResourceDictionary>
+    </ResourceDictionary.ThemeDictionaries>
+
+    <!-- RichTextBlock styles -->
+
+    <Style x:Key="BasicRichTextStyle" TargetType="RichTextBlock">
+        <Setter Property="Foreground" Value="{StaticResource ApplicationForegroundThemeBrush}"/>
+        <Setter Property="FontSize" Value="{StaticResource ControlContentThemeFontSize}"/>
+        <Setter Property="FontFamily" Value="{StaticResource ContentControlThemeFontFamily}"/>
+        <Setter Property="TextTrimming" Value="WordEllipsis"/>
+        <Setter Property="TextWrapping" Value="Wrap"/>
+        <Setter Property="Typography.StylisticSet20" Value="True"/>
+        <Setter Property="Typography.DiscretionaryLigatures" Value="True"/>
+    </Style>
+
+    <Style x:Key="BaselineRichTextStyle" TargetType="RichTextBlock" BasedOn="{StaticResource BasicRichTextStyle}">
+        <Setter Property="LineHeight" Value="20"/>
+        <Setter Property="LineStackingStrategy" Value="BlockLineHeight"/>
+        <!-- Properly align text along its baseline -->
+        <Setter Property="RenderTransform">
+            <Setter.Value>
+                <TranslateTransform X="-1" Y="4"/>
+            </Setter.Value>
+        </Setter>
+    </Style>
+
+    <Style x:Key="ItemRichTextStyle" TargetType="RichTextBlock" BasedOn="{StaticResource BaselineRichTextStyle}"/>
+
+    <Style x:Key="BodyRichTextStyle" TargetType="RichTextBlock" BasedOn="{StaticResource BaselineRichTextStyle}">
+        <Setter Property="FontWeight" Value="SemiLight"/>
+    </Style>
+
+    <!-- TextBlock styles -->
+
+    <Style x:Key="BasicTextStyle" TargetType="TextBlock">
+        <Setter Property="Foreground" Value="{StaticResource ApplicationForegroundThemeBrush}"/>
+        <Setter Property="FontSize" Value="{StaticResource ControlContentThemeFontSize}"/>
+        <Setter Property="FontFamily" Value="{StaticResource ContentControlThemeFontFamily}"/>
+        <Setter Property="TextTrimming" Value="WordEllipsis"/>
+        <Setter Property="TextWrapping" Value="Wrap"/>
+        <Setter Property="Typography.StylisticSet20" Value="True"/>
+        <Setter Property="Typography.DiscretionaryLigatures" Value="True"/>
+    </Style>
+
+    <Style x:Key="BaselineTextStyle" TargetType="TextBlock" BasedOn="{StaticResource BasicTextStyle}">
+        <Setter Property="LineHeight" Value="20"/>
+        <Setter Property="LineStackingStrategy" Value="BlockLineHeight"/>
+        <!-- Properly align text along its baseline -->
+        <Setter Property="RenderTransform">
+            <Setter.Value>
+                <TranslateTransform X="-1" Y="4"/>
+            </Setter.Value>
+        </Setter>
+    </Style>
+
+    <Style x:Key="HeaderTextStyle" TargetType="TextBlock" BasedOn="{StaticResource BaselineTextStyle}">
+        <Setter Property="FontSize" Value="56"/>
+        <Setter Property="FontWeight" Value="Light"/>
+        <Setter Property="LineHeight" Value="40"/>
+        <Setter Property="RenderTransform">
+            <Setter.Value>
+                <TranslateTransform X="-2" Y="8"/>
+            </Setter.Value>
+        </Setter>
+    </Style>
+
+    <Style x:Key="SubheaderTextStyle" TargetType="TextBlock" BasedOn="{StaticResource BaselineTextStyle}">
+        <Setter Property="FontSize" Value="26.667"/>
+        <Setter Property="FontWeight" Value="Light"/>
+        <Setter Property="LineHeight" Value="30"/>
+        <Setter Property="RenderTransform">
+            <Setter.Value>
+                <TranslateTransform X="-1" Y="6"/>
+            </Setter.Value>
+        </Setter>
+    </Style>
+
+    <Style x:Key="TitleTextStyle" TargetType="TextBlock" BasedOn="{StaticResource BaselineTextStyle}">
+        <Setter Property="FontWeight" Value="SemiBold"/>
+    </Style>
+
+    <Style x:Key="ItemTextStyle" TargetType="TextBlock" BasedOn="{StaticResource BaselineTextStyle}"/>
+
+    <Style x:Key="BodyTextStyle" TargetType="TextBlock" BasedOn="{StaticResource BaselineTextStyle}">
+        <Setter Property="FontWeight" Value="SemiLight"/>
+    </Style>
+
+    <Style x:Key="CaptionTextStyle" TargetType="TextBlock" BasedOn="{StaticResource BaselineTextStyle}">
+        <Setter Property="FontSize" Value="12"/>
+        <Setter Property="Foreground" Value="{StaticResource ApplicationSecondaryForegroundThemeBrush}"/>
+    </Style>
+
+    <!-- Button styles -->
+
+    <!--
+        TextButtonStyle is used to style a Button using subheader-styled text with no other adornment.  This
+        style is used in the GroupedItemsPage as a group header and in the FileOpenPickerPage for triggering
+        commands.
+    -->
+    <Style x:Key="TextButtonStyle" TargetType="Button">
+        <Setter Property="MinWidth" Value="0"/>
+        <Setter Property="MinHeight" Value="0"/>
+        <Setter Property="Template">
+            <Setter.Value>
+                <ControlTemplate TargetType="Button">
+                    <Grid Background="Transparent">
+                        <TextBlock
+                            x:Name="Text"
+                            Text="{TemplateBinding Content}"
+                            Margin="3,-7,3,10"
+                            TextWrapping="NoWrap"
+                            Style="{StaticResource SubheaderTextStyle}"/>
+                        <Rectangle
+                            x:Name="FocusVisualWhite"
+                            IsHitTestVisible="False"
+                            Stroke="{StaticResource FocusVisualWhiteStrokeThemeBrush}"
+                            StrokeEndLineCap="Square"
+                            StrokeDashArray="1,1"
+                            Opacity="0"
+                            StrokeDashOffset="1.5"/>
+                        <Rectangle
+                            x:Name="FocusVisualBlack"
+                            IsHitTestVisible="False"
+                            Stroke="{StaticResource FocusVisualBlackStrokeThemeBrush}"
+                            StrokeEndLineCap="Square"
+                            StrokeDashArray="1,1"
+                            Opacity="0"
+                            StrokeDashOffset="0.5"/>
+
+                        <VisualStateManager.VisualStateGroups>
+                            <VisualStateGroup x:Name="CommonStates">
+                                <VisualState x:Name="Normal"/>
+                                <VisualState x:Name="PointerOver">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Text" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource ApplicationPointerOverForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Pressed">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Text" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource ApplicationPressedForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Disabled">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Text" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource ButtonDisabledForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                            </VisualStateGroup>
+                            <VisualStateGroup x:Name="FocusStates">
+                                <VisualState x:Name="Focused">
+                                    <Storyboard>
+                                        <DoubleAnimation Duration="0" To="1" Storyboard.TargetName="FocusVisualWhite" Storyboard.TargetProperty="Opacity"/>
+                                        <DoubleAnimation Duration="0" To="1" Storyboard.TargetName="FocusVisualBlack" Storyboard.TargetProperty="Opacity"/>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Unfocused"/>
+                            </VisualStateGroup>
+                        </VisualStateManager.VisualStateGroups>
+                    </Grid>
+                </ControlTemplate>
+            </Setter.Value>
+        </Setter>
+    </Style>
+
+    <!--
+        TextRadioButtonStyle is used to style a RadioButton using subheader-styled text with no other adornment.
+        This style is used in the SearchResultsPage to allow selection among filters.
+    -->
+    <Style x:Key="TextRadioButtonStyle" TargetType="RadioButton">
+        <Setter Property="MinWidth" Value="0"/>
+        <Setter Property="MinHeight" Value="0"/>
+        <Setter Property="Template">
+            <Setter.Value>
+                <ControlTemplate TargetType="RadioButton">
+                    <Grid Background="Transparent">
+                        <TextBlock
+                            x:Name="Text"
+                            Text="{TemplateBinding Content}"
+                            Margin="3,-7,3,10"
+                            TextWrapping="NoWrap"
+                            Style="{StaticResource SubheaderTextStyle}"/>
+                        <Rectangle
+                            x:Name="FocusVisualWhite"
+                            IsHitTestVisible="False"
+                            Stroke="{StaticResource FocusVisualWhiteStrokeThemeBrush}"
+                            StrokeEndLineCap="Square"
+                            StrokeDashArray="1,1"
+                            Opacity="0"
+                            StrokeDashOffset="1.5"/>
+                        <Rectangle
+                            x:Name="FocusVisualBlack"
+                            IsHitTestVisible="False"
+                            Stroke="{StaticResource FocusVisualBlackStrokeThemeBrush}"
+                            StrokeEndLineCap="Square"
+                            StrokeDashArray="1,1"
+                            Opacity="0"
+                            StrokeDashOffset="0.5"/>
+
+                        <VisualStateManager.VisualStateGroups>
+                            <VisualStateGroup x:Name="CommonStates">
+                                <VisualState x:Name="Normal"/>
+                                <VisualState x:Name="PointerOver">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Text" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource ApplicationPointerOverForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Pressed">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Text" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource ApplicationPressedForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Disabled">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Text" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource ButtonDisabledForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                            </VisualStateGroup>
+                            <VisualStateGroup x:Name="FocusStates">
+                                <VisualState x:Name="Focused">
+                                    <Storyboard>
+                                        <DoubleAnimation Duration="0" To="1" Storyboard.TargetName="FocusVisualWhite" Storyboard.TargetProperty="Opacity"/>
+                                        <DoubleAnimation Duration="0" To="1" Storyboard.TargetName="FocusVisualBlack" Storyboard.TargetProperty="Opacity"/>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Unfocused"/>
+                            </VisualStateGroup>
+                            <VisualStateGroup x:Name="CheckStates">
+                                <VisualState x:Name="Checked"/>
+                                <VisualState x:Name="Unchecked">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Text" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource ApplicationSecondaryForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Indeterminate"/>
+                            </VisualStateGroup>
+                        </VisualStateManager.VisualStateGroups>
+                    </Grid>
+                </ControlTemplate>
+            </Setter.Value>
+        </Setter>
+    </Style>
+
+    <!--
+        AppBarButtonStyle is used to style a Button for use in an App Bar.  Content will be centered and should fit within
+        the 40-pixel radius glyph provided.  16-point Segoe UI Symbol is used for content text to simplify the use of glyphs
+        from that font.  AutomationProperties.Name is used for the text below the glyph.
+    -->
+    <Style x:Key="AppBarButtonStyle" TargetType="Button">
+        <Setter Property="Foreground" Value="{StaticResource AppBarItemForegroundThemeBrush}"/>
+        <Setter Property="VerticalAlignment" Value="Stretch"/>
+        <Setter Property="FontFamily" Value="Segoe UI Symbol"/>
+        <Setter Property="FontWeight" Value="Normal"/>
+        <Setter Property="FontSize" Value="20"/>
+        <Setter Property="AutomationProperties.ItemType" Value="App Bar Button"/>
+        <Setter Property="Template">
+            <Setter.Value>
+                <ControlTemplate TargetType="Button">
+                    <Grid Width="100" Background="Transparent">
+                        <StackPanel VerticalAlignment="Top" Margin="0,14,0,13">
+                            <Grid Width="40" Height="40" Margin="0,0,0,5" HorizontalAlignment="Center">
+                                <TextBlock x:Name="BackgroundGlyph" Text="&#xE0A8;" FontFamily="Segoe UI Symbol" FontSize="53.333" Margin="-4,-19,0,0" Foreground="{StaticResource AppBarItemBackgroundThemeBrush}"/>
+                                <TextBlock x:Name="OutlineGlyph" Text="&#xE0A7;" FontFamily="Segoe UI Symbol" FontSize="53.333" Margin="-4,-19,0,0"/>
+                                <ContentPresenter x:Name="Content" HorizontalAlignment="Center" Margin="-1,-1,0,0" VerticalAlignment="Center"/>
+                            </Grid>
+                            <TextBlock
+                                x:Name="TextLabel"
+                                Text="{TemplateBinding AutomationProperties.Name}"
+                                Margin="0,0,2,0"
+                                FontSize="12"
+                                TextAlignment="Center"
+                                Width="88"
+                                MaxHeight="32"
+                                TextTrimming="WordEllipsis"
+                                Style="{StaticResource BasicTextStyle}"/>
+                        </StackPanel>
+                        <Rectangle
+                                x:Name="FocusVisualWhite"
+                                IsHitTestVisible="False"
+                                Stroke="{StaticResource FocusVisualWhiteStrokeThemeBrush}"
+                                StrokeEndLineCap="Square"
+                                StrokeDashArray="1,1"
+                                Opacity="0"
+                                StrokeDashOffset="1.5"/>
+                        <Rectangle
+                                x:Name="FocusVisualBlack"
+                                IsHitTestVisible="False"
+                                Stroke="{StaticResource FocusVisualBlackStrokeThemeBrush}"
+                                StrokeEndLineCap="Square"
+                                StrokeDashArray="1,1"
+                                Opacity="0"
+                                StrokeDashOffset="0.5"/>
+
+                        <VisualStateManager.VisualStateGroups>
+                            <VisualStateGroup x:Name="CommonStates">
+                                <VisualState x:Name="Normal"/>
+                                <VisualState x:Name="PointerOver">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="BackgroundGlyph" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource AppBarItemPointerOverBackgroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Content" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource AppBarItemPointerOverForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Pressed">
+                                    <Storyboard>
+                                        <DoubleAnimation
+                                            Storyboard.TargetName="OutlineGlyph"
+                                            Storyboard.TargetProperty="Opacity"
+                                            To="0"
+                                            Duration="0"/>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="BackgroundGlyph" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource AppBarItemForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Content" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource AppBarItemPressedForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Disabled">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="OutlineGlyph" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource AppBarItemDisabledForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="Content" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource AppBarItemDisabledForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="TextLabel" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource AppBarItemDisabledForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                            </VisualStateGroup>
+                            <VisualStateGroup x:Name="FocusStates">
+                                <VisualState x:Name="Focused">
+                                    <Storyboard>
+                                        <DoubleAnimation
+                                                Storyboard.TargetName="FocusVisualWhite"
+                                                Storyboard.TargetProperty="Opacity"
+                                                To="1"
+                                                Duration="0"/>
+                                        <DoubleAnimation
+                                                Storyboard.TargetName="FocusVisualBlack"
+                                                Storyboard.TargetProperty="Opacity"
+                                                To="1"
+                                                Duration="0"/>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Unfocused" />
+                                <VisualState x:Name="PointerFocused" />
+                            </VisualStateGroup>
+                        </VisualStateManager.VisualStateGroups>
+                    </Grid>
+                </ControlTemplate>
+            </Setter.Value>
+        </Setter>
+    </Style>
+
+    <!-- Standard App Bar buttons -->
+
+    <Style x:Key="SkipBackAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="SkipBackAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Skip Back"/>
+        <Setter Property="Content" Value="&#xE100;"/>
+    </Style>
+    <Style x:Key="SkipAheadAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="SkipAheadAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Skip Ahead"/>
+        <Setter Property="Content" Value="&#xE101;"/>
+    </Style>
+    <Style x:Key="PlayAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="PlayAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Play"/>
+        <Setter Property="Content" Value="&#xE102;"/>
+    </Style>
+    <Style x:Key="PauseAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="PauseAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Pause"/>
+        <Setter Property="Content" Value="&#xE103;"/>
+    </Style>
+    <Style x:Key="EditAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="EditAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Edit"/>
+        <Setter Property="Content" Value="&#xE104;"/>
+    </Style>
+    <Style x:Key="SaveAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="SaveAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Save"/>
+        <Setter Property="Content" Value="&#xE105;"/>
+    </Style>
+    <Style x:Key="DeleteAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="DeleteAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Delete"/>
+        <Setter Property="Content" Value="&#xE106;"/>
+    </Style>
+    <Style x:Key="DiscardAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="DiscardAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Discard"/>
+        <Setter Property="Content" Value="&#xE107;"/>
+    </Style>
+    <Style x:Key="RemoveAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="RemoveAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Remove"/>
+        <Setter Property="Content" Value="&#xE108;"/>
+    </Style>
+    <Style x:Key="AddAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="AddAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Add"/>
+        <Setter Property="Content" Value="&#xE109;"/>
+    </Style>
+    <Style x:Key="NoAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="NoAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="No"/>
+        <Setter Property="Content" Value="&#xE10A;"/>
+    </Style>
+    <Style x:Key="YesAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="YesAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Yes"/>
+        <Setter Property="Content" Value="&#xE10B;"/>
+    </Style>
+    <Style x:Key="MoreAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="MoreAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="More"/>
+        <Setter Property="Content" Value="&#xE10C;"/>
+    </Style>
+    <Style x:Key="RedoAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="RedoAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Redo"/>
+        <Setter Property="Content" Value="&#xE10D;"/>
+    </Style>
+    <Style x:Key="UndoAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="UndoAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Undo"/>
+        <Setter Property="Content" Value="&#xE10E;"/>
+    </Style>
+    <Style x:Key="HomeAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="HomeAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Home"/>
+        <Setter Property="Content" Value="&#xE10F;"/>
+    </Style>
+    <Style x:Key="OutAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="OutAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Out"/>
+        <Setter Property="Content" Value="&#xE110;"/>
+    </Style>
+    <Style x:Key="NextAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="NextAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Next"/>
+        <Setter Property="Content" Value="&#xE111;"/>
+    </Style>
+    <Style x:Key="PreviousAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="PreviousAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Previous"/>
+        <Setter Property="Content" Value="&#xE112;"/>
+    </Style>
+    <Style x:Key="FavoriteAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="FavoriteAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Favorite"/>
+        <Setter Property="Content" Value="&#xE113;"/>
+    </Style>
+    <Style x:Key="PhotoAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="PhotoAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Photo"/>
+        <Setter Property="Content" Value="&#xE114;"/>
+    </Style>
+    <Style x:Key="SettingsAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="SettingsAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Settings"/>
+        <Setter Property="Content" Value="&#xE115;"/>
+    </Style>
+    <Style x:Key="VideoAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="VideoAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Video"/>
+        <Setter Property="Content" Value="&#xE116;"/>
+    </Style>
+    <Style x:Key="RefreshAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="RefreshAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Refresh"/>
+        <Setter Property="Content" Value="&#xE117;"/>
+    </Style>
+    <Style x:Key="DownloadAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="DownloadAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Download"/>
+        <Setter Property="Content" Value="&#xE118;"/>
+    </Style>
+    <Style x:Key="MailAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="MailAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Mail"/>
+        <Setter Property="Content" Value="&#xE119;"/>
+    </Style>
+    <Style x:Key="SearchAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="SearchAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Search"/>
+        <Setter Property="Content" Value="&#xE11A;"/>
+    </Style>
+    <Style x:Key="HelpAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="HelpAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Help"/>
+        <Setter Property="Content" Value="&#xE11B;"/>
+    </Style>
+    <Style x:Key="UploadAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="UploadAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Upload"/>
+        <Setter Property="Content" Value="&#xE11C;"/>
+    </Style>
+    <Style x:Key="PinAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="PinAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Pin"/>
+        <Setter Property="Content" Value="&#xE141;"/>
+    </Style>
+    <Style x:Key="UnpinAppBarButtonStyle" TargetType="Button" BasedOn="{StaticResource AppBarButtonStyle}">
+        <Setter Property="AutomationProperties.AutomationId" Value="UnpinAppBarButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Unpin"/>
+        <Setter Property="Content" Value="&#xE196;"/>
+    </Style>
+
+    <!-- Title area styles -->
+
+    <Style x:Key="PageHeaderTextStyle" TargetType="TextBlock" BasedOn="{StaticResource HeaderTextStyle}">
+        <Setter Property="TextWrapping" Value="NoWrap"/>
+        <Setter Property="VerticalAlignment" Value="Bottom"/>
+        <Setter Property="Margin" Value="0,0,40,40"/>
+    </Style>
+
+    <Style x:Key="PageSubheaderTextStyle" TargetType="TextBlock" BasedOn="{StaticResource SubheaderTextStyle}">
+        <Setter Property="TextWrapping" Value="NoWrap"/>
+        <Setter Property="VerticalAlignment" Value="Bottom"/>
+        <Setter Property="Margin" Value="0,0,0,40"/>
+    </Style>
+
+    <Style x:Key="SnappedPageHeaderTextStyle" TargetType="TextBlock" BasedOn="{StaticResource PageSubheaderTextStyle}">
+        <Setter Property="Margin" Value="0,0,18,40"/>
+    </Style>
+
+    <!--
+        BackButtonStyle is used to style a Button for use in the title area of a page.  Margins appropriate for
+        the conventional page layout are included as part of the style.
+    -->
+    <Style x:Key="BackButtonStyle" TargetType="Button">
+        <Setter Property="MinWidth" Value="0"/>
+        <Setter Property="Width" Value="48"/>
+        <Setter Property="Height" Value="48"/>
+        <Setter Property="Margin" Value="36,0,36,36"/>
+        <Setter Property="VerticalAlignment" Value="Bottom"/>
+        <Setter Property="FontFamily" Value="Segoe UI Symbol"/>
+        <Setter Property="FontWeight" Value="Normal"/>
+        <Setter Property="FontSize" Value="56"/>
+        <Setter Property="AutomationProperties.AutomationId" Value="BackButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Back"/>
+        <Setter Property="AutomationProperties.ItemType" Value="Navigation Button"/>
+        <Setter Property="Template">
+            <Setter.Value>
+                <ControlTemplate TargetType="Button">
+                    <Grid x:Name="RootGrid">
+                        <Grid Margin="-1,-16,0,0">
+                            <TextBlock x:Name="BackgroundGlyph" Text="&#xE0A8;" Foreground="{StaticResource BackButtonBackgroundThemeBrush}"/>
+                            <TextBlock x:Name="NormalGlyph" Text="{StaticResource BackButtonGlyph}" Foreground="{StaticResource BackButtonForegroundThemeBrush}"/>
+                            <TextBlock x:Name="ArrowGlyph" Text="&#xE0A6;" Foreground="{StaticResource BackButtonPressedForegroundThemeBrush}" Opacity="0"/>
+                        </Grid>
+                        <Rectangle
+                            x:Name="FocusVisualWhite"
+                            IsHitTestVisible="False"
+                            Stroke="{StaticResource FocusVisualWhiteStrokeThemeBrush}"
+                            StrokeEndLineCap="Square"
+                            StrokeDashArray="1,1"
+                            Opacity="0"
+                            StrokeDashOffset="1.5"/>
+                        <Rectangle
+                            x:Name="FocusVisualBlack"
+                            IsHitTestVisible="False"
+                            Stroke="{StaticResource FocusVisualBlackStrokeThemeBrush}"
+                            StrokeEndLineCap="Square"
+                            StrokeDashArray="1,1"
+                            Opacity="0"
+                            StrokeDashOffset="0.5"/>
+
+                        <VisualStateManager.VisualStateGroups>
+                            <VisualStateGroup x:Name="CommonStates">
+                                <VisualState x:Name="Normal" />
+                                <VisualState x:Name="PointerOver">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="BackgroundGlyph" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource BackButtonPointerOverBackgroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="NormalGlyph" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource BackButtonPointerOverForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Pressed">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="BackgroundGlyph" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource BackButtonForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                        <DoubleAnimation
+                                            Storyboard.TargetName="ArrowGlyph"
+                                            Storyboard.TargetProperty="Opacity"
+                                            To="1"
+                                            Duration="0"/>
+                                        <DoubleAnimation
+                                            Storyboard.TargetName="NormalGlyph"
+                                            Storyboard.TargetProperty="Opacity"
+                                            To="0"
+                                            Duration="0"/>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Disabled">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="RootGrid" Storyboard.TargetProperty="Visibility">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="Collapsed"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                            </VisualStateGroup>
+                            <VisualStateGroup x:Name="FocusStates">
+                                <VisualState x:Name="Focused">
+                                    <Storyboard>
+                                        <DoubleAnimation
+                                            Storyboard.TargetName="FocusVisualWhite"
+                                            Storyboard.TargetProperty="Opacity"
+                                            To="1"
+                                            Duration="0"/>
+                                        <DoubleAnimation
+                                            Storyboard.TargetName="FocusVisualBlack"
+                                            Storyboard.TargetProperty="Opacity"
+                                            To="1"
+                                            Duration="0"/>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Unfocused" />
+                                <VisualState x:Name="PointerFocused" />
+                            </VisualStateGroup>
+                        </VisualStateManager.VisualStateGroups>
+                    </Grid>
+                </ControlTemplate>
+            </Setter.Value>
+        </Setter>
+    </Style>
+
+    <!--
+        PortraitBackButtonStyle is used to style a Button for use in the title area of a portrait page.  Margins appropriate
+        for the conventional page layout are included as part of the style.
+    -->
+    <Style x:Key="PortraitBackButtonStyle" TargetType="Button" BasedOn="{StaticResource BackButtonStyle}">
+        <Setter Property="Margin" Value="26,0,26,36"/>
+    </Style>
+
+    <!--
+        SnappedBackButtonStyle is used to style a Button for use in the title area of a snapped page.  Margins appropriate
+        for the conventional page layout are included as part of the style.
+
+        The obvious duplication here is necessary as the glyphs used in snapped are not merely smaller versions of the same
+        glyph but are actually distinct.
+    -->
+    <Style x:Key="SnappedBackButtonStyle" TargetType="Button">
+        <Setter Property="MinWidth" Value="0"/>
+        <Setter Property="Margin" Value="20,0,0,0"/>
+        <Setter Property="VerticalAlignment" Value="Bottom"/>
+        <Setter Property="FontFamily" Value="Segoe UI Symbol"/>
+        <Setter Property="FontWeight" Value="Normal"/>
+        <Setter Property="FontSize" Value="26.66667"/>
+        <Setter Property="AutomationProperties.AutomationId" Value="BackButton"/>
+        <Setter Property="AutomationProperties.Name" Value="Back"/>
+        <Setter Property="AutomationProperties.ItemType" Value="Navigation Button"/>
+        <Setter Property="Template">
+            <Setter.Value>
+                <ControlTemplate TargetType="Button">
+                    <Grid x:Name="RootGrid" Width="36" Height="36" Margin="-3,0,7,33">
+                        <Grid Margin="-1,-1,0,0">
+                            <TextBlock x:Name="BackgroundGlyph" Text="&#xE0D4;" Foreground="{StaticResource BackButtonBackgroundThemeBrush}"/>
+                            <TextBlock x:Name="NormalGlyph" Text="{StaticResource BackButtonSnappedGlyph}" Foreground="{StaticResource BackButtonForegroundThemeBrush}"/>
+                            <TextBlock x:Name="ArrowGlyph" Text="&#xE0C4;" Foreground="{StaticResource BackButtonPressedForegroundThemeBrush}" Opacity="0"/>
+                        </Grid>
+                        <Rectangle
+                            x:Name="FocusVisualWhite"
+                            IsHitTestVisible="False"
+                            Stroke="{StaticResource FocusVisualWhiteStrokeThemeBrush}"
+                            StrokeEndLineCap="Square"
+                            StrokeDashArray="1,1"
+                            Opacity="0"
+                            StrokeDashOffset="1.5"/>
+                        <Rectangle
+                            x:Name="FocusVisualBlack"
+                            IsHitTestVisible="False"
+                            Stroke="{StaticResource FocusVisualBlackStrokeThemeBrush}"
+                            StrokeEndLineCap="Square"
+                            StrokeDashArray="1,1"
+                            Opacity="0"
+                            StrokeDashOffset="0.5"/>
+
+                        <VisualStateManager.VisualStateGroups>
+                            <VisualStateGroup x:Name="CommonStates">
+                                <VisualState x:Name="Normal" />
+                                <VisualState x:Name="PointerOver">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="BackgroundGlyph" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource BackButtonPointerOverBackgroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="NormalGlyph" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource BackButtonPointerOverForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Pressed">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="BackgroundGlyph" Storyboard.TargetProperty="Foreground">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="{StaticResource BackButtonForegroundThemeBrush}"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                        <DoubleAnimation
+                                            Storyboard.TargetName="ArrowGlyph"
+                                            Storyboard.TargetProperty="Opacity"
+                                            To="1"
+                                            Duration="0"/>
+                                        <DoubleAnimation
+                                            Storyboard.TargetName="NormalGlyph"
+                                            Storyboard.TargetProperty="Opacity"
+                                            To="0"
+                                            Duration="0"/>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Disabled">
+                                    <Storyboard>
+                                        <ObjectAnimationUsingKeyFrames Storyboard.TargetName="RootGrid" Storyboard.TargetProperty="Visibility">
+                                            <DiscreteObjectKeyFrame KeyTime="0" Value="Collapsed"/>
+                                        </ObjectAnimationUsingKeyFrames>
+                                    </Storyboard>
+                                </VisualState>
+                            </VisualStateGroup>
+                            <VisualStateGroup x:Name="FocusStates">
+                                <VisualState x:Name="Focused">
+                                    <Storyboard>
+                                        <DoubleAnimation
+                                            Storyboard.TargetName="FocusVisualWhite"
+                                            Storyboard.TargetProperty="Opacity"
+                                            To="1"
+                                            Duration="0"/>
+                                        <DoubleAnimation
+                                            Storyboard.TargetName="FocusVisualBlack"
+                                            Storyboard.TargetProperty="Opacity"
+                                            To="1"
+                                            Duration="0"/>
+                                    </Storyboard>
+                                </VisualState>
+                                <VisualState x:Name="Unfocused" />
+                                <VisualState x:Name="PointerFocused" />
+                            </VisualStateGroup>
+                        </VisualStateManager.VisualStateGroups>
+                    </Grid>
+                </ControlTemplate>
+            </Setter.Value>
+        </Setter>
+    </Style>
+
+    <!-- Item templates -->
+
+    <!-- Grid-appropriate 250 pixel square item template as seen in the GroupedItemsPage and ItemsPage -->
+    <DataTemplate x:Key="Standard250x250ItemTemplate">
+        <Grid HorizontalAlignment="Left" Width="250" Height="250">
+            <Border Background="{StaticResource ListViewItemPlaceholderBackgroundThemeBrush}">
+                <Image Source="{Binding Image}" Stretch="UniformToFill"/>
+            </Border>
+            <StackPanel VerticalAlignment="Bottom" Background="{StaticResource ListViewItemOverlayBackgroundThemeBrush}">
+                <TextBlock Text="{Binding Title}" Foreground="{StaticResource ListViewItemOverlayForegroundThemeBrush}" Style="{StaticResource TitleTextStyle}" Height="60" Margin="15,0,15,0"/>
+                <TextBlock Text="{Binding Subtitle}" Foreground="{StaticResource ListViewItemOverlaySecondaryForegroundThemeBrush}" Style="{StaticResource CaptionTextStyle}" TextWrapping="NoWrap" Margin="15,0,15,10"/>
+            </StackPanel>
+        </Grid>
+    </DataTemplate>
+
+    <!-- Grid-appropriate 500 by 130 pixel item template as seen in the GroupDetailPage -->
+    <DataTemplate x:Key="Standard500x130ItemTemplate">
+        <Grid Height="110" Width="480" Margin="10">
+            <Grid.ColumnDefinitions>
+                <ColumnDefinition Width="Auto"/>
+                <ColumnDefinition Width="*"/>
+            </Grid.ColumnDefinitions>
+            <Border Background="{StaticResource ListViewItemPlaceholderBackgroundThemeBrush}" Width="110" Height="110">
+                <Image Source="{Binding Image}" Stretch="UniformToFill"/>
+            </Border>
+            <StackPanel Grid.Column="1" VerticalAlignment="Top" Margin="10,0,0,0">
+                <TextBlock Text="{Binding Title}" Style="{StaticResource TitleTextStyle}" TextWrapping="NoWrap"/>
+                <TextBlock Text="{Binding Subtitle}" Style="{StaticResource CaptionTextStyle}" TextWrapping="NoWrap"/>
+                <TextBlock Text="{Binding Description}" Style="{StaticResource BodyTextStyle}" MaxHeight="60"/>
+            </StackPanel>
+        </Grid>
+    </DataTemplate>
+
+    <!-- List-appropriate 130 pixel high item template as seen in the SplitPage -->
+    <DataTemplate x:Key="Standard130ItemTemplate">
+        <Grid Height="110" Margin="6">
+            <Grid.ColumnDefinitions>
+                <ColumnDefinition Width="Auto"/>
+                <ColumnDefinition Width="*"/>
+            </Grid.ColumnDefinitions>
+            <Border Background="{StaticResource ListViewItemPlaceholderBackgroundThemeBrush}" Width="110" Height="110">
+                <Image Source="{Binding Image}" Stretch="UniformToFill"/>
+            </Border>
+            <StackPanel Grid.Column="1" VerticalAlignment="Top" Margin="10,0,0,0">
+                <TextBlock Text="{Binding Title}" Style="{StaticResource TitleTextStyle}" TextWrapping="NoWrap"/>
+                <TextBlock Text="{Binding Subtitle}" Style="{StaticResource CaptionTextStyle}" TextWrapping="NoWrap"/>
+                <TextBlock Text="{Binding Description}" Style="{StaticResource BodyTextStyle}" MaxHeight="60"/>
+            </StackPanel>
+        </Grid>
+    </DataTemplate>
+
+    <!--
+        List-appropriate 80 pixel high item template as seen in the SplitPage when Filled, and
+        the following pages when snapped: GroupedItemsPage, GroupDetailPage, and ItemsPage
+    -->
+    <DataTemplate x:Key="Standard80ItemTemplate">
+        <Grid Margin="6">
+            <Grid.ColumnDefinitions>
+                <ColumnDefinition Width="Auto"/>
+                <ColumnDefinition Width="*"/>
+            </Grid.ColumnDefinitions>
+            <Border Background="{StaticResource ListViewItemPlaceholderBackgroundThemeBrush}" Width="60" Height="60">
+                <Image Source="{Binding Image}" Stretch="UniformToFill"/>
+            </Border>
+            <StackPanel Grid.Column="1" Margin="10,0,0,0">
+                <TextBlock Text="{Binding Title}" Style="{StaticResource ItemTextStyle}" MaxHeight="40"/>
+                <TextBlock Text="{Binding Subtitle}" Style="{StaticResource CaptionTextStyle}" TextWrapping="NoWrap"/>
+            </StackPanel>
+        </Grid>
+    </DataTemplate>
+
+    <!-- Grid-appropriate 300 by 70 pixel item template as seen in the SearchResultsPage -->
+    <DataTemplate x:Key="StandardSmallIcon300x70ItemTemplate">
+        <Grid Width="300">
+            <Grid.ColumnDefinitions>
+                <ColumnDefinition Width="Auto"/>
+                <ColumnDefinition Width="*"/>
+            </Grid.ColumnDefinitions>
+            <Border Background="{StaticResource ListViewItemPlaceholderBackgroundThemeBrush}" Margin="10,10,0,20" Width="40" Height="40">
+                <Image Source="{Binding Image}" Stretch="UniformToFill"/>
+            </Border>
+            <StackPanel Grid.Column="1" Margin="10,0,10,10">
+                <TextBlock Text="{Binding Title}" Style="{StaticResource BodyTextStyle}" TextWrapping="NoWrap"/>
+                <TextBlock Text="{Binding Subtitle}" Style="{StaticResource BodyTextStyle}" Foreground="{StaticResource ApplicationSecondaryForegroundThemeBrush}" Height="40"/>
+            </StackPanel>
+        </Grid>
+    </DataTemplate>
+
+    <!-- List-appropriate 70 pixel high item template as seen in the SearchResultsPage when Snapped -->
+    <DataTemplate x:Key="StandardSmallIcon70ItemTemplate">
+        <Grid Margin="6">
+            <Grid.ColumnDefinitions>
+                <ColumnDefinition Width="Auto"/>
+                <ColumnDefinition Width="*"/>
+            </Grid.ColumnDefinitions>
+            <Border Background="{StaticResource ListViewItemPlaceholderBackgroundThemeBrush}" Margin="0,0,0,10" Width="40" Height="40">
+                <Image Source="{Binding Image}" Stretch="UniformToFill"/>
+            </Border>
+            <StackPanel Grid.Column="1" Margin="10,-10,0,0">
+                <TextBlock Text="{Binding Title}" Style="{StaticResource BodyTextStyle}" TextWrapping="NoWrap"/>
+                <TextBlock Text="{Binding Subtitle}" Style="{StaticResource BodyTextStyle}" Foreground="{StaticResource ApplicationSecondaryForegroundThemeBrush}" Height="40"/>
+            </StackPanel>
+        </Grid>
+    </DataTemplate>
+
+  <!--
+      190x130 pixel item template for displaying file previews as seen in the FileOpenPickerPage
+      Includes an elaborate tooltip to display title and description text
+  -->
+  <DataTemplate x:Key="StandardFileWithTooltip190x130ItemTemplate">
+        <Grid>
+            <Grid Background="{StaticResource ListViewItemPlaceholderBackgroundThemeBrush}">
+                <Image
+                    Source="{Binding Image}"
+                    Width="190"
+                    Height="130"
+                    HorizontalAlignment="Center"
+                    VerticalAlignment="Center"
+                    Stretch="Uniform"/>
+            </Grid>
+            <ToolTipService.Placement>Mouse</ToolTipService.Placement>
+            <ToolTipService.ToolTip>
+                <Grid Background="{StaticResource ApplicationPageBackgroundThemeBrush}">
+                    <Grid.ColumnDefinitions>
+                        <ColumnDefinition Width="Auto"/>
+                        <ColumnDefinition Width="*"/>
+                    </Grid.ColumnDefinitions>
+
+                    <Grid Background="{StaticResource ListViewItemPlaceholderBackgroundThemeBrush}" Margin="20">
+                        <Image
+                            Source="{Binding Image}"
+                            Width="160"
+                            Height="160"
+                            HorizontalAlignment="Center"
+                            VerticalAlignment="Center"
+                            Stretch="Uniform"/>
+                    </Grid>
+                    <StackPanel Width="200" Grid.Column="1" Margin="0,20,20,20">
+                        <TextBlock Text="{Binding Title}" TextWrapping="NoWrap" Style="{StaticResource BodyTextStyle}"/>
+                        <TextBlock Text="{Binding Description}" MaxHeight="140" Foreground="{StaticResource ApplicationSecondaryForegroundThemeBrush}" Style="{StaticResource BodyTextStyle}"/>
+                    </StackPanel>
+                </Grid>
+            </ToolTipService.ToolTip>
+        </Grid>
+    </DataTemplate>
+
+    <!-- Default to 10-pixel spacing between grid items (after accounting for 4-pixel insets for focus) -->
+
+    <Style TargetType="GridViewItem">
+        <Setter Property="Margin" Value="0,0,2,2" />
+    </Style>
+
+    <!-- ScrollViewer styles -->
+
+    <Style x:Key="HorizontalScrollViewerStyle" TargetType="ScrollViewer">
+        <Setter Property="HorizontalScrollBarVisibility" Value="Auto"/>
+        <Setter Property="VerticalScrollBarVisibility" Value="Disabled"/>
+        <Setter Property="ScrollViewer.HorizontalScrollMode" Value="Enabled" />
+        <Setter Property="ScrollViewer.VerticalScrollMode" Value="Disabled" />
+        <Setter Property="ScrollViewer.ZoomMode" Value="Disabled" />
+    </Style>
+
+    <Style x:Key="VerticalScrollViewerStyle" TargetType="ScrollViewer">
+        <Setter Property="HorizontalScrollBarVisibility" Value="Disabled"/>
+        <Setter Property="VerticalScrollBarVisibility" Value="Auto"/>
+        <Setter Property="ScrollViewer.HorizontalScrollMode" Value="Disabled" />
+        <Setter Property="ScrollViewer.VerticalScrollMode" Value="Enabled" />
+        <Setter Property="ScrollViewer.ZoomMode" Value="Disabled" />
+    </Style>
+
+    <!-- Page layout roots typically use entrance animations and a theme-appropriate background color -->
+
+    <Style x:Key="LayoutRootStyle" TargetType="Panel">
+        <Setter Property="Background" Value="{StaticResource ApplicationPageBackgroundThemeBrush}"/>
+        <Setter Property="ChildrenTransitions">
+            <Setter.Value>
+                <TransitionCollection>
+                    <EntranceThemeTransition/>
+                </TransitionCollection>
+            </Setter.Value>
+        </Setter>
+    </Style>
+</ResourceDictionary>
diff --git a/samples/winrt/ImageManipulations/common/suspensionmanager.cpp b/samples/winrt/ImageManipulations/common/suspensionmanager.cpp
new file mode 100644
index 000000000..c1ecf11cf
--- /dev/null
+++ b/samples/winrt/ImageManipulations/common/suspensionmanager.cpp
@@ -0,0 +1,481 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// SuspensionManager.cpp
+// Implementation of the SuspensionManager class
+//
+
+#include "pch.h"
+#include "SuspensionManager.h"
+
+#include <collection.h>
+#include <algorithm>
+
+using namespace SDKSample::Common;
+
+using namespace Concurrency;
+using namespace Platform;
+using namespace Platform::Collections;
+using namespace Windows::Foundation;
+using namespace Windows::Foundation::Collections;
+using namespace Windows::Storage;
+using namespace Windows::Storage::FileProperties;
+using namespace Windows::Storage::Streams;
+using namespace Windows::UI::Xaml;
+using namespace Windows::UI::Xaml::Controls;
+using namespace Windows::UI::Xaml::Interop;
+
+namespace
+{
+    Map<String^, Object^>^ _sessionState = ref new Map<String^, Object^>();
+    String^ sessionStateFilename = "_sessionState.dat";
+
+    // Forward declarations for object object read / write support
+    void WriteObject(Windows::Storage::Streams::DataWriter^ writer, Platform::Object^ object);
+    Platform::Object^ ReadObject(Windows::Storage::Streams::DataReader^ reader);
+}
+
+/// <summary>
+/// Provides access to global session state for the current session.  This state is serialized by
+/// <see cref="SaveAsync"/> and restored by <see cref="RestoreAsync"/> which require values to be
+/// one of the following: boxed values including integers, floating-point singles and doubles,
+/// wide characters, boolean, Strings and Guids, or Map<String^, Object^> where map values are
+/// subject to the same constraints.  Session state should be as compact as possible.
+/// </summary>
+IMap<String^, Object^>^ SuspensionManager::SessionState::get(void)
+{
+    return _sessionState;
+}
+
+/// <summary>
+/// Wrap a WeakReference as a reference object for use in a collection.
+/// </summary>
+private ref class WeakFrame sealed
+{
+private:
+    WeakReference _frameReference;
+
+internal:
+    WeakFrame(Frame^ frame) { _frameReference = frame; }
+    property Frame^ ResolvedFrame
+    {
+        Frame^ get(void) { return _frameReference.Resolve<Frame>(); }
+    };
+};
+
+namespace
+{
+    std::vector<WeakFrame^> _registeredFrames;
+    DependencyProperty^ FrameSessionStateKeyProperty =
+        DependencyProperty::RegisterAttached("_FrameSessionStateKeyProperty",
+        TypeName(String::typeid), TypeName(SuspensionManager::typeid), nullptr);
+    DependencyProperty^ FrameSessionStateProperty =
+        DependencyProperty::RegisterAttached("_FrameSessionStateProperty",
+        TypeName(IMap<String^, Object^>::typeid), TypeName(SuspensionManager::typeid), nullptr);
+}
+
+/// <summary>
+/// Registers a <see cref="Frame"/> instance to allow its navigation history to be saved to
+/// and restored from <see cref="SessionState"/>.  Frames should be registered once
+/// immediately after creation if they will participate in session state management.  Upon
+/// registration if state has already been restored for the specified key
+/// the navigation history will immediately be restored.  Subsequent invocations of
+/// <see cref="RestoreAsync(String)"/> will also restore navigation history.
+/// </summary>
+/// <param name="frame">An instance whose navigation history should be managed by
+/// <see cref="SuspensionManager"/></param>
+/// <param name="sessionStateKey">A unique key into <see cref="SessionState"/> used to
+/// store navigation-related information.</param>
+void SuspensionManager::RegisterFrame(Frame^ frame, String^ sessionStateKey)
+{
+    if (frame->GetValue(FrameSessionStateKeyProperty) != nullptr)
+    {
+        throw ref new FailureException("Frames can only be registered to one session state key");
+    }
+
+    if (frame->GetValue(FrameSessionStateProperty) != nullptr)
+    {
+        throw ref new FailureException("Frames must be either be registered before accessing frame session state, or not registered at all");
+    }
+
+    // Use a dependency property to associate the session key with a frame, and keep a list of frames whose
+    // navigation state should be managed
+    frame->SetValue(FrameSessionStateKeyProperty, sessionStateKey);
+    _registeredFrames.insert(_registeredFrames.begin(), ref new WeakFrame(frame));
+
+    // Check to see if navigation state can be restored
+    RestoreFrameNavigationState(frame);
+}
+
+/// <summary>
+/// Disassociates a <see cref="Frame"/> previously registered by <see cref="RegisterFrame"/>
+/// from <see cref="SessionState"/>.  Any navigation state previously captured will be
+/// removed.
+/// </summary>
+/// <param name="frame">An instance whose navigation history should no longer be
+/// managed.</param>
+void SuspensionManager::UnregisterFrame(Frame^ frame)
+{
+    // Remove session state and remove the frame from the list of frames whose navigation
+    // state will be saved (along with any weak references that are no longer reachable)
+    auto key = safe_cast<String^>(frame->GetValue(FrameSessionStateKeyProperty));
+    if (SessionState->HasKey(key)) SessionState->Remove(key);
+    _registeredFrames.erase(
+        std::remove_if(_registeredFrames.begin(), _registeredFrames.end(), [=](WeakFrame^& e)
+        {
+            auto testFrame = e->ResolvedFrame;
+            return testFrame == nullptr || testFrame == frame;
+        }),
+        _registeredFrames.end()
+    );
+}
+
+/// <summary>
+/// Provides storage for session state associated with the specified <see cref="Frame"/>.
+/// Frames that have been previously registered with <see cref="RegisterFrame"/> have
+/// their session state saved and restored automatically as a part of the global
+/// <see cref="SessionState"/>.  Frames that are not registered have transient state
+/// that can still be useful when restoring pages that have been discarded from the
+/// navigation cache.
+/// </summary>
+/// <remarks>Apps may choose to rely on <see cref="LayoutAwarePage"/> to manage
+/// page-specific state instead of working with frame session state directly.</remarks>
+/// <param name="frame">The instance for which session state is desired.</param>
+/// <returns>A collection of state subject to the same serialization mechanism as
+/// <see cref="SessionState"/>.</returns>
+IMap<String^, Object^>^ SuspensionManager::SessionStateForFrame(Frame^ frame)
+{
+    auto frameState = safe_cast<IMap<String^, Object^>^>(frame->GetValue(FrameSessionStateProperty));
+
+    if (frameState == nullptr)
+    {
+        auto frameSessionKey = safe_cast<String^>(frame->GetValue(FrameSessionStateKeyProperty));
+        if (frameSessionKey != nullptr)
+        {
+            // Registered frames reflect the corresponding session state
+            if (!_sessionState->HasKey(frameSessionKey))
+            {
+                _sessionState->Insert(frameSessionKey, ref new Map<String^, Object^>());
+            }
+            frameState = safe_cast<IMap<String^, Object^>^>(_sessionState->Lookup(frameSessionKey));
+        }
+        else
+        {
+            // Frames that aren't registered have transient state
+            frameState = ref new Map<String^, Object^>();
+        }
+        frame->SetValue(FrameSessionStateProperty, frameState);
+    }
+    return frameState;
+}
+
+void SuspensionManager::RestoreFrameNavigationState(Frame^ frame)
+{
+    auto frameState = SessionStateForFrame(frame);
+    if (frameState->HasKey("Navigation"))
+    {
+        frame->SetNavigationState(safe_cast<String^>(frameState->Lookup("Navigation")));
+    }
+}
+
+void SuspensionManager::SaveFrameNavigationState(Frame^ frame)
+{
+    auto frameState = SessionStateForFrame(frame);
+    frameState->Insert("Navigation", frame->GetNavigationState());
+}
+
+/// <summary>
+/// Save the current <see cref="SessionState"/>.  Any <see cref="Frame"/> instances
+/// registered with <see cref="RegisterFrame"/> will also preserve their current
+/// navigation stack, which in turn gives their active <see cref="Page"/> an opportunity
+/// to save its state.
+/// </summary>
+/// <returns>An asynchronous task that reflects when session state has been saved.</returns>
+task<void> SuspensionManager::SaveAsync(void)
+{
+    // Save the navigation state for all registered frames
+    for (auto&& weakFrame : _registeredFrames)
+    {
+        auto frame = weakFrame->ResolvedFrame;
+        if (frame != nullptr) SaveFrameNavigationState(frame);
+    }
+
+    // Serialize the session state synchronously to avoid asynchronous access to shared
+    // state
+    auto sessionData = ref new InMemoryRandomAccessStream();
+    auto sessionDataWriter = ref new DataWriter(sessionData->GetOutputStreamAt(0));
+    WriteObject(sessionDataWriter, _sessionState);
+
+    // Once session state has been captured synchronously, begin the asynchronous process
+    // of writing the result to disk
+    return task<unsigned int>(sessionDataWriter->StoreAsync()).then([=](unsigned int)
+    {
+        return sessionDataWriter->FlushAsync();
+    }).then([=](bool flushSucceeded)
+    {
+        (void)flushSucceeded; // Unused parameter
+        return ApplicationData::Current->LocalFolder->CreateFileAsync(sessionStateFilename,
+            CreationCollisionOption::ReplaceExisting);
+    }).then([=](StorageFile^ createdFile)
+    {
+        return createdFile->OpenAsync(FileAccessMode::ReadWrite);
+    }).then([=](IRandomAccessStream^ newStream)
+    {
+        return RandomAccessStream::CopyAndCloseAsync(
+            sessionData->GetInputStreamAt(0), newStream->GetOutputStreamAt(0));
+    }).then([=](UINT64 copiedBytes)
+    {
+        (void)copiedBytes; // Unused parameter
+        return;
+    });
+}
+
+/// <summary>
+/// Restores previously saved <see cref="SessionState"/>.  Any <see cref="Frame"/> instances
+/// registered with <see cref="RegisterFrame"/> will also restore their prior navigation
+/// state, which in turn gives their active <see cref="Page"/> an opportunity restore its
+/// state.
+/// </summary>
+/// <param name="version">A version identifer compared to the session state to prevent
+/// incompatible versions of session state from reaching app code.  Saved state with a
+/// different version will be ignored, resulting in an empty <see cref="SessionState"/>
+/// dictionary.</param>
+/// <returns>An asynchronous task that reflects when session state has been read.  The
+/// content of <see cref="SessionState"/> should not be relied upon until this task
+/// completes.</returns>
+task<void> SuspensionManager::RestoreAsync(void)
+{
+    _sessionState->Clear();
+
+    task<StorageFile^> getFileTask(ApplicationData::Current->LocalFolder->GetFileAsync(sessionStateFilename));
+    return getFileTask.then([=](StorageFile^ stateFile)
+    {
+        task<BasicProperties^> getBasicPropertiesTask(stateFile->GetBasicPropertiesAsync());
+        return getBasicPropertiesTask.then([=](BasicProperties^ stateFileProperties)
+        {
+            auto size = unsigned int(stateFileProperties->Size);
+            if (size != stateFileProperties->Size) throw ref new FailureException("Session state larger than 4GB");
+            task<IRandomAccessStreamWithContentType^> openReadTask(stateFile->OpenReadAsync());
+            return openReadTask.then([=](IRandomAccessStreamWithContentType^ stateFileStream)
+            {
+                auto stateReader = ref new DataReader(stateFileStream);
+                return task<unsigned int>(stateReader->LoadAsync(size)).then([=](unsigned int bytesRead)
+                {
+                    (void)bytesRead; // Unused parameter
+                    // Deserialize the Session State
+                    Object^ content = ReadObject(stateReader);
+                    _sessionState = (Map<String^, Object^>^)content;
+
+                    // Restore any registered frames to their saved state
+                    for (auto&& weakFrame : _registeredFrames)
+                    {
+                        auto frame = weakFrame->ResolvedFrame;
+                        if (frame != nullptr)
+                        {
+                            frame->ClearValue(FrameSessionStateProperty);
+                            RestoreFrameNavigationState(frame);
+                        }
+                    }
+                }, task_continuation_context::use_current());
+            });
+        });
+    });
+}
+
+#pragma region Object serialization for a known set of types
+
+namespace
+{
+    // Codes used for identifying serialized types
+    enum StreamTypes {
+        NullPtrType = 0,
+
+        // Supported IPropertyValue types
+        UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int16Type, Int32Type, Int64Type,
+        SingleType, DoubleType, BooleanType, Char16Type, GuidType, StringType,
+
+        // Additional supported types
+        StringToObjectMapType,
+
+        // Marker values used to ensure stream integrity
+        MapEndMarker
+    };
+
+    void WriteString(DataWriter^ writer, String^ string)
+    {
+        writer->WriteByte(StringType);
+        writer->WriteUInt32(writer->MeasureString(string));
+        writer->WriteString(string);
+    }
+
+    void WriteProperty(DataWriter^ writer, IPropertyValue^ propertyValue)
+    {
+        switch (propertyValue->Type)
+        {
+        case PropertyType::UInt8:
+            writer->WriteByte(UInt8Type);
+            writer->WriteByte(propertyValue->GetUInt8());
+            return;
+        case PropertyType::UInt16:
+            writer->WriteByte(UInt16Type);
+            writer->WriteUInt16(propertyValue->GetUInt16());
+            return;
+        case PropertyType::UInt32:
+            writer->WriteByte(UInt32Type);
+            writer->WriteUInt32(propertyValue->GetUInt32());
+            return;
+        case PropertyType::UInt64:
+            writer->WriteByte(UInt64Type);
+            writer->WriteUInt64(propertyValue->GetUInt64());
+            return;
+        case PropertyType::Int16:
+            writer->WriteByte(Int16Type);
+            writer->WriteUInt16(propertyValue->GetInt16());
+            return;
+        case PropertyType::Int32:
+            writer->WriteByte(Int32Type);
+            writer->WriteUInt32(propertyValue->GetInt32());
+            return;
+        case PropertyType::Int64:
+            writer->WriteByte(Int64Type);
+            writer->WriteUInt64(propertyValue->GetInt64());
+            return;
+        case PropertyType::Single:
+            writer->WriteByte(SingleType);
+            writer->WriteSingle(propertyValue->GetSingle());
+            return;
+        case PropertyType::Double:
+            writer->WriteByte(DoubleType);
+            writer->WriteDouble(propertyValue->GetDouble());
+            return;
+        case PropertyType::Boolean:
+            writer->WriteByte(BooleanType);
+            writer->WriteBoolean(propertyValue->GetBoolean());
+            return;
+        case PropertyType::Char16:
+            writer->WriteByte(Char16Type);
+            writer->WriteUInt16(propertyValue->GetChar16());
+            return;
+        case PropertyType::Guid:
+            writer->WriteByte(GuidType);
+            writer->WriteGuid(propertyValue->GetGuid());
+            return;
+        case PropertyType::String:
+            WriteString(writer, propertyValue->GetString());
+            return;
+        default:
+            throw ref new InvalidArgumentException("Unsupported property type");
+        }
+    }
+
+    void WriteStringToObjectMap(DataWriter^ writer, IMap<String^, Object^>^ map)
+    {
+        writer->WriteByte(StringToObjectMapType);
+        writer->WriteUInt32(map->Size);
+        for (auto&& pair : map)
+        {
+            WriteObject(writer, pair->Key);
+            WriteObject(writer, pair->Value);
+        }
+        writer->WriteByte(MapEndMarker);
+    }
+
+    void WriteObject(DataWriter^ writer, Object^ object)
+    {
+        if (object == nullptr)
+        {
+            writer->WriteByte(NullPtrType);
+            return;
+        }
+
+        auto propertyObject = dynamic_cast<IPropertyValue^>(object);
+        if (propertyObject != nullptr)
+        {
+            WriteProperty(writer, propertyObject);
+            return;
+        }
+
+        auto mapObject = dynamic_cast<IMap<String^, Object^>^>(object);
+        if (mapObject != nullptr)
+        {
+            WriteStringToObjectMap(writer, mapObject);
+            return;
+        }
+
+        throw ref new InvalidArgumentException("Unsupported data type");
+    }
+
+    String^ ReadString(DataReader^ reader)
+    {
+        int length = reader->ReadUInt32();
+        String^ string = reader->ReadString(length);
+        return string;
+    }
+
+    IMap<String^, Object^>^ ReadStringToObjectMap(DataReader^ reader)
+    {
+        auto map = ref new Map<String^, Object^>();
+        auto size = reader->ReadUInt32();
+        for (unsigned int index = 0; index < size; index++)
+        {
+            auto key = safe_cast<String^>(ReadObject(reader));
+            auto value = ReadObject(reader);
+            map->Insert(key, value);
+        }
+        if (reader->ReadByte() != MapEndMarker)
+        {
+            throw ref new InvalidArgumentException("Invalid stream");
+        }
+        return map;
+    }
+
+    Object^ ReadObject(DataReader^ reader)
+    {
+        auto type = reader->ReadByte();
+        switch (type)
+        {
+        case NullPtrType:
+            return nullptr;
+        case UInt8Type:
+            return reader->ReadByte();
+        case UInt16Type:
+            return reader->ReadUInt16();
+        case UInt32Type:
+            return reader->ReadUInt32();
+        case UInt64Type:
+            return reader->ReadUInt64();
+        case Int16Type:
+            return reader->ReadInt16();
+        case Int32Type:
+            return reader->ReadInt32();
+        case Int64Type:
+            return reader->ReadInt64();
+        case SingleType:
+            return reader->ReadSingle();
+        case DoubleType:
+            return reader->ReadDouble();
+        case BooleanType:
+            return reader->ReadBoolean();
+        case Char16Type:
+            return (char16_t)reader->ReadUInt16();
+        case GuidType:
+            return reader->ReadGuid();
+        case StringType:
+            return ReadString(reader);
+        case StringToObjectMapType:
+            return ReadStringToObjectMap(reader);
+        default:
+            throw ref new InvalidArgumentException("Unsupported property type");
+        }
+    }
+}
+
+#pragma endregion
diff --git a/samples/winrt/ImageManipulations/common/suspensionmanager.h b/samples/winrt/ImageManipulations/common/suspensionmanager.h
new file mode 100644
index 000000000..65e1180a0
--- /dev/null
+++ b/samples/winrt/ImageManipulations/common/suspensionmanager.h
@@ -0,0 +1,50 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// SuspensionManager.h
+// Declaration of the SuspensionManager class
+//
+
+#pragma once
+
+#include <ppltasks.h>
+
+namespace SDKSample
+{
+    namespace Common
+    {
+        /// <summary>
+        /// SuspensionManager captures global session state to simplify process lifetime management
+        /// for an application.  Note that session state will be automatically cleared under a variety
+        /// of conditions and should only be used to store information that would be convenient to
+        /// carry across sessions, but that should be disacarded when an application crashes or is
+        /// upgraded.
+        /// </summary>
+        ref class SuspensionManager sealed
+        {
+        internal:
+            static void RegisterFrame(Windows::UI::Xaml::Controls::Frame^ frame, Platform::String^ sessionStateKey);
+            static void UnregisterFrame(Windows::UI::Xaml::Controls::Frame^ frame);
+            static Concurrency::task<void> SaveAsync(void);
+            static Concurrency::task<void> RestoreAsync(void);
+            static property Windows::Foundation::Collections::IMap<Platform::String^, Platform::Object^>^ SessionState
+            {
+                Windows::Foundation::Collections::IMap<Platform::String^, Platform::Object^>^ get(void);
+            };
+            static Windows::Foundation::Collections::IMap<Platform::String^, Platform::Object^>^ SessionStateForFrame(
+                Windows::UI::Xaml::Controls::Frame^ frame);
+
+        private:
+            static void RestoreFrameNavigationState(Windows::UI::Xaml::Controls::Frame^ frame);
+            static void SaveFrameNavigationState(Windows::UI::Xaml::Controls::Frame^ frame);
+        };
+    }
+}
diff --git a/samples/winrt/ImageManipulations/pch.cpp b/samples/winrt/ImageManipulations/pch.cpp
new file mode 100644
index 000000000..97389d94c
--- /dev/null
+++ b/samples/winrt/ImageManipulations/pch.cpp
@@ -0,0 +1,16 @@
+﻿//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// pch.cpp
+// Include the standard header and generate the precompiled header.
+//
+
+#include "pch.h"
diff --git a/samples/winrt/ImageManipulations/pch.h b/samples/winrt/ImageManipulations/pch.h
new file mode 100644
index 000000000..13f9bc34c
--- /dev/null
+++ b/samples/winrt/ImageManipulations/pch.h
@@ -0,0 +1,23 @@
+﻿//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+//
+// pch.h
+// Header for standard system include files.
+//
+
+#pragma once
+
+#include <collection.h>
+#include <ppltasks.h>
+#include <agile.h>
+#include "Common\LayoutAwarePage.h"
+#include "Common\SuspensionManager.h"
+#include "App.xaml.h"
diff --git a/samples/winrt/ImageManipulations/sample-utils/SampleTemplateStyles.xaml b/samples/winrt/ImageManipulations/sample-utils/SampleTemplateStyles.xaml
new file mode 100644
index 000000000..cddb26401
--- /dev/null
+++ b/samples/winrt/ImageManipulations/sample-utils/SampleTemplateStyles.xaml
@@ -0,0 +1,51 @@
+﻿<!--
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+-->
+<ResourceDictionary
+    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
+    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml">
+
+    <Style x:Key="TitleTextStyle" TargetType="TextBlock">
+        <Setter Property="FontFamily" Value="Segoe UI Light" />
+        <Setter Property="FontSize" Value="16" />
+    </Style>
+    <Style x:Key="HeaderTextStyle" TargetType="TextBlock">
+        <Setter Property="FontFamily" Value="Segoe UI Semilight" />
+        <Setter Property="FontSize" Value="26.667" />
+        <Setter Property="Margin" Value="0,0,0,25" />
+    </Style>
+    <Style x:Key="H2Style" TargetType="TextBlock">
+        <Setter Property="FontFamily" Value="Segoe UI" />
+        <Setter Property="FontSize" Value="14.667" />
+        <Setter Property="Margin" Value="0,0,0,0" />
+    </Style>
+    <Style x:Key="SubheaderTextStyle" TargetType="TextBlock">
+        <Setter Property="FontFamily" Value="Segoe UI Semilight" />
+        <Setter Property="FontSize" Value="14.667" />
+        <Setter Property="Margin" Value="0,0,0,5" />
+    </Style>
+    <Style x:Key="BasicTextStyle" TargetType="TextBlock">
+        <Setter Property="FontFamily" Value="Segoe UI Light" />
+        <Setter Property="FontSize" Value="16" />
+    </Style>
+    <Style x:Key="SeparatorStyle" TargetType="TextBlock">
+        <Setter Property="FontFamily" Value="Segoe UI" />
+        <Setter Property="FontSize" Value="9" />
+    </Style>
+    <Style x:Key="FooterStyle" TargetType="TextBlock">
+        <Setter Property="FontFamily" Value="Segoe UI" />
+        <Setter Property="FontSize" Value="12" />
+        <Setter Property="Margin" Value="0,8,0,0" />
+    </Style>
+    <Style x:Key="HyperlinkStyle" TargetType="HyperlinkButton">
+        <Setter Property="Padding" Value="5"/>
+    </Style>
+</ResourceDictionary>