diff --git a/CMakeLists.txt b/CMakeLists.txt index 41ec315a0..e89db9cd1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -554,8 +554,20 @@ if(BUILD_NEW_PYTHON_SUPPORT) else() set(PYTHON_USE_NUMPY 0) endif() - - + + execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import sphinx; print sphinx.__version__" + RESULT_VARIABLE SPHINX_PROCESS + OUTPUT_VARIABLE SPHINX_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + + set(HAVE_SPHINX 0) + if(SPHINX_PROCESS EQUAL 0) + find_program(SPHINX_BUILD sphinx-build) + if(SPHINX_BUILD) + set(HAVE_SPHINX 1) + message(STATUS " Found Sphinx ${SPHINX_VERSION}: ${SPHINX_BUILD}") + endif() + endif() endif() #YV @@ -778,7 +790,7 @@ set(HAVE_OPENNI FALSE) set(HAVE_OPENNI_PRIME_SENSOR_MODULE FALSE) if(WITH_OPENNI) - include(OpenCVFindOpenNI.cmake) + include(OpenCVFindOpenNI.cmake) endif() ############################## Eigen2 ############################## @@ -798,40 +810,40 @@ endif() ################## Extra HighGUI libs on Windows ################### if(WIN32) - set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} comctl32 gdi32 ole32) - - if (MSVC) - set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} vfw32) - endif() + set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} comctl32 gdi32 ole32) + + if (MSVC) + set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} vfw32) + endif() - if(MINGW) - if(MINGW64) + if(MINGW) + if(MINGW64) set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} msvfw32 avifil32 avicap32 winmm) - if(HAVE_VIDEOINPUT) - set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} videoInput64 strmiids) - endif() - else() + if(HAVE_VIDEOINPUT) + set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} videoInput64 strmiids) + endif() + else() set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} vfw32 winmm) - if(HAVE_VIDEOINPUT) + if(HAVE_VIDEOINPUT) set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} videoInput strmiids) - endif() - endif() + endif() + endif() endif() endif() -################## LATEX ################## -set(BUILD_LATEX_DOCS OFF CACHE BOOL "Build LaTeX OpenCV Documentation") +################## Reference Manual ################## +set(BUILD_REFMAN ON CACHE BOOL "Build OpenCV Reference Manual") ################### DOXYGEN ############### -find_package(Doxygen) - -if(DOXYGEN_FOUND) - set(BUILD_DOXYGEN_DOCS ON CACHE BOOL "Generate HTML docs using Doxygen") -endif() +#find_package(Doxygen) +# +#if(DOXYGEN_FOUND) +# set(BUILD_DOXYGEN_DOCS ON CACHE BOOL "Generate HTML docs using Doxygen") +#endif() # ---------------------------------------------------------------------------- @@ -1246,9 +1258,9 @@ if(BUILD_PACKAGE) install(DIRECTORY data samples DESTINATION "." COMPONENT main PATTERN ".svn" EXCLUDE) install(DIRECTORY 3rdparty android modules tests DESTINATION "." COMPONENT src PATTERN ".svn" EXCLUDE) - install(DIRECTORY doc/ DESTINATION doc COMPONENT src FILES_MATCHING PATTERN "*.tex") - install(DIRECTORY doc/pics DESTINATION doc COMPONENT src PATTERN ".svn" EXCLUDE) - install(DIRECTORY doc/latex2sphinx DESTINATION doc COMPONENT src PATTERN ".svn" EXCLUDE) + #install(DIRECTORY doc/ DESTINATION doc COMPONENT src FILES_MATCHING PATTERN "*.tex") + #install(DIRECTORY doc/pics DESTINATION doc COMPONENT src PATTERN ".svn" EXCLUDE) + #install(DIRECTORY doc/latex2sphinx DESTINATION doc COMPONENT src PATTERN ".svn" EXCLUDE) endif() endif() @@ -1260,7 +1272,7 @@ endif() add_subdirectory(include) add_subdirectory(modules) -if(BUILD_LATEX_DOCS) +if(BUILD_REFMAN) include(OpenCVFindLATEX.cmake REQUIRED) if(PDFLATEX_COMPILER) @@ -1268,7 +1280,7 @@ if(BUILD_LATEX_DOCS) endif() endif() -add_subdirectory(doc) +add_subdirectory(docroot) add_subdirectory(data) if(BUILD_EXAMPLES OR INSTALL_PYTHON_EXAMPLES) @@ -1289,16 +1301,16 @@ add_subdirectory(3rdparty) message(STATUS "") message(STATUS "General configuration for opencv ${OPENCV_VERSION} =====================================") message(STATUS "") -message(STATUS " Built as dynamic libs?: ${BUILD_SHARED_LIBS}") -message(STATUS " Compiler: ${CMAKE_COMPILER}") -message(STATUS " C++ flags (Release): ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}") -message(STATUS " C++ flags (Debug): ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}") +message(STATUS " Built as dynamic libs?: ${BUILD_SHARED_LIBS}") +message(STATUS " Compiler: ${CMAKE_COMPILER}") +message(STATUS " C++ flags (Release): ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}") +message(STATUS " C++ flags (Debug): ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}") if(WIN32) -message(STATUS " Linker flags (Release): ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_RELEASE}") -message(STATUS " Linker flags (Debug): ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_DEBUG}") +message(STATUS " Linker flags (Release): ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_RELEASE}") +message(STATUS " Linker flags (Debug): ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_DEBUG}") else() -message(STATUS " Linker flags (Release): ${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}") -message(STATUS " Linker flags (Debug): ${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS_DEBUG}") +message(STATUS " Linker flags (Release): ${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}") +message(STATUS " Linker flags (Debug): ${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS_DEBUG}") endif() #YV @@ -1306,21 +1318,21 @@ message(STATUS "") message(STATUS " GUI: ") if (HAVE_QT) - message(STATUS " QT 4.x: ${HAVE_QT}") - message(STATUS " QT OpenGL support: ${HAVE_QT_OPENGL}") + message(STATUS " QT 4.x: ${HAVE_QT}") + message(STATUS " QT OpenGL support: ${HAVE_QT_OPENGL}") else() if(WIN32) - message(STATUS " Win32 UI: 1") + message(STATUS " Win32 UI: YES") else() if(APPLE) if(WITH_CARBON) - message(STATUS " Carbon: 1") + message(STATUS " Carbon: YES") else() - message(STATUS " Cocoa: 1") + message(STATUS " Cocoa: YES") endif() else() - message(STATUS " GTK+ 2.x: ${HAVE_GTK}") - message(STATUS " GThread: ${HAVE_GTHREAD}") + message(STATUS " GTK+ 2.x: ${HAVE_GTK}") + message(STATUS " GThread: ${HAVE_GTHREAD}") endif() endif() endif() @@ -1328,69 +1340,69 @@ endif() message(STATUS "") message(STATUS " Image I/O: ") if(NOT WITH_JPEG OR JPEG_FOUND) -message(STATUS " JPEG: ${JPEG_FOUND}") +message(STATUS " JPEG: ${JPEG_FOUND}") else() -message(STATUS " JPEG: build") +message(STATUS " JPEG: build") endif() if(NOT WITH_PNG OR PNG_FOUND) -message(STATUS " PNG: ${PNG_FOUND}") +message(STATUS " PNG: ${PNG_FOUND}") else() -message(STATUS " PNG: build") +message(STATUS " PNG: build") endif() if(NOT WITH_TIFF OR TIFF_FOUND) -message(STATUS " TIFF: ${TIFF_FOUND}") +message(STATUS " TIFF: ${TIFF_FOUND}") else() -message(STATUS " TIFF: build") +message(STATUS " TIFF: build") endif() if(NOT WITH_JASPER OR JASPER_FOUND) -message(STATUS " JPEG 2000: ${JASPER_FOUND}") +message(STATUS " JPEG 2000: ${JASPER_FOUND}") else() -message(STATUS " JPEG 2000: build") +message(STATUS " JPEG 2000: build") endif() if(WITH_OPENEXR AND OPENEXR_FOUND) -message(STATUS " OpenEXR: YES") +message(STATUS " OpenEXR: YES") else() -message(STATUS " OpenEXR: NO") +message(STATUS " OpenEXR: NO") endif() if(NOT HAVE_OPENNI OR HAVE_OPENNI_PRIME_SENSOR_MODULE) -message(STATUS " OpenNI: ${HAVE_OPENNI}") +message(STATUS " OpenNI: ${HAVE_OPENNI}") else() -message(STATUS " OpenNI: ${HAVE_OPENNI} (WARNING: PrimeSensor Modules for OpenNI are not installed (not found in OPENNI_PRIME_SENSOR_MODULE_BIN_DIR).)") +message(STATUS " OpenNI: ${HAVE_OPENNI} (WARNING: PrimeSensor Modules for OpenNI are not installed (not found in OPENNI_PRIME_SENSOR_MODULE_BIN_DIR).)") endif() if(UNIX AND NOT APPLE) message(STATUS "") message(STATUS " Video I/O: ") -message(STATUS " DC1394 1.x: ${HAVE_DC1394}") -message(STATUS " DC1394 2.x: ${HAVE_DC1394_2}") -message(STATUS " FFMPEG: ${HAVE_FFMPEG}") -message(STATUS " codec: ${HAVE_FFMPEG_CODEC}") -message(STATUS " format: ${HAVE_FFMPEG_FORMAT}") -message(STATUS " util: ${HAVE_FFMPEG_UTIL}") -message(STATUS " swscale: ${HAVE_FFMPEG_SWSCALE}") -message(STATUS " gentoo-style: ${HAVE_GENTOO_FFMPEG}") -message(STATUS " GStreamer: ${HAVE_GSTREAMER}") -message(STATUS " UniCap: ${HAVE_UNICAP}") -message(STATUS " PvAPI: ${HAVE_PVAPI}") +message(STATUS " DC1394 1.x: ${HAVE_DC1394}") +message(STATUS " DC1394 2.x: ${HAVE_DC1394_2}") +message(STATUS " FFMPEG: ${HAVE_FFMPEG}") +message(STATUS " codec: ${HAVE_FFMPEG_CODEC}") +message(STATUS " format: ${HAVE_FFMPEG_FORMAT}") +message(STATUS " util: ${HAVE_FFMPEG_UTIL}") +message(STATUS " swscale: ${HAVE_FFMPEG_SWSCALE}") +message(STATUS " gentoo-style: ${HAVE_GENTOO_FFMPEG}") +message(STATUS " GStreamer: ${HAVE_GSTREAMER}") +message(STATUS " UniCap: ${HAVE_UNICAP}") +message(STATUS " PvAPI: ${HAVE_PVAPI}") if(HAVE_LIBV4L) -message(STATUS " V4L/V4L2: Using libv4l") +message(STATUS " V4L/V4L2: Using libv4l") else() -message(STATUS " V4L/V4L2: ${HAVE_CAMV4L}/${HAVE_CAMV4L2}") +message(STATUS " V4L/V4L2: ${HAVE_CAMV4L}/${HAVE_CAMV4L2}") endif() -message(STATUS " Xine: ${HAVE_XINE}") +message(STATUS " Xine: ${HAVE_XINE}") endif() #if(UNIX AND NOT APPLE) if(APPLE) message(STATUS "") if(WITH_QUICKTIME) -message(STATUS " Video I/O: QuickTime") +message(STATUS " Video I/O: QuickTime") else() -message(STATUS " Video I/O: QTKit") +message(STATUS " Video I/O: QTKit") endif() endif() @@ -1398,68 +1410,68 @@ if(WIN32) message(STATUS "") message(STATUS " Video I/O: ") if(HAVE_VIDEOINPUT) -message(STATUS " VideoInput: 1") +message(STATUS " VideoInput: YES") else() -message(STATUS " VideoInput: 0") +message(STATUS " VideoInput: NO") endif() endif() message(STATUS "") message(STATUS " Interfaces: ") -message(STATUS " Python: ${BUILD_NEW_PYTHON_SUPPORT}") -message(STATUS " Python interpreter: ${PYTHON_EXECUTABLE}") +message(STATUS " Python: ${BUILD_NEW_PYTHON_SUPPORT}") +message(STATUS " Python interpreter: ${PYTHON_EXECUTABLE}") if (PYTHON_USE_NUMPY) -message(STATUS " Python numpy: YES") +message(STATUS " Python numpy: YES") else() -message(STATUS " Python numpy: NO (Python interface will not cover OpenCV 2.x API)") +message(STATUS " Python numpy: NO (Python interface will not cover OpenCV 2.x API)") endif() if(WITH_IPP AND IPP_FOUND) -message(STATUS " Use IPP: ${IPP_LATEST_VERSION_STR} [${IPP_LATEST_VERSION_MAJOR}.${IPP_LATEST_VERSION_MINOR}.${IPP_LATEST_VERSION_BUILD}]") -message(STATUS " at: ${IPP_ROOT_DIR}") +message(STATUS " Use IPP: ${IPP_LATEST_VERSION_STR} [${IPP_LATEST_VERSION_MAJOR}.${IPP_LATEST_VERSION_MINOR}.${IPP_LATEST_VERSION_BUILD}]") +message(STATUS " at: ${IPP_ROOT_DIR}") elseif(WITH_IPP AND NOT IPP_FOUND) -message(STATUS " Use IPP: IPP not found") +message(STATUS " Use IPP: IPP not found") else() -message(STATUS " Use IPP: NO") +message(STATUS " Use IPP: NO") endif() if(HAVE_TBB) -message(STATUS " Use TBB: YES") +message(STATUS " Use TBB: YES") else() -message(STATUS " Use TBB: NO") +message(STATUS " Use TBB: NO") endif() if (HAVE_CUDA) -message(STATUS " Use Cuda: YES") +message(STATUS " Use Cuda: YES") else() -message(STATUS " Use Cuda: NO") +message(STATUS " Use Cuda: NO") endif() if(HAVE_EIGEN2) -message(STATUS " Use Eigen2: YES") +message(STATUS " Use Eigen2: YES") else() -message(STATUS " Use Eigen2: NO") +message(STATUS " Use Eigen2: NO") endif() message(STATUS "") message(STATUS " Documentation: ") -if(BUILD_LATEX_DOCS AND PDFLATEX_COMPILER) -message(STATUS " Build PDF YES") +if(BUILD_REFMAN AND PDFLATEX_COMPILER AND HAVE_SPHINX) +message(STATUS " Build Reference Manual: YES") else() -message(STATUS " Build PDF NO") +message(STATUS " Build Reference Manual: NO") endif() -if(BUILD_DOXYGEN_DOCS AND DOXYGEN_FOUND) -message(STATUS " Doxygen HTMLs YES") -else() -message(STATUS " Doxygen HTMLs NO") -endif() +#if(BUILD_DOXYGEN_DOCS AND DOXYGEN_FOUND) +#message(STATUS " Doxygen HTMLs YES") +#else() +#message(STATUS " Doxygen HTMLs NO") +#endif() message(STATUS "") -message(STATUS " Install path: ${CMAKE_INSTALL_PREFIX}") +message(STATUS " Install path: ${CMAKE_INSTALL_PREFIX}") message(STATUS "") -message(STATUS " cvconfig.h is in: ${OPENCV_CONFIG_FILE_INCLUDE_DIR}") +message(STATUS " cvconfig.h is in: ${OPENCV_CONFIG_FILE_INCLUDE_DIR}") message(STATUS "-----------------------------------------------------------------") message(STATUS "") diff --git a/docroot/CMakeLists.txt b/docroot/CMakeLists.txt new file mode 100644 index 000000000..71149ff8d --- /dev/null +++ b/docroot/CMakeLists.txt @@ -0,0 +1,19 @@ +if(BUILD_REFMAN AND PDFLATEX_COMPILER AND HAVE_SPHINX) + +project(opencv_refman) + +file(GLOB_RECURSE FILES_PICT ../modules/*.png ../modules/*.jpg) +file(GLOB_RECURSE FILES_RST *.rst ../modules/*.rst) + +add_custom_target(refman ${SPHINX_BUILD} -b latex -c ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../modules . + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/doc/pics ${CMAKE_CURRENT_BINARY_DIR}/pics + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/doc/latex2sphinx/mymath.sty ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${PDFLATEX_COMPILER} opencv + COMMAND ${PDFLATEX_COMPILER} opencv + DEPENDS conf.py ${FILES_RST} ${FILES_PICT} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating the Reference Manual") + +install(FILES ${CURRENT_BINARY_DIR}/opencv.pdf DESTINATION "${OPENCV_DOC_INSTALL_PATH}" COMPONENT main) + +endif() diff --git a/docroot/conf.py b/docroot/conf.py new file mode 100644 index 000000000..15a262e60 --- /dev/null +++ b/docroot/conf.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- +# +# opencvstd documentation build configuration file, created by +# sphinx-quickstart on Mon Feb 14 00:30:43 2011. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.pngmath', 'sphinx.ext.ifconfig', 'sphinx.ext.todo'] +doctest_test_doctest_blocks = 'block' + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'opencvrefman' +copyright = u'2011, opencv dev team' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '2.3' +# The full version, including alpha/beta/rc tags. +release = '2.3' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +todo_include_todos=True + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'opencv' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'opencv.tex', u'The OpenCV Reference Manual', + u'', 'manual'), +] + +latex_elements = {'preamble': '\usepackage{mymath}\usepackage{amssymb}\usepackage{amsmath}\usepackage{bbm}'} + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'opencv', u'The OpenCV Reference Manual', + [u'opencv-dev@itseez.com'], 1) +] diff --git a/modules/calib3d/doc/calib3d.rst b/modules/calib3d/doc/calib3d.rst new file mode 100644 index 000000000..7a43baaab --- /dev/null +++ b/modules/calib3d/doc/calib3d.rst @@ -0,0 +1,9 @@ +**************************************** +Camera Calibration and 3D Reconstruction +**************************************** + +.. toctree:: + :maxdepth: 2 + + camera_calibration_and_3d_reconstruction + diff --git a/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst b/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst new file mode 100644 index 000000000..70ea83d18 --- /dev/null +++ b/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst @@ -0,0 +1,2413 @@ +Camera Calibration and 3D Reconstruction +======================================== + +.. highlight:: cpp + + +The functions in this section use the so-called pinhole camera model. That +is, a scene view is formed by projecting 3D points into the image plane +using a perspective transformation. + + + +.. math:: + + s \; m' = A [R|t] M' + + +or + + + +.. math:: + + s \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1} \begin{bmatrix} r_{11} & r_{12} & r_{13} & t_1 \\ r_{21} & r_{22} & r_{23} & t_2 \\ r_{31} & r_{32} & r_{33} & t_3 \end{bmatrix} \begin{bmatrix} X \\ Y \\ Z \\ 1 \end{bmatrix} + + +Where +:math:`(X, Y, Z)` +are the coordinates of a 3D point in the world +coordinate space, +:math:`(u, v)` +are the coordinates of the projection point +in pixels. +:math:`A` +is called a camera matrix, or a matrix of +intrinsic parameters. +:math:`(cx, cy)` +is a principal point (that is +usually at the image center), and +:math:`fx, fy` +are the focal lengths +expressed in pixel-related units. Thus, if an image from camera is +scaled by some factor, all of these parameters should +be scaled (multiplied/divided, respectively) by the same factor. The +matrix of intrinsic parameters does not depend on the scene viewed and, +once estimated, can be re-used (as long as the focal length is fixed (in +case of zoom lens)). The joint rotation-translation matrix +:math:`[R|t]` +is called a matrix of extrinsic parameters. It is used to describe the +camera motion around a static scene, or vice versa, rigid motion of an +object in front of still camera. That is, +:math:`[R|t]` +translates +coordinates of a point +:math:`(X, Y, Z)` +to some coordinate system, +fixed with respect to the camera. The transformation above is equivalent +to the following (when +:math:`z \ne 0` +): + + + +.. math:: + + \begin{array}{l} \vecthree{x}{y}{z} = R \vecthree{X}{Y}{Z} + t \\ x' = x/z \\ y' = y/z \\ u = f_x*x' + c_x \\ v = f_y*y' + c_y \end{array} + + +Real lenses usually have some distortion, mostly +radial distortion and slight tangential distortion. So, the above model +is extended as: + + + +.. math:: + + \begin{array}{l} \vecthree{x}{y}{z} = R \vecthree{X}{Y}{Z} + t \\ x' = x/z \\ y' = y/z \\ x'' = x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + 2 p_1 x' y' + p_2(r^2 + 2 x'^2) \\ y'' = y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' \\ \text{where} \quad r^2 = x'^2 + y'^2 \\ u = f_x*x'' + c_x \\ v = f_y*y'' + c_y \end{array} + + +:math:`k_1` +, +:math:`k_2` +, +:math:`k_3` +, +:math:`k_4` +, +:math:`k_5` +, +:math:`k_6` +are radial distortion coefficients, +:math:`p_1` +, +:math:`p_2` +are tangential distortion coefficients. +Higher-order coefficients are not considered in OpenCV. In the functions below the coefficients are passed or returned as + + +.. math:: + + (k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]]) + + +vector. That is, if the vector contains 4 elements, it means that +:math:`k_3=0` +. +The distortion coefficients do not depend on the scene viewed, thus they also belong to the intrinsic camera parameters. +*And they remain the same regardless of the captured image resolution.* +That is, if, for example, a camera has been calibrated on images of +:math:`320 +\times 240` +resolution, absolutely the same distortion coefficients can +be used for images of +:math:`640 \times 480` +resolution from the same camera (while +:math:`f_x` +, +:math:`f_y` +, +:math:`c_x` +and +:math:`c_y` +need to be scaled appropriately). + +The functions below use the above model to + + + + + +* + Project 3D points to the image plane given intrinsic and extrinsic parameters + + + +* + Compute extrinsic parameters given intrinsic parameters, a few 3D points and their projections. + + + +* + Estimate intrinsic and extrinsic camera parameters from several views of a known calibration pattern (i.e. every view is described by several 3D-2D point correspondences). + + + +* + Estimate the relative position and orientation of the stereo camera "heads" and compute the + *rectification* + transformation that makes the camera optical axes parallel. + + + +.. index:: calibrateCamera + + +cv::calibrateCamera +------------------- + +`id=0.0407768177733 Comments from the Wiki `__ + + + + +.. cfunction:: double calibrateCamera( const vector >\& objectPoints, const vector >\& imagePoints, Size imageSize, Mat\& cameraMatrix, Mat\& distCoeffs, vector\& rvecs, vector\& tvecs, int flags=0 ) + + Finds the camera intrinsic and extrinsic parameters from several views of a calibration pattern. + + + + + + + :param objectPoints: The vector of vectors of points on the calibration pattern in its coordinate system, one vector per view. If the same calibration pattern is shown in each view and it's fully visible then all the vectors will be the same, although it is possible to use partially occluded patterns, or even different patterns in different views - then the vectors will be different. The points are 3D, but since they are in the pattern coordinate system, then if the rig is planar, it may have sense to put the model to the XY coordinate plane, so that Z-coordinate of each input object point is 0 + + + :param imagePoints: The vector of vectors of the object point projections on the calibration pattern views, one vector per a view. The projections must be in the same order as the corresponding object points. + + + :param imageSize: Size of the image, used only to initialize the intrinsic camera matrix + + + :param cameraMatrix: The output 3x3 floating-point camera matrix :math:`A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}` . If ``CV_CALIB_USE_INTRINSIC_GUESS`` and/or ``CV_CALIB_FIX_ASPECT_RATIO`` are specified, some or all of ``fx, fy, cx, cy`` must be initialized before calling the function + + + :param distCoeffs: The output vector of distortion coefficients :math:`(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])` of 4, 5 or 8 elements + + + :param rvecs: The output vector of rotation vectors (see :ref:`Rodrigues2` ), estimated for each pattern view. That is, each k-th rotation vector together with the corresponding k-th translation vector (see the next output parameter description) brings the calibration pattern from the model coordinate space (in which object points are specified) to the world coordinate space, i.e. real position of the calibration pattern in the k-th pattern view (k=0.. *M* -1) + + + :param tvecs: The output vector of translation vectors, estimated for each pattern view. + + + :param flags: Different flags, may be 0 or combination of the following values: + + * **CV_CALIB_USE_INTRINSIC_GUESS** ``cameraMatrix`` contains the valid initial values of ``fx, fy, cx, cy`` that are optimized further. Otherwise, ``(cx, cy)`` is initially set to the image center ( ``imageSize`` is used here), and focal distances are computed in some least-squares fashion. Note, that if intrinsic parameters are known, there is no need to use this function just to estimate the extrinsic parameters. Use :ref:`FindExtrinsicCameraParams2` instead. + + * **CV_CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global optimization, it stays at the center or at the other location specified when ``CV_CALIB_USE_INTRINSIC_GUESS`` is set too. + + * **CV_CALIB_FIX_ASPECT_RATIO** The functions considers only ``fy`` as a free parameter, the ratio ``fx/fy`` stays the same as in the input ``cameraMatrix`` . When ``CV_CALIB_USE_INTRINSIC_GUESS`` is not set, the actual input values of ``fx`` and ``fy`` are ignored, only their ratio is computed and used further. + + * **CV_CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients :math:`(p_1, p_2)` will be set to zeros and stay zero. + + + + * **CV_CALIB_FIX_K1,...,CV_CALIB_FIX_K6** Do not change the corresponding radial distortion coefficient during the optimization. If ``CV_CALIB_USE_INTRINSIC_GUESS`` is set, the coefficient from the supplied ``distCoeffs`` matrix is used, otherwise it is set to 0. + + + * **CV_CALIB_RATIONAL_MODEL** Enable coefficients k4, k5 and k6. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the rational model and return 8 coefficients. If the flag is not set, the function will compute and return only 5 distortion coefficients. + + + + + +The function estimates the intrinsic camera +parameters and extrinsic parameters for each of the views. The +coordinates of 3D object points and their correspondent 2D projections +in each view must be specified. That may be achieved by using an +object with known geometry and easily detectable feature points. +Such an object is called a calibration rig or calibration pattern, +and OpenCV has built-in support for a chessboard as a calibration +rig (see +:ref:`FindChessboardCorners` +). Currently, initialization +of intrinsic parameters (when +``CV_CALIB_USE_INTRINSIC_GUESS`` +is not set) is only implemented for planar calibration patterns +(where z-coordinates of the object points must be all 0's). 3D +calibration rigs can also be used as long as initial +``cameraMatrix`` +is provided. + +The algorithm does the following: + + + + +#. + First, it computes the initial intrinsic parameters (the option only available for planar calibration patterns) or reads them from the input parameters. The distortion coefficients are all set to zeros initially (unless some of + ``CV_CALIB_FIX_K?`` + are specified). + + + +#. + The initial camera pose is estimated as if the intrinsic parameters have been already known. This is done using + :ref:`FindExtrinsicCameraParams2` + + +#. + After that the global Levenberg-Marquardt optimization algorithm is run to minimize the reprojection error, i.e. the total sum of squared distances between the observed feature points + ``imagePoints`` + and the projected (using the current estimates for camera parameters and the poses) object points + ``objectPoints`` + ; see + :ref:`ProjectPoints2` + . + + +The function returns the final re-projection error. +Note: if you're using a non-square (=non-NxN) grid and +:func:`findChessboardCorners` +for calibration, and +``calibrateCamera`` +returns +bad values (i.e. zero distortion coefficients, an image center very far from +:math:`(w/2-0.5,h/2-0.5)` +, and / or large differences between +:math:`f_x` +and +:math:`f_y` +(ratios of +10:1 or more)), then you've probably used +``patternSize=cvSize(rows,cols)`` +, +but should use +``patternSize=cvSize(cols,rows)`` +in +:ref:`FindChessboardCorners` +. + +See also: +:ref:`FindChessboardCorners` +, +:ref:`FindExtrinsicCameraParams2` +, +:func:`initCameraMatrix2D` +, +:ref:`StereoCalibrate` +, +:ref:`Undistort2` + +.. index:: calibrationMatrixValues + + +cv::calibrationMatrixValues +--------------------------- + +`id=0.251342197147 Comments from the Wiki `__ + + + + +.. cfunction:: void calibrationMatrixValues( const Mat\& cameraMatrix, Size imageSize, double apertureWidth, double apertureHeight, double\& fovx, double\& fovy, double\& focalLength, Point2d\& principalPoint, double\& aspectRatio ) + + Computes some useful camera characteristics from the camera matrix + + + + + + + :param cameraMatrix: The input camera matrix that can be estimated by :func:`calibrateCamera` or :func:`stereoCalibrate` + + + :param imageSize: The input image size in pixels + + + :param apertureWidth: Physical width of the sensor + + + :param apertureHeight: Physical height of the sensor + + + :param fovx: The output field of view in degrees along the horizontal sensor axis + + + :param fovy: The output field of view in degrees along the vertical sensor axis + + + :param focalLength: The focal length of the lens in mm + + + :param principalPoint: The principal point in pixels + + + :param aspectRatio: :math:`f_y/f_x` + + + +The function computes various useful camera characteristics from the previously estimated camera matrix. + + +.. index:: composeRT + + +cv::composeRT +------------- + +`id=0.153640943816 Comments from the Wiki `__ + + + + +.. cfunction:: void composeRT( const Mat\& rvec1, const Mat\& tvec1, const Mat\& rvec2, const Mat\& tvec2, Mat\& rvec3, Mat\& tvec3 ) + + + +.. cfunction:: void composeRT( const Mat\& rvec1, const Mat\& tvec1, const Mat\& rvec2, const Mat\& tvec2, Mat\& rvec3, Mat\& tvec3, Mat\& dr3dr1, Mat\& dr3dt1, Mat\& dr3dr2, Mat\& dr3dt2, Mat\& dt3dr1, Mat\& dt3dt1, Mat\& dt3dr2, Mat\& dt3dt2 ) + + Combines two rotation-and-shift transformations + + + + + + + :param rvec1: The first rotation vector + + + :param tvec1: The first translation vector + + + :param rvec2: The second rotation vector + + + :param tvec2: The second translation vector + + + :param rvec3: The output rotation vector of the superposition + + + :param tvec3: The output translation vector of the superposition + + + :param d??d??: The optional output derivatives of ``rvec3`` or ``tvec3`` w.r.t. ``rvec?`` or ``tvec?`` + + + +The functions compute: + + + +.. math:: + + \begin{array}{l} \texttt{rvec3} = \mathrm{rodrigues} ^{-1} \left ( \mathrm{rodrigues} ( \texttt{rvec2} ) \cdot \mathrm{rodrigues} ( \texttt{rvec1} ) \right ) \\ \texttt{tvec3} = \mathrm{rodrigues} ( \texttt{rvec2} ) \cdot \texttt{tvec1} + \texttt{tvec2} \end{array} , + + +where +:math:`\mathrm{rodrigues}` +denotes a rotation vector to rotation matrix transformation, and +:math:`\mathrm{rodrigues}^{-1}` +denotes the inverse transformation, see +:func:`Rodrigues` +. + +Also, the functions can compute the derivatives of the output vectors w.r.t the input vectors (see +:func:`matMulDeriv` +). +The functions are used inside +:func:`stereoCalibrate` +but can also be used in your own code where Levenberg-Marquardt or another gradient-based solver is used to optimize a function that contains matrix multiplication. + + +.. index:: computeCorrespondEpilines + + +cv::computeCorrespondEpilines +----------------------------- + +`id=0.31784145856 Comments from the Wiki `__ + + + + +.. cfunction:: void computeCorrespondEpilines( const Mat\& points, int whichImage, const Mat\& F, vector\& lines ) + + For points in one image of a stereo pair, computes the corresponding epilines in the other image. + + + + + + + :param points: The input points. :math:`N \times 1` or :math:`1 \times N` matrix of type ``CV_32FC2`` or ``vector`` + + + :param whichImage: Index of the image (1 or 2) that contains the ``points`` + + + :param F: The fundamental matrix that can be estimated using :ref:`FindFundamentalMat` + or :ref:`StereoRectify` . + + + :param lines: The output vector of the corresponding to the points epipolar lines in the other image. Each line :math:`ax + by + c=0` is encoded by 3 numbers :math:`(a, b, c)` + + + +For every point in one of the two images of a stereo-pair the function finds the equation of the +corresponding epipolar line in the other image. + +From the fundamental matrix definition (see +:ref:`FindFundamentalMat` +), +line +:math:`l^{(2)}_i` +in the second image for the point +:math:`p^{(1)}_i` +in the first image (i.e. when +``whichImage=1`` +) is computed as: + + + +.. math:: + + l^{(2)}_i = F p^{(1)}_i + + +and, vice versa, when +``whichImage=2`` +, +:math:`l^{(1)}_i` +is computed from +:math:`p^{(2)}_i` +as: + + + +.. math:: + + l^{(1)}_i = F^T p^{(2)}_i + + +Line coefficients are defined up to a scale. They are normalized, such that +:math:`a_i^2+b_i^2=1` +. + + +.. index:: convertPointsHomogeneous + + +cv::convertPointsHomogeneous +---------------------------- + +`id=0.580622937482 Comments from the Wiki `__ + + + + +.. cfunction:: void convertPointsHomogeneous( const Mat\& src, vector\& dst ) + + + +.. cfunction:: void convertPointsHomogeneous( const Mat\& src, vector\& dst ) + + Convert points to/from homogeneous coordinates. + + + + + + + :param src: The input array or vector of 2D, 3D or 4D points + + + :param dst: The output vector of 2D or 2D points + + + +The +functions convert +2D or 3D points from/to homogeneous coordinates, or simply +copy or transpose +the array. If the input array dimensionality is larger than the output, each coordinate is divided by the last coordinate: + + + +.. math:: + + \begin{array}{l} (x,y[,z],w) -> (x',y'[,z']) \\ \text{where} \\ x' = x/w \\ y' = y/w \\ z' = z/w \quad \text{(if output is 3D)} \end{array} + + +If the output array dimensionality is larger, an extra 1 is appended to each point. Otherwise, the input array is simply copied (with optional transposition) to the output. + + +.. index:: decomposeProjectionMatrix + + +cv::decomposeProjectionMatrix +----------------------------- + +`id=0.779091802866 Comments from the Wiki `__ + + + + +.. cfunction:: void decomposeProjectionMatrix( const Mat\& projMatrix, Mat\& cameraMatrix, Mat\& rotMatrix, Mat\& transVect ) + + + +.. cfunction:: void decomposeProjectionMatrix( const Mat\& projMatrix, Mat\& cameraMatrix, Mat\& rotMatrix, Mat\& transVect, Mat\& rotMatrixX, Mat\& rotMatrixY, Mat\& rotMatrixZ, Vec3d\& eulerAngles ) + + Decomposes the projection matrix into a rotation matrix and a camera matrix. + + + + + + + :param projMatrix: The 3x4 input projection matrix P + + + :param cameraMatrix: The output 3x3 camera matrix K + + + :param rotMatrix: The output 3x3 external rotation matrix R + + + :param transVect: The output 4x1 translation vector T + + + :param rotMatrX: Optional 3x3 rotation matrix around x-axis + + + :param rotMatrY: Optional 3x3 rotation matrix around y-axis + + + :param rotMatrZ: Optional 3x3 rotation matrix around z-axis + + + :param eulerAngles: Optional 3 points containing the three Euler angles of rotation + + + +The function computes a decomposition of a projection matrix into a calibration and a rotation matrix and the position of the camera. + +It optionally returns three rotation matrices, one for each axis, and the three Euler angles that could be used in OpenGL. + +The function is based on +:ref:`RQDecomp3x3` +. + + +.. index:: drawChessboardCorners + + +cv::drawChessboardCorners +------------------------- + +`id=0.813197335109 Comments from the Wiki `__ + + + + +.. cfunction:: void drawChessboardCorners( Mat\& image, Size patternSize, const Mat\& corners, bool patternWasFound ) + + Renders the detected chessboard corners. + + + + + + + :param image: The destination image; it must be an 8-bit color image + + + :param patternSize: The number of inner corners per chessboard row and column. (patternSize = cv::Size(points _ per _ row,points _ per _ column) = cv::Size(rows,columns) ) + + + :param corners: The array of corners detected, this should be the output from findChessboardCorners wrapped in a cv::Mat(). + + + :param patternWasFound: Indicates whether the complete board was found or not . One may just pass the return value :ref:`FindChessboardCorners` here + + + +The function draws the individual chessboard corners detected as red circles if the board was not found or as colored corners connected with lines if the board was found. + + +.. index:: findChessboardCorners + + +cv::findChessboardCorners +------------------------- + +`id=0.437834363935 Comments from the Wiki `__ + + + + +.. cfunction:: bool findChessboardCorners( const Mat\& image, Size patternSize, vector\& corners, int flags=CV_CALIB_CB_ADAPTIVE_THRESH+ CV_CALIB_CB_NORMALIZE_IMAGE ) + + Finds the positions of the internal corners of the chessboard. + + + + + + + :param image: Source chessboard view; it must be an 8-bit grayscale or color image + + + :param patternSize: The number of inner corners per chessboard row and column + ( patternSize = cvSize(points _ per _ row,points _ per _ colum) = cvSize(columns,rows) ) + + + :param corners: The output array of corners detected + + + :param flags: Various operation flags, can be 0 or a combination of the following values: + + + * **CV_CALIB_CB_ADAPTIVE_THRESH** use adaptive thresholding to convert the image to black and white, rather than a fixed threshold level (computed from the average image brightness). + + + * **CV_CALIB_CB_NORMALIZE_IMAGE** normalize the image gamma with :ref:`EqualizeHist` before applying fixed or adaptive thresholding. + + + * **CV_CALIB_CB_FILTER_QUADS** use additional criteria (like contour area, perimeter, square-like shape) to filter out false quads that are extracted at the contour retrieval stage. + + + * **CALIB_CB_FAST_CHECK** Runs a fast check on the image that looks for chessboard corners, and shortcuts the call if none are found. This can drastically speed up the call in the degenerate condition when + no chessboard is observed. + + + + + +The function attempts to determine +whether the input image is a view of the chessboard pattern and +locate the internal chessboard corners. The function returns a non-zero +value if all of the corners have been found and they have been placed +in a certain order (row by row, left to right in every row), +otherwise, if the function fails to find all the corners or reorder +them, it returns 0. For example, a regular chessboard has 8 x 8 +squares and 7 x 7 internal corners, that is, points, where the black +squares touch each other. The coordinates detected are approximate, +and to determine their position more accurately, the user may use +the function +:ref:`FindCornerSubPix` +. + +Sample usage of detecting and drawing chessboard corners: + + + +:: + + + + Size patternsize(8,6); //interior number of corners + Mat gray = ....; //source image + vector corners; //this will be filled by the detected corners + + //CALIB_CB_FAST_CHECK saves a lot of time on images + //that don't contain any chessboard corners + bool patternfound = findChessboardCorners(gray, patternsize, corners, + CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE + + CALIB_CB_FAST_CHECK); + + if(patternfound) + cornerSubPix(gray, corners, Size(11, 11), Size(-1, -1), + TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 30, 0.1)); + + drawChessboardCorners(img, patternsize, Mat(corners), patternfound); + + +.. + +**Note:** +the function requires some white space (like a square-thick border, the wider the better) around the board to make the detection more robust in various environment (otherwise if there is no border and the background is dark, the outer black squares could not be segmented properly and so the square grouping and ordering algorithm will fail). + + +.. index:: findCirclesGrid + + +cv::findCirclesGrid +------------------- + +`id=0.444235205026 Comments from the Wiki `__ + + + + +.. cfunction:: bool findCirclesGrid( const Mat\& image, Size patternSize, vector\& centers, int flags=CALIB_CB_SYMMETRIC_GRID ) + + Finds the centers of the cirlces' grid. + + + + + + :param image: Source circles' grid view; it must be an 8-bit grayscale or color + image + + + :param patternSize: The number of circles per grid row and column + ( patternSize = Size( points _ per _ row, points _ per _ colum ) = + Size( columns, rows ) ) + + + :param centers: The output array of centers detected + + + :param flags: Various operation flags, can be one of the following values: + + + * **CALIB_CB_SYMMETRIC_GRID** use symmetric pattern of circles. + + + * **CALIB_CB_ASYMMETRIC_GRID** use asymmetric pattern of circles. + + + + + +The function attempts to determine +whether the input image is a view of the circles' grid pattern and +locate the circles' centers. The function returns a +non-zero value if all of the centers have been found and they have been placed +in a certain order (row by row, left to right in every row), +otherwise, if the function fails to find all the corners or reorder +them, it returns 0. + +Sample usage of detecting and drawing circles' centers: + + + +:: + + + + Size patternsize(7,7); //number of centers + Mat gray = ....; //source image + vector centers; //this will be filled by the detected centers + + bool patternfound = findCirclesGrid(gray, patternsize, centers); + + drawChessboardCorners(img, patternsize, Mat(centers), patternfound); + + +.. + +**Note:** +the function requires some white space (like a square-thick border, the wider the better) around the board to make the detection more robust in various environment. + +.. index:: solvePnP + + +cv::solvePnP +------------ + +`id=0.883880651363 Comments from the Wiki `__ + + + + +.. cfunction:: void solvePnP( const Mat\& objectPoints, const Mat\& imagePoints, const Mat\& cameraMatrix, const Mat\& distCoeffs, Mat\& rvec, Mat\& tvec, bool useExtrinsicGuess=false ) + + Finds the object pose from the 3D-2D point correspondences + + + + + + + :param objectPoints: The array of object points in the object coordinate space, 3xN or Nx3 1-channel, or 1xN or Nx1 3-channel, where N is the number of points. Can also pass ``vector`` here. + + + :param imagePoints: The array of corresponding image points, 2xN or Nx2 1-channel or 1xN or Nx1 2-channel, where N is the number of points. Can also pass ``vector`` here. + + + :param cameraMatrix: The input camera matrix :math:`A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}` + + + :param distCoeffs: The input vector of distortion coefficients :math:`(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])` of 4, 5 or 8 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. + + + :param rvec: The output rotation vector (see :ref:`Rodrigues2` ) that (together with ``tvec`` ) brings points from the model coordinate system to the camera coordinate system + + + :param tvec: The output translation vector + + + :param useExtrinsicGuess: If true (1), the function will use the provided ``rvec`` and ``tvec`` as the initial approximations of the rotation and translation vectors, respectively, and will further optimize them. + + + +The function estimates the object pose given a set of object points, their corresponding image projections, as well as the camera matrix and the distortion coefficients. This function finds such a pose that minimizes reprojection error, i.e. the sum of squared distances between the observed projections +``imagePoints`` +and the projected (using +:ref:`ProjectPoints2` +) +``objectPoints`` +. + + + +.. index:: findFundamentalMat + + +cv::findFundamentalMat +---------------------- + +`id=0.346109627839 Comments from the Wiki `__ + + + + +.. cfunction:: Mat findFundamentalMat( const Mat\& points1, const Mat\& points2, vector\& status, int method=FM_RANSAC, double param1=3., double param2=0.99 ) + + + +.. cfunction:: Mat findFundamentalMat( const Mat\& points1, const Mat\& points2, int method=FM_RANSAC, double param1=3., double param2=0.99 ) + + Calculates the fundamental matrix from the corresponding points in two images. + + + + + + + :param points1: Array of ``N`` points from the first image. . The point coordinates should be floating-point (single or double precision) + + + :param points2: Array of the second image points of the same size and format as ``points1`` + + + :param method: Method for computing the fundamental matrix + + + * **CV_FM_7POINT** for a 7-point algorithm. :math:`N = 7` + + + * **CV_FM_8POINT** for an 8-point algorithm. :math:`N \ge 8` + + + * **CV_FM_RANSAC** for the RANSAC algorithm. :math:`N \ge 8` + + + * **CV_FM_LMEDS** for the LMedS algorithm. :math:`N \ge 8` + + + + + :param param1: The parameter is used for RANSAC. It is the maximum distance from point to epipolar line in pixels, beyond which the point is considered an outlier and is not used for computing the final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the point localization, image resolution and the image noise + + + :param param2: The parameter is used for RANSAC or LMedS methods only. It specifies the desirable level of confidence (probability) that the estimated matrix is correct + + + :param status: The output array of N elements, every element of which is set to 0 for outliers and to 1 for the other points. The array is computed only in RANSAC and LMedS methods. For other methods it is set to all 1's + + + +The epipolar geometry is described by the following equation: + + + +.. math:: + + [p_2; 1]^T F [p_1; 1] = 0 + + +where +:math:`F` +is fundamental matrix, +:math:`p_1` +and +:math:`p_2` +are corresponding points in the first and the second images, respectively. + +The function calculates the fundamental matrix using one of four methods listed above and returns +the found fundamental matrix +. Normally just 1 matrix is found, but in the case of 7-point algorithm the function may return up to 3 solutions ( +:math:`9 \times 3` +matrix that stores all 3 matrices sequentially). + +The calculated fundamental matrix may be passed further to +:ref:`ComputeCorrespondEpilines` +that finds the epipolar lines +corresponding to the specified points. It can also be passed to +:ref:`StereoRectifyUncalibrated` +to compute the rectification transformation. + + + + +:: + + + + // Example. Estimation of fundamental matrix using RANSAC algorithm + int point_count = 100; + vector points1(point_count); + vector points2(point_count); + + // initialize the points here ... */ + for( int i = 0; i < point_count; i++ ) + { + points1[i] = ...; + points2[i] = ...; + } + + Mat fundamental_matrix = + findFundamentalMat(points1, points2, FM_RANSAC, 3, 0.99); + + +.. + + +.. index:: findHomography + + +cv::findHomography +------------------ + +`id=0.765224874593 Comments from the Wiki `__ + + + + +.. cfunction:: Mat findHomography( const Mat\& srcPoints, const Mat\& dstPoints, Mat\& status, int method=0, double ransacReprojThreshold=3 ) + + + +.. cfunction:: Mat findHomography( const Mat\& srcPoints, const Mat\& dstPoints, vector\& status, int method=0, double ransacReprojThreshold=3 ) + + + +.. cfunction:: Mat findHomography( const Mat\& srcPoints, const Mat\& dstPoints, int method=0, double ransacReprojThreshold=3 ) + + Finds the perspective transformation between two planes. + + + + + + + :param srcPoints: Coordinates of the points in the original plane, a matrix of type ``CV_32FC2`` or a ``vector`` . + + :param dstPoints: Coordinates of the points in the target plane, a matrix of type ``CV_32FC2`` or a ``vector`` . + + + :param method: The method used to computed homography matrix; one of the following: + + * **0** a regular method using all the points + + * **CV_RANSAC** RANSAC-based robust method + + * **CV_LMEDS** Least-Median robust method + + + + + :param ransacReprojThreshold: The maximum allowed reprojection error to treat a point pair as an inlier (used in the RANSAC method only). That is, if + + .. math:: + + \| \texttt{dstPoints} _i - \texttt{convertPointsHomogeneous} ( \texttt{H} \texttt{srcPoints} _i) \| > \texttt{ransacReprojThreshold} + + then the point :math:`i` is considered an outlier. If ``srcPoints`` and ``dstPoints`` are measured in pixels, it usually makes sense to set this parameter somewhere in the range 1 to 10. + + + :param status: The optional output mask set by a robust method ( ``CV_RANSAC`` or ``CV_LMEDS`` ). *Note that the input mask values are ignored.* + + + +The +functions find and return +the perspective transformation +:math:`H` +between the source and the destination planes: + + + +.. math:: + + s_i \vecthree{x'_i}{y'_i}{1} \sim H \vecthree{x_i}{y_i}{1} + + +So that the back-projection error + + + +.. math:: + + \sum _i \left ( x'_i- \frac{h_{11} x_i + h_{12} y_i + h_{13}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2+ \left ( y'_i- \frac{h_{21} x_i + h_{22} y_i + h_{23}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2 + + +is minimized. If the parameter +``method`` +is set to the default value 0, the function +uses all the point pairs to compute the initial homography estimate with a simple least-squares scheme. + +However, if not all of the point pairs ( +:math:`srcPoints_i` +, +:math:`dstPoints_i` +) fit the rigid perspective transformation (i.e. there +are some outliers), this initial estimate will be poor. +In this case one can use one of the 2 robust methods. Both methods, +``RANSAC`` +and +``LMeDS`` +, try many different random subsets +of the corresponding point pairs (of 4 pairs each), estimate +the homography matrix using this subset and a simple least-square +algorithm and then compute the quality/goodness of the computed homography +(which is the number of inliers for RANSAC or the median re-projection +error for LMeDs). The best subset is then used to produce the initial +estimate of the homography matrix and the mask of inliers/outliers. + +Regardless of the method, robust or not, the computed homography +matrix is refined further (using inliers only in the case of a robust +method) with the Levenberg-Marquardt method in order to reduce the +re-projection error even more. + +The method +``RANSAC`` +can handle practically any ratio of outliers, +but it needs the threshold to distinguish inliers from outliers. +The method +``LMeDS`` +does not need any threshold, but it works +correctly only when there are more than 50 +% +of inliers. Finally, +if you are sure in the computed features, where can be only some +small noise present, but no outliers, the default method could be the best +choice. + +The function is used to find initial intrinsic and extrinsic matrices. +Homography matrix is determined up to a scale, thus it is normalized so that +:math:`h_{33}=1` +. + +See also: +:ref:`GetAffineTransform` +, +:ref:`GetPerspectiveTransform` +, +:ref:`EstimateRigidMotion` +, +:ref:`WarpPerspective` +, +:ref:`PerspectiveTransform` + +.. index:: getDefaultNewCameraMatrix + + +cv::getDefaultNewCameraMatrix +----------------------------- + +`id=0.386680309398 Comments from the Wiki `__ + + + + +.. cfunction:: Mat getDefaultNewCameraMatrix( const Mat\& cameraMatrix, Size imgSize=Size(), bool centerPrincipalPoint=false ) + + Returns the default new camera matrix + + + + + + + :param cameraMatrix: The input camera matrix + + + :param imageSize: The camera view image size in pixels + + + :param centerPrincipalPoint: Indicates whether in the new camera matrix the principal point should be at the image center or not + + + +The function returns the camera matrix that is either an exact copy of the input +``cameraMatrix`` +(when +``centerPrinicipalPoint=false`` +), or the modified one (when +``centerPrincipalPoint`` +=true). + +In the latter case the new camera matrix will be: + + + +.. math:: + + \begin{bmatrix} f_x && 0 && ( \texttt{imgSize.width} -1)*0.5 \\ 0 && f_y && ( \texttt{imgSize.height} -1)*0.5 \\ 0 && 0 && 1 \end{bmatrix} , + + +where +:math:`f_x` +and +:math:`f_y` +are +:math:`(0,0)` +and +:math:`(1,1)` +elements of +``cameraMatrix`` +, respectively. + +By default, the undistortion functions in OpenCV (see +``initUndistortRectifyMap`` +, +``undistort`` +) do not move the principal point. However, when you work with stereo, it's important to move the principal points in both views to the same y-coordinate (which is required by most of stereo correspondence algorithms), and maybe to the same x-coordinate too. So you can form the new camera matrix for each view, where the principal points will be at the center. + + +.. index:: getOptimalNewCameraMatrix + + +cv::getOptimalNewCameraMatrix +----------------------------- + +`id=0.877536136693 Comments from the Wiki `__ + + + + +.. cfunction:: Mat getOptimalNewCameraMatrix( const Mat\& cameraMatrix, const Mat\& distCoeffs, Size imageSize, double alpha, Size newImageSize=Size(), Rect* validPixROI=0) + + Returns the new camera matrix based on the free scaling parameter + + + + + + + :param cameraMatrix: The input camera matrix + + + :param distCoeffs: The input vector of distortion coefficients :math:`(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])` of 4, 5 or 8 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. + + + :param imageSize: The original image size + + + :param alpha: The free scaling parameter between 0 (when all the pixels in the undistorted image will be valid) and 1 (when all the source image pixels will be retained in the undistorted image); see :ref:`StereoRectify` + + + :param newCameraMatrix: The output new camera matrix. + + + :param newImageSize: The image size after rectification. By default it will be set to ``imageSize`` . + + + :param validPixROI: The optional output rectangle that will outline all-good-pixels region in the undistorted image. See ``roi1, roi2`` description in :ref:`StereoRectify` + + + +The function computes +and returns +the optimal new camera matrix based on the free scaling parameter. By varying this parameter the user may retrieve only sensible pixels +``alpha=0`` +, keep all the original image pixels if there is valuable information in the corners +``alpha=1`` +, or get something in between. When +``alpha>0`` +, the undistortion result will likely have some black pixels corresponding to "virtual" pixels outside of the captured distorted image. The original camera matrix, distortion coefficients, the computed new camera matrix and the +``newImageSize`` +should be passed to +:ref:`InitUndistortRectifyMap` +to produce the maps for +:ref:`Remap` +. + + +.. index:: initCameraMatrix2D + + +cv::initCameraMatrix2D +---------------------- + +`id=0.551661399909 Comments from the Wiki `__ + + + + +.. cfunction:: Mat initCameraMatrix2D( const vector >\& objectPoints, const vector >\& imagePoints, Size imageSize, double aspectRatio=1.) + + Finds the initial camera matrix from the 3D-2D point correspondences + + + + + + + :param objectPoints: The vector of vectors of the object points. See :func:`calibrateCamera` + + + :param imagePoints: The vector of vectors of the corresponding image points. See :func:`calibrateCamera` + + + :param imageSize: The image size in pixels; used to initialize the principal point + + + :param aspectRatio: If it is zero or negative, both :math:`f_x` and :math:`f_y` are estimated independently. Otherwise :math:`f_x = f_y * \texttt{aspectRatio}` + + + +The function estimates and returns the initial camera matrix for camera calibration process. +Currently, the function only supports planar calibration patterns, i.e. patterns where each object point has z-coordinate =0. + + +.. index:: initUndistortRectifyMap + + +cv::initUndistortRectifyMap +--------------------------- + +`id=0.926109074312 Comments from the Wiki `__ + + + + +.. cfunction:: void initUndistortRectifyMap( const Mat\& cameraMatrix, const Mat\& distCoeffs, const Mat\& R, const Mat\& newCameraMatrix, Size size, int m1type, Mat\& map1, Mat\& map2 ) + + Computes the undistortion and rectification transformation map. + + + + + + + :param cameraMatrix: The input camera matrix :math:`A=\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}` + + + :param distCoeffs: The input vector of distortion coefficients :math:`(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])` of 4, 5 or 8 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. + + + :param R: The optional rectification transformation in object space (3x3 matrix). ``R1`` or ``R2`` , computed by :ref:`StereoRectify` can be passed here. If the matrix is empty , the identity transformation is assumed + + + :param newCameraMatrix: The new camera matrix :math:`A'=\vecthreethree{f_x'}{0}{c_x'}{0}{f_y'}{c_y'}{0}{0}{1}` + + + :param size: The undistorted image size + + :param m1type: The type of the first output map, can be ``CV_32FC1`` or ``CV_16SC2`` . See :func:`convertMaps` + + + :param map1: The first output map + + + :param map2: The second output map + + + +The function computes the joint undistortion+rectification transformation and represents the result in the form of maps for +:ref:`Remap` +. The undistorted image will look like the original, as if it was captured with a camera with camera matrix +``=newCameraMatrix`` +and zero distortion. In the case of monocular camera +``newCameraMatrix`` +is usually equal to +``cameraMatrix`` +, or it can be computed by +:ref:`GetOptimalNewCameraMatrix` +for a better control over scaling. In the case of stereo camera +``newCameraMatrix`` +is normally set to +``P1`` +or +``P2`` +computed by +:ref:`StereoRectify` +. + +Also, this new camera will be oriented differently in the coordinate space, according to +``R`` +. That, for example, helps to align two heads of a stereo camera so that the epipolar lines on both images become horizontal and have the same y- coordinate (in the case of horizontally aligned stereo camera). + +The function actually builds the maps for the inverse mapping algorithm that is used by +:ref:`Remap` +. That is, for each pixel +:math:`(u, v)` +in the destination (corrected and rectified) image the function computes the corresponding coordinates in the source image (i.e. in the original image from camera). The process is the following: + + + +.. math:: + + \begin{array}{l} x \leftarrow (u - {c'}_x)/{f'}_x \\ y \leftarrow (v - {c'}_y)/{f'}_y \\{[X\,Y\,W]} ^T \leftarrow R^{-1}*[x \, y \, 1]^T \\ x' \leftarrow X/W \\ y' \leftarrow Y/W \\ x" \leftarrow x' (1 + k_1 r^2 + k_2 r^4 + k_3 r^6) + 2p_1 x' y' + p_2(r^2 + 2 x'^2) \\ y" \leftarrow y' (1 + k_1 r^2 + k_2 r^4 + k_3 r^6) + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' \\ map_x(u,v) \leftarrow x" f_x + c_x \\ map_y(u,v) \leftarrow y" f_y + c_y \end{array} + + +where +:math:`(k_1, k_2, p_1, p_2[, k_3])` +are the distortion coefficients. + +In the case of a stereo camera this function is called twice, once for each camera head, after +:ref:`StereoRectify` +, which in its turn is called after +:ref:`StereoCalibrate` +. But if the stereo camera was not calibrated, it is still possible to compute the rectification transformations directly from the fundamental matrix using +:ref:`StereoRectifyUncalibrated` +. For each camera the function computes homography +``H`` +as the rectification transformation in pixel domain, not a rotation matrix +``R`` +in 3D space. The +``R`` +can be computed from +``H`` +as + + + +.. math:: + + \texttt{R} = \texttt{cameraMatrix} ^{-1} \cdot \texttt{H} \cdot \texttt{cameraMatrix} + + +where the +``cameraMatrix`` +can be chosen arbitrarily. + + +.. index:: matMulDeriv + + +cv::matMulDeriv +--------------- + +`id=0.119680550435 Comments from the Wiki `__ + + + + +.. cfunction:: void matMulDeriv( const Mat\& A, const Mat\& B, Mat\& dABdA, Mat\& dABdB ) + + Computes partial derivatives of the matrix product w.r.t each multiplied matrix + + + + + + + :param A: The first multiplied matrix + + + :param B: The second multiplied matrix + + + :param dABdA: The first output derivative matrix ``d(A*B)/dA`` of size :math:`\texttt{A.rows*B.cols} \times {A.rows*A.cols}` + + + :param dABdA: The second output derivative matrix ``d(A*B)/dB`` of size :math:`\texttt{A.rows*B.cols} \times {B.rows*B.cols}` + + + +The function computes the partial derivatives of the elements of the matrix product +:math:`A*B` +w.r.t. the elements of each of the two input matrices. The function is used to compute Jacobian matrices in +:func:`stereoCalibrate` +, but can also be used in any other similar optimization function. + + +.. index:: projectPoints + + +cv::projectPoints +----------------- + +`id=0.0175111623396 Comments from the Wiki `__ + + + + +.. cfunction:: void projectPoints( const Mat\& objectPoints, const Mat\& rvec, const Mat\& tvec, const Mat\& cameraMatrix, const Mat\& distCoeffs, vector\& imagePoints ) + + + +.. cfunction:: void projectPoints( const Mat\& objectPoints, const Mat\& rvec, const Mat\& tvec, const Mat\& cameraMatrix, const Mat\& distCoeffs, vector\& imagePoints, Mat\& dpdrot, Mat\& dpdt, Mat\& dpdf, Mat\& dpdc, Mat\& dpddist, double aspectRatio=0 ) + + Project 3D points on to an image plane. + + + + + + + :param objectPoints: The array of object points, 3xN or Nx3 1-channel or 1xN or Nx1 3-channel (or ``vector`` ) , where N is the number of points in the view + + + :param rvec: The rotation vector, see :ref:`Rodrigues2` + + + :param tvec: The translation vector + + + :param cameraMatrix: The camera matrix :math:`A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}` + + + :param distCoeffs: The input vector of distortion coefficients :math:`(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])` of 4, 5 or 8 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. + + + :param imagePoints: The output array of image points, 2xN or Nx2 1-channel or 1xN or Nx1 2-channel (or ``vector`` ) + + + :param dpdrot: Optional 2Nx3 matrix of derivatives of image points with respect to components of the rotation vector + + + :param dpdt: Optional 2Nx3 matrix of derivatives of image points with respect to components of the translation vector + + + :param dpdf: Optional 2Nx2 matrix of derivatives of image points with respect to :math:`f_x` and :math:`f_y` + + + :param dpdc: Optional 2Nx2 matrix of derivatives of image points with respect to :math:`c_x` and :math:`c_y` + + + :param dpddist: Optional 2Nx4 matrix of derivatives of image points with respect to distortion coefficients + + + +The function computes projections of 3D +points to the image plane given intrinsic and extrinsic camera +parameters. Optionally, the function computes jacobians - matrices +of partial derivatives of image points coordinates (as functions of all the +input parameters) with respect to the particular parameters, intrinsic and/or +extrinsic. The jacobians are used during the global optimization +in +:ref:`CalibrateCamera2` +, +:ref:`FindExtrinsicCameraParams2` +and +:ref:`StereoCalibrate` +. The +function itself can also used to compute re-projection error given the +current intrinsic and extrinsic parameters. + +Note, that by setting +``rvec=tvec=(0,0,0)`` +, or by setting +``cameraMatrix`` +to 3x3 identity matrix, or by passing zero distortion coefficients, you can get various useful partial cases of the function, i.e. you can compute the distorted coordinates for a sparse set of points, or apply a perspective transformation (and also compute the derivatives) in the ideal zero-distortion setup etc. + + + +.. index:: reprojectImageTo3D + + +cv::reprojectImageTo3D +---------------------- + +`id=0.963252688198 Comments from the Wiki `__ + + + + +.. cfunction:: void reprojectImageTo3D( const Mat\& disparity, Mat\& _3dImage, const Mat\& Q, bool handleMissingValues=false ) + + Reprojects disparity image to 3D space. + + + + + + + :param disparity: The input single-channel 16-bit signed or 32-bit floating-point disparity image + + + :param _3dImage: The output 3-channel floating-point image of the same size as ``disparity`` . + Each element of ``_3dImage(x,y)`` will contain the 3D coordinates of the point ``(x,y)`` , computed from the disparity map. + + + :param Q: The :math:`4 \times 4` perspective transformation matrix that can be obtained with :ref:`StereoRectify` + + + :param handleMissingValues: If true, when the pixels with the minimal disparity (that corresponds to the outliers; see :ref:`FindStereoCorrespondenceBM` ) will be transformed to 3D points with some very large Z value (currently set to 10000) + + + +The function transforms 1-channel disparity map to 3-channel image representing a 3D surface. That is, for each pixel +``(x,y)`` +and the corresponding disparity +``d=disparity(x,y)`` +it computes: + + + +.. math:: + + \begin{array}{l} [X \; Y \; Z \; W]^T = \texttt{Q} *[x \; y \; \texttt{disparity} (x,y) \; 1]^T \\ \texttt{\_3dImage} (x,y) = (X/W, \; Y/W, \; Z/W) \end{array} + + +The matrix +``Q`` +can be arbitrary +:math:`4 \times 4` +matrix, e.g. the one computed by +:ref:`StereoRectify` +. To reproject a sparse set of points {(x,y,d),...} to 3D space, use +:ref:`PerspectiveTransform` +. + + +.. index:: RQDecomp3x3 + + +cv::RQDecomp3x3 +--------------- + +`id=0.923623781564 Comments from the Wiki `__ + + + + +.. cfunction:: void RQDecomp3x3( const Mat\& M, Mat\& R, Mat\& Q ) + + + +.. cfunction:: Vec3d RQDecomp3x3( const Mat\& M, Mat\& R, Mat\& Q, Mat\& Qx, Mat\& Qy, Mat\& Qz ) + + Computes the 'RQ' decomposition of 3x3 matrices. + + + + + + + :param M: The 3x3 input matrix + + + :param R: The output 3x3 upper-triangular matrix + + + :param Q: The output 3x3 orthogonal matrix + + + :param Qx: Optional 3x3 rotation matrix around x-axis + + + :param Qy: Optional 3x3 rotation matrix around y-axis + + + :param Qz: Optional 3x3 rotation matrix around z-axis + + + +The function computes a RQ decomposition using the given rotations. This function is used in +:ref:`DecomposeProjectionMatrix` +to decompose the left 3x3 submatrix of a projection matrix into a camera and a rotation matrix. + +It optionally returns three rotation matrices, one for each axis, and the three Euler angles +(as the return value) +that could be used in OpenGL. + + +.. index:: Rodrigues + + +cv::Rodrigues +------------- + +`id=0.910118279746 Comments from the Wiki `__ + + + + +.. cfunction:: void Rodrigues(const Mat\& src, Mat\& dst) + + + +.. cfunction:: void Rodrigues(const Mat\& src, Mat\& dst, Mat\& jacobian) + + Converts a rotation matrix to a rotation vector or vice versa. + + + + + + + :param src: The input rotation vector (3x1 or 1x3) or rotation matrix (3x3) + + + :param dst: The output rotation matrix (3x3) or rotation vector (3x1 or 1x3), respectively + + + :param jacobian: Optional output Jacobian matrix, 3x9 or 9x3 - partial derivatives of the output array components with respect to the input array components + + + + + +.. math:: + + \begin{array}{l} \theta \leftarrow norm(r) \\ r \leftarrow r/ \theta \\ R = \cos{\theta} I + (1- \cos{\theta} ) r r^T + \sin{\theta} \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array} + + +Inverse transformation can also be done easily, since + + + +.. math:: + + \sin ( \theta ) \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} = \frac{R - R^T}{2} + + +A rotation vector is a convenient and most-compact representation of a rotation matrix +(since any rotation matrix has just 3 degrees of freedom). The representation is +used in the global 3D geometry optimization procedures like +:ref:`CalibrateCamera2` +, +:ref:`StereoCalibrate` +or +:ref:`FindExtrinsicCameraParams2` +. + + + +.. index:: StereoBM + +.. _StereoBM: + +StereoBM +-------- + +`id=0.214478829518 Comments from the Wiki `__ + +.. ctype:: StereoBM + + + +The class for computing stereo correspondence using block matching algorithm. + + + + +:: + + + + // Block matching stereo correspondence algorithmclass StereoBM + { + enum { NORMALIZED_RESPONSE = CV_STEREO_BM_NORMALIZED_RESPONSE, + BASIC_PRESET=CV_STEREO_BM_BASIC, + FISH_EYE_PRESET=CV_STEREO_BM_FISH_EYE, + NARROW_PRESET=CV_STEREO_BM_NARROW }; + + StereoBM(); + // the preset is one of ..._PRESET above. + // ndisparities is the size of disparity range, + // in which the optimal disparity at each pixel is searched for. + // SADWindowSize is the size of averaging window used to match pixel blocks + // (larger values mean better robustness to noise, but yield blurry disparity maps) + StereoBM(int preset, int ndisparities=0, int SADWindowSize=21); + // separate initialization function + void init(int preset, int ndisparities=0, int SADWindowSize=21); + // computes the disparity for the two rectified 8-bit single-channel images. + // the disparity will be 16-bit signed (fixed-point) or 32-bit floating-point image of the same size as left. + void operator()( const Mat& left, const Mat& right, Mat& disparity, int disptype=CV_16S ); + + Ptr state; + }; + + +.. + +The class is a C++ wrapper for +and the associated functions. In particular, +``StereoBM::operator ()`` +is the wrapper for +:ref:`FindStereoCorrespondceBM` +. See the respective descriptions. + + + +.. index:: StereoSGBM + +.. _StereoSGBM: + +StereoSGBM +---------- + +`id=0.410794906842 Comments from the Wiki `__ + +.. ctype:: StereoSGBM + + + +The class for computing stereo correspondence using semi-global block matching algorithm. + + + + +:: + + + + class StereoSGBM + { + StereoSGBM(); + StereoSGBM(int minDisparity, int numDisparities, int SADWindowSize, + int P1=0, int P2=0, int disp12MaxDiff=0, + int preFilterCap=0, int uniquenessRatio=0, + int speckleWindowSize=0, int speckleRange=0, + bool fullDP=false); + virtual ~StereoSGBM(); + + virtual void operator()(const Mat& left, const Mat& right, Mat& disp); + + int minDisparity; + int numberOfDisparities; + int SADWindowSize; + int preFilterCap; + int uniquenessRatio; + int P1, P2; + int speckleWindowSize; + int speckleRange; + int disp12MaxDiff; + bool fullDP; + + ... + }; + + +.. + +The class implements modified H. Hirschmuller algorithm +HH08 +. The main differences between the implemented algorithm and the original one are: + + + + + +* + by default the algorithm is single-pass, i.e. instead of 8 directions we only consider 5. Set + ``fullDP=true`` + to run the full variant of the algorithm (which could consume + *a lot* + of memory) + + + +* + the algorithm matches blocks, not individual pixels (though, by setting + ``SADWindowSize=1`` + the blocks are reduced to single pixels) + + + +* + mutual information cost function is not implemented. Instead, we use a simpler Birchfield-Tomasi sub-pixel metric from + BT96 + , though the color images are supported as well. + + + +* + we include some pre- and post- processing steps from K. Konolige algorithm + :ref:`FindStereoCorrespondceBM` + , such as pre-filtering ( + ``CV_STEREO_BM_XSOBEL`` + type) and post-filtering (uniqueness check, quadratic interpolation and speckle filtering) + + + +.. index:: StereoSGBM::StereoSGBM + + +cv::StereoSGBM::StereoSGBM +-------------------------- + +`id=0.516208833784 Comments from the Wiki `__ + + + + +.. cfunction:: StereoSGBM::StereoSGBM() + + + +.. cfunction:: StereoSGBM::StereoSGBM( int minDisparity, int numDisparities, int SADWindowSize, int P1=0, int P2=0, int disp12MaxDiff=0, int preFilterCap=0, int uniquenessRatio=0, int speckleWindowSize=0, int speckleRange=0, bool fullDP=false) + + StereoSGBM constructors + + + + + + + :param minDisparity: The minimum possible disparity value. Normally it is 0, but sometimes rectification algorithms can shift images, so this parameter needs to be adjusted accordingly + + + :param numDisparities: This is maximum disparity minus minimum disparity. Always greater than 0. In the current implementation this parameter must be divisible by 16. + + + :param SADWindowSize: The matched block size. Must be an odd number ``>=1`` . Normally, it should be somewhere in ``3..11`` range + + . + + :param P1, P2: Parameters that control disparity smoothness. The larger the values, the smoother the disparity. ``P1`` is the penalty on the disparity change by plus or minus 1 between neighbor pixels. ``P2`` is the penalty on the disparity change by more than 1 between neighbor pixels. The algorithm requires ``P2 > P1`` . See ``stereo_match.cpp`` sample where some reasonably good ``P1`` and ``P2`` values are shown (like ``8*number_of_image_channels*SADWindowSize*SADWindowSize`` and ``32*number_of_image_channels*SADWindowSize*SADWindowSize`` , respectively). + + + :param disp12MaxDiff: Maximum allowed difference (in integer pixel units) in the left-right disparity check. Set it to non-positive value to disable the check. + + + :param preFilterCap: Truncation value for the prefiltered image pixels. The algorithm first computes x-derivative at each pixel and clips its value by ``[-preFilterCap, preFilterCap]`` interval. The result values are passed to the Birchfield-Tomasi pixel cost function. + + + :param uniquenessRatio: The margin in percents by which the best (minimum) computed cost function value should "win" the second best value to consider the found match correct. Normally, some value within 5-15 range is good enough + + + :param speckleWindowSize: Maximum size of smooth disparity regions to consider them noise speckles and invdalidate. Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in 50-200 range. + + + :param speckleRange: Maximum disparity variation within each connected component. If you do speckle filtering, set it to some positive value, multiple of 16. Normally, 16 or 32 is good enough. + + + :param fullDP: Set it to ``true`` to run full-scale 2-pass dynamic programming algorithm. It will consume O(W*H*numDisparities) bytes, which is large for 640x480 stereo and huge for HD-size pictures. By default this is ``false`` + + + +The first constructor initializes +``StereoSGBM`` +with all the default parameters (so actually one will only have to set +``StereoSGBM::numberOfDisparities`` +at minimum). The second constructor allows you to set each parameter to a custom value. + + +.. index:: StereoSGBM::operator () + + +cv::StereoSGBM::operator () +--------------------------- + +`id=0.0926196237506 Comments from the Wiki `__ + + + + +.. cfunction:: void SGBM::operator()(const Mat\& left, const Mat\& right, Mat\& disp) + + Computes disparity using SGBM algorithm for a rectified stereo pair + + + + + + + :param left: The left image, 8-bit single-channel or 3-channel. + + + :param right: The right image of the same size and the same type as the left one. + + + :param disp: The output disparity map. It will be 16-bit signed single-channel image of the same size as the input images. It will contain scaled by 16 disparity values, so that to get the floating-point disparity map, you will need to divide each ``disp`` element by 16. + + + +The method executes SGBM algorithm on a rectified stereo pair. See +``stereo_match.cpp`` +OpenCV sample on how to prepare the images and call the method. Note that the method is not constant, thus you should not use the same +``StereoSGBM`` +instance from within different threads simultaneously. + + +.. index:: stereoCalibrate + + +cv::stereoCalibrate +------------------- + +`id=0.394101372507 Comments from the Wiki `__ + + + + +.. cfunction:: double stereoCalibrate( const vector >\& objectPoints, const vector >\& imagePoints1, const vector >\& imagePoints2, Mat\& cameraMatrix1, Mat\& distCoeffs1, Mat\& cameraMatrix2, Mat\& distCoeffs2, Size imageSize, Mat\& R, Mat\& T, Mat\& E, Mat\& F, TermCriteria term_crit = TermCriteria(TermCriteria::COUNT+ TermCriteria::EPS, 30, 1e-6), int flags=CALIB_FIX_INTRINSIC ) + + Calibrates stereo camera. + + + + + + + :param objectPoints: The vector of vectors of points on the calibration pattern in its coordinate system, one vector per view. If the same calibration pattern is shown in each view and it's fully visible then all the vectors will be the same, although it is possible to use partially occluded patterns, or even different patterns in different views - then the vectors will be different. The points are 3D, but since they are in the pattern coordinate system, then if the rig is planar, it may have sense to put the model to the XY coordinate plane, so that Z-coordinate of each input object point is 0 + + + :param imagePoints1: The vector of vectors of the object point projections on the calibration pattern views from the 1st camera, one vector per a view. The projections must be in the same order as the corresponding object points. + + + :param imagePoints2: The vector of vectors of the object point projections on the calibration pattern views from the 2nd camera, one vector per a view. The projections must be in the same order as the corresponding object points. + + + :param cameraMatrix1: The input/output first camera matrix: :math:`\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}` , :math:`j = 0,\, 1` . If any of ``CV_CALIB_USE_INTRINSIC_GUESS`` , ``CV_CALIB_FIX_ASPECT_RATIO`` , ``CV_CALIB_FIX_INTRINSIC`` or ``CV_CALIB_FIX_FOCAL_LENGTH`` are specified, some or all of the matrices' components must be initialized; see the flags description + + + :param distCoeffs: The input/output vector of distortion coefficients :math:`(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])` of 4, 5 or 8 elements. On output vector length depends on the flags. + + + :param cameraMatrix2: The input/output second camera matrix, as cameraMatrix1. + + + :param distCoeffs2: The input/output lens distortion coefficients for the second camera, as ``distCoeffs1`` . + + + :param imageSize: Size of the image, used only to initialize intrinsic camera matrix. + + + :param R: The output rotation matrix between the 1st and the 2nd cameras' coordinate systems. + + + :param T: The output translation vector between the cameras' coordinate systems. + + + :param E: The output essential matrix. + + + :param F: The output fundamental matrix. + + + :param term_crit: The termination criteria for the iterative optimization algorithm. + + + :param flags: Different flags, may be 0 or combination of the following values: + + * **CV_CALIB_FIX_INTRINSIC** If it is set, ``cameraMatrix?`` , as well as ``distCoeffs?`` are fixed, so that only ``R, T, E`` and ``F`` are estimated. + + * **CV_CALIB_USE_INTRINSIC_GUESS** The flag allows the function to optimize some or all of the intrinsic parameters, depending on the other flags, but the initial values are provided by the user. + + * **CV_CALIB_FIX_PRINCIPAL_POINT** The principal points are fixed during the optimization. + + * **CV_CALIB_FIX_FOCAL_LENGTH** :math:`f^{(j)}_x` and :math:`f^{(j)}_y` are fixed. + + * **CV_CALIB_FIX_ASPECT_RATIO** :math:`f^{(j)}_y` is optimized, but the ratio :math:`f^{(j)}_x/f^{(j)}_y` is fixed. + + * **CV_CALIB_SAME_FOCAL_LENGTH** Enforces :math:`f^{(0)}_x=f^{(1)}_x` and :math:`f^{(0)}_y=f^{(1)}_y` + + * **CV_CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients for each camera are set to zeros and fixed there. + + * **CV_CALIB_FIX_K1,...,CV_CALIB_FIX_K6** Do not change the corresponding radial distortion coefficient during the optimization. If ``CV_CALIB_USE_INTRINSIC_GUESS`` is set, the coefficient from the supplied ``distCoeffs`` matrix is used, otherwise it is set to 0. + + * **CV_CALIB_RATIONAL_MODEL** Enable coefficients k4, k5 and k6. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the rational model and return 8 coefficients. If the flag is not set, the function will compute and return only 5 distortion coefficients. + + + + + +The function estimates transformation between the 2 cameras making a stereo pair. If we have a stereo camera, where the relative position and orientation of the 2 cameras is fixed, and if we computed poses of an object relative to the fist camera and to the second camera, (R1, T1) and (R2, T2), respectively (that can be done with +:ref:`FindExtrinsicCameraParams2` +), obviously, those poses will relate to each other, i.e. given ( +:math:`R_1` +, +:math:`T_1` +) it should be possible to compute ( +:math:`R_2` +, +:math:`T_2` +) - we only need to know the position and orientation of the 2nd camera relative to the 1st camera. That's what the described function does. It computes ( +:math:`R` +, +:math:`T` +) such that: + + + +.. math:: + + R_2=R*R_1 + T_2=R*T_1 + T, + + +Optionally, it computes the essential matrix E: + + + +.. math:: + + E= \vecthreethree{0}{-T_2}{T_1}{T_2}{0}{-T_0}{-T_1}{T_0}{0} *R + + +where +:math:`T_i` +are components of the translation vector +:math:`T` +: +:math:`T=[T_0, T_1, T_2]^T` +. And also the function can compute the fundamental matrix F: + + + +.. math:: + + F = cameraMatrix2^{-T} E cameraMatrix1^{-1} + + +Besides the stereo-related information, the function can also perform full calibration of each of the 2 cameras. However, because of the high dimensionality of the parameter space and noise in the input data the function can diverge from the correct solution. Thus, if intrinsic parameters can be estimated with high accuracy for each of the cameras individually (e.g. using +:ref:`CalibrateCamera2` +), it is recommended to do so and then pass +``CV_CALIB_FIX_INTRINSIC`` +flag to the function along with the computed intrinsic parameters. Otherwise, if all the parameters are estimated at once, it makes sense to restrict some parameters, e.g. pass +``CV_CALIB_SAME_FOCAL_LENGTH`` +and +``CV_CALIB_ZERO_TANGENT_DIST`` +flags, which are usually reasonable assumptions. + +Similarly to +:ref:`CalibrateCamera2` +, the function minimizes the total re-projection error for all the points in all the available views from both cameras. +The function returns the final value of the re-projection error. + +.. index:: stereoRectify + + +cv::stereoRectify +----------------- + +`id=0.718485106602 Comments from the Wiki `__ + + + + +.. cfunction:: void stereoRectify( const Mat\& cameraMatrix1, const Mat\& distCoeffs1, const Mat\& cameraMatrix2, const Mat\& distCoeffs2, Size imageSize, const Mat\& R, const Mat\& T, Mat\& R1, Mat\& R2, Mat\& P1, Mat\& P2, Mat\& Q, int flags=CALIB_ZERO_DISPARITY ) + + + +.. cfunction:: void stereoRectify( const Mat\& cameraMatrix1, const Mat\& distCoeffs1, const Mat\& cameraMatrix2, const Mat\& distCoeffs2, Size imageSize, const Mat\& R, const Mat\& T, Mat\& R1, Mat\& R2, Mat\& P1, Mat\& P2, Mat\& Q, double alpha, Size newImageSize=Size(), Rect* roi1=0, Rect* roi2=0, int flags=CALIB_ZERO_DISPARITY ) + + Computes rectification transforms for each head of a calibrated stereo camera. + + + + + + + :param cameraMatrix1, cameraMatrix2: The camera matrices :math:`\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}` . + + + :param distCoeffs: The input vectors of distortion coefficients :math:`(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])` of 4, 5 or 8 elements each. If the vectors are NULL/empty, the zero distortion coefficients are assumed. + + + :param imageSize: Size of the image used for stereo calibration. + + + :param R: The rotation matrix between the 1st and the 2nd cameras' coordinate systems. + + + :param T: The translation vector between the cameras' coordinate systems. + + + :param R1, R2: The output :math:`3 \times 3` rectification transforms (rotation matrices) for the first and the second cameras, respectively. + + + :param P1, P2: The output :math:`3 \times 4` projection matrices in the new (rectified) coordinate systems. + + + :param Q: The output :math:`4 \times 4` disparity-to-depth mapping matrix, see :func:`reprojectImageTo3D` . + + + :param flags: The operation flags; may be 0 or ``CV_CALIB_ZERO_DISPARITY`` . If the flag is set, the function makes the principal points of each camera have the same pixel coordinates in the rectified views. And if the flag is not set, the function may still shift the images in horizontal or vertical direction (depending on the orientation of epipolar lines) in order to maximize the useful image area. + + + :param alpha: The free scaling parameter. If it is -1 or absent , the functions performs some default scaling. Otherwise the parameter should be between 0 and 1. ``alpha=0`` means that the rectified images will be zoomed and shifted so that only valid pixels are visible (i.e. there will be no black areas after rectification). ``alpha=1`` means that the rectified image will be decimated and shifted so that all the pixels from the original images from the cameras are retained in the rectified images, i.e. no source image pixels are lost. Obviously, any intermediate value yields some intermediate result between those two extreme cases. + + + :param newImageSize: The new image resolution after rectification. The same size should be passed to :ref:`InitUndistortRectifyMap` , see the ``stereo_calib.cpp`` sample in OpenCV samples directory. By default, i.e. when (0,0) is passed, it is set to the original ``imageSize`` . Setting it to larger value can help you to preserve details in the original image, especially when there is big radial distortion. + + + :param roi1, roi2: The optional output rectangles inside the rectified images where all the pixels are valid. If ``alpha=0`` , the ROIs will cover the whole images, otherwise they likely be smaller, see the picture below + + + +The function computes the rotation matrices for each camera that (virtually) make both camera image planes the same plane. Consequently, that makes all the epipolar lines parallel and thus simplifies the dense stereo correspondence problem. On input the function takes the matrices computed by +:func:`stereoCalibrate` +and on output it gives 2 rotation matrices and also 2 projection matrices in the new coordinates. The 2 cases are distinguished by the function are: + + + + + +#. + Horizontal stereo, when 1st and 2nd camera views are shifted relative to each other mainly along the x axis (with possible small vertical shift). Then in the rectified images the corresponding epipolar lines in left and right cameras will be horizontal and have the same y-coordinate. P1 and P2 will look as: + + + + .. math:: + + \texttt{P1} = \begin{bmatrix} f & 0 & cx_1 & 0 \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix} + + + + + .. math:: + + \texttt{P2} = \begin{bmatrix} f & 0 & cx_2 & T_x*f \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix} , + + + where + :math:`T_x` + is horizontal shift between the cameras and + :math:`cx_1=cx_2` + if + ``CV_CALIB_ZERO_DISPARITY`` + is set. + + +#. + Vertical stereo, when 1st and 2nd camera views are shifted relative to each other mainly in vertical direction (and probably a bit in the horizontal direction too). Then the epipolar lines in the rectified images will be vertical and have the same x coordinate. P2 and P2 will look as: + + + + .. math:: + + \texttt{P1} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_1 & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix} + + + + + .. math:: + + \texttt{P2} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_2 & T_y*f \\ 0 & 0 & 1 & 0 \end{bmatrix} , + + + where + :math:`T_y` + is vertical shift between the cameras and + :math:`cy_1=cy_2` + if + ``CALIB_ZERO_DISPARITY`` + is set. + + +As you can see, the first 3 columns of +``P1`` +and +``P2`` +will effectively be the new "rectified" camera matrices. +The matrices, together with +``R1`` +and +``R2`` +, can then be passed to +:ref:`InitUndistortRectifyMap` +to initialize the rectification map for each camera. + +Below is the screenshot from +``stereo_calib.cpp`` +sample. Some red horizontal lines, as you can see, pass through the corresponding image regions, i.e. the images are well rectified (which is what most stereo correspondence algorithms rely on). The green rectangles are +``roi1`` +and +``roi2`` +- indeed, their interior are all valid pixels. + + + +.. image:: ../../pics/stereo_undistort.jpg + + + + +.. index:: stereoRectifyUncalibrated + + +cv::stereoRectifyUncalibrated +----------------------------- + +`id=0.0404437097349 Comments from the Wiki `__ + + + + +.. cfunction:: bool stereoRectifyUncalibrated( const Mat\& points1, const Mat\& points2, const Mat\& F, Size imgSize, Mat\& H1, Mat\& H2, double threshold=5 ) + + Computes rectification transform for uncalibrated stereo camera. + + + + + + + :param points1, points2: The 2 arrays of corresponding 2D points. The same formats as in :ref:`FindFundamentalMat` are supported + + + :param F: The input fundamental matrix. It can be computed from the same set of point pairs using :ref:`FindFundamentalMat` . + + + :param imageSize: Size of the image. + + + :param H1, H2: The output rectification homography matrices for the first and for the second images. + + + :param threshold: The optional threshold used to filter out the outliers. If the parameter is greater than zero, then all the point pairs that do not comply the epipolar geometry well enough (that is, the points for which :math:`|\texttt{points2[i]}^T*\texttt{F}*\texttt{points1[i]}|>\texttt{threshold}` ) are rejected prior to computing the homographies. + Otherwise all the points are considered inliers. + + + +The function computes the rectification transformations without knowing intrinsic parameters of the cameras and their relative position in space, hence the suffix "Uncalibrated". Another related difference from +:ref:`StereoRectify` +is that the function outputs not the rectification transformations in the object (3D) space, but the planar perspective transformations, encoded by the homography matrices +``H1`` +and +``H2`` +. The function implements the algorithm +Hartley99 +. + +Note that while the algorithm does not need to know the intrinsic parameters of the cameras, it heavily depends on the epipolar geometry. Therefore, if the camera lenses have significant distortion, it would better be corrected before computing the fundamental matrix and calling this function. For example, distortion coefficients can be estimated for each head of stereo camera separately by using +:ref:`CalibrateCamera2` +and then the images can be corrected using +:ref:`Undistort2` +, or just the point coordinates can be corrected with +:ref:`UndistortPoints` +. + + + +.. index:: undistort + + +cv::undistort +------------- + +`id=0.845081500407 Comments from the Wiki `__ + + + + +.. cfunction:: void undistort( const Mat\& src, Mat\& dst, const Mat\& cameraMatrix, const Mat\& distCoeffs, const Mat\& newCameraMatrix=Mat() ) + + Transforms an image to compensate for lens distortion. + + + + + + + :param src: The input (distorted) image + + + :param dst: The output (corrected) image; will have the same size and the same type as ``src`` + + + :param cameraMatrix: The input camera matrix :math:`A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}` + + + :param distCoeffs: The input vector of distortion coefficients :math:`(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])` of 4, 5 or 8 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. + + + :param newCameraMatrix: Camera matrix of the distorted image. By default it is the same as ``cameraMatrix`` , but you may additionally scale and shift the result by using some different matrix + + + +The function transforms the image to compensate radial and tangential lens distortion. + +The function is simply a combination of +:ref:`InitUndistortRectifyMap` +(with unity +``R`` +) and +:ref:`Remap` +(with bilinear interpolation). See the former function for details of the transformation being performed. + +Those pixels in the destination image, for which there is no correspondent pixels in the source image, are filled with 0's (black color). + +The particular subset of the source image that will be visible in the corrected image can be regulated by +``newCameraMatrix`` +. You can use +:ref:`GetOptimalNewCameraMatrix` +to compute the appropriate +``newCameraMatrix`` +, depending on your requirements. + +The camera matrix and the distortion parameters can be determined using +:ref:`CalibrateCamera2` +. If the resolution of images is different from the used at the calibration stage, +:math:`f_x, f_y, c_x` +and +:math:`c_y` +need to be scaled accordingly, while the distortion coefficients remain the same. + + + +.. index:: undistortPoints + + +cv::undistortPoints +------------------- + +`id=0.38442231932 Comments from the Wiki `__ + + + + +.. cfunction:: void undistortPoints( const Mat\& src, vector\& dst, const Mat\& cameraMatrix, const Mat\& distCoeffs, const Mat\& R=Mat(), const Mat\& P=Mat()) + + + +.. cfunction:: void undistortPoints( const Mat\& src, Mat\& dst, const Mat\& cameraMatrix, const Mat\& distCoeffs, const Mat\& R=Mat(), const Mat\& P=Mat()) + + Computes the ideal point coordinates from the observed point coordinates. + + + + + + + :param src: The observed point coordinates, 1xN or Nx1 2-channel (CV _ 32FC2 or CV _ 64FC2). + + + :param dst: The output ideal point coordinates, after undistortion and reverse perspective transformation . + + + :param cameraMatrix: The camera matrix :math:`\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}` + + + :param distCoeffs: The input vector of distortion coefficients :math:`(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])` of 4, 5 or 8 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. + + + :param R: The rectification transformation in object space (3x3 matrix). ``R1`` or ``R2`` , computed by :func:`StereoRectify` can be passed here. If the matrix is empty, the identity transformation is used + + + :param P: The new camera matrix (3x3) or the new projection matrix (3x4). ``P1`` or ``P2`` , computed by :func:`StereoRectify` can be passed here. If the matrix is empty, the identity new camera matrix is used + + + +The function is similar to +:ref:`Undistort2` +and +:ref:`InitUndistortRectifyMap` +, but it operates on a sparse set of points instead of a raster image. Also the function does some kind of reverse transformation to +:ref:`ProjectPoints2` +(in the case of 3D object it will not reconstruct its 3D coordinates, of course; but for a planar object it will, up to a translation vector, if the proper +``R`` +is specified). + + + + +:: + + + + // (u,v) is the input point, (u', v') is the output point + // camera_matrix=[fx 0 cx; 0 fy cy; 0 0 1] + // P=[fx' 0 cx' tx; 0 fy' cy' ty; 0 0 1 tz] + x" = (u - cx)/fx + y" = (v - cy)/fy + (x',y') = undistort(x",y",dist_coeffs) + [X,Y,W]T = R*[x' y' 1]T + x = X/W, y = Y/W + u' = x*fx' + cx' + v' = y*fy' + cy', + + +.. + +where undistort() is approximate iterative algorithm that estimates the normalized original point coordinates out of the normalized distorted point coordinates ("normalized" means that the coordinates do not depend on the camera matrix). + +The function can be used both for a stereo camera head or for monocular camera (when R is +empty +). diff --git a/modules/core/doc/basic_structures.rst b/modules/core/doc/basic_structures.rst new file mode 100644 index 000000000..ba50ee88c --- /dev/null +++ b/modules/core/doc/basic_structures.rst @@ -0,0 +1,4533 @@ +Basic Structures +================ + +.. highlight:: cpp + + + +DataType +-------- + + +Template "traits" class for other OpenCV primitive data types + + + + +:: + + + + template class DataType + { + // value_type is always a synonym for _Tp. + typedef _Tp value_type; + + // intermediate type used for operations on _Tp. + // it is int for uchar, signed char, unsigned short, signed short and int, + // float for float, double for double, ... + typedef <...> work_type; + // in the case of multi-channel data it is the data type of each channel + typedef <...> channel_type; + enum + { + // CV_8U ... CV_64F + depth = DataDepth::value, + // 1 ... + channels = <...>, + // '1u', '4i', '3f', '2d' etc. + fmt=<...>, + // CV_8UC3, CV_32FC2 ... + type = CV_MAKETYPE(depth, channels) + }; + }; + + +.. + +The template class +``DataType`` +is descriptive class for OpenCV primitive data types and other types that comply with the following definition. A primitive OpenCV data type is one of +``unsigned char, bool, signed char, unsigned short, signed short, int, float, double`` +or a tuple of values of one of these types, where all the values in the tuple have the same type. If you are familiar with OpenCV +:ref:`CvMat` +'s type notation, CV +_ +8U ... CV +_ +32FC3, CV +_ +64FC2 etc., then a primitive type can be defined as a type for which you can give a unique identifier in a form +``CV_{U|S|F}C`` +. A universal OpenCV structure able to store a single instance of such primitive data type is +:ref:`Vec` +. Multiple instances of such a type can be stored to a +``std::vector`` +, +``Mat`` +, +``Mat_`` +, +``SparseMat`` +, +``SparseMat_`` +or any other container that is able to store +:ref:`Vec` +instances. + +The class +``DataType`` +is basically used to provide some description of such primitive data types without adding any fields or methods to the corresponding classes (and it is actually impossible to add anything to primitive C/C++ data types). This technique is known in C++ as class traits. It's not +``DataType`` +itself that is used, but its specialized versions, such as: + + + + +:: + + + + template<> class DataType + { + typedef uchar value_type; + typedef int work_type; + typedef uchar channel_type; + enum { channel_type = CV_8U, channels = 1, fmt='u', type = CV_8U }; + }; + ... + template DataType > + { + typedef std::complex<_Tp> value_type; + typedef std::complex<_Tp> work_type; + typedef _Tp channel_type; + // DataDepth is another helper trait class + enum { depth = DataDepth<_Tp>::value, channels=2, + fmt=(channels-1)*256+DataDepth<_Tp>::fmt, + type=CV_MAKETYPE(depth, channels) }; + }; + ... + + +.. + +The main purpose of the classes is to convert compile-time type information to OpenCV-compatible data type identifier, for example: + + + + +:: + + + + // allocates 30x40 floating-point matrix + Mat A(30, 40, DataType::type); + + Mat B = Mat_ >(3, 3); + // the statement below will print 6, 2 /* i.e. depth == CV_64F, channels == 2 */ + cout << B.depth() << ", " << B.channels() << endl; + + +.. + +that is, such traits are used to tell OpenCV which data type you are working with, even if such a type is not native to OpenCV (the matrix +``B`` +intialization above compiles because OpenCV defines the proper specialized template class +``DataType >`` +). Also, this mechanism is useful (and used in OpenCV this way) for generic algorithms implementations. + + +Point\_ +------- + + +Template class for 2D points + + + + +:: + + + + template class Point_ + { + public: + typedef _Tp value_type; + + Point_(); + Point_(_Tp _x, _Tp _y); + Point_(const Point_& pt); + Point_(const CvPoint& pt); + Point_(const CvPoint2D32f& pt); + Point_(const Size_<_Tp>& sz); + Point_(const Vec<_Tp, 2>& v); + Point_& operator = (const Point_& pt); + template operator Point_<_Tp2>() const; + operator CvPoint() const; + operator CvPoint2D32f() const; + operator Vec<_Tp, 2>() const; + + // computes dot-product (this->x*pt.x + this->y*pt.y) + _Tp dot(const Point_& pt) const; + // computes dot-product using double-precision arithmetics + double ddot(const Point_& pt) const; + // returns true if the point is inside the rectangle "r". + bool inside(const Rect_<_Tp>& r) const; + + _Tp x, y; + }; + + +.. + +The class represents a 2D point, specified by its coordinates +:math:`x` +and +:math:`y` +. +Instance of the class is interchangeable with C structures +``CvPoint`` +and +``CvPoint2D32f`` +. There is also cast operator to convert point coordinates to the specified type. The conversion from floating-point coordinates to integer coordinates is done by rounding; in general case the conversion uses +operation on each of the coordinates. Besides the class members listed in the declaration above, the following operations on points are implemented: + + + + +:: + + + + pt1 = pt2 + pt3; + pt1 = pt2 - pt3; + pt1 = pt2 * a; + pt1 = a * pt2; + pt1 += pt2; + pt1 -= pt2; + pt1 *= a; + double value = norm(pt); // L2 norm + pt1 == pt2; + pt1 != pt2; + + +.. + +For user convenience, the following type aliases are defined: + + + +:: + + + + typedef Point_ Point2i; + typedef Point2i Point; + typedef Point_ Point2f; + typedef Point_ Point2d; + + +.. + +Here is a short example: + + + +:: + + + + Point2f a(0.3f, 0.f), b(0.f, 0.4f); + Point pt = (a + b)*10.f; + cout << pt.x << ", " << pt.y << endl; + + +.. + + +Point3\_ +-------- + + +Template class for 3D points + + + + +:: + + + + + template class Point3_ + { + public: + typedef _Tp value_type; + + Point3_(); + Point3_(_Tp _x, _Tp _y, _Tp _z); + Point3_(const Point3_& pt); + explicit Point3_(const Point_<_Tp>& pt); + Point3_(const CvPoint3D32f& pt); + Point3_(const Vec<_Tp, 3>& v); + Point3_& operator = (const Point3_& pt); + template operator Point3_<_Tp2>() const; + operator CvPoint3D32f() const; + operator Vec<_Tp, 3>() const; + + _Tp dot(const Point3_& pt) const; + double ddot(const Point3_& pt) const; + + _Tp x, y, z; + }; + + +.. + +The class represents a 3D point, specified by its coordinates +:math:`x` +, +:math:`y` +and +:math:`z` +. +Instance of the class is interchangeable with C structure +``CvPoint2D32f`` +. Similarly to +``Point_`` +, the 3D points' coordinates can be converted to another type, and the vector arithmetic and comparison operations are also supported. + +The following type aliases are available: + + + + +:: + + + + typedef Point3_ Point3i; + typedef Point3_ Point3f; + typedef Point3_ Point3d; + + +.. + + +Size\_ +------ + + +Template class for specfying image or rectangle size. + + + + +:: + + + + template class Size_ + { + public: + typedef _Tp value_type; + + Size_(); + Size_(_Tp _width, _Tp _height); + Size_(const Size_& sz); + Size_(const CvSize& sz); + Size_(const CvSize2D32f& sz); + Size_(const Point_<_Tp>& pt); + Size_& operator = (const Size_& sz); + _Tp area() const; + + operator Size_() const; + operator Size_() const; + operator Size_() const; + operator CvSize() const; + operator CvSize2D32f() const; + + _Tp width, height; + }; + + +.. + +The class +``Size_`` +is similar to +``Point_`` +, except that the two members are called +``width`` +and +``height`` +instead of +``x`` +and +``y`` +. The structure can be converted to and from the old OpenCV structures +:ref:`CvSize` +and +:ref:`CvSize2D32f` +. The same set of arithmetic and comparison operations as for +``Point_`` +is available. + +OpenCV defines the following type aliases: + + + + +:: + + + + typedef Size_ Size2i; + typedef Size2i Size; + typedef Size_ Size2f; + + +.. + + +Rect\_ +------ + + +Template class for 2D rectangles + + + + +:: + + + + template class Rect_ + { + public: + typedef _Tp value_type; + + Rect_(); + Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height); + Rect_(const Rect_& r); + Rect_(const CvRect& r); + // (x, y) <- org, (width, height) <- sz + Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz); + // (x, y) <- min(pt1, pt2), (width, height) <- max(pt1, pt2) - (x, y) + Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2); + Rect_& operator = ( const Rect_& r ); + // returns Point_<_Tp>(x, y) + Point_<_Tp> tl() const; + // returns Point_<_Tp>(x+width, y+height) + Point_<_Tp> br() const; + + // returns Size_<_Tp>(width, height) + Size_<_Tp> size() const; + // returns width*height + _Tp area() const; + + operator Rect_() const; + operator Rect_() const; + operator Rect_() const; + operator CvRect() const; + + // x <= pt.x && pt.x < x + width && + // y <= pt.y && pt.y < y + height ? true : false + bool contains(const Point_<_Tp>& pt) const; + + _Tp x, y, width, height; + }; + + +.. + +The rectangle is described by the coordinates of the top-left corner (which is the default interpretation of +``Rect_::x`` +and +``Rect_::y`` +in OpenCV; though, in your algorithms you may count +``x`` +and +``y`` +from the bottom-left corner), the rectangle width and height. + +Another assumption OpenCV usually makes is that the top and left boundary of the rectangle are inclusive, while the right and bottom boundaries are not, for example, the method +``Rect_::contains`` +returns true if + + +.. math:: + + x \leq pt.x < x+width, + y \leq pt.y < y+height + + +And virtually every loop over an image +:ref:`ROI` +in OpenCV (where ROI is specified by +``Rect_`` +) is implemented as: + + + +:: + + + + for(int y = roi.y; y < roi.y + rect.height; y++) + for(int x = roi.x; x < roi.x + rect.width; x++) + { + // ... + } + + +.. + +In addition to the class members, the following operations on rectangles are implemented: + + + + +* + :math:`\texttt{rect} = \texttt{rect} \pm \texttt{point}` + (shifting rectangle by a certain offset) + + + +* + :math:`\texttt{rect} = \texttt{rect} \pm \texttt{size}` + (expanding or shrinking rectangle by a certain amount) + + + +* + ``rect += point, rect -= point, rect += size, rect -= size`` + (augmenting operations) + + + +* + ``rect = rect1 & rect2`` + (rectangle intersection) + + + +* + ``rect = rect1 | rect2`` + (minimum area rectangle containing + ``rect2`` + and + ``rect3`` + ) + + + +* + ``rect &= rect1, rect |= rect1`` + (and the corresponding augmenting operations) + + + +* + ``rect == rect1, rect != rect1`` + (rectangle comparison) + + +Example. Here is how the partial ordering on rectangles can be established (rect1 +:math:`\subseteq` +rect2): + + + +:: + + + + template inline bool + operator <= (const Rect_<_Tp>& r1, const Rect_<_Tp>& r2) + { + return (r1 & r2) == r1; + } + + +.. + +For user convenience, the following type alias is available: + + + +:: + + + + typedef Rect_ Rect; + + +.. + + +RotatedRect +----------- + + +Possibly rotated rectangle + + + + +:: + + + + class RotatedRect + { + public: + // constructors + RotatedRect(); + RotatedRect(const Point2f& _center, const Size2f& _size, float _angle); + RotatedRect(const CvBox2D& box); + + // returns minimal up-right rectangle that contains the rotated rectangle + Rect boundingRect() const; + // backward conversion to CvBox2D + operator CvBox2D() const; + + // mass center of the rectangle + Point2f center; + // size + Size2f size; + // rotation angle in degrees + float angle; + }; + + +.. + +The class +``RotatedRect`` +replaces the old +:ref:`CvBox2D` +and fully compatible with it. + + +TermCriteria +------------ + + +Termination criteria for iterative algorithms + + + + +:: + + + + class TermCriteria + { + public: + enum { COUNT=1, MAX_ITER=COUNT, EPS=2 }; + + // constructors + TermCriteria(); + // type can be MAX_ITER, EPS or MAX_ITER+EPS. + // type = MAX_ITER means that only the number of iterations does matter; + // type = EPS means that only the required precision (epsilon) does matter + // (though, most algorithms put some limit on the number of iterations anyway) + // type = MAX_ITER + EPS means that algorithm stops when + // either the specified number of iterations is made, + // or when the specified accuracy is achieved - whatever happens first. + TermCriteria(int _type, int _maxCount, double _epsilon); + TermCriteria(const CvTermCriteria& criteria); + operator CvTermCriteria() const; + + int type; + int maxCount; + double epsilon; + }; + + +.. + +The class +``TermCriteria`` +replaces the old +:ref:`CvTermCriteria` +and fully compatible with it. + + + +Matx +---- + + +Template class for small matrices + + + + +:: + + + + template class Matx + { + public: + typedef T value_type; + enum { depth = DataDepth::value, channels = m*n, + type = CV_MAKETYPE(depth, channels) }; + + // various methods + ... + + Tp val[m*n]; + }; + + typedef Matx Matx12f; + typedef Matx Matx12d; + ... + typedef Matx Matx16f; + typedef Matx Matx16d; + + typedef Matx Matx21f; + typedef Matx Matx21d; + ... + typedef Matx Matx61f; + typedef Matx Matx61d; + + typedef Matx Matx22f; + typedef Matx Matx22d; + ... + typedef Matx Matx66f; + typedef Matx Matx66d; + + +.. + +The class represents small matrices, which type and size are known at compile time. If you need more flexible type, use +:ref:`Mat` +. The elements of a matrix +``M`` +are accessible using +``M(i,j)`` +notation, and most of the common matrix operations (see also +:ref:`MatrixExpressions` +) are available. If you need to do some operation on +``Matx`` +that is not implemented, it is easy to convert the matrix to +:ref:`Mat` +and backwards. + + + + +:: + + + + Matx33f m(1, 2, 3, + 4, 5, 6, + 7, 8, 9); + cout << sum(Mat(m*m.t())) << endl; + + +.. + + +Vec +--- + + +Template class for short numerical vectors + + + + +:: + + + + template class Vec : public Matx + { + public: + typedef T value_type; + enum { depth = DataDepth::value, channels = cn, + type = CV_MAKETYPE(depth, channels) }; + + // various methods ... + }; + + typedef Vec Vec2b; + typedef Vec Vec3b; + typedef Vec Vec4b; + + typedef Vec Vec2s; + typedef Vec Vec3s; + typedef Vec Vec4s; + + typedef Vec Vec2i; + typedef Vec Vec3i; + typedef Vec Vec4i; + + typedef Vec Vec2f; + typedef Vec Vec3f; + typedef Vec Vec4f; + typedef Vec Vec6f; + + typedef Vec Vec2d; + typedef Vec Vec3d; + typedef Vec Vec4d; + typedef Vec Vec6d; + + +.. + +``Vec`` +is a partial case of +``Matx`` +. It is possible to convert +``Vec`` +to/from +``Point_`` +, +``Vec`` +to/from +``Point3_`` +, and +``Vec`` +to +:ref:`CvScalar` +or +:ref:`Scalar` +. The elements of +``Vec`` +are accessed using +``operator[]`` +. All the expected vector operations are implemented too: + + + + + +* + :math:`\texttt{v1} = \texttt{v2} \pm \texttt{v3}` + , + :math:`\texttt{v1} = \texttt{v2} * \alpha` + , + :math:`\texttt{v1} = \alpha * \texttt{v2}` + (plus the corresponding augmenting operations; note that these operations apply + to the each computed vector component) + + + +* + ``v1 == v2, v1 != v2`` + + +* + ``norm(v1)`` + ( + :math:`L_2` + -norm) + + +The class +``Vec`` +is commonly used to describe pixel types of multi-channel arrays, see +``Mat_`` +description. + + +Scalar\_ +-------- + + +4-element vector + + + + +:: + + + + template class Scalar_ : public Vec<_Tp, 4> + { + public: + Scalar_(); + Scalar_(_Tp v0, _Tp v1, _Tp v2=0, _Tp v3=0); + Scalar_(const CvScalar& s); + Scalar_(_Tp v0); + static Scalar_<_Tp> all(_Tp v0); + operator CvScalar() const; + + template operator Scalar_() const; + + Scalar_<_Tp> mul(const Scalar_<_Tp>& t, double scale=1 ) const; + template void convertTo(T2* buf, int channels, int unroll_to=0) const; + }; + + typedef Scalar_ Scalar; + + +.. + +The template class +``Scalar_`` +and it's double-precision instantiation +``Scalar`` +represent 4-element vector. Being derived from +``Vec<_Tp, 4>`` +, they can be used as typical 4-element vectors, but in addition they can be converted to/from +``CvScalar`` +. The type +``Scalar`` +is widely used in OpenCV for passing pixel values and it is a drop-in replacement for +:ref:`CvScalar` +that was used for the same purpose in the earlier versions of OpenCV. + + +Range +----- + + +Specifies a continuous subsequence (a.k.a. slice) of a sequence. + + + + +:: + + + + class Range + { + public: + Range(); + Range(int _start, int _end); + Range(const CvSlice& slice); + int size() const; + bool empty() const; + static Range all(); + operator CvSlice() const; + + int start, end; + }; + + +.. + +The class is used to specify a row or column span in a matrix ( +:ref:`Mat` +), and for many other purposes. +``Range(a,b)`` +is basically the same as +``a:b`` +in Matlab or +``a..b`` +in Python. As in Python, +``start`` +is inclusive left boundary of the range, and +``end`` +is exclusive right boundary of the range. Such a half-opened interval is usually denoted as +:math:`[start,end)` +. + +The static method +``Range::all()`` +returns some special variable that means "the whole sequence" or "the whole range", just like " +``:`` +" in Matlab or " +``...`` +" in Python. All the methods and functions in OpenCV that take +``Range`` +support this special +``Range::all()`` +value, but of course, in the case of your own custom processing you will probably have to check and handle it explicitly: + + + +:: + + + + void my_function(..., const Range& r, ....) + { + if(r == Range::all()) { + // process all the data + } + else { + // process [r.start, r.end) + } + } + + +.. + + +Ptr +--- + + +A template class for smart reference-counting pointers + + + + +:: + + + + template class Ptr + { + public: + // default constructor + Ptr(); + // constructor that wraps the object pointer + Ptr(_Tp* _obj); + // destructor: calls release() + ~Ptr(); + // copy constructor; increments ptr's reference counter + Ptr(const Ptr& ptr); + // assignment operator; decrements own reference counter + // (with release()) and increments ptr's reference counter + Ptr& operator = (const Ptr& ptr); + // increments reference counter + void addref(); + // decrements reference counter; when it becomes 0, + // delete_obj() is called + void release(); + // user-specified custom object deletion operation. + // by default, "delete obj;" is called + void delete_obj(); + // returns true if obj == 0; + bool empty() const; + + // provide access to the object fields and methods + _Tp* operator -> (); + const _Tp* operator -> () const; + + // return the underlying object pointer; + // thanks to the methods, the Ptr<_Tp> can be + // used instead of _Tp* + operator _Tp* (); + operator const _Tp*() const; + protected: + // the encapsulated object pointer + _Tp* obj; + // the associated reference counter + int* refcount; + }; + + +.. + +The class +``Ptr<_Tp>`` +is a template class that wraps pointers of the corresponding type. It is similar to +``shared_ptr`` +that is a part of Boost library ( +http://www.boost.org/doc/libs/1_40_0/libs/smart_ptr/shared_ptr.htm +) and also a part of the +`C++0x `_ +standard. + +By using this class you can get the following capabilities: + + + + + +* + default constructor, copy constructor and assignment operator for an arbitrary C++ class or a C structure. For some objects, like files, windows, mutexes, sockets etc, copy constructor or assignment operator are difficult to define. For some other objects, like complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally, some of complex OpenCV and your own data structures may have been written in C. However, copy constructors and default constructors can simplify programming a lot; besides, they are often required (e.g. by STL containers). By wrapping a pointer to such a complex object + ``TObj`` + to + ``Ptr`` + you will automatically get all of the necessary constructors and the assignment operator. + + + +* + all the above-mentioned operations running very fast, regardless of the data size, i.e. as "O(1)" operations. Indeed, while some structures, like + ``std::vector`` + provide a copy constructor and an assignment operator, the operations may take considerable time if the data structures are big. But if the structures are put into + ``Ptr<>`` + , the overhead becomes small and independent of the data size. + + + +* + automatic destruction, even for C structures. See the example below with + ``FILE*`` + . + + + +* + heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers can only store objects of the same type and the same size. The classical solution to store objects of different types in the same container is to store pointers to the base class + ``base_class_t*`` + instead, but when you loose the automatic memory management. Again, by using + ``Ptr()`` + instead of the raw pointers, you can solve the problem. + + +The class +``Ptr`` +treats the wrapped object as a black box, the reference counter is allocated and managed separately. The only thing the pointer class needs to know about the object is how to deallocate it. This knowledge is incapsulated in +``Ptr::delete_obj()`` +method, which is called when the reference counter becomes 0. If the object is a C++ class instance, no additional coding is needed, because the default implementation of this method calls +``delete obj;`` +. +However, if the object is deallocated in a different way, then the specialized method should be created. For example, if you want to wrap +``FILE`` +, the +``delete_obj`` +may be implemented as following: + + + + +:: + + + + template<> inline void Ptr::delete_obj() + { + fclose(obj); // no need to clear the pointer afterwards, + // it is done externally. + } + ... + + // now use it: + Ptr f(fopen("myfile.txt", "r")); + if(f.empty()) + throw ...; + fprintf(f, ....); + ... + // the file will be closed automatically by the Ptr destructor. + + +.. + +**Note** +: The reference increment/decrement operations are implemented as atomic operations, and therefore it is normally safe to use the classes in multi-threaded applications. The same is true for +:ref:`Mat` +and other C++ OpenCV classes that operate on the reference counters. + + +Mat +--- + + +OpenCV C++ n-dimensional dense array class. + + + + +:: + + + + class CV_EXPORTS Mat + { + public: + // ... a lot of methods ... + ... + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + //! the array dimensionality, >= 2 + int dims; + //! the number of rows and columns or (-1, -1) when the array has more than 2 dimensions + int rows, cols; + //! pointer to the data + uchar* data; + + //! pointer to the reference counter; + // when array points to user-allocated data, the pointer is NULL + int* refcount; + + // other members + ... + }; + + +.. + +The class +``Mat`` +represents an n-dimensional dense numerical single-channel or multi-channel array. It can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel volumes, vector fields, point clouds, tensors, histograms (though, very high-dimensional histograms may be better stored in a +``SparseMat`` +). The data layout of array +:math:`M` +is defined by the array +``M.step[]`` +, so that the address of element +:math:`(i_0,...,i_{M.dims-1})` +, where +:math:`0\leq i_k= M.step[i+1]`` +(in fact, +``M.step[i] >= M.step[i+1]*M.size[i+1]`` +), that is, 2-dimensional matrices are stored row-by-row, 3-dimensional matrices are stored plane-by-plane etc. +``M.step[M.dims-1]`` +is minimal and always equal to the element size +``M.elemSize()`` +. + +That is, the data layout in +``Mat`` +is fully compatible with +``CvMat`` +, +``IplImage`` +and +``CvMatND`` +types from OpenCV 1.x, as well as with majority of dense array types from the standard toolkits and SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps) etc, i.e. any other array that uses "steps", a.k.a. "strides", to compute position of a pixel. Because of such compatibility, it is possible to make a +``Mat`` +header for user-allocated data and process it in-place using OpenCV functions. + +There are many different ways to create +``Mat`` +object. Here are the some popular ones: + + + + +* + using + ``create(nrows, ncols, type)`` + method or + the similar constructor + ``Mat(nrows, ncols, type[, fillValue])`` + constructor. + A new array of the specified size and specifed type will be allocated. + + ``type`` + has the same meaning as in + :func:`cvCreateMat` + method, + e.g. + ``CV_8UC1`` + means 8-bit single-channel array, + + ``CV_32FC2`` + means 2-channel (i.e. complex) floating-point array etc: + + + + + :: + + + + // make 7x7 complex matrix filled with 1+3j. + cv::Mat M(7,7,CV_32FC2,Scalar(1,3)); + // and now turn M to 100x60 15-channel 8-bit matrix. + // The old content will be deallocated + M.create(100,60,CV_8UC(15)); + + + .. + + As noted in the introduction of this chapter, + ``create()`` + will only allocate a new array when the current array shape + or type are different from the specified. + + + +* + similarly to above, you can create a multi-dimensional array: + + + + + :: + + + + // create 100x100x100 8-bit array + int sz[] = {100, 100, 100}; + cv::Mat bigCube(3, sz, CV_8U, Scalar::all(0)); + + + .. + + note that it is pass number of dimensions =1 to the + ``Mat`` + constructor, but the created array will be 2-dimensional, with the number of columns set to 1. That's why + ``Mat::dims`` + is always >= 2 (can also be 0 when the array is empty) + + + +* + by using a copy constructor or assignment operator, where on the right side it can + be a array or expression, see below. Again, as noted in the introduction, + array assignment is O(1) operation because it only copies the header + and increases the reference counter. + ``Mat::clone()`` + method can be used to get a full + (a.k.a. deep) copy of the array when you need it. + + + +* + by constructing a header for a part of another array. It can be a single row, single column, + several rows, several columns, rectangular region in the array (called a minor in algebra) or + a diagonal. Such operations are also O(1), because the new header will reference the same data. + You can actually modify a part of the array using this feature, e.g. + + + + + :: + + + + // add 5-th row, multiplied by 3 to the 3rd row + M.row(3) = M.row(3) + M.row(5)*3; + + // now copy 7-th column to the 1-st column + // M.col(1) = M.col(7); // this will not work + Mat M1 = M.col(1); + M.col(7).copyTo(M1); + + // create new 320x240 image + cv::Mat img(Size(320,240),CV_8UC3); + // select a roi + cv::Mat roi(img, Rect(10,10,100,100)); + // fill the ROI with (0,255,0) (which is green in RGB space); + // the original 320x240 image will be modified + roi = Scalar(0,255,0); + + + .. + + Thanks to the additional + ``datastart`` + and + ``dataend`` + members, it is possible to + compute the relative sub-array position in the main + *"container"* + array using + ``locateROI()`` + : + + + + + :: + + + + Mat A = Mat::eye(10, 10, CV_32S); + // extracts A columns, 1 (inclusive) to 3 (exclusive). + Mat B = A(Range::all(), Range(1, 3)); + // extracts B rows, 5 (inclusive) to 9 (exclusive). + // that is, C ~ A(Range(5, 9), Range(1, 3)) + Mat C = B(Range(5, 9), Range::all()); + Size size; Point ofs; + C.locateROI(size, ofs); + // size will be (width=10,height=10) and the ofs will be (x=1, y=5) + + + .. + + As in the case of whole matrices, if you need a deep copy, use + ``clone()`` + method + of the extracted sub-matrices. + + + +* + by making a header for user-allocated-data. It can be useful for + + + + + + #. + processing "foreign" data using OpenCV (e.g. when you implement + a DirectShow filter or a processing module for gstreamer etc.), e.g. + + + + + :: + + + + void process_video_frame(const unsigned char* pixels, + int width, int height, int step) + { + cv::Mat img(height, width, CV_8UC3, pixels, step); + cv::GaussianBlur(img, img, cv::Size(7,7), 1.5, 1.5); + } + + + .. + + + + #. + for quick initialization of small matrices and/or super-fast element access + + + + :: + + + + double m[3][3] = {{a, b, c}, {d, e, f}, {g, h, i}}; + cv::Mat M = cv::Mat(3, 3, CV_64F, m).inv(); + + + .. + + + + partial yet very common cases of this "user-allocated data" case are conversions + from + :ref:`CvMat` + and + :ref:`IplImage` + to + ``Mat`` + . For this purpose there are special constructors + taking pointers to + ``CvMat`` + or + ``IplImage`` + and the optional + flag indicating whether to copy the data or not. + + Backward conversion from + ``Mat`` + to + ``CvMat`` + or + ``IplImage`` + is provided via cast operators + + ``Mat::operator CvMat() const`` + an + ``Mat::operator IplImage()`` + . + The operators do + *not* + copy the data. + + + + + :: + + + + IplImage* img = cvLoadImage("greatwave.jpg", 1); + Mat mtx(img); // convert IplImage* -> cv::Mat + CvMat oldmat = mtx; // convert cv::Mat -> CvMat + CV_Assert(oldmat.cols == img->width && oldmat.rows == img->height && + oldmat.data.ptr == (uchar*)img->imageData && oldmat.step == img->widthStep); + + + .. + + + +* + by using MATLAB-style array initializers, + ``zeros(), ones(), eye()`` + , e.g.: + + + + + :: + + + + // create a double-precision identity martix and add it to M. + M += Mat::eye(M.rows, M.cols, CV_64F); + + + .. + + + +* + by using comma-separated initializer: + + + + :: + + + + // create 3x3 double-precision identity matrix + Mat M = (Mat_(3,3) << 1, 0, 0, 0, 1, 0, 0, 0, 1); + + + .. + + here we first call constructor of + ``Mat_`` + class (that we describe further) with the proper parameters, and then we just put + ``<<`` + operator followed by comma-separated values that can be constants, variables, expressions etc. Also, note the extra parentheses that are needed to avoid compiler errors. + + + +Once array is created, it will be automatically managed by using reference-counting mechanism (unless the array header is built on top of user-allocated data, in which case you should handle the data by yourself). +The array data will be deallocated when no one points to it; if you want to release the data pointed by a array header before the array destructor is called, use +``Mat::release()`` +. + +The next important thing to learn about the array class is element access. Earlier it was shown how to compute address of each array element. Normally, it's not needed to use the formula directly in your code. If you know the array element type (which can be retrieved using the method +``Mat::type()`` +), you can access element +:math:`M_{ij}` +of 2-dimensional array as: + + + + +:: + + + + M.at(i,j) += 1.f; + + +.. + +assuming that M is double-precision floating-point array. There are several variants of the method +``at`` +for different number of dimensions. + +If you need to process a whole row of a 2d array, the most efficient way is to get the pointer to the row first, and then just use plain C operator +``[]`` +: + + + + +:: + + + + // compute sum of positive matrix elements + // (assuming that M is double-precision matrix) + double sum=0; + for(int i = 0; i < M.rows; i++) + { + const double* Mi = M.ptr(i); + for(int j = 0; j < M.cols; j++) + sum += std::max(Mi[j], 0.); + } + + +.. + +Some operations, like the above one, do not actually depend on the array shape, they just process elements of an array one by one (or elements from multiple arrays that have the same coordinates, e.g. array addition). Such operations are called element-wise and it makes sense to check whether all the input/output arrays are continuous, i.e. have no gaps in the end of each row, and if yes, process them as a single long row: + + + + +:: + + + + // compute sum of positive matrix elements, optimized variant + double sum=0; + int cols = M.cols, rows = M.rows; + if(M.isContinuous()) + { + cols *= rows; + rows = 1; + } + for(int i = 0; i < rows; i++) + { + const double* Mi = M.ptr(i); + for(int j = 0; j < cols; j++) + sum += std::max(Mi[j], 0.); + } + + +.. + +in the case of continuous matrix the outer loop body will be executed just once, so the overhead will be smaller, which will be especially noticeable in the case of small matrices. + +Finally, there are STL-style iterators that are smart enough to skip gaps between successive rows: + + + +:: + + + + // compute sum of positive matrix elements, iterator-based variant + double sum=0; + MatConstIterator_ it = M.begin(), it_end = M.end(); + for(; it != it_end; ++it) + sum += std::max(*it, 0.); + + +.. + +The matrix iterators are random-access iterators, so they can be passed to any STL algorithm, including +``std::sort()`` +. + + +Matrix Expressions +------------------ + + +This is a list of implemented matrix operations that can be combined in arbitrary complex expressions +(here +*A* +, +*B* +stand for matrices ( +``Mat`` +), +*s* +for a scalar ( +``Scalar`` +), +:math:`\alpha` +for a real-valued scalar ( +``double`` +)): + + + + + +* + addition, subtraction, negation: + :math:`A \pm B,\;A \pm s,\;s \pm A,\;-A` + + +* + scaling: + :math:`A*\alpha` + , + :math:`A*\alpha` + + +* + per-element multiplication and division: + :math:`A.mul(B), A/B, \alpha/A` + + +* + matrix multiplication: + :math:`A*B` + + +* + transposition: + :math:`A.t() \sim A^t` + + +* + matrix inversion and pseudo-inversion, solving linear systems and least-squares problems: + + :math:`A.inv([method]) \sim A^{-1}, A.inv([method])*B \sim X:\,AX=B` + + +* + comparison: + :math:`A\gtreqqless B,\;A \ne B,\;A \gtreqqless \alpha,\;A \ne \alpha` + . + The result of comparison is 8-bit single channel mask, which elements are set to 255 + (if the particular element or pair of elements satisfy the condition) and 0 otherwise. + + + +* + bitwise logical operations: + ``A & B, A & s, A | B, A | s, A textasciicircum B, A textasciicircum s, ~ A`` + + +* + element-wise minimum and maximum: + :math:`min(A, B), min(A, \alpha), max(A, B), max(A, \alpha)` + + +* + element-wise absolute value: + :math:`abs(A)` + + +* + cross-product, dot-product: + :math:`A.cross(B), A.dot(B)` + + +* + any function of matrix or matrices and scalars that returns a matrix or a scalar, such as + + :func:`norm` + , + :func:`mean` + , + :func:`sum` + , + :func:`countNonZero` + , + :func:`trace` + , + + :func:`determinant` + , + :func:`repeat` + etc. + + + +* + matrix initializers ( + ``eye(), zeros(), ones()`` + ), matrix comma-separated initializers, + matrix constructors and operators that extract sub-matrices (see + :ref:`Mat` + description). + + + +* + verb + "Mat_()" constructors to cast the result to the proper type. + + +Note, however, that comma-separated initializers and probably some other operations may require additional explicit +``Mat()`` +or +verb +"Mat_()" constuctor calls to resolve possible ambiguity. + +Below is the formal description of the +``Mat`` +methods. + + +.. index:: Mat::Mat + + +cv::Mat::Mat +------------ + +`id=0.205719205092 Comments from the Wiki `__ + + + + +.. cfunction:: (1) Mat::Mat() + + + +.. cfunction:: (2) Mat::Mat(int rows, int cols, int type) + + + +.. cfunction:: (3) Mat::Mat(Size size, int type) + + + +.. cfunction:: (4) Mat::Mat(int rows, int cols, int type, const Scalar\& s) + + + +.. cfunction:: (5) Mat::Mat(Size size, int type, const Scalar\& s) + + + +.. cfunction:: (6) Mat::Mat(const Mat\& m) + + + +.. cfunction:: (7) Mat::Mat(int rows, int cols, int type, void* data, size_t step=AUTO_STEP) + + + +.. cfunction:: (8) Mat::Mat(Size size, int type, void* data, size_t step=AUTO_STEP) + + + +.. cfunction:: (9) Mat::Mat(const Mat\& m, const Range\& rowRange, const Range\& colRange) + + + +.. cfunction:: (10) Mat::Mat(const Mat\& m, const Rect\& roi) + + + +.. cfunction:: (11) Mat::Mat(const CvMat* m, bool copyData=false) + + + +.. cfunction:: (12) Mat::Mat(const IplImage* img, bool copyData=false) + + + +.. cfunction:: (13) template explicit Mat::Mat(const Vec\& vec, bool copyData=true) + + + +.. cfunction:: (14) template explicit Mat::Mat(const Matx\& vec, bool copyData=true) + + + +.. cfunction:: (15) template explicit Mat::Mat(const vector\& vec, bool copyData=false) + + + +.. cfunction:: (16) Mat::Mat(const MatExpr\& expr) + + + +.. cfunction:: (17) Mat::Mat(int ndims, const int* sizes, int type) + + + +.. cfunction:: (18) Mat::Mat(int ndims, const int* sizes, int type, const Scalar\& s) + + + +.. cfunction:: (19) Mat::Mat(int ndims, const int* sizes, int type, void* data, const size_t* steps=0) + + + +.. cfunction:: (20) Mat::Mat(const Mat\& m, const Range* ranges) + + Various array constructors + + + + + + + :param ndims: The array dimensionality + + + :param rows: The number of rows in 2D array + + + :param cols: The number of columns in 2D array + + + :param size: The 2D array size: ``Size(cols, rows)`` . Note that in the ``Size()`` constructor the number of rows and the number of columns go in the reverse order. + + + :param sizes: The array of integers, specifying the n-dimensional array shape + + + :param type: The array type, use ``CV_8UC1, ..., CV_64FC4`` to create 1-4 channel matrices, or ``CV_8UC(n), ..., CV_64FC(n)`` to create multi-channel (up to ``CV_MAX_CN`` channels) matrices + + + :param s: The optional value to initialize each matrix element with. To set all the matrix elements to the particular value after the construction, use the assignment operator ``Mat::operator=(const Scalar& value)`` . + + + :param data: Pointer to the user data. Matrix constructors that take ``data`` and ``step`` parameters do not allocate matrix data. Instead, they just initialize the matrix header that points to the specified data, i.e. no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, user should take care of it. + + + :param step: The ``data`` buddy. This optional parameter specifies the number of bytes that each matrix row occupies. The value should include the padding bytes in the end of each row, if any. If the parameter is missing (set to ``cv::AUTO_STEP`` ), no padding is assumed and the actual step is calculated as ``cols*elemSize()`` , see :ref:`Mat::elemSize` (). + + + :param steps: The array of ``ndims-1`` steps in the case of multi-dimensional array (the last step is always set to the element size). If not specified, the matrix is assumed to be continuous. + + + :param m: The array that (in whole, a partly) is assigned to the constructed matrix. No data is copied by these constructors. Instead, the header pointing to ``m`` data, or its sub-array, is constructed and the associated with it reference counter, if any, is incremented. That is, when you modify the matrix formed using such a constructor, you will also modify the corresponding elements of ``m`` . If you want to have an independent copy of the sub-array, use ``Mat::clone()`` . + + + :param img: Pointer to the old-style ``IplImage`` image structure. By default, the data is shared between the original image and the new matrix, but when ``copyData`` is set, the full copy of the image data is created. + + + :param vec: STL vector, which elements will form the matrix. The matrix will have a single column and the number of rows equal to the number of vector elements. Type of the matrix will match the type of vector elements. The constructor can handle arbitrary types, for which there is properly declared :ref:`DataType` , i.e. the vector elements must be primitive numbers or uni-type numerical tuples of numbers. Mixed-type structures are not supported, of course. Note that the corresponding constructor is explicit, meaning that STL vectors are not automatically converted to ``Mat`` instances, you should write ``Mat(vec)`` explicitly. Another obvious note: unless you copied the data into the matrix ( ``copyData=true`` ), no new elements should be added to the vector, because it can potentially yield vector data reallocation, and thus the matrix data pointer will become invalid. + + + :param copyData: Specifies, whether the underlying data of the STL vector, or the old-style ``CvMat`` or ``IplImage`` should be copied to (true) or shared with (false) the newly constructed matrix. When the data is copied, the allocated buffer will be managed using ``Mat`` 's reference counting mechanism. While when the data is shared, the reference counter will be NULL, and you should not deallocate the data until the matrix is not destructed. + + + :param rowRange: The range of the ``m`` 's rows to take. As usual, the range start is inclusive and the range end is exclusive. Use ``Range::all()`` to take all the rows. + + + :param colRange: The range of the ``m`` 's columns to take. Use ``Range::all()`` to take all the columns. + + + :param ranges: The array of selected ranges of ``m`` along each dimensionality + + . + + :param expr: Matrix expression. See :ref:`Matrix Expressions` . + + + +These are various constructors that form a matrix. As noticed in the +, often the default constructor is enough, and the proper matrix will be allocated by an OpenCV function. The constructed matrix can further be assigned to another matrix or matrix expression, in which case the old content is dereferenced, or be allocated with +:ref:`Mat::create` +. + + +.. index:: Mat::Mat + + +cv::Mat::Mat +------------ + +`id=0.165814224424 Comments from the Wiki `__ + + + + +.. cfunction:: Mat::\textasciitilde Mat() + + Matrix destructor + + + +The matrix destructor calls +:ref:`Mat::release` +. + + +.. index:: Mat::operator = + + +cv::Mat::operator = +------------------- + +`id=0.0592340960422 Comments from the Wiki `__ + + + + +.. cfunction:: Mat\& Mat::operator = (const Mat\& m) + + + +.. cfunction:: Mat\& Mat::operator = (const MatExpr_Base\& expr) + + + +.. cfunction:: Mat\& operator = (const Scalar\& s) + + Matrix assignment operators + + + + + + + :param m: The assigned, right-hand-side matrix. Matrix assignment is O(1) operation, that is, no data is copied. Instead, the data is shared and the reference counter, if any, is incremented. Before assigning new data, the old data is dereferenced via :ref:`Mat::release` . + + + :param expr: The assigned matrix expression object. As opposite to the first form of assignment operation, the second form can reuse already allocated matrix if it has the right size and type to fit the matrix expression result. It is automatically handled by the real function that the matrix expressions is expanded to. For example, ``C=A+B`` is expanded to ``cv::add(A, B, C)`` , and :func:`add` will take care of automatic ``C`` reallocation. + + + :param s: The scalar, assigned to each matrix element. The matrix size or type is not changed. + + + +These are the available assignment operators, and they all are very different, so, please, look at the operator parameters description. + + +.. index:: Mat::operator MatExpr + + +cv::Mat::operator MatExpr +------------------------- + +`id=0.211434868252 Comments from the Wiki `__ + + + + +.. cfunction:: Mat::operator MatExpr_() const + + Mat-to-MatExpr cast operator + + + +The cast operator should not be called explicitly. It is used internally by the +:ref:`Matrix Expressions` +engine. + + +.. index:: Mat::row + + +cv::Mat::row +------------ + +`id=0.73660440882 Comments from the Wiki `__ + + + + +.. cfunction:: Mat Mat::row(int i) const + + Makes a matrix header for the specified matrix row + + + + + + + :param i: the 0-based row index + + + +The method makes a new header for the specified matrix row and returns it. This is O(1) operation, regardless of the matrix size. The underlying data of the new matrix will be shared with the original matrix. Here is the example of one of the classical basic matrix processing operations, axpy, used by LU and many other algorithms: + + + + +:: + + + + inline void matrix_axpy(Mat& A, int i, int j, double alpha) + { + A.row(i) += A.row(j)*alpha; + } + + +.. + +**Important note** +. In the current implementation the following code will not work as expected: + + + +:: + + + + Mat A; + ... + A.row(i) = A.row(j); // will not work + + +.. + +This is because +``A.row(i)`` +forms a temporary header, which is further assigned another header. Remember, each of these operations is O(1), i.e. no data is copied. Thus, the above assignment will have absolutely no effect, while you may have expected j-th row being copied to i-th row. To achieve that, you should either turn this simple assignment into an expression, or use +:ref:`Mat::copyTo` +method: + + + + +:: + + + + Mat A; + ... + // works, but looks a bit obscure. + A.row(i) = A.row(j) + 0; + + // this is a bit longer, but the recommended method. + Mat Ai = A.row(i); M.row(j).copyTo(Ai); + + +.. + + +.. index:: Mat::col + + +cv::Mat::col +------------ + +`id=0.221396697707 Comments from the Wiki `__ + + + + +.. cfunction:: Mat Mat::col(int j) const + + Makes a matrix header for the specified matrix column + + + + + + + :param j: the 0-based column index + + + +The method makes a new header for the specified matrix column and returns it. This is O(1) operation, regardless of the matrix size. The underlying data of the new matrix will be shared with the original matrix. See also +:ref:`Mat::row` +description. + + + +.. index:: Mat::rowRange + + +cv::Mat::rowRange +----------------- + +`id=0.198351968113 Comments from the Wiki `__ + + + + +.. cfunction:: Mat Mat::rowRange(int startrow, int endrow) const + + + +.. cfunction:: Mat Mat::rowRange(const Range\& r) const + + Makes a matrix header for the specified row span + + + + + + + :param startrow: the 0-based start index of the row span + + + :param endrow: the 0-based ending index of the row span + + + :param r: The :func:`Range` structure containing both the start and the end indices + + + +The method makes a new header for the specified row span of the matrix. Similarly to +:func:`Mat::row` +and +:func:`Mat::col` +, this is O(1) operation. + + + +.. index:: Mat::colRange + + +cv::Mat::colRange +----------------- + +`id=0.226816224463 Comments from the Wiki `__ + + + + +.. cfunction:: Mat Mat::colRange(int startcol, int endcol) const + + + +.. cfunction:: Mat Mat::colRange(const Range\& r) const + + Makes a matrix header for the specified row span + + + + + + + :param startcol: the 0-based start index of the column span + + + :param endcol: the 0-based ending index of the column span + + + :param r: The :func:`Range` structure containing both the start and the end indices + + + +The method makes a new header for the specified column span of the matrix. Similarly to +:func:`Mat::row` +and +:func:`Mat::col` +, this is O(1) operation. + + + +.. index:: Mat::diag + + +cv::Mat::diag +------------- + +`id=0.384675650064 Comments from the Wiki `__ + + + + +.. cfunction:: Mat Mat::diag(int d) const static Mat Mat::diag(const Mat\& matD) + + Extracts diagonal from a matrix, or creates a diagonal matrix. + + + + + + + :param d: index of the diagonal, with the following meaning: + + + + + * **d=0** the main diagonal + + + * **d>0** a diagonal from the lower half, e.g. ``d=1`` means the diagonal immediately below the main one + + + * **d<0** a diagonal from the upper half, e.g. ``d=1`` means the diagonal immediately above the main one + + + + + :param matD: single-column matrix that will form the diagonal matrix. + + + +The method makes a new header for the specified matrix diagonal. The new matrix will be represented as a single-column matrix. Similarly to +:func:`Mat::row` +and +:func:`Mat::col` +, this is O(1) operation. + + + +.. index:: Mat::clone + + +cv::Mat::clone +-------------- + +`id=0.42135190309 Comments from the Wiki `__ + + + + +.. cfunction:: Mat Mat::clone() const + + Creates full copy of the array and the underlying data. + + + +The method creates full copy of the array. The original +``step[]`` +are not taken into the account. That is, the array copy will be a continuous array occupying +``total()*elemSize()`` +bytes. + + +.. index:: Mat::copyTo + + +cv::Mat::copyTo +--------------- + +`id=0.346741328466 Comments from the Wiki `__ + + + + +.. cfunction:: void Mat::copyTo( Mat\& m ) const void Mat::copyTo( Mat\& m, const Mat\& mask ) const + + Copies the matrix to another one. + + + + + + + :param m: The destination matrix. If it does not have a proper size or type before the operation, it will be reallocated + + + :param mask: The operation mask. Its non-zero elements indicate, which matrix elements need to be copied + + + +The method copies the matrix data to another matrix. Before copying the data, the method invokes + + + + +:: + + + + m.create(this->size(), this->type); + + +.. + +so that the destination matrix is reallocated if needed. While +``m.copyTo(m);`` +will work as expected, i.e. will have no effect, the function does not handle the case of a partial overlap between the source and the destination matrices. + +When the operation mask is specified, and the +``Mat::create`` +call shown above reallocated the matrix, the newly allocated matrix is initialized with all 0's before copying the data. + + + +.. index:: Mat::convertTo + + +cv::Mat::convertTo +------------------ + +`id=0.471204154064 Comments from the Wiki `__ + + + + +.. cfunction:: void Mat::convertTo( Mat\& m, int rtype, double alpha=1, double beta=0 ) const + + Converts array to another datatype with optional scaling. + + + + + + + :param m: The destination matrix. If it does not have a proper size or type before the operation, it will be reallocated + + + :param rtype: The desired destination matrix type, or rather, the depth (since the number of channels will be the same with the source one). If ``rtype`` is negative, the destination matrix will have the same type as the source. + + + :param alpha: The optional scale factor + + + :param beta: The optional delta, added to the scaled values. + + + +The method converts source pixel values to the target datatype. +``saturate_cast<>`` +is applied in the end to avoid possible overflows: + + + +.. math:: + + m(x,y) = saturate \_ cast( \alpha (*this)(x,y) + \beta ) + + + +.. index:: Mat::assignTo + + +cv::Mat::assignTo +----------------- + +`id=0.979255736546 Comments from the Wiki `__ + + + + +.. cfunction:: void Mat::assignTo( Mat\& m, int type=-1 ) const + + Functional form of convertTo + + + + + + + :param m: The destination array + + + :param type: The desired destination array depth (or -1 if it should be the same as the source one). + + + +This is internal-use method called by the +:ref:`Matrix Expressions` +engine. + + +.. index:: Mat::setTo + + +cv::Mat::setTo +-------------- + +`id=0.53548053507 Comments from the Wiki `__ + + + + +.. cfunction:: Mat\& Mat::setTo(const Scalar\& s, const Mat\& mask=Mat()) + + Sets all or some of the array elements to the specified value. + + + + + + + :param s: Assigned scalar, which is converted to the actual array type + + + :param mask: The operation mask of the same size as ``*this`` + + + +This is the advanced variant of +``Mat::operator=(const Scalar& s)`` +operator. + + +.. index:: Mat::reshape + + +cv::Mat::reshape +---------------- + +`id=0.736505436803 Comments from the Wiki `__ + + + + +.. cfunction:: Mat Mat::reshape(int cn, int rows=0) const + + Changes the 2D matrix's shape and/or the number of channels without copying the data. + + + + + + + :param cn: The new number of channels. If the parameter is 0, the number of channels remains the same. + + + :param rows: The new number of rows. If the parameter is 0, the number of rows remains the same. + + + +The method makes a new matrix header for +``*this`` +elements. The new matrix may have different size and/or different number of channels. Any combination is possible, as long as: + + + + +#. + No extra elements is included into the new matrix and no elements are excluded. Consequently, + the product + ``rows*cols*channels()`` + must stay the same after the transformation. + + +#. + No data is copied, i.e. this is O(1) operation. Consequently, if you change the number of rows, or the operation changes elements' row indices in some other way, the matrix must be continuous. See + :func:`Mat::isContinuous` + . + + +Here is some small example. Assuming, there is a set of 3D points that are stored as STL vector, and you want to represent the points as +``3xN`` +matrix. Here is how it can be done: + + + + +:: + + + + std::vector vec; + ... + + Mat pointMat = Mat(vec). // convert vector to Mat, O(1) operation + reshape(1). // make Nx3 1-channel matrix out of Nx1 3-channel. + // Also, an O(1) operation + t(); // finally, transpose the Nx3 matrix. + // This involves copying of all the elements + + +.. + + +.. index:: Mat::t + + +cv::Mat::t +---------- + +`id=0.0198954314148 Comments from the Wiki `__ + + + + +.. cfunction:: MatExpr Mat::t() const + + Transposes the matrix + + + +The method performs matrix transposition by means of matrix expressions. +It does not perform the actual transposition, but returns a temporary "matrix transposition" object that can be further used as a part of more complex matrix expression or be assigned to a matrix: + + + + +:: + + + + Mat A1 = A + Mat::eye(A.size(), A.type)*lambda; + Mat C = A1.t()*A1; // compute (A + lambda*I)^t * (A + lamda*I) + + +.. + + +.. index:: Mat::inv + + +cv::Mat::inv +------------ + +`id=0.458170876744 Comments from the Wiki `__ + + + + +.. cfunction:: MatExpr Mat::inv(int method=DECOMP_LU) const + + Inverses the matrix + + + + + + + :param method: The matrix inversion method, one of + + + + + * **DECOMP_LU** LU decomposition. The matrix must be non-singular + + + * **DECOMP_CHOLESKY** Cholesky :math:`LL^T` decomposition, for symmetrical positively defined matrices only. About twice faster than LU on big matrices. + + + * **DECOMP_SVD** SVD decomposition. The matrix can be a singular or even non-square, then the pseudo-inverse is computed + + + + + +The method performs matrix inversion by means of matrix expressions, i.e. a temporary "matrix inversion" object is returned by the method, and can further be used as a part of more complex matrix expression or be assigned to a matrix. + + +.. index:: Mat::mul + + +cv::Mat::mul +------------ + +`id=0.0371846106885 Comments from the Wiki `__ + + + + +.. cfunction:: MatExpr Mat::mul(const Mat\& m, double scale=1) const + + + +.. cfunction:: MatExpr Mat::mul(const MatExpr\& m, double scale=1) const + + Performs element-wise multiplication or division of the two matrices + + + + + + + :param m: Another matrix, of the same type and the same size as ``*this`` , or a matrix expression + + + :param scale: The optional scale factor + + + +The method returns a temporary object encoding per-element array multiplication, with optional scale. Note that this is not a matrix multiplication, which corresponds to a simpler "*" operator. + +Here is a example: + + + + +:: + + + + Mat C = A.mul(5/B); // equivalent to divide(A, B, C, 5) + + +.. + + +.. index:: Mat::cross + + +cv::Mat::cross +-------------- + +`id=0.371623871564 Comments from the Wiki `__ + + + + +.. cfunction:: Mat Mat::cross(const Mat\& m) const + + Computes cross-product of two 3-element vectors + + + + + + + :param m: Another cross-product operand + + + +The method computes cross-product of the two 3-element vectors. The vectors must be 3-elements floating-point vectors of the same shape and the same size. The result will be another 3-element vector of the same shape and the same type as operands. + + +.. index:: Mat::dot + + +cv::Mat::dot +------------ + +`id=0.0134044069178 Comments from the Wiki `__ + + + + +.. cfunction:: double Mat::dot(const Mat\& m) const + + Computes dot-product of two vectors + + + + + + + :param m: Another dot-product operand. + + + +The method computes dot-product of the two matrices. If the matrices are not single-column or single-row vectors, the top-to-bottom left-to-right scan ordering is used to treat them as 1D vectors. The vectors must have the same size and the same type. If the matrices have more than one channel, the dot products from all the channels are summed together. + + +.. index:: Mat::zeros + + +cv::Mat::zeros +-------------- + +`id=0.26899433256 Comments from the Wiki `__ + + + + +.. cfunction:: static MatExpr Mat::zeros(int rows, int cols, int type) static MatExpr Mat::zeros(Size size, int type) static MatExpr Mat::zeros(int ndims, const int* sizes, int type) + + Returns zero array of the specified size and type + + + + + + + :param ndims: The array dimensionality + + + :param rows: The number of rows + + + :param cols: The number of columns + + + :param size: Alternative matrix size specification: ``Size(cols, rows)`` + + + :param sizes: The array of integers, specifying the array shape + + + :param type: The created matrix type + + + +The method returns Matlab-style zero array initializer. It can be used to quickly form a constant array and use it as a function parameter, as a part of matrix expression, or as a matrix initializer. + + + + +:: + + + + Mat A; + A = Mat::zeros(3, 3, CV_32F); + + +.. + +Note that in the above sample a new matrix will be allocated only if +``A`` +is not 3x3 floating-point matrix, otherwise the existing matrix +``A`` +will be filled with 0's. + + + +.. index:: Mat::ones + + +cv::Mat::ones +------------- + +`id=0.885849571877 Comments from the Wiki `__ + + + + +.. cfunction:: static MatExpr Mat::ones(int rows, int cols, int type) static MatExpr Mat::ones(Size size, int type) static MatExpr Mat::ones(int ndims, const int* sizes, int type) + + Returns array of all 1's of the specified size and type + + + + + + + :param ndims: The array dimensionality + + + :param rows: The number of rows + + + :param cols: The number of columns + + + :param size: Alternative matrix size specification: ``Size(cols, rows)`` + + + :param sizes: The array of integers, specifying the array shape + + + :param type: The created matrix type + + + +The method returns Matlab-style ones' array initializer, similarly to +:func:`Mat::zeros` +. Note that using this method you can initialize an array with arbitrary value, using the following Matlab idiom: + + + + +:: + + + + Mat A = Mat::ones(100, 100, CV_8U)*3; // make 100x100 matrix filled with 3. + + +.. + +The above operation will not form 100x100 matrix of ones and then multiply it by 3. Instead, it will just remember the scale factor (3 in this case) and use it when actually invoking the matrix initializer. + + +.. index:: Mat::eye + + +cv::Mat::eye +------------ + +`id=0.587817231915 Comments from the Wiki `__ + + + + +.. cfunction:: static MatExpr Mat::eye(int rows, int cols, int type) static MatExpr Mat::eye(Size size, int type) + + Returns identity matrix of the specified size and type + + + + + + + :param rows: The number of rows + + + :param cols: The number of columns + + + :param size: Alternative matrix size specification: ``Size(cols, rows)`` + + + :param type: The created matrix type + + + +The method returns Matlab-style identity matrix initializer, similarly to +:func:`Mat::zeros` +. Similarly to +``Mat::ones`` +, you can use a scale operation to create a scaled identity matrix efficiently: + + + + +:: + + + + // make a 4x4 diagonal matrix with 0.1's on the diagonal. + Mat A = Mat::eye(4, 4, CV_32F)*0.1; + + +.. + + +.. index:: Mat::create + + +cv::Mat::create +--------------- + +`id=0.13977109272 Comments from the Wiki `__ + + + + +.. cfunction:: void Mat::create(int rows, int cols, int type) void Mat::create(Size size, int type) void Mat::create(int ndims, const int* sizes, int type) + + Allocates new array data if needed. + + + + + + + :param ndims: The new array dimensionality + + + :param rows: The new number of rows + + + :param cols: The new number of columns + + + :param size: Alternative new matrix size specification: ``Size(cols, rows)`` + + + :param sizes: The array of integers, specifying the new array shape + + + :param type: The new matrix type + + + +This is one of the key +``Mat`` +methods. Most new-style OpenCV functions and methods that produce arrays call this method for each output array. The method uses the following algorithm: + + + + + +#. + if the current array shape and the type match the new ones, return immediately. + + +#. + otherwise, dereference the previous data by calling + :func:`Mat::release` + + +#. + initialize the new header + + +#. + allocate the new data of + ``total()*elemSize()`` + bytes + + +#. + allocate the new, associated with the data, reference counter and set it to 1. + + +Such a scheme makes the memory management robust and efficient at the same time, and also saves quite a bit of typing for the user, i.e. usually there is no need to explicitly allocate output arrays. That is, instead of writing: + + + + +:: + + + + Mat color; + ... + Mat gray(color.rows, color.cols, color.depth()); + cvtColor(color, gray, CV_BGR2GRAY); + + +.. + +you can simply write: + + + +:: + + + + Mat color; + ... + Mat gray; + cvtColor(color, gray, CV_BGR2GRAY); + + +.. + +because +``cvtColor`` +, as well as most of OpenCV functions, calls Mat::create() for the output array internally. + + +.. index:: Mat::addref + + +cv::Mat::addref +--------------- + +`id=0.525485813853 Comments from the Wiki `__ + + + + +.. cfunction:: void Mat::addref() + + Increments the reference counter + + + +The method increments the reference counter, associated with the matrix data. If the matrix header points to an external data (see +:func:`Mat::Mat` +), the reference counter is NULL, and the method has no effect in this case. Normally, the method should not be called explicitly, to avoid memory leaks. It is called implicitly by the matrix assignment operator. The reference counter increment is the atomic operation on the platforms that support it, thus it is safe to operate on the same matrices asynchronously in different threads. + + + +.. index:: Mat::release + + +cv::Mat::release +---------------- + +`id=0.596756879366 Comments from the Wiki `__ + + + + +.. cfunction:: void Mat::release() + + Decrements the reference counter and deallocates the matrix if needed + + + +The method decrements the reference counter, associated with the matrix data. When the reference counter reaches 0, the matrix data is deallocated and the data and the reference counter pointers are set to NULL's. If the matrix header points to an external data (see +:func:`Mat::Mat` +), the reference counter is NULL, and the method has no effect in this case. + +This method can be called manually to force the matrix data deallocation. But since this method is automatically called in the destructor, or by any other method that changes the data pointer, it is usually not needed. The reference counter decrement and check for 0 is the atomic operation on the platforms that support it, thus it is safe to operate on the same matrices asynchronously in different threads. + + +.. index:: Mat::resize + + +cv::Mat::resize +--------------- + +`id=0.648802206934 Comments from the Wiki `__ + + + + +.. cfunction:: void Mat::resize( size_t sz ) const + + Changes the number of matrix rows + + + + + + + :param sz: The new number of rows + + + +The method changes the number of matrix rows. If the matrix is reallocated, the first +``min(Mat::rows, sz)`` +rows are preserved. The method emulates the corresponding method of STL vector class. + + +.. index:: Mat::push_back + +.. _Mat::push_back: + +Mat::push_back +-------------- + +`id=0.758344713588 Comments from the Wiki `__ + + + + +.. cfunction:: template void Mat::push_back(const T\& elem) template void Mat::push_back(const Mat_\& elem) + + Adds elements to the bottom of the matrix + + + + + + + :param elem: The added element(s). + + + +The methods add one or more elements to the bottom of the matrix. They emulate the corresponding method of STL vector class. When +``elem`` +is +``Mat`` +, its type and the number of columns must be the same as in the container matrix. + + +.. index:: Mat::pop_back + +.. _Mat::pop_back: + +Mat::pop_back +------------- + +`id=0.959344841629 Comments from the Wiki `__ + + + + +.. cfunction:: template void Mat::pop_back(size_t nelems=1) + + Removes elements from the bottom of the matrix. + + + + + + + :param nelems: The number of rows removed. If it is greater than the total number of rows, an exception is thrown. + + + +The method removes one or more rows from the bottom of the matrix. + + +.. index:: Mat::locateROI + + +cv::Mat::locateROI +------------------ + +`id=0.981770755379 Comments from the Wiki `__ + + + + +.. cfunction:: void Mat::locateROI( Size\& wholeSize, Point\& ofs ) const + + Locates matrix header within a parent matrix + + + + + + + :param wholeSize: The output parameter that will contain size of the whole matrix, which ``*this`` is a part of. + + + :param ofs: The output parameter that will contain offset of ``*this`` inside the whole matrix + + + +After you extracted a submatrix from a matrix using +:func:`Mat::row` +, +:func:`Mat::col` +, +:func:`Mat::rowRange` +, +:func:`Mat::colRange` +etc., the result submatrix will point just to the part of the original big matrix. However, each submatrix contains some information (represented by +``datastart`` +and +``dataend`` +fields), using which it is possible to reconstruct the original matrix size and the position of the extracted submatrix within the original matrix. The method +``locateROI`` +does exactly that. + + +.. index:: Mat::adjustROI + + +cv::Mat::adjustROI +------------------ + +`id=0.475624324276 Comments from the Wiki `__ + + + + +.. cfunction:: Mat\& Mat::adjustROI( int dtop, int dbottom, int dleft, int dright ) + + Adjust submatrix size and position within the parent matrix + + + + + + + :param dtop: The shift of the top submatrix boundary upwards + + + :param dbottom: The shift of the bottom submatrix boundary downwards + + + :param dleft: The shift of the left submatrix boundary to the left + + + :param dright: The shift of the right submatrix boundary to the right + + + +The method is complimentary to the +:func:`Mat::locateROI` +. Indeed, the typical use of these functions is to determine the submatrix position within the parent matrix and then shift the position somehow. Typically it can be needed for filtering operations, when pixels outside of the ROI should be taken into account. When all the method's parameters are positive, it means that the ROI needs to grow in all directions by the specified amount, i.e. + + + + +:: + + + + A.adjustROI(2, 2, 2, 2); + + +.. + +increases the matrix size by 4 elements in each direction and shifts it by 2 elements to the left and 2 elements up, which brings in all the necessary pixels for the filtering with 5x5 kernel. + +It's user responsibility to make sure that adjustROI does not cross the parent matrix boundary. If it does, the function will signal an error. + +The function is used internally by the OpenCV filtering functions, like +:func:`filter2D` +, morphological operations etc. + +See also +:func:`copyMakeBorder` +. + + +.. index:: Mat::operator() + + +cv::Mat::operator() +------------------- + +`id=0.344759151294 Comments from the Wiki `__ + + + + +.. cfunction:: Mat Mat::operator()( Range rowRange, Range colRange ) const + + + +.. cfunction:: Mat Mat::operator()( const Rect\& roi ) const Mat Mat::operator()( const Ranges* ranges ) const + + Extracts a rectangular submatrix + + + + + + + :param rowRange: The start and the end row of the extracted submatrix. The upper boundary is not included. To select all the rows, use ``Range::all()`` + + + :param colRange: The start and the end column of the extracted submatrix. The upper boundary is not included. To select all the columns, use ``Range::all()`` + + + :param roi: The extracted submatrix specified as a rectangle + + + :param ranges: The array of selected ranges along each array dimension + + + +The operators make a new header for the specified sub-array of +``*this`` +. They are the most generalized forms of +:func:`Mat::row` +, +:func:`Mat::col` +, +:func:`Mat::rowRange` +and +:func:`Mat::colRange` +. For example, +``A(Range(0, 10), Range::all())`` +is equivalent to +``A.rowRange(0, 10)`` +. Similarly to all of the above, the operators are O(1) operations, i.e. no matrix data is copied. + + +.. index:: Mat::operator CvMat + + +cv::Mat::operator CvMat +----------------------- + +`id=0.302333916988 Comments from the Wiki `__ + + + + +.. cfunction:: Mat::operator CvMat() const + + Creates CvMat header for the matrix + + + +The operator makes CvMat header for the matrix without copying the underlying data. The reference counter is not taken into account by this operation, thus you should make sure than the original matrix is not deallocated while the +``CvMat`` +header is used. The operator is useful for intermixing the new and the old OpenCV API's, e.g: + + + + +:: + + + + Mat img(Size(320, 240), CV_8UC3); + ... + + CvMat cvimg = img; + mycvOldFunc( &cvimg, ...); + + +.. + +where +``mycvOldFunc`` +is some function written to work with OpenCV 1.x data structures. + + + +.. index:: Mat::operator IplImage + + +cv::Mat::operator IplImage +-------------------------- + +`id=0.740923204083 Comments from the Wiki `__ + + + + +.. cfunction:: Mat::operator IplImage() const + + Creates IplImage header for the matrix + + + +The operator makes IplImage header for the matrix without copying the underlying data. You should make sure than the original matrix is not deallocated while the +``IplImage`` +header is used. Similarly to +``Mat::operator CvMat`` +, the operator is useful for intermixing the new and the old OpenCV API's. + + +.. index:: Mat::total + + +cv::Mat::total +-------------- + +`id=0.180672866457 Comments from the Wiki `__ + + + + +.. cfunction:: size_t Mat::total() const + + Returns the total number of array elements. + + + +The method returns the number of array elements (e.g. number of pixels if the array represents an image). + + +.. index:: Mat::isContinuous + + +cv::Mat::isContinuous +--------------------- + +`id=0.467300710372 Comments from the Wiki `__ + + + + +.. cfunction:: bool Mat::isContinuous() const + + Reports whether the matrix is continuous or not + + + +The method returns true if the matrix elements are stored continuously, i.e. without gaps in the end of each row, and false otherwise. Obviously, +``1x1`` +or +``1xN`` +matrices are always continuous. Matrices created with +:func:`Mat::create` +are always continuous, but if you extract a part of the matrix using +:func:`Mat::col` +, +:func:`Mat::diag` +etc. or constructed a matrix header for externally allocated data, such matrices may no longer have this property. + +The continuity flag is stored as a bit in +``Mat::flags`` +field, and is computed automatically when you construct a matrix header, thus the continuity check is very fast operation, though it could be, in theory, done as following: + + + + +:: + + + + // alternative implementation of Mat::isContinuous() + bool myCheckMatContinuity(const Mat& m) + { + //return (m.flags & Mat::CONTINUOUS_FLAG) != 0; + return m.rows == 1 || m.step == m.cols*m.elemSize(); + } + + +.. + +The method is used in a quite a few of OpenCV functions, and you are welcome to use it as well. The point is that element-wise operations (such as arithmetic and logical operations, math functions, alpha blending, color space transformations etc.) do not depend on the image geometry, and thus, if all the input and all the output arrays are continuous, the functions can process them as very long single-row vectors. Here is the example of how alpha-blending function can be implemented. + + + + +:: + + + + template + void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst) + { + const float alpha_scale = (float)std::numeric_limits::max(), + inv_scale = 1.f/alpha_scale; + + CV_Assert( src1.type() == src2.type() && + src1.type() == CV_MAKETYPE(DataType::depth, 4) && + src1.size() == src2.size()); + Size size = src1.size(); + dst.create(size, src1.type()); + + // here is the idiom: check the arrays for continuity and, + // if this is the case, + // treat the arrays as 1D vectors + if( src1.isContinuous() && src2.isContinuous() && dst.isContinuous() ) + { + size.width *= size.height; + size.height = 1; + } + size.width *= 4; + + for( int i = 0; i < size.height; i++ ) + { + // when the arrays are continuous, + // the outer loop is executed only once + const T* ptr1 = src1.ptr(i); + const T* ptr2 = src2.ptr(i); + T* dptr = dst.ptr(i); + + for( int j = 0; j < size.width; j += 4 ) + { + float alpha = ptr1[j+3]*inv_scale, beta = ptr2[j+3]*inv_scale; + dptr[j] = saturate_cast(ptr1[j]*alpha + ptr2[j]*beta); + dptr[j+1] = saturate_cast(ptr1[j+1]*alpha + ptr2[j+1]*beta); + dptr[j+2] = saturate_cast(ptr1[j+2]*alpha + ptr2[j+2]*beta); + dptr[j+3] = saturate_cast((1 - (1-alpha)*(1-beta))*alpha_scale); + } + } + } + + +.. + +This trick, while being very simple, can boost performance of a simple element-operation by 10-20 percents, especially if the image is rather small and the operation is quite simple. + +Also, note that we use another OpenCV idiom in this function - we call +:func:`Mat::create` +for the destination array instead of checking that it already has the proper size and type. And while the newly allocated arrays are always continuous, we still check the destination array, because +:func:`create` +does not always allocate a new matrix. + + +.. index:: Mat::elemSize + + +cv::Mat::elemSize +----------------- + +`id=0.551344129694 Comments from the Wiki `__ + + + + +.. cfunction:: size_t Mat::elemSize() const + + Returns matrix element size in bytes + + + +The method returns the matrix element size in bytes. For example, if the matrix type is +``CV_16SC3`` +, the method will return +``3*sizeof(short)`` +or 6. + + +.. index:: Mat::elemSize1 + + +cv::Mat::elemSize1 +------------------ + +`id=0.431190336781 Comments from the Wiki `__ + + + + +.. cfunction:: size_t Mat::elemSize1() const + + Returns size of each matrix element channel in bytes + + + +The method returns the matrix element channel size in bytes, that is, it ignores the number of channels. For example, if the matrix type is +``CV_16SC3`` +, the method will return +``sizeof(short)`` +or 2. + + +.. index:: Mat::type + + +cv::Mat::type +------------- + +`id=0.0234323605141 Comments from the Wiki `__ + + + + +.. cfunction:: int Mat::type() const + + Returns matrix element type + + + +The method returns the matrix element type, an id, compatible with the +``CvMat`` +type system, like +``CV_16SC3`` +or 16-bit signed 3-channel array etc. + + +.. index:: Mat::depth + + +cv::Mat::depth +-------------- + +`id=0.690564194265 Comments from the Wiki `__ + + + + +.. cfunction:: int Mat::depth() const + + Returns matrix element depth + + + +The method returns the matrix element depth id, i.e. the type of each individual channel. For example, for 16-bit signed 3-channel array the method will return +``CV_16S`` +. The complete list of matrix types: + + + + +* + ``CV_8U`` + - 8-bit unsigned integers ( + ``0..255`` + ) + + +* + ``CV_8S`` + - 8-bit signed integers ( + ``-128..127`` + ) + + +* + ``CV_16U`` + - 16-bit unsigned integers ( + ``0..65535`` + ) + + +* + ``CV_16S`` + - 16-bit signed integers ( + ``-32768..32767`` + ) + + +* + ``CV_32S`` + - 32-bit signed integers ( + ``-2147483648..2147483647`` + ) + + +* + ``CV_32F`` + - 32-bit floating-point numbers ( + ``-FLT_MAX..FLT_MAX, INF, NAN`` + ) + + +* + ``CV_64F`` + - 64-bit floating-point numbers ( + ``-DBL_MAX..DBL_MAX, INF, NAN`` + ) + + + +.. index:: Mat::channels + + +cv::Mat::channels +----------------- + +`id=0.0445574444951 Comments from the Wiki `__ + + + + +.. cfunction:: int Mat::channels() const + + Returns matrix element depth + + + +The method returns the number of matrix channels. + + +.. index:: Mat::step1 + + +cv::Mat::step1 +-------------- + +`id=0.21149114983 Comments from the Wiki `__ + + + + +.. cfunction:: size_t Mat::step1() const + + Returns normalized step + + + +The method returns the matrix step, divided by +:func:`Mat::elemSize1()` +. It can be useful for fast access to arbitrary matrix element. + + +.. index:: Mat::size + + +cv::Mat::size +------------- + +`id=0.0957272695555 Comments from the Wiki `__ + + + + +.. cfunction:: Size Mat::size() const + + Returns the matrix size + + + +The method returns the matrix size: +``Size(cols, rows)`` +. + + +.. index:: Mat::empty + + +cv::Mat::empty +-------------- + +`id=0.656535020917 Comments from the Wiki `__ + + + + +.. cfunction:: bool Mat::empty() const + + Returns true if the array has no elemens + + + +The method returns true if +``Mat::total()`` +is 0 or if +``Mat::data`` +is NULL. Because of +``pop_back()`` +and +``resize()`` +methods +``M.total() == 0`` +does not imply that +``M.data == NULL`` +. + + +.. index:: Mat::ptr + + +cv::Mat::ptr +------------ + +`id=0.364617938884 Comments from the Wiki `__ + + + + +.. cfunction:: uchar* Mat::ptr(int i=0) + + + +.. cfunction:: const uchar* Mat::ptr(int i=0) const + + + +.. cfunction:: template _Tp* Mat::ptr(int i=0) + + + +.. cfunction:: template const _Tp* Mat::ptr(int i=0) const + + Return pointer to the specified matrix row + + + + + + + :param i: The 0-based row index + + + +The methods return +``uchar*`` +or typed pointer to the specified matrix row. See the sample in +:func:`Mat::isContinuous` +() on how to use these methods. + + +.. index:: Mat::at + + +cv::Mat::at +----------- + +`id=0.184365116803 Comments from the Wiki `__ + + + + +.. cfunction:: template T\& Mat::at(int i) const + + + +.. cfunction:: template const T\& Mat::at(int i) const + + + +.. cfunction:: template T\& Mat::at(int i, int j) + + + +.. cfunction:: template const T\& Mat::at(int i, int j) const + + + +.. cfunction:: template T\& Mat::at(Point pt) + + + +.. cfunction:: template const T\& Mat::at(Point pt) const + + + +.. cfunction:: template T\& Mat::at(int i, int j, int k) + + + +.. cfunction:: template const T\& Mat::at(int i, int j, int k) const + + + +.. cfunction:: template T\& Mat::at(const int* idx) + + + +.. cfunction:: template const T\& Mat::at(const int* idx) const + + Return reference to the specified array element + + + + + + + :param i, j, k: Indices along the dimensions 0, 1 and 2, respectively + + + :param pt: The element position specified as ``Point(j,i)`` + + + :param idx: The array of ``Mat::dims`` indices + + + +The template methods return reference to the specified array element. For the sake of higher performance the index range checks are only performed in Debug configuration. + +Note that the variants with a single index (i) can be used to access elements of single-row or single-column 2-dimensional arrays. That is, if, for example, +``A`` +is +``1 x N`` +floating-point matrix and +``B`` +is +``M x 1`` +integer matrix, you can simply write +``A.at(k+4)`` +and +``B.at(2*i+1)`` +instead of +``A.at(0,k+4)`` +and +``B.at(2*i+1,0)`` +, respectively. + +Here is an example of initialization of a Hilbert matrix: + + + + +:: + + + + Mat H(100, 100, CV_64F); + for(int i = 0; i < H.rows; i++) + for(int j = 0; j < H.cols; j++) + H.at(i,j)=1./(i+j+1); + + +.. + + +.. index:: Mat::begin + + +cv::Mat::begin +-------------- + +`id=0.382101170806 Comments from the Wiki `__ + + + + +.. cfunction:: template MatIterator_<_Tp> Mat::begin() template MatConstIterator_<_Tp> Mat::begin() const + + Return the matrix iterator, set to the first matrix element + + + +The methods return the matrix read-only or read-write iterators. The use of matrix iterators is very similar to the use of bi-directional STL iterators. Here is the alpha blending function rewritten using the matrix iterators: + + + + +:: + + + + template + void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst) + { + typedef Vec VT; + + const float alpha_scale = (float)std::numeric_limits::max(), + inv_scale = 1.f/alpha_scale; + + CV_Assert( src1.type() == src2.type() && + src1.type() == DataType::type && + src1.size() == src2.size()); + Size size = src1.size(); + dst.create(size, src1.type()); + + MatConstIterator_ it1 = src1.begin(), it1_end = src1.end(); + MatConstIterator_ it2 = src2.begin(); + MatIterator_ dst_it = dst.begin(); + + for( ; it1 != it1_end; ++it1, ++it2, ++dst_it ) + { + VT pix1 = *it1, pix2 = *it2; + float alpha = pix1[3]*inv_scale, beta = pix2[3]*inv_scale; + *dst_it = VT(saturate_cast(pix1[0]*alpha + pix2[0]*beta), + saturate_cast(pix1[1]*alpha + pix2[1]*beta), + saturate_cast(pix1[2]*alpha + pix2[2]*beta), + saturate_cast((1 - (1-alpha)*(1-beta))*alpha_scale)); + } + } + + +.. + + +.. index:: Mat::end + + +cv::Mat::end +------------ + +`id=0.166500401542 Comments from the Wiki `__ + + + + +.. cfunction:: template MatIterator_<_Tp> Mat::end() template MatConstIterator_<_Tp> Mat::end() const + + Return the matrix iterator, set to the after-last matrix element + + + +The methods return the matrix read-only or read-write iterators, set to the point following the last matrix element. + + + +Mat\_ +----- + + +Template matrix class derived from +:ref:`Mat` + + + +:: + + + + template class Mat_ : public Mat + { + public: + // ... some specific methods + // and + // no new extra fields + }; + + +.. + +The class +``Mat_<_Tp>`` +is a "thin" template wrapper on top of +``Mat`` +class. It does not have any extra data fields, nor it or +``Mat`` +have any virtual methods and thus references or pointers to these two classes can be freely converted one to another. But do it with care, e.g.: + + + + +:: + + + + // create 100x100 8-bit matrix + Mat M(100,100,CV_8U); + // this will compile fine. no any data conversion will be done. + Mat_& M1 = (Mat_&)M; + // the program will likely crash at the statement below + M1(99,99) = 1.f; + + +.. + +While +``Mat`` +is sufficient in most cases, +``Mat_`` +can be more convenient if you use a lot of element access operations and if you know matrix type at compile time. Note that +``Mat::at<_Tp>(int y, int x)`` +and +``Mat_<_Tp>::operator ()(int y, int x)`` +do absolutely the same and run at the same speed, but the latter is certainly shorter: + + + + +:: + + + + Mat_ M(20,20); + for(int i = 0; i < M.rows; i++) + for(int j = 0; j < M.cols; j++) + M(i,j) = 1./(i+j+1); + Mat E, V; + eigen(M,E,V); + cout << E.at(0,0)/E.at(M.rows-1,0); + + +.. + +*How to use ``Mat_`` for multi-channel images/matrices?* +This is simple - just pass +``Vec`` +as +``Mat_`` +parameter: + + + +:: + + + + // allocate 320x240 color image and fill it with green (in RGB space) + Mat_ img(240, 320, Vec3b(0,255,0)); + // now draw a diagonal white line + for(int i = 0; i < 100; i++) + img(i,i)=Vec3b(255,255,255); + // and now scramble the 2nd (red) channel of each pixel + for(int i = 0; i < img.rows; i++) + for(int j = 0; j < img.cols; j++) + img(i,j)[2] ^= (uchar)(i ^ j); + + +.. + + +NAryMatIterator +--------------- + + +n-ary multi-dimensional array iterator + + + + +:: + + + + class CV_EXPORTS NAryMatIterator + { + public: + //! the default constructor + NAryMatIterator(); + //! the full constructor taking arbitrary number of n-dim matrices + NAryMatIterator(const Mat** arrays, Mat* planes, int narrays=-1); + //! the separate iterator initialization method + void init(const Mat** arrays, Mat* planes, int narrays=-1); + + //! proceeds to the next plane of every iterated matrix + NAryMatIterator& operator ++(); + //! proceeds to the next plane of every iterated matrix (postfix increment operator) + NAryMatIterator operator ++(int); + + ... + int nplanes; // the total number of planes + }; + + +.. + +The class is used for implementation of unary, binary and, generally, n-ary element-wise operations on multi-dimensional arrays. Some of the arguments of n-ary function may be continuous arrays, some may be not. It is possible to use conventional +:ref:`MatIterator` +'s for each array, but it can be a big overhead to increment all of the iterators after each small operations. That's where +``NAryMatIterator`` +can be used. Using it, you can iterate though several matrices simultaneously as long as they have the same geometry (dimensionality and all the dimension sizes are the same). On each iteration +``it.planes[0]`` +, +``it.planes[1]`` +, ... will be the slices of the corresponding matrices. + +Here is an example of how you can compute a normalized and thresholded 3D color histogram: + + + + +:: + + + + void computeNormalizedColorHist(const Mat& image, Mat& hist, int N, double minProb) + { + const int histSize[] = {N, N, N}; + + // make sure that the histogram has proper size and type + hist.create(3, histSize, CV_32F); + + // and clear it + hist = Scalar(0); + + // the loop below assumes that the image + // is 8-bit 3-channel, so let's check it. + CV_Assert(image.type() == CV_8UC3); + MatConstIterator_ it = image.begin(), + it_end = image.end(); + for( ; it != it_end; ++it ) + { + const Vec3b& pix = *it; + hist.at(pix[0]*N/256, pix[1]*N/256, pix[2]*N/256) += 1.f; + } + + minProb *= image.rows*image.cols; + Mat plane; + NAryMatIterator it(&hist, &plane, 1); + double s = 0; + // iterate through the matrix. on each iteration + // it.planes[*] (of type Mat) will be set to the current plane. + for(int p = 0; p < it.nplanes; p++, ++it) + { + threshold(it.planes[0], it.planes[0], minProb, 0, THRESH_TOZERO); + s += sum(it.planes[0])[0]; + } + + s = 1./s; + it = NAryMatIterator(&hist, &plane, 1); + for(int p = 0; p < it.nplanes; p++, ++it) + it.planes[0] *= s; + } + + +.. + + +SparseMat +--------- + + +Sparse n-dimensional array. + + + + +:: + + + + class SparseMat + { + public: + typedef SparseMatIterator iterator; + typedef SparseMatConstIterator const_iterator; + + // internal structure - sparse matrix header + struct Hdr + { + ... + }; + + // sparse matrix node - element of a hash table + struct Node + { + size_t hashval; + size_t next; + int idx[CV_MAX_DIM]; + }; + + ////////// constructors and destructor ////////// + // default constructor + SparseMat(); + // creates matrix of the specified size and type + SparseMat(int dims, const int* _sizes, int _type); + // copy constructor + SparseMat(const SparseMat& m); + // converts dense array to the sparse form, + // if try1d is true and matrix is a single-column matrix (Nx1), + // then the sparse matrix will be 1-dimensional. + SparseMat(const Mat& m, bool try1d=false); + // converts old-style sparse matrix to the new-style. + // all the data is copied, so that "m" can be safely + // deleted after the conversion + SparseMat(const CvSparseMat* m); + // destructor + ~SparseMat(); + + ///////// assignment operations /////////// + + // this is O(1) operation; no data is copied + SparseMat& operator = (const SparseMat& m); + // (equivalent to the corresponding constructor with try1d=false) + SparseMat& operator = (const Mat& m); + + // creates full copy of the matrix + SparseMat clone() const; + + // copy all the data to the destination matrix. + // the destination will be reallocated if needed. + void copyTo( SparseMat& m ) const; + // converts 1D or 2D sparse matrix to dense 2D matrix. + // If the sparse matrix is 1D, then the result will + // be a single-column matrix. + void copyTo( Mat& m ) const; + // converts arbitrary sparse matrix to dense matrix. + // multiplies all the matrix elements by the specified scalar + void convertTo( SparseMat& m, int rtype, double alpha=1 ) const; + // converts sparse matrix to dense matrix with optional type conversion and scaling. + // When rtype=-1, the destination element type will be the same + // as the sparse matrix element type. + // Otherwise rtype will specify the depth and + // the number of channels will remain the same is in the sparse matrix + void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const; + + // not used now + void assignTo( SparseMat& m, int type=-1 ) const; + + // reallocates sparse matrix. If it was already of the proper size and type, + // it is simply cleared with clear(), otherwise, + // the old matrix is released (using release()) and the new one is allocated. + void create(int dims, const int* _sizes, int _type); + // sets all the matrix elements to 0, which means clearing the hash table. + void clear(); + // manually increases reference counter to the header. + void addref(); + // decreses the header reference counter, when it reaches 0, + // the header and all the underlying data are deallocated. + void release(); + + // converts sparse matrix to the old-style representation. + // all the elements are copied. + operator CvSparseMat*() const; + // size of each element in bytes + // (the matrix nodes will be bigger because of + // element indices and other SparseMat::Node elements). + size_t elemSize() const; + // elemSize()/channels() + size_t elemSize1() const; + + // the same is in Mat + int type() const; + int depth() const; + int channels() const; + + // returns the array of sizes and 0 if the matrix is not allocated + const int* size() const; + // returns i-th size (or 0) + int size(int i) const; + // returns the matrix dimensionality + int dims() const; + // returns the number of non-zero elements + size_t nzcount() const; + + // compute element hash value from the element indices: + // 1D case + size_t hash(int i0) const; + // 2D case + size_t hash(int i0, int i1) const; + // 3D case + size_t hash(int i0, int i1, int i2) const; + // n-D case + size_t hash(const int* idx) const; + + // low-level element-acccess functions, + // special variants for 1D, 2D, 3D cases and the generic one for n-D case. + // + // return pointer to the matrix element. + // if the element is there (it's non-zero), the pointer to it is returned + // if it's not there and createMissing=false, NULL pointer is returned + // if it's not there and createMissing=true, then the new element + // is created and initialized with 0. Pointer to it is returned + // If the optional hashval pointer is not NULL, the element hash value is + // not computed, but *hashval is taken instead. + uchar* ptr(int i0, bool createMissing, size_t* hashval=0); + uchar* ptr(int i0, int i1, bool createMissing, size_t* hashval=0); + uchar* ptr(int i0, int i1, int i2, bool createMissing, size_t* hashval=0); + uchar* ptr(const int* idx, bool createMissing, size_t* hashval=0); + + // higher-level element access functions: + // ref<_Tp>(i0,...[,hashval]) - equivalent to *(_Tp*)ptr(i0,...true[,hashval]). + // always return valid reference to the element. + // If it's did not exist, it is created. + // find<_Tp>(i0,...[,hashval]) - equivalent to (_const Tp*)ptr(i0,...false[,hashval]). + // return pointer to the element or NULL pointer if the element is not there. + // value<_Tp>(i0,...[,hashval]) - equivalent to + // { const _Tp* p = find<_Tp>(i0,...[,hashval]); return p ? *p : _Tp(); } + // that is, 0 is returned when the element is not there. + // note that _Tp must match the actual matrix type - + // the functions do not do any on-fly type conversion + + // 1D case + template _Tp& ref(int i0, size_t* hashval=0); + template _Tp value(int i0, size_t* hashval=0) const; + template const _Tp* find(int i0, size_t* hashval=0) const; + + // 2D case + template _Tp& ref(int i0, int i1, size_t* hashval=0); + template _Tp value(int i0, int i1, size_t* hashval=0) const; + template const _Tp* find(int i0, int i1, size_t* hashval=0) const; + + // 3D case + template _Tp& ref(int i0, int i1, int i2, size_t* hashval=0); + template _Tp value(int i0, int i1, int i2, size_t* hashval=0) const; + template const _Tp* find(int i0, int i1, int i2, size_t* hashval=0) const; + + // n-D case + template _Tp& ref(const int* idx, size_t* hashval=0); + template _Tp value(const int* idx, size_t* hashval=0) const; + template const _Tp* find(const int* idx, size_t* hashval=0) const; + + // erase the specified matrix element. + // When there is no such element, the methods do nothing + void erase(int i0, int i1, size_t* hashval=0); + void erase(int i0, int i1, int i2, size_t* hashval=0); + void erase(const int* idx, size_t* hashval=0); + + // return the matrix iterators, + // pointing to the first sparse matrix element, + SparseMatIterator begin(); + SparseMatConstIterator begin() const; + // ... or to the point after the last sparse matrix element + SparseMatIterator end(); + SparseMatConstIterator end() const; + + // and the template forms of the above methods. + // _Tp must match the actual matrix type. + template SparseMatIterator_<_Tp> begin(); + template SparseMatConstIterator_<_Tp> begin() const; + template SparseMatIterator_<_Tp> end(); + template SparseMatConstIterator_<_Tp> end() const; + + // return value stored in the sparse martix node + template _Tp& value(Node* n); + template const _Tp& value(const Node* n) const; + + ////////////// some internal-use methods /////////////// + ... + + // pointer to the sparse matrix header + Hdr* hdr; + }; + + +.. + +The class +``SparseMat`` +represents multi-dimensional sparse numerical arrays. Such a sparse array can store elements of any type that +:ref:`Mat` +can store. "Sparse" means that only non-zero elements are stored (though, as a result of operations on a sparse matrix, some of its stored elements can actually become 0. It's up to the user to detect such elements and delete them using +``SparseMat::erase`` +). The non-zero elements are stored in a hash table that grows when it's filled enough, so that the search time is O(1) in average (regardless of whether element is there or not). Elements can be accessed using the following methods: + + + + + +#. + query operations ( + ``SparseMat::ptr`` + and the higher-level + ``SparseMat::ref`` + , + ``SparseMat::value`` + and + ``SparseMat::find`` + ), e.g.: + + + + + :: + + + + const int dims = 5; + int size[] = {10, 10, 10, 10, 10}; + SparseMat sparse_mat(dims, size, CV_32F); + for(int i = 0; i < 1000; i++) + { + int idx[dims]; + for(int k = 0; k < dims; k++) + idx[k] = rand() + sparse_mat.ref(idx) += 1.f; + } + + + + .. + + + +#. + sparse matrix iterators. Like + :ref:`Mat` + iterators and unlike + :ref:`MatND` + iterators, the sparse matrix iterators are STL-style, that is, the iteration loop is familiar to C++ users: + + + + + :: + + + + // prints elements of a sparse floating-point matrix + // and the sum of elements. + SparseMatConstIterator_ + it = sparse_mat.begin(), + it_end = sparse_mat.end(); + double s = 0; + int dims = sparse_mat.dims(); + for(; it != it_end; ++it) + { + // print element indices and the element value + const Node* n = it.node(); + printf("(") + for(int i = 0; i < dims; i++) + printf(" + printf(": + s += *it; + } + printf("Element sum is + + + + .. + + If you run this loop, you will notice that elements are enumerated in no any logical order (lexicographical etc.), they come in the same order as they stored in the hash table, i.e. semi-randomly. You may collect pointers to the nodes and sort them to get the proper ordering. Note, however, that pointers to the nodes may become invalid when you add more elements to the matrix; this is because of possible buffer reallocation. + + + +#. + a combination of the above 2 methods when you need to process 2 or more sparse matrices simultaneously, e.g. this is how you can compute unnormalized cross-correlation of the 2 floating-point sparse matrices: + + + + + :: + + + + double cross_corr(const SparseMat& a, const SparseMat& b) + { + const SparseMat *_a = &a, *_b = &b; + // if b contains less elements than a, + // it's faster to iterate through b + if(_a->nzcount() > _b->nzcount()) + std::swap(_a, _b); + SparseMatConstIterator_ it = _a->begin(), + it_end = _a->end(); + double ccorr = 0; + for(; it != it_end; ++it) + { + // take the next element from the first matrix + float avalue = *it; + const Node* anode = it.node(); + // and try to find element with the same index in the second matrix. + // since the hash value depends only on the element index, + // we reuse hashvalue stored in the node + float bvalue = _b->value(anode->idx,&anode->hashval); + ccorr += avalue*bvalue; + } + return ccorr; + } + + + + .. + + + + +SparseMat\_ +----------- + + +Template sparse n-dimensional array class derived from +:ref:`SparseMat` + + + +:: + + + + template class SparseMat_ : public SparseMat + { + public: + typedef SparseMatIterator_<_Tp> iterator; + typedef SparseMatConstIterator_<_Tp> const_iterator; + + // constructors; + // the created matrix will have data type = DataType<_Tp>::type + SparseMat_(); + SparseMat_(int dims, const int* _sizes); + SparseMat_(const SparseMat& m); + SparseMat_(const SparseMat_& m); + SparseMat_(const Mat& m); + SparseMat_(const CvSparseMat* m); + // assignment operators; data type conversion is done when necessary + SparseMat_& operator = (const SparseMat& m); + SparseMat_& operator = (const SparseMat_& m); + SparseMat_& operator = (const Mat& m); + SparseMat_& operator = (const MatND& m); + + // equivalent to the correspoding parent class methods + SparseMat_ clone() const; + void create(int dims, const int* _sizes); + operator CvSparseMat*() const; + + // overriden methods that do extra checks for the data type + int type() const; + int depth() const; + int channels() const; + + // more convenient element access operations. + // ref() is retained (but <_Tp> specification is not need anymore); + // operator () is equivalent to SparseMat::value<_Tp> + _Tp& ref(int i0, size_t* hashval=0); + _Tp operator()(int i0, size_t* hashval=0) const; + _Tp& ref(int i0, int i1, size_t* hashval=0); + _Tp operator()(int i0, int i1, size_t* hashval=0) const; + _Tp& ref(int i0, int i1, int i2, size_t* hashval=0); + _Tp operator()(int i0, int i1, int i2, size_t* hashval=0) const; + _Tp& ref(const int* idx, size_t* hashval=0); + _Tp operator()(const int* idx, size_t* hashval=0) const; + + // iterators + SparseMatIterator_<_Tp> begin(); + SparseMatConstIterator_<_Tp> begin() const; + SparseMatIterator_<_Tp> end(); + SparseMatConstIterator_<_Tp> end() const; + }; + + +.. + +``SparseMat_`` +is a thin wrapper on top of +:ref:`SparseMat` +, made in the same way as +``Mat_`` +. +It simplifies notation of some operations, and that's it. + + + +:: + + + + int sz[] = {10, 20, 30}; + SparseMat_ M(3, sz); + ... + M.ref(1, 2, 3) = M(4, 5, 6) + M(7, 8, 9); + + +.. + diff --git a/modules/core/doc/clustering.rst b/modules/core/doc/clustering.rst new file mode 100644 index 000000000..0aa86a57b --- /dev/null +++ b/modules/core/doc/clustering.rst @@ -0,0 +1,138 @@ +Clustering +========== + +.. highlight:: cpp + + + +.. index:: kmeans + + +cv::kmeans +---------- + +`id=0.0672046481842 Comments from the Wiki `__ + + + + +.. cfunction:: double kmeans( const Mat\& samples, int clusterCount, Mat\& labels, TermCriteria termcrit, int attempts, int flags, Mat* centers ) + + Finds the centers of clusters and groups the input samples around the clusters. + + + + + + :param samples: Floating-point matrix of input samples, one row per sample + + + :param clusterCount: The number of clusters to split the set by + + + :param labels: The input/output integer array that will store the cluster indices for every sample + + + :param termcrit: Specifies maximum number of iterations and/or accuracy (distance the centers can move by between subsequent iterations) + + + :param attempts: How many times the algorithm is executed using different initial labelings. The algorithm returns the labels that yield the best compactness (see the last function parameter) + + + :param flags: It can take the following values: + + * **KMEANS_RANDOM_CENTERS** Random initial centers are selected in each attempt + + * **KMEANS_PP_CENTERS** Use kmeans++ center initialization by Arthur and Vassilvitskii + + * **KMEANS_USE_INITIAL_LABELS** During the first (and possibly the only) attempt, the + function uses the user-supplied labels instaed of computing them from the initial centers. For the second and further attempts, the function will use the random or semi-random centers (use one of ``KMEANS_*_CENTERS`` flag to specify the exact method) + + + + + :param centers: The output matrix of the cluster centers, one row per each cluster center + + + +The function +``kmeans`` +implements a k-means algorithm that finds the +centers of +``clusterCount`` +clusters and groups the input samples +around the clusters. On output, +:math:`\texttt{labels}_i` +contains a 0-based cluster index for +the sample stored in the +:math:`i^{th}` +row of the +``samples`` +matrix. + +The function returns the compactness measure, which is computed as + + +.. math:: + + \sum _i \| \texttt{samples} _i - \texttt{centers} _{ \texttt{labels} _i} \| ^2 + + +after every attempt; the best (minimum) value is chosen and the +corresponding labels and the compactness value are returned by the function. +Basically, the user can use only the core of the function, set the number of +attempts to 1, initialize labels each time using some custom algorithm and pass them with +( +``flags`` += +``KMEANS_USE_INITIAL_LABELS`` +) flag, and then choose the best (most-compact) clustering. + + +.. index:: partition + + +cv::partition +------------- + +`id=0.0923567235062 Comments from the Wiki `__ + + + + +.. cfunction:: template int + + + +.. cfunction:: partition( const vector<_Tp>\& vec, vector\& labels, _EqPredicate predicate=_EqPredicate()) + + Splits an element set into equivalency classes. + + + + + + + :param vec: The set of elements stored as a vector + + + :param labels: The output vector of labels; will contain as many elements as ``vec`` . Each label ``labels[i]`` is 0-based cluster index of ``vec[i]`` + + + :param predicate: The equivalence predicate (i.e. pointer to a boolean function of two arguments or an instance of the class that has the method ``bool operator()(const _Tp& a, const _Tp& b)`` . The predicate returns true when the elements are certainly if the same class, and false if they may or may not be in the same class + + + +The generic function +``partition`` +implements an +:math:`O(N^2)` +algorithm for +splitting a set of +:math:`N` +elements into one or more equivalency classes, as described in +http://en.wikipedia.org/wiki/Disjoint-set_data_structure +. The function +returns the number of equivalency classes. + + diff --git a/modules/core/doc/core.rst b/modules/core/doc/core.rst new file mode 100644 index 000000000..a8e3ea97f --- /dev/null +++ b/modules/core/doc/core.rst @@ -0,0 +1,15 @@ +****************** +Core Functionality +****************** + +.. toctree:: + :maxdepth: 2 + + basic_structures + operations_on_arrays + dynamic_structures + drawing_functions + xml_yaml_persistence + clustering + utility_and_system_functions_and_macros + diff --git a/modules/core/doc/drawing_functions.rst b/modules/core/doc/drawing_functions.rst new file mode 100644 index 000000000..ee1057f10 --- /dev/null +++ b/modules/core/doc/drawing_functions.rst @@ -0,0 +1,770 @@ +Drawing Functions +================= + +.. highlight:: cpp + + +Drawing functions work with matrices/images of arbitrary depth. +The boundaries of the shapes can be rendered with antialiasing (implemented only for 8-bit images for now). +All the functions include the parameter color that uses a rgb value (that may be constructed +with +``CV_RGB`` +or the :ref:`Scalar` constructor +) for color +images and brightness for grayscale images. For color images the order channel +is normally +*Blue, Green, Red* +, this is what +:func:`imshow` +, +:func:`imread` +and +:func:`imwrite` +expect +, so if you form a color using +:ref:`Scalar` +constructor, it should look like: + + +.. math:: + + \texttt{Scalar} (blue \_ component, green \_ component, red \_ component[, alpha \_ component]) + + +If you are using your own image rendering and I/O functions, you can use any channel ordering, the drawing functions process each channel independently and do not depend on the channel order or even on the color space used. The whole image can be converted from BGR to RGB or to a different color space using +:func:`cvtColor` +. + +If a drawn figure is partially or completely outside the image, the drawing functions clip it. Also, many drawing functions can handle pixel coordinates specified with sub-pixel accuracy, that is, the coordinates can be passed as fixed-point numbers, encoded as integers. The number of fractional bits is specified by the +``shift`` +parameter and the real point coordinates are calculated as +:math:`\texttt{Point}(x,y)\rightarrow\texttt{Point2f}(x*2^{-shift},y*2^{-shift})` +. This feature is especially effective wehn rendering antialiased shapes. + +Also, note that the functions do not support alpha-transparency - when the target image is 4-channnel, then the +``color[3]`` +is simply copied to the repainted pixels. Thus, if you want to paint semi-transparent shapes, you can paint them in a separate buffer and then blend it with the main image. + + +.. index:: circle + + +cv::circle +---------- + +`id=0.143685141364 Comments from the Wiki `__ + + + + +.. cfunction:: void circle(Mat\& img, Point center, int radius, const Scalar\& color, int thickness=1, int lineType=8, int shift=0) + + Draws a circle + + + + + + + :param img: Image where the circle is drawn + + + :param center: Center of the circle + + + :param radius: Radius of the circle + + + :param color: Circle color + + + :param thickness: Thickness of the circle outline if positive; negative thickness means that a filled circle is to be drawn + + + :param lineType: Type of the circle boundary, see :func:`line` description + + + :param shift: Number of fractional bits in the center coordinates and radius value + + + +The function +``circle`` +draws a simple or filled circle with a +given center and radius. + + +.. index:: clipLine + + +cv::clipLine +------------ + +`id=0.715949286846 Comments from the Wiki `__ + + + + +.. cfunction:: bool clipLine(Size imgSize, Point\& pt1, Point\& pt2) + + + +.. cfunction:: bool clipLine(Rect imgRect, Point\& pt1, Point\& pt2) + + Clips the line against the image rectangle + + + + + + + :param imgSize: The image size; the image rectangle will be ``Rect(0, 0, imgSize.width, imgSize.height)`` + + + :param imgSize: The image rectangle + + + :param pt1: The first line point + + + :param pt2: The second line point + + + +The functions +``clipLine`` +calculate a part of the line +segment which is entirely within the specified rectangle. +They return +``false`` +if the line segment is completely outside the rectangle and +``true`` +otherwise. + + + +.. index:: ellipse + + +cv::ellipse +----------- + +`id=0.0631091216884 Comments from the Wiki `__ + + + + +.. cfunction:: void ellipse(Mat\& img, Point center, Size axes, double angle, double startAngle, double endAngle, const Scalar\& color, int thickness=1, int lineType=8, int shift=0) + + + +.. cfunction:: void ellipse(Mat\& img, const RotatedRect\& box, const Scalar\& color, int thickness=1, int lineType=8) + + Draws a simple or thick elliptic arc or an fills ellipse sector. + + + + + + + :param img: The image + + + :param center: Center of the ellipse + + + :param axes: Length of the ellipse axes + + + :param angle: The ellipse rotation angle in degrees + + + :param startAngle: Starting angle of the elliptic arc in degrees + + + :param endAngle: Ending angle of the elliptic arc in degrees + + + :param box: Alternative ellipse representation via a :ref:`RotatedRect` , i.e. the function draws an ellipse inscribed in the rotated rectangle + + + :param color: Ellipse color + + + :param thickness: Thickness of the ellipse arc outline if positive, otherwise this indicates that a filled ellipse sector is to be drawn + + + :param lineType: Type of the ellipse boundary, see :func:`line` description + + + :param shift: Number of fractional bits in the center coordinates and axes' values + + + +The functions +``ellipse`` +with less parameters draw an ellipse outline, a filled ellipse, an elliptic +arc or a filled ellipse sector. +A piecewise-linear curve is used to approximate the elliptic arc boundary. If you need more control of the ellipse rendering, you can retrieve the curve using +:func:`ellipse2Poly` +and then render it with +:func:`polylines` +or fill it with +:func:`fillPoly` +. If you use the first variant of the function and want to draw the whole ellipse, not an arc, pass +``startAngle=0`` +and +``endAngle=360`` +. The picture below +explains the meaning of the parameters. + +Parameters of Elliptic Arc + + + +.. image:: ../../pics/ellipse.png + + + + +.. index:: ellipse2Poly + + +cv::ellipse2Poly +---------------- + +`id=0.644340648167 Comments from the Wiki `__ + + + + +.. cfunction:: void ellipse2Poly( Point center, Size axes, int angle, int startAngle, int endAngle, int delta, vector\& pts ) + + Approximates an elliptic arc with a polyline + + + + + + + :param center: Center of the arc + + + :param axes: Half-sizes of the arc. See :func:`ellipse` + + + :param angle: Rotation angle of the ellipse in degrees. See :func:`ellipse` + + + :param startAngle: Starting angle of the elliptic arc in degrees + + + :param endAngle: Ending angle of the elliptic arc in degrees + + + :param delta: Angle between the subsequent polyline vertices. It defines the approximation accuracy. + + + :param pts: The output vector of polyline vertices + + + +The function +``ellipse2Poly`` +computes the vertices of a polyline that approximates the specified elliptic arc. It is used by +:func:`ellipse` +. + + +.. index:: fillConvexPoly + + +cv::fillConvexPoly +------------------ + +`id=0.345453533071 Comments from the Wiki `__ + + + + +.. cfunction:: void fillConvexPoly(Mat\& img, const Point* pts, int npts, const Scalar\& color, int lineType=8, int shift=0) + + Fills a convex polygon. + + + + + + + :param img: Image + + + :param pts: The polygon vertices + + + :param npts: The number of polygon vertices + + + :param color: Polygon color + + + :param lineType: Type of the polygon boundaries, see :func:`line` description + + + :param shift: The number of fractional bits in the vertex coordinates + + + +The function +``fillConvexPoly`` +draws a filled convex polygon. +This function is much faster than the function +``fillPoly`` +and can fill not only convex polygons but any monotonic polygon without self-intersections, +i.e., a polygon whose contour intersects every horizontal line (scan +line) twice at the most (though, its top-most and/or the bottom edge could be horizontal). + + +.. index:: fillPoly + + +cv::fillPoly +------------ + +`id=0.00272984452496 Comments from the Wiki `__ + + + + +.. cfunction:: void fillPoly(Mat\& img, const Point** pts, const int* npts, int ncontours, const Scalar\& color, int lineType=8, int shift=0, Point offset=Point() ) + + Fills the area bounded by one or more polygons + + + + + + + :param img: Image + + + :param pts: Array of polygons, each represented as an array of points + + + :param npts: The array of polygon vertex counters + + + :param ncontours: The number of contours that bind the filled region + + + :param color: Polygon color + + + :param lineType: Type of the polygon boundaries, see :func:`line` description + + + :param shift: The number of fractional bits in the vertex coordinates + + + +The function +``fillPoly`` +fills an area bounded by several +polygonal contours. The function can fills complex areas, for example, +areas with holes, contours with self-intersections (some of thier parts), and so forth. + + +.. index:: getTextSize + + +cv::getTextSize +--------------- + +`id=0.364618843078 Comments from the Wiki `__ + + + + +.. cfunction:: Size getTextSize(const string\& text, int fontFace, double fontScale, int thickness, int* baseLine) + + Calculates the width and height of a text string. + + + + + + + :param text: The input text string + + + :param fontFace: The font to use; see :func:`putText` + + + :param fontScale: The font scale; see :func:`putText` + + + :param thickness: The thickness of lines used to render the text; see :func:`putText` + + + :param baseLine: The output parameter - y-coordinate of the baseline relative to the bottom-most text point + + + +The function +``getTextSize`` +calculates and returns size of the box that contain the specified text. +That is, the following code will render some text, the tight box surrounding it and the baseline: + + + + +:: + + + + // Use "y" to show that the baseLine is about + string text = "Funny text inside the box"; + int fontFace = FONT_HERSHEY_SCRIPT_SIMPLEX; + double fontScale = 2; + int thickness = 3; + + Mat img(600, 800, CV_8UC3, Scalar::all(0)); + + int baseline=0; + Size textSize = getTextSize(text, fontFace, + fontScale, thickness, &baseline); + baseline += thickness; + + // center the text + Point textOrg((img.cols - textSize.width)/2, + (img.rows + textSize.height)/2); + + // draw the box + rectangle(img, textOrg + Point(0, baseline), + textOrg + Point(textSize.width, -textSize.height), + Scalar(0,0,255)); + // ... and the baseline first + line(img, textOrg + Point(0, thickness), + textOrg + Point(textSize.width, thickness), + Scalar(0, 0, 255)); + + // then put the text itself + putText(img, text, textOrg, fontFace, fontScale, + Scalar::all(255), thickness, 8); + + +.. + + +.. index:: line + + +cv::line +-------- + +`id=0.645160739861 Comments from the Wiki `__ + + + + +.. cfunction:: void line(Mat\& img, Point pt1, Point pt2, const Scalar\& color, int thickness=1, int lineType=8, int shift=0) + + Draws a line segment connecting two points + + + + + + + :param img: The image + + + :param pt1: First point of the line segment + + + :param pt2: Second point of the line segment + + + :param color: Line color + + + :param thickness: Line thickness + + + :param lineType: Type of the line: + + + + * **8** (or omitted) 8-connected line. + + + * **4** 4-connected line. + + + * **CV_AA** antialiased line. + + + + + + :param shift: Number of fractional bits in the point coordinates + + + +The function +``line`` +draws the line segment between +``pt1`` +and +``pt2`` +points in the image. The line is +clipped by the image boundaries. For non-antialiased lines +with integer coordinates the 8-connected or 4-connected Bresenham +algorithm is used. Thick lines are drawn with rounding endings. +Antialiased lines are drawn using Gaussian filtering. To specify +the line color, the user may use the macro +``CV_RGB(r, g, b)`` +. + + + +.. index:: LineIterator + +.. _LineIterator: + +LineIterator +------------ + +`id=0.913176469223 Comments from the Wiki `__ + +.. ctype:: LineIterator + + + +Class for iterating pixels on a raster line + + + + +:: + + + + class LineIterator + { + public: + // creates iterators for the line connecting pt1 and pt2 + // the line will be clipped on the image boundaries + // the line is 8-connected or 4-connected + // If leftToRight=true, then the iteration is always done + // from the left-most point to the right most, + // not to depend on the ordering of pt1 and pt2 parameters + LineIterator(const Mat& img, Point pt1, Point pt2, + int connectivity=8, bool leftToRight=false); + // returns pointer to the current line pixel + uchar* operator *(); + // move the iterator to the next pixel + LineIterator& operator ++(); + LineIterator operator ++(int); + + // internal state of the iterator + uchar* ptr; + int err, count; + int minusDelta, plusDelta; + int minusStep, plusStep; + }; + + +.. + +The class +``LineIterator`` +is used to get each pixel of a raster line. It can be treated as versatile implementation of the Bresenham algorithm, where you can stop at each pixel and do some extra processing, for example, grab pixel values along the line, or draw a line with some effect (e.g. with XOR operation). + +The number of pixels along the line is store in +``LineIterator::count`` +. + + + + +:: + + + + // grabs pixels along the line (pt1, pt2) + // from 8-bit 3-channel image to the buffer + LineIterator it(img, pt1, pt2, 8); + vector buf(it.count); + + for(int i = 0; i < it.count; i++, ++it) + buf[i] = *(const Vec3b)*it; + + +.. + + +.. index:: rectangle + + +cv::rectangle +------------- + +`id=0.494030339931 Comments from the Wiki `__ + + + + +.. cfunction:: void rectangle(Mat\& img, Point pt1, Point pt2, const Scalar\& color, int thickness=1, int lineType=8, int shift=0) + + Draws a simple, thick, or filled up-right rectangle. + + + + + + + :param img: Image + + + :param pt1: One of the rectangle's vertices + + + :param pt2: Opposite to ``pt1`` rectangle vertex + + + :param color: Rectangle color or brightness (grayscale image) + + + :param thickness: Thickness of lines that make up the rectangle. Negative values, e.g. ``CV_FILLED`` , mean that the function has to draw a filled rectangle. + + + :param lineType: Type of the line, see :func:`line` description + + + :param shift: Number of fractional bits in the point coordinates + + + +The function +``rectangle`` +draws a rectangle outline or a filled rectangle, which two opposite corners are +``pt1`` +and +``pt2`` +. + + + +.. index:: polylines + + +cv::polylines +------------- + +`id=0.550422277453 Comments from the Wiki `__ + + + + +.. cfunction:: void polylines(Mat\& img, const Point** pts, const int* npts, int ncontours, bool isClosed, const Scalar\& color, int thickness=1, int lineType=8, int shift=0 ) + + Draws several polygonal curves + + + + + + + :param img: The image + + + :param pts: Array of polygonal curves + + + :param npts: Array of polygon vertex counters + + + :param ncontours: The number of curves + + + :param isClosed: Indicates whether the drawn polylines are closed or not. If they are closed, the function draws the line from the last vertex of each curve to its first vertex + + + :param color: Polyline color + + + :param thickness: Thickness of the polyline edges + + + :param lineType: Type of the line segments, see :func:`line` description + + + :param shift: The number of fractional bits in the vertex coordinates + + + +The function +``polylines`` +draws one or more polygonal curves. + + +.. index:: putText + + +cv::putText +----------- + +`id=0.164290316532 Comments from the Wiki `__ + + + + +.. cfunction:: void putText( Mat\& img, const string\& text, Point org, int fontFace, double fontScale, Scalar color, int thickness=1, int lineType=8, bool bottomLeftOrigin=false ) + + Draws a text string + + + + + + + :param img: The image + + + :param text: The text string to be drawn + + + :param org: The bottom-left corner of the text string in the image + + + :param fontFace: The font type, one of ``FONT_HERSHEY_SIMPLEX`` , ``FONT_HERSHEY_PLAIN`` , + ``FONT_HERSHEY_DUPLEX`` , ``FONT_HERSHEY_COMPLEX`` , ``FONT_HERSHEY_TRIPLEX`` , + ``FONT_HERSHEY_COMPLEX_SMALL`` , ``FONT_HERSHEY_SCRIPT_SIMPLEX`` or ``FONT_HERSHEY_SCRIPT_COMPLEX`` , + where each of the font id's can be combined with ``FONT_HERSHEY_ITALIC`` to get the slanted letters. + + + :param fontScale: The font scale factor that is multiplied by the font-specific base size + + + :param color: The text color + + + :param thickness: Thickness of the lines used to draw the text + + + :param lineType: The line type; see ``line`` for details + + + :param bottomLeftOrigin: When true, the image data origin is at the bottom-left corner, otherwise it's at the top-left corner + + + +The function +``putText`` +renders the specified text string in the image. +Symbols that can not be rendered using the specified font are +replaced by question marks. See +:func:`getTextSize` +for a text rendering code example. + diff --git a/modules/core/doc/dynamic_structures.rst b/modules/core/doc/dynamic_structures.rst new file mode 100644 index 000000000..4e088e772 --- /dev/null +++ b/modules/core/doc/dynamic_structures.rst @@ -0,0 +1,6 @@ +Dynamic Structures +================== + +.. highlight:: cpp + + diff --git a/modules/core/doc/intro.rst b/modules/core/doc/intro.rst new file mode 100644 index 000000000..c40530715 --- /dev/null +++ b/modules/core/doc/intro.rst @@ -0,0 +1,248 @@ +************ +Introduction +************ + +.. highlight:: cpp + +OpenCV (Open Source Computer Vision Library: http://opencv.willowgarage.com/wiki/) is open-source BSD-licensed library that includes several hundreds computer vision algorithms. It is very popular in the Computer Vision community. Some people call it “de-facto standard” API. The document aims to specify the stable parts of the library, as well as some abstract interfaces for high-level interfaces, with the final goal to make it an official standard. + +API specifications in the document use the standard C++ (http://www.open-std.org/jtc1/sc22/wg21/) and the standard C++ library. + +The current OpenCV implementation has a modular structure (i.e. the binary package includes several shared or static libraries), where we have: + + * **core** - the compact module defining basic data structures, including the dense multi-dimensional array ``Mat``, and basic functions, used by all other modules. + * **imgproc** - image processing module that includes linear and non-linear image filtering, geometrical image transformations (resize, affine and perspective warping, generic table-based remap), color space conversion, histograms etc. + * **video** - video analysis module that includes motion estimation, background subtraction and object tracking algorithms. + * **calib3d** - basic multiple-view geometry algorithms, single and stereo camera calibration, object pose estimation, stereo correspondence algorithms, elements of 3d reconstruction. + * **features2d** - salient feature detectors, descriptors and the descriptor matchers. + * **objdetect** - detection of objects, instances of the predefined classes (e.g faces, eyes, mugs, people, cars etc.) + * **highgui** - easy-to-use interface to video capturing, image and video codecs APIs, as well as simple UI capabilities. + * **gpu** - GPU-accelerated algorithms from different OpenCV modules. + * ... some other helper modules, such as FLANN and Google test wrappers, Python bindings etc. + +Although the alternative implementations of the proposed standard may be structured differently, the proposed standard draft is organized by the functionality groups that reflect the decomposition of the library by modules. + +Below are the other main concepts of the OpenCV API, implied everywhere in the document. + +The API Concepts +================ + +*"cv"* namespace +---------------- + +All the OpenCV classes and functions are placed into *"cv"* namespace. Therefore, to access this functionality from your code, use +``cv::`` specifier or ``using namespace cv;`` directive: + +.. code-block:: c + + #include "opencv2/core/core.hpp" + ... + cv::Mat H = cv::findHomography(points1, points2, CV_RANSAC, 5); + ... + +or + +:: + + #include "opencv2/core/core.hpp" + using namespace cv; + ... + Mat H = findHomography(points1, points2, CV_RANSAC, 5 ); + ... + +It is probable that some of the current or future OpenCV external names conflict with STL +or other libraries, in this case use explicit namespace specifiers to resolve the name conflicts: + +:: + + Mat a(100, 100, CV_32F); + randu(a, Scalar::all(1), Scalar::all(std::rand())); + cv::log(a, a); + a /= std::log(2.); + + +Automatic Memory Management +--------------------------- + +OpenCV handles all the memory automatically. + +First of all, ``std::vector``, ``Mat`` and other data structures used by the functions and methods have destructors that deallocate the underlying memory buffers when needed. + +Secondly, in the case of ``Mat`` this *when needed* means that the destructors do not always deallocate the buffers, they take into account possible data sharing. That is, destructor decrements the reference counter, associated with the matrix data buffer, and the buffer is deallocated if and only if the reference counter reaches zero, that is, when no other structures refer to the same buffer. Similarly, when ``Mat`` instance is copied, not actual data is really copied; instead, the associated with it reference counter is incremented to memorize that there is another owner of the same data. There is also ``Mat::clone`` method that creates a full copy of the matrix data. Here is the example + +:: + + // create a big 8Mb matrix + Mat A(1000, 1000, CV_64F); + + // create another header for the same matrix; + // this is instant operation, regardless of the matrix size. + Mat B = A; + // create another header for the 3-rd row of A; no data is copied either + Mat C = B.row(3); + // now create a separate copy of the matrix + Mat D = B.clone(); + // copy the 5-th row of B to C, that is, copy the 5-th row of A + // to the 3-rd row of A. + B.row(5).copyTo(C); + // now let A and D share the data; after that the modified version + // of A is still referenced by B and C. + A = D; + // now make B an empty matrix (which references no memory buffers), + // but the modified version of A will still be referenced by C, + // despite that C is just a single row of the original A + B.release(); + + // finally, make a full copy of C. In result, the big modified + // matrix will be deallocated, since it's not referenced by anyone + C = C.clone(); + +Therefore, ``Mat`` and other basic structures use is simple. But what about high-level classes or even user data types that have been created without automatic memory management in mind? For them OpenCV offers ``Ptr<>`` template class, which is similar to the ``std::shared_ptr`` from C++ TR1. That is, instead of using plain pointers:: + + T* ptr = new T(...); + +one can use:: + + Ptr ptr = new T(...); + +That is, ``Ptr ptr`` incapsulates a pointer to ``T`` instance and a reference counter associated with the pointer. See ``Ptr`` description for details. + + +.. todo:: + + Should we replace Ptr<> with the semi-standard shared_ptr<>? + +Automatic Allocation of the Output Data +--------------------------------------- + +OpenCV does not only deallocate the memory automatically, it can also allocate memory for the output function parameters automatically most of the time. That is, if a function has one or more input arrays (``cv::Mat`` instances) and some output arrays, the output arrays automatically allocated or reallocated. The size and type of the output arrays are determined from the input arrays' size and type. If needed, the functions take extra parameters that help to figure out the output array properties. + +Here is the example: :: + + #include "cv.h" + #include "highgui.h" + + using namespace cv; + + int main(int, char**) + { + VideoCapture cap(0); + if(!cap.isOpened()) return -1; + + Mat frame, edges; + namedWindow("edges",1); + for(;;) + { + cap >> frame; + cvtColor(frame, edges, CV_BGR2GRAY); + GaussianBlur(edges, edges, Size(7,7), 1.5, 1.5); + Canny(edges, edges, 0, 30, 3); + imshow("edges", edges); + if(waitKey(30) >= 0) break; + } + return 0; + } +.. + +The array ``frame`` is automatically allocated by ``>>`` operator, since the video frame resolution and bit-depth is known to the video capturing module. The array ``edges`` is automatically allocated by ``cvtColor`` function. It will have the same size and the bit-depth as the input array, and the number of channels will be 1, because we passed the color conversion code ``CV_BGR2GRAY`` (that means color to grayscale conversion). Note that ``frame`` and ``edges`` will be allocated only once during the first execution of the loop body, since all the next video frames will have the same resolution (unless user somehow changes the video resolution, in this case the arrays will be automatically reallocated). + +The key component of this technology is the method ``Mat::create``. It takes the desired array size and type. If the array already has the specified size and type, the method does nothing. Otherwise, it releases the previously allocated data, if any (this part involves decrementing the reference counter and comparing it with zero), and then allocates a new buffer of the required size. Most functions call this ``Mat::create`` method for each output array and so the automatic output data allocation is implemented. + +Some notable exceptions from this scheme are ``cv::mixChannels``, ``cv::RNG::fill`` and a few others functions and methods. They are not able to allocate the output array, so the user has to do that in advance. + + +Saturation Arithmetics +---------------------- + +As computer vision library, OpenCV deals a lot with image pixels that are often encoded in a compact 8- or 16-bit per channel form and thus have a limited value range. Furthermore, certain operations on images, like color space conversions, brightness/contrast adjustments, sharpening, complex interpolation (bi-cubic, Lanczos) can produce values out of the available range. If we just store the lowest 8 (16) bit of the result, that will result in some visual artifacts and may affect the further image analysis. To solve this problem, we use so-called *saturation* arithmetics, e.g. to store ``r``, a result of some operation, to 8-bit image, we find the nearest value within 0..255 range: + +.. math:: + + I(x,y)= \min ( \max (\textrm{round}(r), 0), 255) + +The similar rules are applied to 8-bit signed and 16-bit signed and unsigned types. This semantics is used everywhere in the library. In C++ code it is done using ``saturate_cast<>`` functions that resembler the standard C++ cast operations. Here is the implementation of the above formula:: + + I.at(y, x) = saturate_cast(r); + +where ``cv::uchar`` is OpenCV's 8-bit unsigned integer type. In optimized SIMD code we use specialized instructions, like SSE2' ``paddusb``, ``packuswb`` etc. to achieve exactly the same behavior as in C++ code. + + +Fixed Pixel Types. Limited Use of Templates +------------------------------------------- + +Templates is a great feature of C++ that enables implementation of very powerful, efficient and yet safe data structures and algorithms. However, the extensive use of templates may dramatically increase compile time and code size. Besides, it is difficult to separate interface and implementation when templates are used exclusively, which is fine for basic algorithms, but not good for computer vision libraries, where a single algorithm may span a thousands lines of code. Because of this, and also to simplify development of bindings for other languages, like Python, Java, Matlab, that do not have templates at all or have limited template capabilities, we prefer polymorphism and runtime dispatching over templates. In the places where runtime dispatching would be too slow (like pixel access operators), impossible (generic Ptr<> implementation) or just very inconvenient (saturate_cast<>()) we introduce small template classes, methods and functions. Everywhere else we prefer not to use templates. + +Because of this, there is a limited fixed set of primitive data types that the library can operate on. That is, an array elements should have one of the following types: + + * 8-bit unsigned integer (uchar) + * 8-bit signed integer (schar) + * 16-bit unsigned integer (ushort) + * 16-bit signed integer (short) + * 32-bit signed integer (int) + * 32-bit floating-point number (float) + * 64-bit floating-point number (double) + * a tuple of several elements, where all elements have the same type (one of the above). Array, which elements are such tuples, are called multi-channel arrays, as opposite to the single-channel arrays, which elements are scalar values. The maximum possible number of channels is defined by ``CV_CN_MAX`` constant (which is not smaller than 32). + +.. todo:: + Need we extend the above list? Shouldn't we throw away 8-bit signed (schar)? + +For these basic types there is enumeration:: + + enum { CV_8U=0, CV_8S=1, CV_16U=2, CV_16S=3, CV_32S=4, CV_32F=5, CV_64F=6 }; + +Multi-channel (``n``-channel) types can be specified using ``CV_8UC1`` ... ``CV_64FC4`` constants (for number of channels from 1 to 4), or using ``CV_8UC(n)`` ... ``CV_64FC(n)`` or ``CV_MAKETYPE(CV_8U, n)`` ... ``CV_MAKETYPE(CV_64F, n)`` macros when the number of channels is more than 4 or unknown at compile time. + +.. note:: + ``CV_32FC1 == CV_32F``, ``CV_32FC2 == CV_32FC(2) == CV_MAKETYPE(CV_32F, 2)`` and ``CV_MAKETYPE(depth, n) == ((x&7)<<3) + (n-1)``, that is, the type constant is formed from the ``depth``, taking the lowest 3 bits, and the number of channels minus 1, taking the next ``log2(CV_CN_MAX)`` bits. + +Here are some examples:: + + Mat mtx(3, 3, CV_32F); // make 3x3 floating-point matrix + Mat cmtx(10, 1, CV_64FC2); // make 10x1 2-channel floating-point + // matrix (i.e. 10-element complex vector) + Mat img(Size(1920, 1080), CV_8UC3); // make 3-channel (color) image + // of 1920 columns and 1080 rows. + Mat grayscale(image.size(), CV_MAKETYPE(image.depth(), 1)); // make 1-channel image of + // the same size and same + // channel type as img + +Arrays, which elements are more complex, can not be constructed or processed using OpenCV. Furthermore, each function or method can handle only a subset of all possible array types. Usually, the more complex is the algorithm, the smaller is the supported subset of formats. Here are some typical examples of such limitations: + + * The face detection algorithm only works with 8-bit grayscale or color images. + * Linear algebra functions and most of the machine learning algorithms work with floating-point arrays only. + * Basic functions, such as ``cv::add``, support all types, except for ``CV_8SC(n)``. + * Color space conversion functions support 8-bit unsigned, 16-bit unsigned and 32-bit floating-point types. + +The subset of supported types for each functions has been defined from practical needs. All this information about supported types can be put together into a special table. In different implementations of the standard the tables may look differently, for example, on embedded platforms double-precision floating-point type (``CV_64F``) may be unavailable. + +.. todo:: + Should we include such a table into the standard? + Should we specify minimum "must-have" set of supported formats for each functions? + + +Error handling +-------------- + +OpenCV uses exceptions to signal about the critical errors. When the input data has correct format and within the specified value range, but the algorithm can not succeed for some reason (e.g. the optimization algorithm did not converge), it returns a special error code (typically, just a boolean variable). + +The exceptions can be instances of ``cv::Exception`` class or its derivatives. In its turn, ``cv::Exception`` is a derivative of std::exception, so it can be gracefully handled in the code using other standard C++ library components. + +The exception is typically thrown using ``CV_Error(errcode, description)`` macro, or its printf-like ``CV_Error_(errcode, printf-spec, (printf-args))`` variant, or using ``CV_Assert(condition)`` macro that checks the condition and throws exception when it is not satisfied. For performance-critical code there is ``CV_DbgAssert(condition)`` that is only retained in Debug configuration. Thanks to the automatic memory management, all the intermediate buffers are automatically deallocated in the case of sudden error; user only needs to put a try statement to catch the exceptions, if needed: + +:: + + try + { + ... // call OpenCV + } + catch( cv::Exception& e ) + { + const char* err_msg = e.what(); + std::cout << "exception caught: " << err_msg << std::endl; + } + + +Multi-threading and reenterability +---------------------------------- + +The current OpenCV implementation is fully reenterable, and so should be any alternative implementation targeted for multi-threaded environments. That is, the same function, the same *constant* method of a class instance, or the same *non-constant* method of different class instances can be called from different threads. Also, the same ``cv::Mat`` can be used in different threads, because the reference-counting operations use the architecture-specific atomic instructions. diff --git a/modules/core/doc/operations_on_arrays.rst b/modules/core/doc/operations_on_arrays.rst new file mode 100644 index 000000000..ad4877be1 --- /dev/null +++ b/modules/core/doc/operations_on_arrays.rst @@ -0,0 +1,6552 @@ +Operations on Arrays +==================== + +.. highlight:: cpp + + + +.. index:: abs + + +cv::abs +------- + +`id=0.0489035461248 Comments from the Wiki `__ + + + + +.. cfunction:: MatExpr<...> abs(const Mat\& src) + + + +.. cfunction:: MatExpr<...> abs(const MatExpr<...>\& src) + + Computes absolute value of each matrix element + + + + + + + :param src: matrix or matrix expression + + + +``abs`` +is a meta-function that is expanded to one of +:func:`absdiff` +forms: + + + + + +* + ``C = abs(A-B)`` + is equivalent to + ``absdiff(A, B, C)`` + and + + + +* + ``C = abs(A)`` + is equivalent to + ``absdiff(A, Scalar::all(0), C)`` + . + + + +* + ``C = Mat_ >(abs(A*alpha + beta))`` + is equivalent to + ``convertScaleAbs(A, C, alpha, beta)`` + + +The output matrix will have the same size and the same type as the input one +(except for the last case, where +``C`` +will be +``depth=CV_8U`` +). + +See also: +:ref:`Matrix Expressions` +, +:func:`absdiff` +, + +.. index:: absdiff + + +cv::absdiff +----------- + +`id=0.697171323196 Comments from the Wiki `__ + + + + +.. cfunction:: void absdiff(const Mat\& src1, const Mat\& src2, Mat\& dst) + + + +.. cfunction:: void absdiff(const Mat\& src1, const Scalar\& sc, Mat\& dst) + + + +.. cfunction:: void absdiff(const MatND\& src1, const MatND\& src2, MatND\& dst) + + + +.. cfunction:: void absdiff(const MatND\& src1, const Scalar\& sc, MatND\& dst) + + Computes per-element absolute difference between 2 arrays or between array and a scalar. + + + + + + + :param src1: The first input array + + + :param src2: The second input array; Must be the same size and same type as ``src1`` + + + :param sc: Scalar; the second input parameter + + + :param dst: The destination array; it will have the same size and same type as ``src1`` ; see ``Mat::create`` + + + +The functions +``absdiff`` +compute: + + + + +* + absolute difference between two arrays + + + + .. math:: + + \texttt{dst} (I) = \texttt{saturate} (| \texttt{src1} (I) - \texttt{src2} (I)|) + + + + +* + or absolute difference between array and a scalar: + + + + .. math:: + + \texttt{dst} (I) = \texttt{saturate} (| \texttt{src1} (I) - \texttt{sc} |) + + + + +where +``I`` +is multi-dimensional index of array elements. +in the case of multi-channel arrays each channel is processed independently. + +See also: +:func:`abs` +, + +.. index:: add + + +cv::add +------- + +`id=0.132299044764 Comments from the Wiki `__ + + + + +.. cfunction:: void add(const Mat\& src1, const Mat\& src2, Mat\& dst) + + + +.. cfunction:: void add(const Mat\& src1, const Mat\& src2, Mat\& dst, const Mat\& mask) + + + +.. cfunction:: void add(const Mat\& src1, const Scalar\& sc, Mat\& dst, const Mat\& mask=Mat()) + + + +.. cfunction:: void add(const MatND\& src1, const MatND\& src2, MatND\& dst) + + + +.. cfunction:: void add(const MatND\& src1, const MatND\& src2, MatND\& dst, const MatND\& mask) + + + +.. cfunction:: void add(const MatND\& src1, const Scalar\& sc, MatND\& dst, const MatND\& mask=MatND()) + + Computes the per-element sum of two arrays or an array and a scalar. + + + + + + + :param src1: The first source array + + + :param src2: The second source array. It must have the same size and same type as ``src1`` + + + :param sc: Scalar; the second input parameter + + + :param dst: The destination array; it will have the same size and same type as ``src1`` ; see ``Mat::create`` + + + :param mask: The optional operation mask, 8-bit single channel array; + specifies elements of the destination array to be changed + + + +The functions +``add`` +compute: + + + + +* + the sum of two arrays: + + + + .. math:: + + \texttt{dst} (I) = \texttt{saturate} ( \texttt{src1} (I) + \texttt{src2} (I)) \quad \texttt{if mask} (I) \ne0 + + + + +* + or the sum of array and a scalar: + + + + .. math:: + + \texttt{dst} (I) = \texttt{saturate} ( \texttt{src1} (I) + \texttt{sc} ) \quad \texttt{if mask} (I) \ne0 + + + + +where +``I`` +is multi-dimensional index of array elements. + +The first function in the above list can be replaced with matrix expressions: + + + +:: + + + + dst = src1 + src2; + dst += src1; // equivalent to add(dst, src1, dst); + + +.. + +in the case of multi-channel arrays each channel is processed independently. + +See also: +:func:`subtract` +, +:func:`addWeighted` +, +:func:`scaleAdd` +, +:func:`convertScale` +, +:ref:`Matrix Expressions` +, +. + + +.. index:: addWeighted + + +cv::addWeighted +--------------- + +`id=0.137579334868 Comments from the Wiki `__ + + + + +.. cfunction:: void addWeighted(const Mat\& src1, double alpha, const Mat\& src2, double beta, double gamma, Mat\& dst) + + + +.. cfunction:: void addWeighted(const MatND\& src1, double alpha, const MatND\& src2, double beta, double gamma, MatND\& dst) + + Computes the weighted sum of two arrays. + + + + + + + :param src1: The first source array + + + :param alpha: Weight for the first array elements + + + :param src2: The second source array; must have the same size and same type as ``src1`` + + + :param beta: Weight for the second array elements + + + :param dst: The destination array; it will have the same size and same type as ``src1`` + + + :param gamma: Scalar, added to each sum + + + +The functions +``addWeighted`` +calculate the weighted sum of two arrays as follows: + + +.. math:: + + \texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} + \texttt{src2} (I)* \texttt{beta} + \texttt{gamma} ) + + +where +``I`` +is multi-dimensional index of array elements. + +The first function can be replaced with a matrix expression: + + + +:: + + + + dst = src1*alpha + src2*beta + gamma; + + +.. + +In the case of multi-channel arrays each channel is processed independently. + +See also: +:func:`add` +, +:func:`subtract` +, +:func:`scaleAdd` +, +:func:`convertScale` +, +:ref:`Matrix Expressions` +, +. + + +.. index:: bitwise_and + +.. _bitwise_and: + +bitwise_and +----------- + +`id=0.394569155604 Comments from the Wiki `__ + + + + +.. cfunction:: void bitwise_and(const Mat\& src1, const Mat\& src2, Mat\& dst, const Mat\& mask=Mat()) + + + +.. cfunction:: void bitwise_and(const Mat\& src1, const Scalar\& sc, Mat\& dst, const Mat\& mask=Mat()) + + + +.. cfunction:: void bitwise_and(const MatND\& src1, const MatND\& src2, MatND\& dst, const MatND\& mask=MatND()) + + + +.. cfunction:: void bitwise_and(const MatND\& src1, const Scalar\& sc, MatND\& dst, const MatND\& mask=MatND()) + + Calculates per-element bit-wise conjunction of two arrays and an array and a scalar. + + + + + + + :param src1: The first source array + + + :param src2: The second source array. It must have the same size and same type as ``src1`` + + + :param sc: Scalar; the second input parameter + + + :param dst: The destination array; it will have the same size and same type as ``src1`` ; see ``Mat::create`` + + + :param mask: The optional operation mask, 8-bit single channel array; + specifies elements of the destination array to be changed + + + +The functions +``bitwise_and`` +compute per-element bit-wise logical conjunction: + + + + +* + of two arrays + + + + .. math:: + + \texttt{dst} (I) = \texttt{src1} (I) \wedge \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0 + + + + +* + or array and a scalar: + + + + .. math:: + + \texttt{dst} (I) = \texttt{src1} (I) \wedge \texttt{sc} \quad \texttt{if mask} (I) \ne0 + + + + +In the case of floating-point arrays their machine-specific bit representations (usually IEEE754-compliant) are used for the operation, and in the case of multi-channel arrays each channel is processed independently. + +See also: +, +, + +.. index:: bitwise_not + +.. _bitwise_not: + +bitwise_not +----------- + +`id=0.0872507219493 Comments from the Wiki `__ + + + + +.. cfunction:: void bitwise_not(const Mat\& src, Mat\& dst) + + + +.. cfunction:: void bitwise_not(const MatND\& src, MatND\& dst) + + Inverts every bit of array + + + + + + + :param src1: The source array + + + :param dst: The destination array; it is reallocated to be of the same size and + the same type as ``src`` ; see ``Mat::create`` + + + :param mask: The optional operation mask, 8-bit single channel array; + specifies elements of the destination array to be changed + + + +The functions +``bitwise_not`` +compute per-element bit-wise inversion of the source array: + + +.. math:: + + \texttt{dst} (I) = \neg \texttt{src} (I) + + +In the case of floating-point source array its machine-specific bit representation (usually IEEE754-compliant) is used for the operation. in the case of multi-channel arrays each channel is processed independently. + +See also: +, +, + +.. index:: bitwise_or + +.. _bitwise_or: + +bitwise_or +---------- + +`id=0.482813081553 Comments from the Wiki `__ + + + + +.. cfunction:: void bitwise_or(const Mat\& src1, const Mat\& src2, Mat\& dst, const Mat\& mask=Mat()) + + + +.. cfunction:: void bitwise_or(const Mat\& src1, const Scalar\& sc, Mat\& dst, const Mat\& mask=Mat()) + + + +.. cfunction:: void bitwise_or(const MatND\& src1, const MatND\& src2, MatND\& dst, const MatND\& mask=MatND()) + + + +.. cfunction:: void bitwise_or(const MatND\& src1, const Scalar\& sc, MatND\& dst, const MatND\& mask=MatND()) + + Calculates per-element bit-wise disjunction of two arrays and an array and a scalar. + + + + + + + :param src1: The first source array + + + :param src2: The second source array. It must have the same size and same type as ``src1`` + + + :param sc: Scalar; the second input parameter + + + :param dst: The destination array; it is reallocated to be of the same size and + the same type as ``src1`` ; see ``Mat::create`` + + + :param mask: The optional operation mask, 8-bit single channel array; + specifies elements of the destination array to be changed + + + +The functions +``bitwise_or`` +compute per-element bit-wise logical disjunction + + + + +* + of two arrays + + + + .. math:: + + \texttt{dst} (I) = \texttt{src1} (I) \vee \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0 + + + + +* + or array and a scalar: + + + + .. math:: + + \texttt{dst} (I) = \texttt{src1} (I) \vee \texttt{sc} \quad \texttt{if mask} (I) \ne0 + + + + +In the case of floating-point arrays their machine-specific bit representations (usually IEEE754-compliant) are used for the operation. in the case of multi-channel arrays each channel is processed independently. + +See also: +, +, + +.. index:: bitwise_xor + +.. _bitwise_xor: + +bitwise_xor +----------- + +`id=0.0987299345573 Comments from the Wiki `__ + + + + +.. cfunction:: void bitwise_xor(const Mat\& src1, const Mat\& src2, Mat\& dst, const Mat\& mask=Mat()) + + + +.. cfunction:: void bitwise_xor(const Mat\& src1, const Scalar\& sc, Mat\& dst, const Mat\& mask=Mat()) + + + +.. cfunction:: void bitwise_xor(const MatND\& src1, const MatND\& src2, MatND\& dst, const MatND\& mask=MatND()) + + + +.. cfunction:: void bitwise_xor(const MatND\& src1, const Scalar\& sc, MatND\& dst, const MatND\& mask=MatND()) + + Calculates per-element bit-wise "exclusive or" operation on two arrays and an array and a scalar. + + + + + + + :param src1: The first source array + + + :param src2: The second source array. It must have the same size and same type as ``src1`` + + + :param sc: Scalar; the second input parameter + + + :param dst: The destination array; it is reallocated to be of the same size and + the same type as ``src1`` ; see ``Mat::create`` + + + :param mask: The optional operation mask, 8-bit single channel array; + specifies elements of the destination array to be changed + + + +The functions +``bitwise_xor`` +compute per-element bit-wise logical "exclusive or" operation + + + + + +* + on two arrays + + + + .. math:: + + \texttt{dst} (I) = \texttt{src1} (I) \oplus \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0 + + + + +* + or array and a scalar: + + + + .. math:: + + \texttt{dst} (I) = \texttt{src1} (I) \oplus \texttt{sc} \quad \texttt{if mask} (I) \ne0 + + + + +In the case of floating-point arrays their machine-specific bit representations (usually IEEE754-compliant) are used for the operation. in the case of multi-channel arrays each channel is processed independently. + +See also: +, +, + +.. index:: calcCovarMatrix + + +cv::calcCovarMatrix +------------------- + +`id=0.392280504732 Comments from the Wiki `__ + + + + +.. cfunction:: void calcCovarMatrix( const Mat* samples, int nsamples, Mat\& covar, Mat\& mean, int flags, int ctype=CV_64F) + + + +.. cfunction:: void calcCovarMatrix( const Mat\& samples, Mat\& covar, Mat\& mean, int flags, int ctype=CV_64F) + + Calculates covariation matrix of a set of vectors + + + + + + + :param samples: The samples, stored as separate matrices, or as rows or columns of a single matrix + + + :param nsamples: The number of samples when they are stored separately + + + :param covar: The output covariance matrix; it will have type= ``ctype`` and square size + + + :param mean: The input or output (depending on the flags) array - the mean (average) vector of the input vectors + + + :param flags: The operation flags, a combination of the following values + + * **CV_COVAR_SCRAMBLED** The output covariance matrix is calculated as: + + .. math:: + + \texttt{scale} \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...]^T \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...] + + , + that is, the covariance matrix will be :math:`\texttt{nsamples} \times \texttt{nsamples}` . + Such an unusual covariance matrix is used for fast PCA + of a set of very large vectors (see, for example, the EigenFaces technique + for face recognition). Eigenvalues of this "scrambled" matrix will + match the eigenvalues of the true covariance matrix and the "true" + eigenvectors can be easily calculated from the eigenvectors of the + "scrambled" covariance matrix. + + * **CV_COVAR_NORMAL** The output covariance matrix is calculated as: + + .. math:: + + \texttt{scale} \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...] \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...]^T + + , + that is, ``covar`` will be a square matrix + of the same size as the total number of elements in each + input vector. One and only one of ``CV_COVAR_SCRAMBLED`` and ``CV_COVAR_NORMAL`` must be specified + + * **CV_COVAR_USE_AVG** If the flag is specified, the function does not calculate ``mean`` from the input vectors, but, instead, uses the passed ``mean`` vector. This is useful if ``mean`` has been pre-computed or known a-priori, or if the covariance matrix is calculated by parts - in this case, ``mean`` is not a mean vector of the input sub-set of vectors, but rather the mean vector of the whole set. + + * **CV_COVAR_SCALE** If the flag is specified, the covariance matrix is scaled. In the "normal" mode ``scale`` is ``1./nsamples`` ; in the "scrambled" mode ``scale`` is the reciprocal of the total number of elements in each input vector. By default (if the flag is not specified) the covariance matrix is not scaled (i.e. ``scale=1`` ). + + + * **CV_COVAR_ROWS** [Only useful in the second variant of the function] The flag means that all the input vectors are stored as rows of the ``samples`` matrix. ``mean`` should be a single-row vector in this case. + + * **CV_COVAR_COLS** [Only useful in the second variant of the function] The flag means that all the input vectors are stored as columns of the ``samples`` matrix. ``mean`` should be a single-column vector in this case. + + + + + + +The functions +``calcCovarMatrix`` +calculate the covariance matrix +and, optionally, the mean vector of the set of input vectors. + +See also: +:func:`PCA` +, +:func:`mulTransposed` +, +:func:`Mahalanobis` + +.. index:: cartToPolar + + +cv::cartToPolar +--------------- + +`id=0.782591430352 Comments from the Wiki `__ + + + + +.. cfunction:: void cartToPolar(const Mat\& x, const Mat\& y, Mat\& magnitude, Mat\& angle, bool angleInDegrees=false) + + Calculates the magnitude and angle of 2d vectors. + + + + + + + :param x: The array of x-coordinates; must be single-precision or double-precision floating-point array + + + :param y: The array of y-coordinates; it must have the same size and same type as ``x`` + + + :param magnitude: The destination array of magnitudes of the same size and same type as ``x`` + + + :param angle: The destination array of angles of the same size and same type as ``x`` . + The angles are measured in radians :math:`(0` to :math:`2 \pi )` or in degrees (0 to 360 degrees). + + + :param angleInDegrees: The flag indicating whether the angles are measured in radians, which is default mode, or in degrees + + + +The function +``cartToPolar`` +calculates either the magnitude, angle, or both of every 2d vector (x(I),y(I)): + + + +.. math:: + + \begin{array}{l} \texttt{magnitude} (I)= \sqrt{\texttt{x}(I)^2+\texttt{y}(I)^2} , \\ \texttt{angle} (I)= \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))[ \cdot180 / \pi ] \end{array} + + +The angles are calculated with +:math:`\sim\,0.3^\circ` +accuracy. For the (0,0) point, the angle is set to 0. + + +.. index:: checkRange + + +cv::checkRange +-------------- + +`id=0.936497569482 Comments from the Wiki `__ + + + + +.. cfunction:: bool checkRange(const Mat\& src, bool quiet=true, Point* pos=0, double minVal=-DBL_MAX, double maxVal=DBL_MAX) + + + +.. cfunction:: bool checkRange(const MatND\& src, bool quiet=true, int* pos=0, double minVal=-DBL_MAX, double maxVal=DBL_MAX) + + Checks every element of an input array for invalid values. + + + + + + + :param src: The array to check + + + :param quiet: The flag indicating whether the functions quietly return false when the array elements are out of range, or they throw an exception. + + + :param pos: The optional output parameter, where the position of the first outlier is stored. In the second function ``pos`` , when not NULL, must be a pointer to array of ``src.dims`` elements + + + :param minVal: The inclusive lower boundary of valid values range + + + :param maxVal: The exclusive upper boundary of valid values range + + + +The functions +``checkRange`` +check that every array element is +neither NaN nor +:math:`\pm \infty` +. When +``minVal < -DBL_MAX`` +and +``maxVal < DBL_MAX`` +, then the functions also check that +each value is between +``minVal`` +and +``maxVal`` +. in the case of multi-channel arrays each channel is processed independently. +If some values are out of range, position of the first outlier is stored in +``pos`` +(when +:math:`\texttt{pos}\ne0` +), and then the functions either return false (when +``quiet=true`` +) or throw an exception. + + + +.. index:: compare + + +cv::compare +----------- + +`id=0.535792020128 Comments from the Wiki `__ + + + + +.. cfunction:: void compare(const Mat\& src1, const Mat\& src2, Mat\& dst, int cmpop) + + + +.. cfunction:: void compare(const Mat\& src1, double value, Mat\& dst, int cmpop) + + + +.. cfunction:: void compare(const MatND\& src1, const MatND\& src2, MatND\& dst, int cmpop) + + + +.. cfunction:: void compare(const MatND\& src1, double value, MatND\& dst, int cmpop) + + Performs per-element comparison of two arrays or an array and scalar value. + + + + + + + :param src1: The first source array + + + :param src2: The second source array; must have the same size and same type as ``src1`` + + + :param value: The scalar value to compare each array element with + + + :param dst: The destination array; will have the same size as ``src1`` and type= ``CV_8UC1`` + + + :param cmpop: The flag specifying the relation between the elements to be checked + + + * **CMP_EQ** :math:`\texttt{src1}(I) = \texttt{src2}(I)` or :math:`\texttt{src1}(I) = \texttt{value}` + + + * **CMP_GT** :math:`\texttt{src1}(I) > \texttt{src2}(I)` or :math:`\texttt{src1}(I) > \texttt{value}` + + + * **CMP_GE** :math:`\texttt{src1}(I) \geq \texttt{src2}(I)` or :math:`\texttt{src1}(I) \geq \texttt{value}` + + + * **CMP_LT** :math:`\texttt{src1}(I) < \texttt{src2}(I)` or :math:`\texttt{src1}(I) < \texttt{value}` + + + * **CMP_LE** :math:`\texttt{src1}(I) \leq \texttt{src2}(I)` or :math:`\texttt{src1}(I) \leq \texttt{value}` + + + * **CMP_NE** :math:`\texttt{src1}(I) \ne \texttt{src2}(I)` or :math:`\texttt{src1}(I) \ne \texttt{value}` + + + + + +The functions +``compare`` +compare each element of +``src1`` +with the corresponding element of +``src2`` +or with real scalar +``value`` +. When the comparison result is true, the corresponding element of destination array is set to 255, otherwise it is set to 0: + + + + +* + ``dst(I) = src1(I) cmpop src2(I) ? 255 : 0`` + + +* + ``dst(I) = src1(I) cmpop value ? 255 : 0`` + + +The comparison operations can be replaced with the equivalent matrix expressions: + + + + +:: + + + + Mat dst1 = src1 >= src2; + Mat dst2 = src1 < 8; + ... + + +.. + +See also: +:func:`checkRange` +, +:func:`min` +, +:func:`max` +, +:func:`threshold` +, +:ref:`Matrix Expressions` + +.. index:: completeSymm + + +cv::completeSymm +---------------- + +`id=0.645555893687 Comments from the Wiki `__ + + + + +.. cfunction:: void completeSymm(Mat\& mtx, bool lowerToUpper=false) + + Copies the lower or the upper half of a square matrix to another half. + + + + + + + :param mtx: Input-output floating-point square matrix + + + :param lowerToUpper: If true, the lower half is copied to the upper half, otherwise the upper half is copied to the lower half + + + +The function +``completeSymm`` +copies the lower half of a square matrix to its another half; the matrix diagonal remains unchanged: + + + + + +* + :math:`\texttt{mtx}_{ij}=\texttt{mtx}_{ji}` + for + :math:`i > j` + if + ``lowerToUpper=false`` + + +* + :math:`\texttt{mtx}_{ij}=\texttt{mtx}_{ji}` + for + :math:`i < j` + if + ``lowerToUpper=true`` + + +See also: +:func:`flip` +, +:func:`transpose` + +.. index:: convertScaleAbs + + +cv::convertScaleAbs +------------------- + +`id=0.715045024786 Comments from the Wiki `__ + + + + +.. cfunction:: void convertScaleAbs(const Mat\& src, Mat\& dst, double alpha=1, double beta=0) + + Scales, computes absolute values and converts the result to 8-bit. + + + + + + + :param src: The source array + + + :param dst: The destination array + + + :param alpha: The optional scale factor + + + :param beta: The optional delta added to the scaled values + + + +On each element of the input array the function +``convertScaleAbs`` +performs 3 operations sequentially: scaling, taking absolute value, conversion to unsigned 8-bit type: + + +.. math:: + + \texttt{dst} (I)= \texttt{saturate\_cast} (| \texttt{src} (I)* \texttt{alpha} + \texttt{beta} |) + + +in the case of multi-channel arrays the function processes each channel independently. When the output is not 8-bit, the operation can be emulated by calling +``Mat::convertTo`` +method (or by using matrix expressions) and then by computing absolute value of the result, for example: + + + + +:: + + + + Mat_ A(30,30); + randu(A, Scalar(-100), Scalar(100)); + Mat_ B = A*5 + 3; + B = abs(B); + // Mat_ B = abs(A*5+3) will also do the job, + // but it will allocate a temporary matrix + + +.. + +See also: +:func:`Mat::convertTo` +, +:func:`abs` + +.. index:: countNonZero + + +cv::countNonZero +---------------- + +`id=0.198157580023 Comments from the Wiki `__ + + + + +.. cfunction:: int countNonZero( const Mat\& mtx ) + + + +.. cfunction:: int countNonZero( const MatND\& mtx ) + + Counts non-zero array elements. + + + + + + + :param mtx: Single-channel array + + + +The function +``cvCountNonZero`` +returns the number of non-zero elements in mtx: + + + +.. math:: + + \sum _{I: \; \texttt{mtx} (I) \ne0 } 1 + + +See also: +:func:`mean` +, +:func:`meanStdDev` +, +:func:`norm` +, +:func:`minMaxLoc` +, +:func:`calcCovarMatrix` + +.. index:: cubeRoot + + +cv::cubeRoot +------------ + +`id=0.975192922827 Comments from the Wiki `__ + + + + +.. cfunction:: float cubeRoot(float val) + + Computes cube root of the argument + + + + + + + :param val: The function argument + + + +The function +``cubeRoot`` +computes +:math:`\sqrt[3]{\texttt{val}}` +. +Negative arguments are handled correctly, +*NaN* +and +:math:`\pm\infty` +are not handled. +The accuracy approaches the maximum possible accuracy for single-precision data. + + +.. index:: cvarrToMat + + +cv::cvarrToMat +-------------- + +`id=0.39815014212 Comments from the Wiki `__ + + + + +.. cfunction:: Mat cvarrToMat(const CvArr* src, bool copyData=false, bool allowND=true, int coiMode=0) + + Converts CvMat, IplImage or CvMatND to cv::Mat. + + + + + + + :param src: The source ``CvMat`` , ``IplImage`` or ``CvMatND`` + + + :param copyData: When it is false (default value), no data is copied, only the new header is created. + In this case the original array should not be deallocated while the new matrix header is used. The the parameter is true, all the data is copied, then user may deallocate the original array right after the conversion + + + :param allowND: When it is true (default value), then ``CvMatND`` is converted to ``Mat`` if it's possible + (e.g. then the data is contiguous). If it's not possible, or when the parameter is false, the function will report an error + + + :param coiMode: The parameter specifies how the IplImage COI (when set) is handled. + + + + * If ``coiMode=0`` , the function will report an error if COI is set. + + + * If ``coiMode=1`` , the function will never report an error; instead it returns the header to the whole original image and user will have to check and process COI manually, see :func:`extractImageCOI` . + + + + + +The function +``cvarrToMat`` +converts +:ref:`CvMat` +, +:ref:`IplImage` +or +:ref:`CvMatND` +header to +:func:`Mat` +header, and optionally duplicates the underlying data. The constructed header is returned by the function. + +When +``copyData=false`` +, the conversion is done really fast (in O(1) time) and the newly created matrix header will have +``refcount=0`` +, which means that no reference counting is done for the matrix data, and user has to preserve the data until the new header is destructed. Otherwise, when +``copyData=true`` +, the new buffer will be allocated and managed as if you created a new matrix from scratch and copy the data there. That is, +``cvarrToMat(src, true) :math:`\sim` cvarrToMat(src, false).clone()`` +(assuming that COI is not set). The function provides uniform way of supporting +:ref:`CvArr` +paradigm in the code that is migrated to use new-style data structures internally. The reverse transformation, from +:func:`Mat` +to +:ref:`CvMat` +or +:ref:`IplImage` +can be done by simple assignment: + + + + +:: + + + + CvMat* A = cvCreateMat(10, 10, CV_32F); + cvSetIdentity(A); + IplImage A1; cvGetImage(A, &A1); + Mat B = cvarrToMat(A); + Mat B1 = cvarrToMat(&A1); + IplImage C = B; + CvMat C1 = B1; + // now A, A1, B, B1, C and C1 are different headers + // for the same 10x10 floating-point array. + // note, that you will need to use "&" + // to pass C & C1 to OpenCV functions, e.g: + printf(" + + +.. + +Normally, the function is used to convert an old-style 2D array ( +:ref:`CvMat` +or +:ref:`IplImage` +) to +``Mat`` +, however, the function can also take +:ref:`CvMatND` +on input and create +:func:`Mat` +for it, if it's possible. And for +``CvMatND A`` +it is possible if and only if +``A.dim[i].size*A.dim.step[i] == A.dim.step[i-1]`` +for all or for all but one +``i, 0 < i < A.dims`` +. That is, the matrix data should be continuous or it should be representable as a sequence of continuous matrices. By using this function in this way, you can process +:ref:`CvMatND` +using arbitrary element-wise function. But for more complex operations, such as filtering functions, it will not work, and you need to convert +:ref:`CvMatND` +to +:func:`MatND` +using the corresponding constructor of the latter. + +The last parameter, +``coiMode`` +, specifies how to react on an image with COI set: by default it's 0, and then the function reports an error when an image with COI comes in. And +``coiMode=1`` +means that no error is signaled - user has to check COI presence and handle it manually. The modern structures, such as +:func:`Mat` +and +:func:`MatND` +do not support COI natively. To process individual channel of an new-style array, you will need either to organize loop over the array (e.g. using matrix iterators) where the channel of interest will be processed, or extract the COI using +:func:`mixChannels` +(for new-style arrays) or +:func:`extractImageCOI` +(for old-style arrays), process this individual channel and insert it back to the destination array if need (using +:func:`mixChannel` +or +:func:`insertImageCOI` +, respectively). + +See also: +:func:`cvGetImage` +, +:func:`cvGetMat` +, +:func:`cvGetMatND` +, +:func:`extractImageCOI` +, +:func:`insertImageCOI` +, +:func:`mixChannels` + +.. index:: dct + + +cv::dct +------- + +`id=0.198370476417 Comments from the Wiki `__ + + + + +.. cfunction:: void dct(const Mat\& src, Mat\& dst, int flags=0) + + Performs a forward or inverse discrete cosine transform of 1D or 2D array + + + + + + + :param src: The source floating-point array + + + :param dst: The destination array; will have the same size and same type as ``src`` + + + :param flags: Transformation flags, a combination of the following values + + * **DCT_INVERSE** do an inverse 1D or 2D transform instead of the default forward transform. + + * **DCT_ROWS** do a forward or inverse transform of every individual row of the input matrix. This flag allows user to transform multiple vectors simultaneously and can be used to decrease the overhead (which is sometimes several times larger than the processing itself), to do 3D and higher-dimensional transforms and so forth. + + + + + +The function +``dct`` +performs a forward or inverse discrete cosine transform (DCT) of a 1D or 2D floating-point array: + +Forward Cosine transform of 1D vector of +:math:`N` +elements: + + +.. math:: + + Y = C^{(N)} \cdot X + + +where + + +.. math:: + + C^{(N)}_{jk}= \sqrt{\alpha_j/N} \cos \left ( \frac{\pi(2k+1)j}{2N} \right ) + + +and +:math:`\alpha_0=1` +, +:math:`\alpha_j=2` +for +:math:`j > 0` +. + +Inverse Cosine transform of 1D vector of N elements: + + +.. math:: + + X = \left (C^{(N)} \right )^{-1} \cdot Y = \left (C^{(N)} \right )^T \cdot Y + + +(since +:math:`C^{(N)}` +is orthogonal matrix, +:math:`C^{(N)} \cdot \left(C^{(N)}\right)^T = I` +) + +Forward Cosine transform of 2D +:math:`M \times N` +matrix: + + +.. math:: + + Y = C^{(N)} \cdot X \cdot \left (C^{(N)} \right )^T + + +Inverse Cosine transform of 2D vector of +:math:`M \times N` +elements: + + +.. math:: + + X = \left (C^{(N)} \right )^T \cdot X \cdot C^{(N)} + + +The function chooses the mode of operation by looking at the flags and size of the input array: + + + + +* + if + ``(flags & DCT_INVERSE) == 0`` + , the function does forward 1D or 2D transform, otherwise it is inverse 1D or 2D transform. + + + +* + if + ``(flags & DCT_ROWS) :math:`\ne` 0`` + , the function performs 1D transform of each row. + + + +* + otherwise, if the array is a single column or a single row, the function performs 1D transform + + + +* + otherwise it performs 2D transform. + + +**Important note** +: currently cv::dct supports even-size arrays (2, 4, 6 ...). For data analysis and approximation you can pad the array when necessary. + +Also, the function's performance depends very much, and not monotonically, on the array size, see +:func:`getOptimalDFTSize` +. In the current implementation DCT of a vector of size +``N`` +is computed via DFT of a vector of size +``N/2`` +, thus the optimal DCT size +:math:`\texttt{N}^*\geq\texttt{N}` +can be computed as: + + + + +:: + + + + size_t getOptimalDCTSize(size_t N) { return 2*getOptimalDFTSize((N+1)/2); } + + +.. + +See also: +:func:`dft` +, +:func:`getOptimalDFTSize` +, +:func:`idct` + +.. index:: dft + + +cv::dft +------- + +`id=0.0357418232516 Comments from the Wiki `__ + + + + +.. cfunction:: void dft(const Mat\& src, Mat\& dst, int flags=0, int nonzeroRows=0) + + Performs a forward or inverse Discrete Fourier transform of 1D or 2D floating-point array. + + + + + + + :param src: The source array, real or complex + + + :param dst: The destination array, which size and type depends on the ``flags`` + + + :param flags: Transformation flags, a combination of the following values + + * **DFT_INVERSE** do an inverse 1D or 2D transform instead of the default forward transform. + + * **DFT_SCALE** scale the result: divide it by the number of array elements. Normally, it is combined with ``DFT_INVERSE`` + . + * **DFT_ROWS** do a forward or inverse transform of every individual row of the input matrix. This flag allows the user to transform multiple vectors simultaneously and can be used to decrease the overhead (which is sometimes several times larger than the processing itself), to do 3D and higher-dimensional transforms and so forth. + + * **DFT_COMPLEX_OUTPUT** then the function performs forward transformation of 1D or 2D real array, the result, though being a complex array, has complex-conjugate symmetry ( *CCS* ), see the description below. Such an array can be packed into real array of the same size as input, which is the fastest option and which is what the function does by default. However, you may wish to get the full complex array (for simpler spectrum analysis etc.). Pass the flag to tell the function to produce full-size complex output array. + + * **DFT_REAL_OUTPUT** then the function performs inverse transformation of 1D or 2D complex array, the result is normally a complex array of the same size. However, if the source array has conjugate-complex symmetry (for example, it is a result of forward transformation with ``DFT_COMPLEX_OUTPUT`` flag), then the output is real array. While the function itself does not check whether the input is symmetrical or not, you can pass the flag and then the function will assume the symmetry and produce the real output array. Note that when the input is packed real array and inverse transformation is executed, the function treats the input as packed complex-conjugate symmetrical array, so the output will also be real array + + + + + :param nonzeroRows: When the parameter :math:`\ne 0` , the function assumes that only the first ``nonzeroRows`` rows of the input array ( ``DFT_INVERSE`` is not set) or only the first ``nonzeroRows`` of the output array ( ``DFT_INVERSE`` is set) contain non-zeros, thus the function can handle the rest of the rows more efficiently and thus save some time. This technique is very useful for computing array cross-correlation or convolution using DFT + + + +Forward Fourier transform of 1D vector of N elements: + + +.. math:: + + Y = F^{(N)} \cdot X, + + +where +:math:`F^{(N)}_{jk}=\exp(-2\pi i j k/N)` +and +:math:`i=\sqrt{-1}` +Inverse Fourier transform of 1D vector of N elements: + + +.. math:: + + \begin{array}{l} X'= \left (F^{(N)} \right )^{-1} \cdot Y = \left (F^{(N)} \right )^* \cdot y \\ X = (1/N) \cdot X, \end{array} + + +where +:math:`F^*=\left(\textrm{Re}(F^{(N)})-\textrm{Im}(F^{(N)})\right)^T` +Forward Fourier transform of 2D vector of +:math:`M \times N` +elements: + + +.. math:: + + Y = F^{(M)} \cdot X \cdot F^{(N)} + + +Inverse Fourier transform of 2D vector of +:math:`M \times N` +elements: + + +.. math:: + + \begin{array}{l} X'= \left (F^{(M)} \right )^* \cdot Y \cdot \left (F^{(N)} \right )^* \\ X = \frac{1}{M \cdot N} \cdot X' \end{array} + + +In the case of real (single-channel) data, the packed format called +*CCS* +(complex-conjugate-symmetrical) that was borrowed from IPL and used to represent the result of a forward Fourier transform or input for an inverse Fourier transform: + + + +.. math:: + + \begin{bmatrix} Re Y_{0,0} & Re Y_{0,1} & Im Y_{0,1} & Re Y_{0,2} & Im Y_{0,2} & \cdots & Re Y_{0,N/2-1} & Im Y_{0,N/2-1} & Re Y_{0,N/2} \\ Re Y_{1,0} & Re Y_{1,1} & Im Y_{1,1} & Re Y_{1,2} & Im Y_{1,2} & \cdots & Re Y_{1,N/2-1} & Im Y_{1,N/2-1} & Re Y_{1,N/2} \\ Im Y_{1,0} & Re Y_{2,1} & Im Y_{2,1} & Re Y_{2,2} & Im Y_{2,2} & \cdots & Re Y_{2,N/2-1} & Im Y_{2,N/2-1} & Im Y_{1,N/2} \\ \hdotsfor{9} \\ Re Y_{M/2-1,0} & Re Y_{M-3,1} & Im Y_{M-3,1} & \hdotsfor{3} & Re Y_{M-3,N/2-1} & Im Y_{M-3,N/2-1}& Re Y_{M/2-1,N/2} \\ Im Y_{M/2-1,0} & Re Y_{M-2,1} & Im Y_{M-2,1} & \hdotsfor{3} & Re Y_{M-2,N/2-1} & Im Y_{M-2,N/2-1}& Im Y_{M/2-1,N/2} \\ Re Y_{M/2,0} & Re Y_{M-1,1} & Im Y_{M-1,1} & \hdotsfor{3} & Re Y_{M-1,N/2-1} & Im Y_{M-1,N/2-1}& Re Y_{M/2,N/2} \end{bmatrix} + + +in the case of 1D transform of real vector, the output will look as the first row of the above matrix. + +So, the function chooses the operation mode depending on the flags and size of the input array: + + + + +* + if + ``DFT_ROWS`` + is set or the input array has single row or single column then the function performs 1D forward or inverse transform (of each row of a matrix when + ``DFT_ROWS`` + is set, otherwise it will be 2D transform. + + + +* + if input array is real and + ``DFT_INVERSE`` + is not set, the function does forward 1D or 2D transform: + + + + + + * + when + ``DFT_COMPLEX_OUTPUT`` + is set then the output will be complex matrix of the same size as input. + + + + * + otherwise the output will be a real matrix of the same size as input. in the case of 2D transform it will use the packed format as shown above; in the case of single 1D transform it will look as the first row of the above matrix; in the case of multiple 1D transforms (when using + ``DCT_ROWS`` + flag) each row of the output matrix will look like the first row of the above matrix. + + + + + +* + otherwise, if the input array is complex and either + ``DFT_INVERSE`` + or + ``DFT_REAL_OUTPUT`` + are not set then the output will be a complex array of the same size as input and the function will perform the forward or inverse 1D or 2D transform of the whole input array or each row of the input array independently, depending on the flags + ``DFT_INVERSE`` + and + ``DFT_ROWS`` + . + + + +* + otherwise, i.e. when + ``DFT_INVERSE`` + is set, the input array is real, or it is complex but + ``DFT_REAL_OUTPUT`` + is set, the output will be a real array of the same size as input, and the function will perform 1D or 2D inverse transformation of the whole input array or each individual row, depending on the flags + ``DFT_INVERSE`` + and + ``DFT_ROWS`` + . + + +The scaling is done after the transformation if +``DFT_SCALE`` +is set. + +Unlike +:func:`dct` +, the function supports arrays of arbitrary size, but only those arrays are processed efficiently, which sizes can be factorized in a product of small prime numbers (2, 3 and 5 in the current implementation). Such an efficient DFT size can be computed using +:func:`getOptimalDFTSize` +method. + +Here is the sample on how to compute DFT-based convolution of two 2D real arrays: + + + +:: + + + + void convolveDFT(const Mat& A, const Mat& B, Mat& C) + { + // reallocate the output array if needed + C.create(abs(A.rows - B.rows)+1, abs(A.cols - B.cols)+1, A.type()); + Size dftSize; + // compute the size of DFT transform + dftSize.width = getOptimalDFTSize(A.cols + B.cols - 1); + dftSize.height = getOptimalDFTSize(A.rows + B.rows - 1); + + // allocate temporary buffers and initialize them with 0's + Mat tempA(dftSize, A.type(), Scalar::all(0)); + Mat tempB(dftSize, B.type(), Scalar::all(0)); + + // copy A and B to the top-left corners of tempA and tempB, respectively + Mat roiA(tempA, Rect(0,0,A.cols,A.rows)); + A.copyTo(roiA); + Mat roiB(tempB, Rect(0,0,B.cols,B.rows)); + B.copyTo(roiB); + + // now transform the padded A & B in-place; + // use "nonzeroRows" hint for faster processing + dft(tempA, tempA, 0, A.rows); + dft(tempB, tempB, 0, B.rows); + + // multiply the spectrums; + // the function handles packed spectrum representations well + mulSpectrums(tempA, tempB, tempA); + + // transform the product back from the frequency domain. + // Even though all the result rows will be non-zero, + // we need only the first C.rows of them, and thus we + // pass nonzeroRows == C.rows + dft(tempA, tempA, DFT_INVERSE + DFT_SCALE, C.rows); + + // now copy the result back to C. + tempA(Rect(0, 0, C.cols, C.rows)).copyTo(C); + + // all the temporary buffers will be deallocated automatically + } + + +.. + +What can be optimized in the above sample? + + + + +* + since we passed + :math:`\texttt{nonzeroRows} \ne 0` + to the forward transform calls and + since we copied + ``A`` + / + ``B`` + to the top-left corners of + ``tempA`` + / + ``tempB`` + , respectively, + it's not necessary to clear the whole + ``tempA`` + and + ``tempB`` + ; + it is only necessary to clear the + ``tempA.cols - A.cols`` + ( + ``tempB.cols - B.cols`` + ) + rightmost columns of the matrices. + + + +* + this DFT-based convolution does not have to be applied to the whole big arrays, + especially if + ``B`` + is significantly smaller than + ``A`` + or vice versa. + Instead, we can compute convolution by parts. For that we need to split the destination array + + ``C`` + into multiple tiles and for each tile estimate, which parts of + ``A`` + and + ``B`` + are required to compute convolution in this tile. If the tiles in + ``C`` + are too small, + the speed will decrease a lot, because of repeated work - in the ultimate case, when each tile in + ``C`` + is a single pixel, + the algorithm becomes equivalent to the naive convolution algorithm. + If the tiles are too big, the temporary arrays + ``tempA`` + and + ``tempB`` + become too big + and there is also slowdown because of bad cache locality. So there is optimal tile size somewhere in the middle. + + + +* + if the convolution is done by parts, since different tiles in + ``C`` + can be computed in parallel, the loop can be threaded. + + +All of the above improvements have been implemented in +:func:`matchTemplate` +and +:func:`filter2D` +, therefore, by using them, you can get even better performance than with the above theoretically optimal implementation (though, those two functions actually compute cross-correlation, not convolution, so you will need to "flip" the kernel or the image around the center using +:func:`flip` +). + +See also: +:func:`dct` +, +:func:`getOptimalDFTSize` +, +:func:`mulSpectrums` +, +:func:`filter2D` +, +:func:`matchTemplate` +, +:func:`flip` +, +:func:`cartToPolar` +, +:func:`magnitude` +, +:func:`phase` + +.. index:: divide + + +cv::divide +---------- + +`id=0.814434783558 Comments from the Wiki `__ + + + + +.. cfunction:: void divide(const Mat\& src1, const Mat\& src2, Mat\& dst, double scale=1) + + + +.. cfunction:: void divide(double scale, const Mat\& src2, Mat\& dst) + + + +.. cfunction:: void divide(const MatND\& src1, const MatND\& src2, MatND\& dst, double scale=1) + + + +.. cfunction:: void divide(double scale, const MatND\& src2, MatND\& dst) + + Performs per-element division of two arrays or a scalar by an array. + + + + + + + :param src1: The first source array + + + :param src2: The second source array; should have the same size and same type as ``src1`` + + + :param scale: Scale factor + + + :param dst: The destination array; will have the same size and same type as ``src2`` + + + +The functions +``divide`` +divide one array by another: + + +.. math:: + + \texttt{dst(I) = saturate(src1(I)*scale/src2(I))} + + +or a scalar by array, when there is no +``src1`` +: + + +.. math:: + + \texttt{dst(I) = saturate(scale/src2(I))} + + +The result will have the same type as +``src1`` +. When +``src2(I)=0`` +, +``dst(I)=0`` +too. + +See also: +:func:`multiply` +, +:func:`add` +, +:func:`subtract` +, +:ref:`Matrix Expressions` + +.. index:: determinant + + +cv::determinant +--------------- + +`id=0.405627016388 Comments from the Wiki `__ + + + + +.. cfunction:: double determinant(const Mat\& mtx) + + Returns determinant of a square floating-point matrix. + + + + + + + :param mtx: The input matrix; must have ``CV_32FC1`` or ``CV_64FC1`` type and square size + + + +The function +``determinant`` +computes and returns determinant of the specified matrix. For small matrices ( +``mtx.cols=mtx.rows<=3`` +) +the direct method is used; for larger matrices the function uses LU factorization. + +For symmetric positive-determined matrices, it is also possible to compute +:func:`SVD` +: +:math:`\texttt{mtx}=U \cdot W \cdot V^T` +and then calculate the determinant as a product of the diagonal elements of +:math:`W` +. + +See also: +:func:`SVD` +, +:func:`trace` +, +:func:`invert` +, +:func:`solve` +, +:ref:`Matrix Expressions` + +.. index:: eigen + + +cv::eigen +--------- + +`id=0.190325886613 Comments from the Wiki `__ + + + + +.. cfunction:: bool eigen(const Mat\& src, Mat\& eigenvalues, int lowindex=-1, int highindex=-1) + + + +.. cfunction:: bool eigen(const Mat\& src, Mat\& eigenvalues, Mat\& eigenvectors, int lowindex=-1,int highindex=-1) + + Computes eigenvalues and eigenvectors of a symmetric matrix. + + + + + + + :param src: The input matrix; must have ``CV_32FC1`` or ``CV_64FC1`` type, square size and be symmetric: :math:`\texttt{src}^T=\texttt{src}` + + + :param eigenvalues: The output vector of eigenvalues of the same type as ``src`` ; The eigenvalues are stored in the descending order. + + + :param eigenvectors: The output matrix of eigenvectors; It will have the same size and the same type as ``src`` ; The eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding eigenvalues + + + :param lowindex: Optional index of largest eigenvalue/-vector to calculate. + (See below.) + + + :param highindex: Optional index of smallest eigenvalue/-vector to calculate. + (See below.) + + + +The functions +``eigen`` +compute just eigenvalues, or eigenvalues and eigenvectors of symmetric matrix +``src`` +: + + + + +:: + + + + src*eigenvectors(i,:)' = eigenvalues(i)*eigenvectors(i,:)' (in MATLAB notation) + + +.. + +If either low- or highindex is supplied the other is required, too. +Indexing is 0-based. Example: To calculate the largest eigenvector/-value set +lowindex = highindex = 0. +For legacy reasons this function always returns a square matrix the same size +as the source matrix with eigenvectors and a vector the length of the source +matrix with eigenvalues. The selected eigenvectors/-values are always in the +first highindex - lowindex + 1 rows. + +See also: +:func:`SVD` +, +:func:`completeSymm` +, +:func:`PCA` + +.. index:: exp + + +cv::exp +------- + +`id=0.485225248728 Comments from the Wiki `__ + + + + +.. cfunction:: void exp(const Mat\& src, Mat\& dst) + + + +.. cfunction:: void exp(const MatND\& src, MatND\& dst) + + Calculates the exponent of every array element. + + + + + + + :param src: The source array + + + :param dst: The destination array; will have the same size and same type as ``src`` + + + +The function +``exp`` +calculates the exponent of every element of the input array: + + + +.. math:: + + \texttt{dst} [I] = e^{ \texttt{src} }(I) + + +The maximum relative error is about +:math:`7 \times 10^{-6}` +for single-precision and less than +:math:`10^{-10}` +for double-precision. Currently, the function converts denormalized values to zeros on output. Special values (NaN, +:math:`\pm \infty` +) are not handled. + +See also: +:func:`log` +, +:func:`cartToPolar` +, +:func:`polarToCart` +, +:func:`phase` +, +:func:`pow` +, +:func:`sqrt` +, +:func:`magnitude` + +.. index:: extractImageCOI + + +cv::extractImageCOI +------------------- + +`id=0.888628678983 Comments from the Wiki `__ + + + + +.. cfunction:: void extractImageCOI(const CvArr* src, Mat\& dst, int coi=-1) + + Extract the selected image channel + + + + + + + :param src: The source array. It should be a pointer to :ref:`CvMat` or :ref:`IplImage` + + + :param dst: The destination array; will have single-channel, and the same size and the same depth as ``src`` + + + :param coi: If the parameter is ``>=0`` , it specifies the channel to extract; + If it is ``<0`` , ``src`` must be a pointer to ``IplImage`` with valid COI set - then the selected COI is extracted. + + + +The function +``extractImageCOI`` +is used to extract image COI from an old-style array and put the result to the new-style C++ matrix. As usual, the destination matrix is reallocated using +``Mat::create`` +if needed. + +To extract a channel from a new-style matrix, use +:func:`mixChannels` +or +:func:`split` +See also: +:func:`mixChannels` +, +:func:`split` +, +:func:`merge` +, +:func:`cvarrToMat` +, +:func:`cvSetImageCOI` +, +:func:`cvGetImageCOI` + +.. index:: fastAtan2 + + +cv::fastAtan2 +------------- + +`id=0.823198179102 Comments from the Wiki `__ + + + + +.. cfunction:: float fastAtan2(float y, float x) + + Calculates the angle of a 2D vector in degrees + + + + + + + :param x: x-coordinate of the vector + + + :param y: y-coordinate of the vector + + + +The function +``fastAtan2`` +calculates the full-range angle of an input 2D vector. The angle is +measured in degrees and varies from +:math:`0^\circ` +to +:math:`360^\circ` +. The accuracy is about +:math:`0.3^\circ` +. + + +.. index:: flip + + +cv::flip +-------- + +`id=0.360483781891 Comments from the Wiki `__ + + + + +.. cfunction:: void flip(const Mat\& src, Mat\& dst, int flipCode) + + Flips a 2D array around vertical, horizontal or both axes. + + + + + + + :param src: The source array + + + :param dst: The destination array; will have the same size and same type as ``src`` + + + :param flipCode: Specifies how to flip the array: + 0 means flipping around the x-axis, positive (e.g., 1) means flipping around y-axis, and negative (e.g., -1) means flipping around both axes. See also the discussion below for the formulas. + + + +The function +``flip`` +flips the array in one of three different ways (row and column indices are 0-based): + + + +.. math:: + + \texttt{dst} _{ij} = \forkthree{\texttt{src}_{\texttt{src.rows}-i-1,j} }{if \texttt{flipCode} = 0} + { \texttt{src} _{i, \texttt{src.cols} -j-1}}{if \texttt{flipCode} > 0} + { \texttt{src} _{ \texttt{src.rows} -i-1, \texttt{src.cols} -j-1}}{if \texttt{flipCode} < 0} + + +The example scenarios of function use are: + + + + +* + vertical flipping of the image ( + :math:`\texttt{flipCode} = 0` + ) to switch between top-left and bottom-left image origin, which is a typical operation in video processing in Windows. + + + +* + horizontal flipping of the image with subsequent horizontal shift and absolute difference calculation to check for a vertical-axis symmetry ( + :math:`\texttt{flipCode} > 0` + ) + + + +* + simultaneous horizontal and vertical flipping of the image with subsequent shift and absolute difference calculation to check for a central symmetry ( + :math:`\texttt{flipCode} < 0` + ) + + + +* + reversing the order of 1d point arrays ( + :math:`\texttt{flipCode} > 0` + or + :math:`\texttt{flipCode} = 0` + ) + + +See also: +:func:`transpose` +, +:func:`repeat` +, +:func:`completeSymm` + +.. index:: gemm + + +cv::gemm +-------- + +`id=0.493765445577 Comments from the Wiki `__ + + + + +.. cfunction:: void gemm(const Mat\& src1, const Mat\& src2, double alpha, const Mat\& src3, double beta, Mat\& dst, int flags=0) + + Performs generalized matrix multiplication. + + + + + + + :param src1: The first multiplied input matrix; should have ``CV_32FC1`` , ``CV_64FC1`` , ``CV_32FC2`` or ``CV_64FC2`` type + + + :param src2: The second multiplied input matrix; should have the same type as ``src1`` + + + :param alpha: The weight of the matrix product + + + :param src3: The third optional delta matrix added to the matrix product; should have the same type as ``src1`` and ``src2`` + + + :param beta: The weight of ``src3`` + + + :param dst: The destination matrix; It will have the proper size and the same type as input matrices + + + :param flags: Operation flags: + + + * **GEMM_1_T** transpose ``src1`` + + + * **GEMM_2_T** transpose ``src2`` + + + * **GEMM_3_T** transpose ``src3`` + + + + + +The function performs generalized matrix multiplication and similar to the corresponding functions +``*gemm`` +in BLAS level 3. +For example, +``gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T)`` +corresponds to + + +.. math:: + + \texttt{dst} = \texttt{alpha} \cdot \texttt{src1} ^T \cdot \texttt{src2} + \texttt{beta} \cdot \texttt{src3} ^T + + +The function can be replaced with a matrix expression, e.g. the above call can be replaced with: + + + +:: + + + + dst = alpha*src1.t()*src2 + beta*src3.t(); + + +.. + +See also: +:func:`mulTransposed` +, +:func:`transform` +, +:ref:`Matrix Expressions` + +.. index:: getConvertElem + + +cv::getConvertElem +------------------ + +`id=0.501750836115 Comments from the Wiki `__ + + + + +.. cfunction:: ConvertData getConvertElem(int fromType, int toType) + + + +.. cfunction:: ConvertScaleData getConvertScaleElem(int fromType, int toType) + + + +.. cfunction:: typedef void (*ConvertData)(const void* from, void* to, int cn) + + + +.. cfunction:: typedef void (*ConvertScaleData)(const void* from, void* to, int cn, double alpha, double beta) + + Returns conversion function for a single pixel + + + + + + + :param fromType: The source pixel type + + + :param toType: The destination pixel type + + + :param from: Callback parameter: pointer to the input pixel + + + :param to: Callback parameter: pointer to the output pixel + + + :param cn: Callback parameter: the number of channels; can be arbitrary, 1, 100, 100000, ... + + + :param alpha: ConvertScaleData callback optional parameter: the scale factor + + + :param beta: ConvertScaleData callback optional parameter: the delta or offset + + + +The functions +``getConvertElem`` +and +``getConvertScaleElem`` +return pointers to the functions for converting individual pixels from one type to another. While the main function purpose is to convert single pixels (actually, for converting sparse matrices from one type to another), you can use them to convert the whole row of a dense matrix or the whole matrix at once, by setting +``cn = matrix.cols*matrix.rows*matrix.channels()`` +if the matrix data is continuous. + +See also: +:func:`Mat::convertTo` +, +:func:`MatND::convertTo` +, +:func:`SparseMat::convertTo` + +.. index:: getOptimalDFTSize + + +cv::getOptimalDFTSize +--------------------- + +`id=0.662659569491 Comments from the Wiki `__ + + + + +.. cfunction:: int getOptimalDFTSize(int vecsize) + + Returns optimal DFT size for a given vector size. + + + + + + + :param vecsize: Vector size + + + +DFT performance is not a monotonic function of a vector size, therefore, when you compute convolution of two arrays or do a spectral analysis of array, it usually makes sense to pad the input data with zeros to get a bit larger array that can be transformed much faster than the original one. +Arrays, which size is a power-of-two (2, 4, 8, 16, 32, ...) are the fastest to process, though, the arrays, which size is a product of 2's, 3's and 5's (e.g. 300 = 5*5*3*2*2), are also processed quite efficiently. + +The function +``getOptimalDFTSize`` +returns the minimum number +``N`` +that is greater than or equal to +``vecsize`` +, such that the DFT +of a vector of size +``N`` +can be computed efficiently. In the current implementation +:math:`N=2^p \times 3^q \times 5^r` +, for some +:math:`p` +, +:math:`q` +, +:math:`r` +. + +The function returns a negative number if +``vecsize`` +is too large (very close to +``INT_MAX`` +). + +While the function cannot be used directly to estimate the optimal vector size for DCT transform (since the current DCT implementation supports only even-size vectors), it can be easily computed as +``getOptimalDFTSize((vecsize+1)/2)*2`` +. + +See also: +:func:`dft` +, +:func:`dct` +, +:func:`idft` +, +:func:`idct` +, +:func:`mulSpectrums` + +.. index:: idct + + +cv::idct +-------- + +`id=0.98264674822 Comments from the Wiki `__ + + + + +.. cfunction:: void idct(const Mat\& src, Mat\& dst, int flags=0) + + Computes inverse Discrete Cosine Transform of a 1D or 2D array + + + + + + + :param src: The source floating-point single-channel array + + + :param dst: The destination array. Will have the same size and same type as ``src`` + + + :param flags: The operation flags. + + + +``idct(src, dst, flags)`` +is equivalent to +``dct(src, dst, flags | DCT_INVERSE)`` +. +See +:func:`dct` +for details. + +See also: +:func:`dct` +, +:func:`dft` +, +:func:`idft` +, +:func:`getOptimalDFTSize` + +.. index:: idft + + +cv::idft +-------- + +`id=0.516314429436 Comments from the Wiki `__ + + + + +.. cfunction:: void idft(const Mat\& src, Mat\& dst, int flags=0, int outputRows=0) + + Computes inverse Discrete Fourier Transform of a 1D or 2D array + + + + + + + :param src: The source floating-point real or complex array + + + :param dst: The destination array, which size and type depends on the ``flags`` + + + :param flags: The operation flags. See :func:`dft` + + + :param nonzeroRows: The number of ``dst`` rows to compute. + The rest of the rows will have undefined content. + See the convolution sample in :func:`dft` description + + + +``idft(src, dst, flags)`` +is equivalent to +``dct(src, dst, flags | DFT_INVERSE)`` +. +See +:func:`dft` +for details. +Note, that none of +``dft`` +and +``idft`` +scale the result by default. +Thus, you should pass +``DFT_SCALE`` +to one of +``dft`` +or +``idft`` +explicitly to make these transforms mutually inverse. + +See also: +:func:`dft` +, +:func:`dct` +, +:func:`idct` +, +:func:`mulSpectrums` +, +:func:`getOptimalDFTSize` + +.. index:: inRange + + +cv::inRange +----------- + +`id=0.938288317332 Comments from the Wiki `__ + + + + +.. cfunction:: void inRange(const Mat\& src, const Mat\& lowerb, const Mat\& upperb, Mat\& dst) + + + +.. cfunction:: void inRange(const Mat\& src, const Scalar\& lowerb, const Scalar\& upperb, Mat\& dst) + + + +.. cfunction:: void inRange(const MatND\& src, const MatND\& lowerb, const MatND\& upperb, MatND\& dst) + + + +.. cfunction:: void inRange(const MatND\& src, const Scalar\& lowerb, const Scalar\& upperb, MatND\& dst) + + Checks if array elements lie between the elements of two other arrays. + + + + + + + :param src: The first source array + + + :param lowerb: The inclusive lower boundary array of the same size and type as ``src`` + + + :param upperb: The exclusive upper boundary array of the same size and type as ``src`` + + + :param dst: The destination array, will have the same size as ``src`` and ``CV_8U`` type + + + +The functions +``inRange`` +do the range check for every element of the input array: + + + +.. math:: + + \texttt{dst} (I)= \texttt{lowerb} (I)_0 \leq \texttt{src} (I)_0 < \texttt{upperb} (I)_0 + + +for single-channel arrays, + + + +.. math:: + + \texttt{dst} (I)= \texttt{lowerb} (I)_0 \leq \texttt{src} (I)_0 < \texttt{upperb} (I)_0 \land \texttt{lowerb} (I)_1 \leq \texttt{src} (I)_1 < \texttt{upperb} (I)_1 + + +for two-channel arrays and so forth. +``dst`` +(I) is set to 255 (all +``1`` +-bits) if +``src`` +(I) is within the specified range and 0 otherwise. + + + +.. index:: invert + + +cv::invert +---------- + +`id=0.654786236579 Comments from the Wiki `__ + + + + +.. cfunction:: double invert(const Mat\& src, Mat\& dst, int method=DECOMP_LU) + + Finds the inverse or pseudo-inverse of a matrix + + + + + + + :param src: The source floating-point :math:`M \times N` matrix + + + :param dst: The destination matrix; will have :math:`N \times M` size and the same type as ``src`` + + + :param flags: The inversion method : + + + * **DECOMP_LU** Gaussian elimination with optimal pivot element chosen + + + * **DECOMP_SVD** Singular value decomposition (SVD) method + + + * **DECOMP_CHOLESKY** Cholesky decomposion. The matrix must be symmetrical and positively defined + + + + + +The function +``invert`` +inverts matrix +``src`` +and stores the result in +``dst`` +. +When the matrix +``src`` +is singular or non-square, the function computes the pseudo-inverse matrix, i.e. the matrix +``dst`` +, such that +:math:`\|\texttt{src} \cdot \texttt{dst} - I\|` +is minimal. + +In the case of +``DECOMP_LU`` +method, the function returns the +``src`` +determinant ( +``src`` +must be square). If it is 0, the matrix is not inverted and +``dst`` +is filled with zeros. + +In the case of +``DECOMP_SVD`` +method, the function returns the inversed condition number of +``src`` +(the ratio of the smallest singular value to the largest singular value) and 0 if +``src`` +is singular. The SVD method calculates a pseudo-inverse matrix if +``src`` +is singular. + +Similarly to +``DECOMP_LU`` +, the method +``DECOMP_CHOLESKY`` +works only with non-singular square matrices. In this case the function stores the inverted matrix in +``dst`` +and returns non-zero, otherwise it returns 0. + +See also: +:func:`solve` +, +:func:`SVD` + +.. index:: log + + +cv::log +------- + +`id=0.5374947489 Comments from the Wiki `__ + + + + +.. cfunction:: void log(const Mat\& src, Mat\& dst) + + + +.. cfunction:: void log(const MatND\& src, MatND\& dst) + + Calculates the natural logarithm of every array element. + + + + + + + :param src: The source array + + + :param dst: The destination array; will have the same size and same type as ``src`` + + + +The function +``log`` +calculates the natural logarithm of the absolute value of every element of the input array: + + + +.. math:: + + \texttt{dst} (I) = \fork{\log |\texttt{src}(I)|}{if $\texttt{src}(I) \ne 0$ }{\texttt{C}}{otherwise} + + +Where +``C`` +is a large negative number (about -700 in the current implementation). +The maximum relative error is about +:math:`7 \times 10^{-6}` +for single-precision input and less than +:math:`10^{-10}` +for double-precision input. Special values (NaN, +:math:`\pm \infty` +) are not handled. + +See also: +:func:`exp` +, +:func:`cartToPolar` +, +:func:`polarToCart` +, +:func:`phase` +, +:func:`pow` +, +:func:`sqrt` +, +:func:`magnitude` + +.. index:: LUT + + +cv::LUT +------- + +`id=0.646161549784 Comments from the Wiki `__ + + + + +.. cfunction:: void LUT(const Mat\& src, const Mat\& lut, Mat\& dst) + + Performs a look-up table transform of an array. + + + + + + + :param src: Source array of 8-bit elements + + + :param lut: Look-up table of 256 elements. In the case of multi-channel source array, the table should either have a single channel (in this case the same table is used for all channels) or the same number of channels as in the source array + + + :param dst: Destination array; will have the same size and the same number of channels as ``src`` , and the same depth as ``lut`` + + + +The function +``LUT`` +fills the destination array with values from the look-up table. Indices of the entries are taken from the source array. That is, the function processes each element of +``src`` +as follows: + + + +.. math:: + + \texttt{dst} (I) \leftarrow \texttt{lut(src(I) + d)} + + +where + + + +.. math:: + + d = \fork{0}{if \texttt{src} has depth \texttt{CV\_8U}}{128}{if \texttt{src} has depth \texttt{CV\_8S}} + + +See also: +:func:`convertScaleAbs` +, +``Mat::convertTo`` + +.. index:: magnitude + + +cv::magnitude +------------- + +`id=0.993441094911 Comments from the Wiki `__ + + + + +.. cfunction:: void magnitude(const Mat\& x, const Mat\& y, Mat\& magnitude) + + Calculates magnitude of 2D vectors. + + + + + + + :param x: The floating-point array of x-coordinates of the vectors + + + :param y: The floating-point array of y-coordinates of the vectors; must have the same size as ``x`` + + + :param dst: The destination array; will have the same size and same type as ``x`` + + + +The function +``magnitude`` +calculates magnitude of 2D vectors formed from the corresponding elements of +``x`` +and +``y`` +arrays: + + + +.. math:: + + \texttt{dst} (I) = \sqrt{\texttt{x}(I)^2 + \texttt{y}(I)^2} + + +See also: +:func:`cartToPolar` +, +:func:`polarToCart` +, +:func:`phase` +, +:func:`sqrt` + +.. index:: Mahalanobis + + +cv::Mahalanobis +--------------- + +`id=0.959334434704 Comments from the Wiki `__ + + + + +.. cfunction:: double Mahalanobis(const Mat\& vec1, const Mat\& vec2, const Mat\& icovar) + + Calculates the Mahalanobis distance between two vectors. + + + + + + + :param vec1: The first 1D source vector + + + :param vec2: The second 1D source vector + + + :param icovar: The inverse covariance matrix + + + +The function +``cvMahalonobis`` +calculates and returns the weighted distance between two vectors: + + + +.. math:: + + d( \texttt{vec1} , \texttt{vec2} )= \sqrt{\sum_{i,j}{\texttt{icovar(i,j)}\cdot(\texttt{vec1}(I)-\texttt{vec2}(I))\cdot(\texttt{vec1(j)}-\texttt{vec2(j)})} } + + +The covariance matrix may be calculated using the +:func:`calcCovarMatrix` +function and then inverted using the +:func:`invert` +function (preferably using DECOMP +_ +SVD method, as the most accurate). + + + +.. index:: max + + +cv::max +------- + +`id=0.0142762503347 Comments from the Wiki `__ + + + + +.. cfunction:: Mat_Expr<...> max(const Mat\& src1, const Mat\& src2) + + + +.. cfunction:: Mat_Expr<...> max(const Mat\& src1, double value) + + + +.. cfunction:: Mat_Expr<...> max(double value, const Mat\& src1) + + + +.. cfunction:: void max(const Mat\& src1, const Mat\& src2, Mat\& dst) + + + +.. cfunction:: void max(const Mat\& src1, double value, Mat\& dst) + + + +.. cfunction:: void max(const MatND\& src1, const MatND\& src2, MatND\& dst) + + + +.. cfunction:: void max(const MatND\& src1, double value, MatND\& dst) + + Calculates per-element maximum of two arrays or array and a scalar + + + + + + + :param src1: The first source array + + + :param src2: The second source array of the same size and type as ``src1`` + + + :param value: The real scalar value + + + :param dst: The destination array; will have the same size and type as ``src1`` + + + +The functions +``max`` +compute per-element maximum of two arrays: + + +.. math:: + + \texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{src2} (I)) + + +or array and a scalar: + + +.. math:: + + \texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{value} ) + + +In the second variant, when the source array is multi-channel, each channel is compared with +``value`` +independently. + +The first 3 variants of the function listed above are actually a part of +:ref:`Matrix Expressions` +, they return the expression object that can be further transformed, or assigned to a matrix, or passed to a function etc. + +See also: +:func:`min` +, +:func:`compare` +, +:func:`inRange` +, +:func:`minMaxLoc` +, +:ref:`Matrix Expressions` + +.. index:: mean + + +cv::mean +-------- + +`id=0.475305484395 Comments from the Wiki `__ + + + + +.. cfunction:: Scalar mean(const Mat\& mtx) + + + +.. cfunction:: Scalar mean(const Mat\& mtx, const Mat\& mask) + + + +.. cfunction:: Scalar mean(const MatND\& mtx) + + + +.. cfunction:: Scalar mean(const MatND\& mtx, const MatND\& mask) + + Calculates average (mean) of array elements + + + + + + + :param mtx: The source array; it should have 1 to 4 channels (so that the result can be stored in :func:`Scalar` ) + + + :param mask: The optional operation mask + + + +The functions +``mean`` +compute mean value +``M`` +of array elements, independently for each channel, and return it: + + + +.. math:: + + \begin{array}{l} N = \sum _{I: \; \texttt{mask} (I) \ne 0} 1 \\ M_c = \left ( \sum _{I: \; \texttt{mask} (I) \ne 0}{ \texttt{mtx} (I)_c} \right )/N \end{array} + + +When all the mask elements are 0's, the functions return +``Scalar::all(0)`` +. + +See also: +:func:`countNonZero` +, +:func:`meanStdDev` +, +:func:`norm` +, +:func:`minMaxLoc` + +.. index:: meanStdDev + + +cv::meanStdDev +-------------- + +`id=0.0369218819559 Comments from the Wiki `__ + + + + +.. cfunction:: void meanStdDev(const Mat\& mtx, Scalar\& mean, Scalar\& stddev, const Mat\& mask=Mat()) + + + +.. cfunction:: void meanStdDev(const MatND\& mtx, Scalar\& mean, Scalar\& stddev, const MatND\& mask=MatND()) + + Calculates mean and standard deviation of array elements + + + + + + + :param mtx: The source array; it should have 1 to 4 channels (so that the results can be stored in :func:`Scalar` 's) + + + :param mean: The output parameter: computed mean value + + + :param stddev: The output parameter: computed standard deviation + + + :param mask: The optional operation mask + + + +The functions +``meanStdDev`` +compute the mean and the standard deviation +``M`` +of array elements, independently for each channel, and return it via the output parameters: + + + +.. math:: + + \begin{array}{l} N = \sum _{I, \texttt{mask} (I) \ne 0} 1 \\ \texttt{mean} _c = \frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \texttt{src} (I)_c}{N} \\ \texttt{stddev} _c = \sqrt{\sum_{ I: \; \texttt{mask}(I) \ne 0} \left ( \texttt{src} (I)_c - \texttt{mean} _c \right )^2} \end{array} + + +When all the mask elements are 0's, the functions return +``mean=stddev=Scalar::all(0)`` +. +Note that the computed standard deviation is only the diagonal of the complete normalized covariance matrix. If the full matrix is needed, you can reshape the multi-channel array +:math:`M \times N` +to the single-channel array +:math:`M*N \times \texttt{mtx.channels}()` +(only possible when the matrix is continuous) and then pass the matrix to +:func:`calcCovarMatrix` +. + +See also: +:func:`countNonZero` +, +:func:`mean` +, +:func:`norm` +, +:func:`minMaxLoc` +, +:func:`calcCovarMatrix` + +.. index:: merge + + +cv::merge +--------- + +`id=0.0984382574782 Comments from the Wiki `__ + + + + +.. cfunction:: void merge(const Mat* mv, size_t count, Mat\& dst) + + + +.. cfunction:: void merge(const vector\& mv, Mat\& dst) + + + +.. cfunction:: void merge(const MatND* mv, size_t count, MatND\& dst) + + + +.. cfunction:: void merge(const vector\& mv, MatND\& dst) + + Composes a multi-channel array from several single-channel arrays. + + + + + + + :param mv: The source array or vector of the single-channel matrices to be merged. All the matrices in ``mv`` must have the same size and the same type + + + :param count: The number of source matrices when ``mv`` is a plain C array; must be greater than zero + + + :param dst: The destination array; will have the same size and the same depth as ``mv[0]`` , the number of channels will match the number of source matrices + + + +The functions +``merge`` +merge several single-channel arrays (or rather interleave their elements) to make a single multi-channel array. + + + +.. math:: + + \texttt{dst} (I)_c = \texttt{mv} [c](I) + + +The function +:func:`split` +does the reverse operation and if you need to merge several multi-channel images or shuffle channels in some other advanced way, use +:func:`mixChannels` +See also: +:func:`mixChannels` +, +:func:`split` +, +:func:`reshape` + +.. index:: min + + +cv::min +------- + +`id=0.791050700527 Comments from the Wiki `__ + + + + +.. cfunction:: Mat_Expr<...> min(const Mat\& src1, const Mat\& src2) + + + +.. cfunction:: Mat_Expr<...> min(const Mat\& src1, double value) + + + +.. cfunction:: Mat_Expr<...> min(double value, const Mat\& src1) + + + +.. cfunction:: void min(const Mat\& src1, const Mat\& src2, Mat\& dst) + + + +.. cfunction:: void min(const Mat\& src1, double value, Mat\& dst) + + + +.. cfunction:: void min(const MatND\& src1, const MatND\& src2, MatND\& dst) + + + +.. cfunction:: void min(const MatND\& src1, double value, MatND\& dst) + + Calculates per-element minimum of two arrays or array and a scalar + + + + + + + :param src1: The first source array + + + :param src2: The second source array of the same size and type as ``src1`` + + + :param value: The real scalar value + + + :param dst: The destination array; will have the same size and type as ``src1`` + + + +The functions +``min`` +compute per-element minimum of two arrays: + + +.. math:: + + \texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{src2} (I)) + + +or array and a scalar: + + +.. math:: + + \texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{value} ) + + +In the second variant, when the source array is multi-channel, each channel is compared with +``value`` +independently. + +The first 3 variants of the function listed above are actually a part of +:ref:`Matrix Expressions` +, they return the expression object that can be further transformed, or assigned to a matrix, or passed to a function etc. + +See also: +:func:`max` +, +:func:`compare` +, +:func:`inRange` +, +:func:`minMaxLoc` +, +:ref:`Matrix Expressions` + +.. index:: minMaxLoc + + +cv::minMaxLoc +------------- + +`id=0.275583595246 Comments from the Wiki `__ + + + + +.. cfunction:: void minMaxLoc(const Mat\& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, const Mat\& mask=Mat()) + + + +.. cfunction:: void minMaxLoc(const MatND\& src, double* minVal, double* maxVal, int* minIdx=0, int* maxIdx=0, const MatND\& mask=MatND()) + + + +.. cfunction:: void minMaxLoc(const SparseMat\& src, double* minVal, double* maxVal, int* minIdx=0, int* maxIdx=0) + + Finds global minimum and maximum in a whole array or sub-array + + + + + + + :param src: The source single-channel array + + + :param minVal: Pointer to returned minimum value; ``NULL`` if not required + + + :param maxVal: Pointer to returned maximum value; ``NULL`` if not required + + + :param minLoc: Pointer to returned minimum location (in 2D case); ``NULL`` if not required + + + :param maxLoc: Pointer to returned maximum location (in 2D case); ``NULL`` if not required + + + :param minIdx: Pointer to returned minimum location (in nD case); + ``NULL`` if not required, otherwise must point to an array of ``src.dims`` elements and the coordinates of minimum element in each dimensions will be stored sequentially there. + + + :param maxIdx: Pointer to returned maximum location (in nD case); ``NULL`` if not required + + + :param mask: The optional mask used to select a sub-array + + + +The functions +``ninMaxLoc`` +find minimum and maximum element values +and their positions. The extremums are searched across the whole array, or, +if +``mask`` +is not an empty array, in the specified array region. + +The functions do not work with multi-channel arrays. If you need to find minimum or maximum elements across all the channels, use +:func:`reshape` +first to reinterpret the array as single-channel. Or you may extract the particular channel using +:func:`extractImageCOI` +or +:func:`mixChannels` +or +:func:`split` +. + +in the case of a sparse matrix the minimum is found among non-zero elements only. + +See also: +:func:`max` +, +:func:`min` +, +:func:`compare` +, +:func:`inRange` +, +:func:`extractImageCOI` +, +:func:`mixChannels` +, +:func:`split` +, +:func:`reshape` +. + + +.. index:: mixChannels + + +cv::mixChannels +--------------- + +`id=0.714072746705 Comments from the Wiki `__ + + + + +.. cfunction:: void mixChannels(const Mat* srcv, int nsrc, Mat* dstv, int ndst, const int* fromTo, size_t npairs) + + + +.. cfunction:: void mixChannels(const MatND* srcv, int nsrc, MatND* dstv, int ndst, const int* fromTo, size_t npairs) + + + +.. cfunction:: void mixChannels(const vector\& srcv, vector\& dstv, const int* fromTo, int npairs) + + + +.. cfunction:: void mixChannels(const vector\& srcv, vector\& dstv, const int* fromTo, int npairs) + + Copies specified channels from input arrays to the specified channels of output arrays + + + + + + + :param srcv: The input array or vector of matrices. + All the matrices must have the same size and the same depth + + + :param nsrc: The number of elements in ``srcv`` + + + :param dstv: The output array or vector of matrices. + All the matrices *must be allocated* , their size and depth must be the same as in ``srcv[0]`` + + + :param ndst: The number of elements in ``dstv`` + + + :param fromTo: The array of index pairs, specifying which channels are copied and where. ``fromTo[k*2]`` is the 0-based index of the input channel in ``srcv`` and ``fromTo[k*2+1]`` is the index of the output channel in ``dstv`` . Here the continuous channel numbering is used, that is, + the first input image channels are indexed from ``0`` to ``srcv[0].channels()-1`` , + the second input image channels are indexed from ``srcv[0].channels()`` to ``srcv[0].channels() + srcv[1].channels()-1`` etc., and the same scheme is used for the output image channels. + As a special case, when ``fromTo[k*2]`` is negative, the corresponding output channel is filled with zero. + + ``npairs`` + + +The functions +``mixChannels`` +provide an advanced mechanism for shuffling image channels. +:func:`split` +and +:func:`merge` +and some forms of +:func:`cvtColor` +are partial cases of +``mixChannels`` +. + +As an example, this code splits a 4-channel RGBA image into a 3-channel +BGR (i.e. with R and B channels swapped) and separate alpha channel image: + + + + +:: + + + + Mat rgba( 100, 100, CV_8UC4, Scalar(1,2,3,4) ); + Mat bgr( rgba.rows, rgba.cols, CV_8UC3 ); + Mat alpha( rgba.rows, rgba.cols, CV_8UC1 ); + + // forming array of matrices is quite efficient operations, + // because the matrix data is not copied, only the headers + Mat out[] = { bgr, alpha }; + // rgba[0] -> bgr[2], rgba[1] -> bgr[1], + // rgba[2] -> bgr[0], rgba[3] -> alpha[0] + int from_to[] = { 0,2, 1,1, 2,0, 3,3 }; + mixChannels( &rgba, 1, out, 2, from_to, 4 ); + + +.. + +Note that, unlike many other new-style C++ functions in OpenCV (see the introduction section and +:func:`Mat::create` +), +``mixChannels`` +requires the destination arrays be pre-allocated before calling the function. + +See also: +:func:`split` +, +:func:`merge` +, +:func:`cvtColor` + +.. index:: mulSpectrums + + +cv::mulSpectrums +---------------- + +`id=0.366187458098 Comments from the Wiki `__ + + + + +.. cfunction:: void mulSpectrums(const Mat\& src1, const Mat\& src2, Mat\& dst, int flags, bool conj=false) + + Performs per-element multiplication of two Fourier spectrums. + + + + + + + :param src1: The first source array + + + :param src2: The second source array; must have the same size and the same type as ``src1`` + + + :param dst: The destination array; will have the same size and the same type as ``src1`` + + + :param flags: The same flags as passed to :func:`dft` ; only the flag ``DFT_ROWS`` is checked for + + + :param conj: The optional flag that conjugate the second source array before the multiplication (true) or not (false) + + + +The function +``mulSpectrums`` +performs per-element multiplication of the two CCS-packed or complex matrices that are results of a real or complex Fourier transform. + +The function, together with +:func:`dft` +and +:func:`idft` +, may be used to calculate convolution (pass +``conj=false`` +) or correlation (pass +``conj=false`` +) of two arrays rapidly. When the arrays are complex, they are simply multiplied (per-element) with optional conjugation of the second array elements. When the arrays are real, they assumed to be CCS-packed (see +:func:`dft` +for details). + + +.. index:: multiply + + +cv::multiply +------------ + +`id=0.793858308479 Comments from the Wiki `__ + + + + +.. cfunction:: void multiply(const Mat\& src1, const Mat\& src2, Mat\& dst, double scale=1) + + + +.. cfunction:: void multiply(const MatND\& src1, const MatND\& src2, MatND\& dst, double scale=1) + + Calculates the per-element scaled product of two arrays + + + + + + + :param src1: The first source array + + + :param src2: The second source array of the same size and the same type as ``src1`` + + + :param dst: The destination array; will have the same size and the same type as ``src1`` + + + :param scale: The optional scale factor + + + +The function +``multiply`` +calculates the per-element product of two arrays: + + + +.. math:: + + \texttt{dst} (I)= \texttt{saturate} ( \texttt{scale} \cdot \texttt{src1} (I) \cdot \texttt{src2} (I)) + + +There is also +:ref:`Matrix Expressions` +-friendly variant of the first function, see +:func:`Mat::mul` +. + +If you are looking for a matrix product, not per-element product, see +:func:`gemm` +. + +See also: +:func:`add` +, +:func:`substract` +, +:func:`divide` +, +:ref:`Matrix Expressions` +, +:func:`scaleAdd` +, +:func:`addWeighted` +, +:func:`accumulate` +, +:func:`accumulateProduct` +, +:func:`accumulateSquare` +, +:func:`Mat::convertTo` + +.. index:: mulTransposed + + +cv::mulTransposed +----------------- + +`id=0.725639545391 Comments from the Wiki `__ + + + + +.. cfunction:: void mulTransposed( const Mat\& src, Mat\& dst, bool aTa, const Mat\& delta=Mat(), double scale=1, int rtype=-1 ) + + Calculates the product of a matrix and its transposition. + + + + + + + :param src: The source matrix + + + :param dst: The destination square matrix + + + :param aTa: Specifies the multiplication ordering; see the description below + + + :param delta: The optional delta matrix, subtracted from ``src`` before the multiplication. When the matrix is empty ( ``delta=Mat()`` ), it's assumed to be zero, i.e. nothing is subtracted, otherwise if it has the same size as ``src`` , then it's simply subtracted, otherwise it is "repeated" (see :func:`repeat` ) to cover the full ``src`` and then subtracted. Type of the delta matrix, when it's not empty, must be the same as the type of created destination matrix, see the ``rtype`` description + + + :param scale: The optional scale factor for the matrix product + + + :param rtype: When it's negative, the destination matrix will have the same type as ``src`` . Otherwise, it will have ``type=CV_MAT_DEPTH(rtype)`` , which should be either ``CV_32F`` or ``CV_64F`` + + + +The function +``mulTransposed`` +calculates the product of +``src`` +and its transposition: + + +.. math:: + + \texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} )^T ( \texttt{src} - \texttt{delta} ) + + +if +``aTa=true`` +, and + + + +.. math:: + + \texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} ) ( \texttt{src} - \texttt{delta} )^T + + +otherwise. The function is used to compute covariance matrix and with zero delta can be used as a faster substitute for general matrix product +:math:`A*B` +when +:math:`B=A^T` +. + +See also: +:func:`calcCovarMatrix` +, +:func:`gemm` +, +:func:`repeat` +, +:func:`reduce` + +.. index:: norm + + +cv::norm +-------- + +`id=0.445421138553 Comments from the Wiki `__ + + + + +.. cfunction:: double norm(const Mat\& src1, int normType=NORM_L2) + + + +.. cfunction:: double norm(const Mat\& src1, const Mat\& src2, int normType=NORM_L2) + + + +.. cfunction:: double norm(const Mat\& src1, int normType, const Mat\& mask) + + + +.. cfunction:: double norm(const Mat\& src1, const Mat\& src2, int normType, const Mat\& mask) + + + +.. cfunction:: double norm(const MatND\& src1, int normType=NORM_L2, const MatND\& mask=MatND()) + + + +.. cfunction:: double norm(const MatND\& src1, const MatND\& src2, int normType=NORM_L2, const MatND\& mask=MatND()) + + + +.. cfunction:: double norm( const SparseMat\& src, int normType ) + + Calculates absolute array norm, absolute difference norm, or relative difference norm. + + + + + + + :param src1: The first source array + + + :param src2: The second source array of the same size and the same type as ``src1`` + + + :param normType: Type of the norm; see the discussion below + + + :param mask: The optional operation mask + + + +The functions +``norm`` +calculate the absolute norm of +``src1`` +(when there is no +``src2`` +): + + +.. math:: + + norm = \forkthree{\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if $\texttt{normType} = \texttt{NORM\_INF}$ } + { \| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if $\texttt{normType} = \texttt{NORM\_L1}$ } + { \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if $\texttt{normType} = \texttt{NORM\_L2}$ } + + +or an absolute or relative difference norm if +``src2`` +is there: + + +.. math:: + + norm = \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if $\texttt{normType} = \texttt{NORM\_INF}$ } + { \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if $\texttt{normType} = \texttt{NORM\_L1}$ } + { \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if $\texttt{normType} = \texttt{NORM\_L2}$ } + + +or + + + +.. math:: + + norm = \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if $\texttt{normType} = \texttt{NORM\_RELATIVE\_INF}$ } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if $\texttt{normType} = \texttt{NORM\_RELATIVE\_L1}$ } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if $\texttt{normType} = \texttt{NORM\_RELATIVE\_L2}$ } + + +The functions +``norm`` +return the calculated norm. + +When there is +``mask`` +parameter, and it is not empty (then it should have type +``CV_8U`` +and the same size as +``src1`` +), the norm is computed only over the specified by the mask region. + +A multiple-channel source arrays are treated as a single-channel, that is, the results for all channels are combined. + + + +.. index:: normalize + + +cv::normalize +------------- + +`id=0.767751401338 Comments from the Wiki `__ + + + + +.. cfunction:: void normalize( const Mat\& src, Mat\& dst, double alpha=1, double beta=0, int normType=NORM_L2, int rtype=-1, const Mat\& mask=Mat()) + + + +.. cfunction:: void normalize( const MatND\& src, MatND\& dst, double alpha=1, double beta=0, int normType=NORM_L2, int rtype=-1, const MatND\& mask=MatND()) + + + +.. cfunction:: void normalize( const SparseMat\& src, SparseMat\& dst, double alpha, int normType ) + + Normalizes array's norm or the range + + + + + + + :param src: The source array + + + :param dst: The destination array; will have the same size as ``src`` + + + :param alpha: The norm value to normalize to or the lower range boundary in the case of range normalization + + + :param beta: The upper range boundary in the case of range normalization; not used for norm normalization + + + :param normType: The normalization type, see the discussion + + + :param rtype: When the parameter is negative, the destination array will have the same type as ``src`` , otherwise it will have the same number of channels as ``src`` and the depth ``=CV_MAT_DEPTH(rtype)`` + + + :param mask: The optional operation mask + + + +The functions +``normalize`` +scale and shift the source array elements, so that + + +.. math:: + + \| \texttt{dst} \| _{L_p}= \texttt{alpha} + + +(where +:math:`p=\infty` +, 1 or 2) when +``normType=NORM_INF`` +, +``NORM_L1`` +or +``NORM_L2`` +, +or so that + + +.. math:: + + \min _I \texttt{dst} (I)= \texttt{alpha} , \, \, \max _I \texttt{dst} (I)= \texttt{beta} + + +when +``normType=NORM_MINMAX`` +(for dense arrays only). + +The optional mask specifies the sub-array to be normalize, that is, the norm or min-n-max are computed over the sub-array and then this sub-array is modified to be normalized. If you want to only use the mask to compute the norm or min-max, but modify the whole array, you can use +:func:`norm` +and +:func:`Mat::convertScale` +/ +:func:`MatND::convertScale` +/cross{SparseMat::convertScale} separately. + +in the case of sparse matrices, only the non-zero values are analyzed and transformed. Because of this, the range transformation for sparse matrices is not allowed, since it can shift the zero level. + +See also: +:func:`norm` +, +:func:`Mat::convertScale` +, +:func:`MatND::convertScale` +, +:func:`SparseMat::convertScale` + +.. index:: PCA + +.. _PCA: + +PCA +--- + +`id=0.800799119432 Comments from the Wiki `__ + +.. ctype:: PCA + + + +Class for Principal Component Analysis + + + + +:: + + + + class PCA + { + public: + // default constructor + PCA(); + // computes PCA for a set of vectors stored as data rows or columns. + PCA(const Mat& data, const Mat& mean, int flags, int maxComponents=0); + // computes PCA for a set of vectors stored as data rows or columns + PCA& operator()(const Mat& data, const Mat& mean, int flags, int maxComponents=0); + // projects vector into the principal components space + Mat project(const Mat& vec) const; + void project(const Mat& vec, Mat& result) const; + // reconstructs the vector from its PC projection + Mat backProject(const Mat& vec) const; + void backProject(const Mat& vec, Mat& result) const; + + // eigenvectors of the PC space, stored as the matrix rows + Mat eigenvectors; + // the corresponding eigenvalues; not used for PCA compression/decompression + Mat eigenvalues; + // mean vector, subtracted from the projected vector + // or added to the reconstructed vector + Mat mean; + }; + + +.. + +The class +``PCA`` +is used to compute the special basis for a set of vectors. The basis will consist of eigenvectors of the covariance matrix computed from the input set of vectors. And also the class +``PCA`` +can transform vectors to/from the new coordinate space, defined by the basis. Usually, in this new coordinate system each vector from the original set (and any linear combination of such vectors) can be quite accurately approximated by taking just the first few its components, corresponding to the eigenvectors of the largest eigenvalues of the covariance matrix. Geometrically it means that we compute projection of the vector to a subspace formed by a few eigenvectors corresponding to the dominant eigenvalues of the covariation matrix. And usually such a projection is very close to the original vector. That is, we can represent the original vector from a high-dimensional space with a much shorter vector consisting of the projected vector's coordinates in the subspace. Such a transformation is also known as Karhunen-Loeve Transform, or KLT. See +http://en.wikipedia.org/wiki/Principal\_component\_analysis +The following sample is the function that takes two matrices. The first one stores the set of vectors (a row per vector) that is used to compute PCA, the second one stores another "test" set of vectors (a row per vector) that are first compressed with PCA, then reconstructed back and then the reconstruction error norm is computed and printed for each vector. + + + +:: + + + + PCA compressPCA(const Mat& pcaset, int maxComponents, + const Mat& testset, Mat& compressed) + { + PCA pca(pcaset, // pass the data + Mat(), // we do not have a pre-computed mean vector, + // so let the PCA engine to compute it + CV_PCA_DATA_AS_ROW, // indicate that the vectors + // are stored as matrix rows + // (use CV_PCA_DATA_AS_COL if the vectors are + // the matrix columns) + maxComponents // specify, how many principal components to retain + ); + // if there is no test data, just return the computed basis, ready-to-use + if( !testset.data ) + return pca; + CV_Assert( testset.cols == pcaset.cols ); + + compressed.create(testset.rows, maxComponents, testset.type()); + + Mat reconstructed; + for( int i = 0; i < testset.rows; i++ ) + { + Mat vec = testset.row(i), coeffs = compressed.row(i); + // compress the vector, the result will be stored + // in the i-th row of the output matrix + pca.project(vec, coeffs); + // and then reconstruct it + pca.backProject(coeffs, reconstructed); + // and measure the error + printf(" + } + return pca; + } + + +.. + +See also: +:func:`calcCovarMatrix` +, +:func:`mulTransposed` +, +:func:`SVD` +, +:func:`dft` +, +:func:`dct` + +.. index:: PCA::PCA + + +cv::PCA::PCA +------------ + +`id=0.0335374386306 Comments from the Wiki `__ + + + + +.. cfunction:: PCA::PCA() + + + +.. cfunction:: PCA::PCA(const Mat\& data, const Mat\& mean, int flags, int maxComponents=0) + + PCA constructors + + + + + + + :param data: the input samples, stored as the matrix rows or as the matrix columns + + + :param mean: the optional mean value. If the matrix is empty ( ``Mat()`` ), the mean is computed from the data. + + + :param flags: operation flags. Currently the parameter is only used to specify the data layout. + + + + + * **CV_PCA_DATA_AS_ROWS** Indicates that the input samples are stored as matrix rows. + + + * **CV_PCA_DATA_AS_COLS** Indicates that the input samples are stored as matrix columns. + + + + + :param maxComponents: The maximum number of components that PCA should retain. By default, all the components are retained. + + + +The default constructor initializes empty PCA structure. The second constructor initializes the structure and calls +:func:`PCA::operator ()` +. + + +.. index:: PCA::operator () + + +cv::PCA::operator () +-------------------- + +`id=0.531209821114 Comments from the Wiki `__ + + + + +.. cfunction:: PCA\& PCA::operator()(const Mat\& data, const Mat\& mean, int flags, int maxComponents=0) + + Performs Principal Component Analysis of the supplied dataset. + + + + + + + :param data: the input samples, stored as the matrix rows or as the matrix columns + + + :param mean: the optional mean value. If the matrix is empty ( ``Mat()`` ), the mean is computed from the data. + + + :param flags: operation flags. Currently the parameter is only used to specify the data layout. + + + + + * **CV_PCA_DATA_AS_ROWS** Indicates that the input samples are stored as matrix rows. + + + * **CV_PCA_DATA_AS_COLS** Indicates that the input samples are stored as matrix columns. + + + + + :param maxComponents: The maximum number of components that PCA should retain. By default, all the components are retained. + + + +The operator performs PCA of the supplied dataset. It is safe to reuse the same PCA structure for multiple dataset. That is, if the structure has been previously used with another dataset, the existing internal data is reclaimed and the new +``eigenvalues`` +, +``eigenvectors`` +and +``mean`` +are allocated and computed. + +The computed eigenvalues are sorted from the largest to the smallest and the corresponding eigenvectors are stored as +``PCA::eigenvectors`` +rows. + + +.. index:: PCA::project + + +cv::PCA::project +---------------- + +`id=0.706397581604 Comments from the Wiki `__ + + + + +.. cfunction:: Mat PCA::project(const Mat\& vec) const + + + +.. cfunction:: void PCA::project(const Mat\& vec, Mat\& result) const + + Project vector(s) to the principal component subspace + + + + + + + :param vec: the input vector(s). They have to have the same dimensionality and the same layout as the input data used at PCA phase. That is, if ``CV_PCA_DATA_AS_ROWS`` had been specified, then ``vec.cols==data.cols`` (that's vectors' dimensionality) and ``vec.rows`` is the number of vectors to project; and similarly for the ``CV_PCA_DATA_AS_COLS`` case. + + + :param result: the output vectors. Let's now consider ``CV_PCA_DATA_AS_COLS`` case. In this case the output matrix will have as many columns as the number of input vectors, i.e. ``result.cols==vec.cols`` and the number of rows will match the number of principal components (e.g. ``maxComponents`` parameter passed to the constructor). + + + +The methods project one or more vectors to the principal component subspace, where each vector projection is represented by coefficients in the principal component basis. The first form of the method returns the matrix that the second form writes to the result. So the first form can be used as a part of expression, while the second form can be more efficient in a processing loop. + + +.. index:: PCA::backProject + + +cv::PCA::backProject +-------------------- + +`id=0.730132482088 Comments from the Wiki `__ + + + + +.. cfunction:: Mat PCA::backProject(const Mat\& vec) const + + + +.. cfunction:: void PCA::backProject(const Mat\& vec, Mat\& result) const + + Reconstruct vectors from their PC projections. + + + + + + + :param vec: Coordinates of the vectors in the principal component subspace. The layout and size are the same as of ``PCA::project`` output vectors. + + + :param result: The reconstructed vectors. The layout and size are the same as of ``PCA::project`` input vectors. + + + +The methods are inverse operations to +:func:`PCA::project` +. They take PC coordinates of projected vectors and reconstruct the original vectors. Of course, unless all the principal components have been retained, the reconstructed vectors will be different from the originals, but typically the difference will be small is if the number of components is large enough (but still much smaller than the original vector dimensionality) - that's why PCA is used after all. + + +.. index:: perspectiveTransform + + +cv::perspectiveTransform +------------------------ + +`id=0.283194607337 Comments from the Wiki `__ + + + + +.. cfunction:: void perspectiveTransform(const Mat\& src, Mat\& dst, const Mat\& mtx ) + + Performs perspective matrix transformation of vectors. + + + + + + + :param src: The source two-channel or three-channel floating-point array; + each element is 2D/3D vector to be transformed + + + :param dst: The destination array; it will have the same size and same type as ``src`` + + + :param mtx: :math:`3\times 3` or :math:`4 \times 4` transformation matrix + + + +The function +``perspectiveTransform`` +transforms every element of +``src`` +, +by treating it as 2D or 3D vector, in the following way (here 3D vector transformation is shown; in the case of 2D vector transformation the +:math:`z` +component is omitted): + + + +.. math:: + + (x, y, z) \rightarrow (x'/w, y'/w, z'/w) + + +where + + + +.. math:: + + (x', y', z', w') = \texttt{mat} \cdot \begin{bmatrix} x & y & z & 1 \end{bmatrix} + + +and + + +.. math:: + + w = \fork{w'}{if $w' \ne 0$}{\infty}{otherwise} + + +Note that the function transforms a sparse set of 2D or 3D vectors. If you want to transform an image using perspective transformation, use +:func:`warpPerspective` +. If you have an inverse task, i.e. want to compute the most probable perspective transformation out of several pairs of corresponding points, you can use +:func:`getPerspectiveTransform` +or +:func:`findHomography` +. + +See also: +:func:`transform` +, +:func:`warpPerspective` +, +:func:`getPerspectiveTransform` +, +:func:`findHomography` + +.. index:: phase + + +cv::phase +--------- + +`id=0.241585072137 Comments from the Wiki `__ + + + + +.. cfunction:: void phase(const Mat\& x, const Mat\& y, Mat\& angle, bool angleInDegrees=false) + + Calculates the rotation angle of 2d vectors + + + + + + + :param x: The source floating-point array of x-coordinates of 2D vectors + + + :param y: The source array of y-coordinates of 2D vectors; must have the same size and the same type as ``x`` + + + :param angle: The destination array of vector angles; it will have the same size and same type as ``x`` + + + :param angleInDegrees: When it is true, the function will compute angle in degrees, otherwise they will be measured in radians + + + +The function +``phase`` +computes the rotation angle of each 2D vector that is formed from the corresponding elements of +``x`` +and +``y`` +: + + + +.. math:: + + \texttt{angle} (I) = \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I)) + + +The angle estimation accuracy is +:math:`\sim\,0.3^\circ` +, when +``x(I)=y(I)=0`` +, the corresponding +``angle`` +(I) is set to +:math:`0` +. + +See also: + + +.. index:: polarToCart + + +cv::polarToCart +--------------- + +`id=0.368600095428 Comments from the Wiki `__ + + + + +.. cfunction:: void polarToCart(const Mat\& magnitude, const Mat\& angle, Mat\& x, Mat\& y, bool angleInDegrees=false) + + Computes x and y coordinates of 2D vectors from their magnitude and angle. + + + + + + + :param magnitude: The source floating-point array of magnitudes of 2D vectors. It can be an empty matrix ( ``=Mat()`` ) - in this case the function assumes that all the magnitudes are =1. If it's not empty, it must have the same size and same type as ``angle`` + + + :param angle: The source floating-point array of angles of the 2D vectors + + + :param x: The destination array of x-coordinates of 2D vectors; will have the same size and the same type as ``angle`` + + + :param y: The destination array of y-coordinates of 2D vectors; will have the same size and the same type as ``angle`` + + + :param angleInDegrees: When it is true, the input angles are measured in degrees, otherwise they are measured in radians + + + +The function +``polarToCart`` +computes the cartesian coordinates of each 2D vector represented by the corresponding elements of +``magnitude`` +and +``angle`` +: + + + +.. math:: + + \begin{array}{l} \texttt{x} (I) = \texttt{magnitude} (I) \cos ( \texttt{angle} (I)) \\ \texttt{y} (I) = \texttt{magnitude} (I) \sin ( \texttt{angle} (I)) \\ \end{array} + + +The relative accuracy of the estimated coordinates is +:math:`\sim\,10^{-6}` +. + +See also: +:func:`cartToPolar` +, +:func:`magnitude` +, +:func:`phase` +, +:func:`exp` +, +:func:`log` +, +:func:`pow` +, +:func:`sqrt` + +.. index:: pow + + +cv::pow +------- + +`id=0.84185995318 Comments from the Wiki `__ + + + + +.. cfunction:: void pow(const Mat\& src, double p, Mat\& dst) + + + +.. cfunction:: void pow(const MatND\& src, double p, MatND\& dst) + + Raises every array element to a power. + + + + + + + :param src: The source array + + + :param p: The exponent of power + + + :param dst: The destination array; will have the same size and the same type as ``src`` + + + +The function +``pow`` +raises every element of the input array to +``p`` +: + + + +.. math:: + + \texttt{dst} (I) = \fork{\texttt{src}(I)^p}{if \texttt{p} is integer}{|\texttt{src}(I)|^p}{otherwise} + + +That is, for a non-integer power exponent the absolute values of input array elements are used. However, it is possible to get true values for negative values using some extra operations, as the following example, computing the 5th root of array +``src`` +, shows: + + + + +:: + + + + Mat mask = src < 0; + pow(src, 1./5, dst); + subtract(Scalar::all(0), dst, dst, mask); + + +.. + +For some values of +``p`` +, such as integer values, 0.5, and -0.5, specialized faster algorithms are used. + +See also: +:func:`sqrt` +, +:func:`exp` +, +:func:`log` +, +:func:`cartToPolar` +, +:func:`polarToCart` + +RNG +--- + + +Random number generator class. + + + + +:: + + + + class CV_EXPORTS RNG + { + public: + enum { UNIFORM=0, NORMAL=1 }; + + // constructors + RNG(); + RNG(uint64 state); + + // returns 32-bit unsigned random number + unsigned next(); + + // return random numbers of the specified type + operator uchar(); + operator schar(); + operator ushort(); + operator short(); + operator unsigned(); + // returns a random integer sampled uniformly from [0, N). + unsigned operator()(unsigned N); + unsigned operator()(); + operator int(); + operator float(); + operator double(); + // returns a random number sampled uniformly from [a, b) range + int uniform(int a, int b); + float uniform(float a, float b); + double uniform(double a, double b); + + // returns Gaussian random number with zero mean. + double gaussian(double sigma); + + // fills array with random numbers sampled from the specified distribution + void fill( Mat& mat, int distType, const Scalar& a, const Scalar& b ); + void fill( MatND& mat, int distType, const Scalar& a, const Scalar& b ); + + // internal state of the RNG (could change in the future) + uint64 state; + }; + + +.. + +The class +``RNG`` +implements random number generator. It encapsulates the RNG state (currently, a 64-bit integer) and has methods to return scalar random values and to fill arrays with random values. Currently it supports uniform and Gaussian (normal) distributions. The generator uses Multiply-With-Carry algorithm, introduced by G. Marsaglia ( +http://en.wikipedia.org/wiki/Multiply-with-carry +). Gaussian-distribution random numbers are generated using Ziggurat algorithm ( +http://en.wikipedia.org/wiki/Ziggurat_algorithm +), introduced by G. Marsaglia and W. W. Tsang. + + +.. index:: RNG::RNG + + +cv::RNG::RNG +------------ + +`id=0.330961593443 Comments from the Wiki `__ + + + + +.. cfunction:: RNG::RNG() + + + +.. cfunction:: RNG::RNG(uint64 state) + + RNG constructors + + + + + + + :param state: the 64-bit value used to initialize the RNG + + + +These are the RNG constructors. The first form sets the state to some pre-defined value, equal to +``2**32-1`` +in the current implementation. The second form sets the state to the specified value. If the user passed +``state=0`` +, the constructor uses the above default value instead, to avoid the singular random number sequence, consisting of all zeros. + + +.. index:: RNG::next + + +cv::RNG::next +------------- + +`id=0.0230494628234 Comments from the Wiki `__ + + + + +.. cfunction:: unsigned RNG::next() + + Returns the next random number + + + +The method updates the state using MWC algorithm and returns the next 32-bit random number. + + + +.. index:: RNG::operator T + + +cv::RNG::operator T +------------------- + +`id=0.204237361191 Comments from the Wiki `__ + + + + +.. cfunction:: RNG::operator uchar() RNG::operator schar() RNG::operator ushort() RNG::operator short() RNG::operator unsigned() RNG::operator int() RNG::operator float() RNG::operator double() + + Returns the next random number of the specified type + + + +Each of the methods updates the state using MWC algorithm and returns the next random number of the specified type. In the case of integer types the returned number is from the whole available value range for the specified type. In the case of floating-point types the returned value is from +``[0,1)`` +range. + + +.. index:: RNG::operator () + + +cv::RNG::operator () +-------------------- + +`id=0.485824565656 Comments from the Wiki `__ + + + + +.. cfunction:: unsigned RNG::operator ()() + + + +.. cfunction:: unsigned RNG::operator ()(unsigned N) + + Returns the next random number + + + + + + + :param N: The upper non-inclusive boundary of the returned random number + + + +The methods transforms the state using MWC algorithm and returns the next random number. The first form is equivalent to +:func:`RNG::next` +, the second form returns the random number modulo +``N`` +, i.e. the result will be in the range +``[0, N)`` +. + + +.. index:: RNG::uniform + + +cv::RNG::uniform +---------------- + +`id=0.978193043655 Comments from the Wiki `__ + + + + +.. cfunction:: int RNG::uniform(int a, int b) + + + +.. cfunction:: float RNG::uniform(float a, float b) + + + +.. cfunction:: double RNG::uniform(double a, double b) + + Returns the next random number sampled from the uniform distribution + + + + + + + :param a: The lower inclusive boundary of the returned random numbers + + + :param b: The upper non-inclusive boundary of the returned random numbers + + + +The methods transforms the state using MWC algorithm and returns the next uniformly-distributed random number of the specified type, deduced from the input parameter type, from the range +``[a, b)`` +. There is one nuance, illustrated by the following sample: + + + + +:: + + + + cv::RNG rng; + + // will always produce 0 + double a = rng.uniform(0, 1); + + // will produce double from [0, 1) + double a1 = rng.uniform((double)0, (double)1); + + // will produce float from [0, 1) + double b = rng.uniform(0.f, 1.f); + + // will produce double from [0, 1) + double c = rng.uniform(0., 1.); + + // will likely cause compiler error because of ambiguity: + // RNG::uniform(0, (int)0.999999)? or RNG::uniform((double)0, 0.99999)? + double d = rng.uniform(0, 0.999999); + + +.. + +That is, the compiler does not take into account type of the variable that you assign the result of +``RNG::uniform`` +to, the only thing that matters to it is the type of +``a`` +and +``b`` +parameters. So if you want a floating-point random number, but the range boundaries are integer numbers, either put dots in the end, if they are constants, or use explicit type cast operators, as in +``a1`` +initialization above. + + + +.. index:: RNG::gaussian + + +cv::RNG::gaussian +----------------- + +`id=0.910634999394 Comments from the Wiki `__ + + + + +.. cfunction:: double RNG::gaussian(double sigma) + + Returns the next random number sampled from the Gaussian distribution + + + + + + + :param sigma: The standard deviation of the distribution + + + +The methods transforms the state using MWC algorithm and returns the next random number from the Gaussian distribution +``N(0,sigma)`` +. That is, the mean value of the returned random numbers will be zero and the standard deviation will be the specified +``sigma`` +. + + + +.. index:: RNG::fill + + +cv::RNG::fill +------------- + +`id=0.874686220628 Comments from the Wiki `__ + + + + +.. cfunction:: void RNG::fill( Mat\& mat, int distType, const Scalar\& a, const Scalar\& b ) + + + +.. cfunction:: void RNG::fill( MatND\& mat, int distType, const Scalar\& a, const Scalar\& b ) + + Fill arrays with random numbers + + + + + + + :param mat: 2D or N-dimensional matrix. Currently matrices with more than 4 channels are not supported by the methods. Use :func:`reshape` as a possible workaround. + + + :param distType: The distribution type, ``RNG::UNIFORM`` or ``RNG::NORMAL`` + + + :param a: The first distribution parameter. In the case of uniform distribution this is inclusive lower boundary. In the case of normal distribution this is mean value. + + + :param b: The second distribution parameter. In the case of uniform distribution this is non-inclusive upper boundary. In the case of normal distribution this is standard deviation. + + + +Each of the methods fills the matrix with the random values from the specified distribution. As the new numbers are generated, the RNG state is updated accordingly. In the case of multiple-channel images every channel is filled independently, i.e. RNG can not generate samples from multi-dimensional Gaussian distribution with non-diagonal covariation matrix directly. To do that, first, generate matrix from the distribution +:math:`N(0, I_n)` +, i.e. Gaussian distribution with zero mean and identity covariation matrix, and then transform it using +:func:`transform` +and the specific covariation matrix. + + +.. index:: randu + + +cv::randu +--------- + +`id=0.406065156237 Comments from the Wiki `__ + + + + +.. cfunction:: template _Tp randu() + + + +.. cfunction:: void randu(Mat\& mtx, const Scalar\& low, const Scalar\& high) + + Generates a single uniformly-distributed random number or array of random numbers + + + + + + + :param mtx: The output array of random numbers. The array must be pre-allocated and have 1 to 4 channels + + + :param low: The inclusive lower boundary of the generated random numbers + + + :param high: The exclusive upper boundary of the generated random numbers + + + +The template functions +``randu`` +generate and return the next uniformly-distributed random value of the specified type. +``randu()`` +is equivalent to +``(int)theRNG();`` +etc. See +:func:`RNG` +description. + +The second non-template variant of the function fills the matrix +``mtx`` +with uniformly-distributed random numbers from the specified range: + + + +.. math:: + + \texttt{low} _c \leq \texttt{mtx} (I)_c < \texttt{high} _c + + +See also: +:func:`RNG` +, +:func:`randn` +, +:func:`theRNG` +. + + +.. index:: randn + + +cv::randn +--------- + +`id=0.137293032285 Comments from the Wiki `__ + + + + +.. cfunction:: void randn(Mat\& mtx, const Scalar\& mean, const Scalar\& stddev) + + Fills array with normally distributed random numbers + + + + + + + :param mtx: The output array of random numbers. The array must be pre-allocated and have 1 to 4 channels + + + :param mean: The mean value (expectation) of the generated random numbers + + + :param stddev: The standard deviation of the generated random numbers + + + +The function +``randn`` +fills the matrix +``mtx`` +with normally distributed random numbers with the specified mean and standard deviation. +is applied to the generated numbers (i.e. the values are clipped) + +See also: +:func:`RNG` +, +:func:`randu` + +.. index:: randShuffle + + +cv::randShuffle +--------------- + +`id=0.303710586038 Comments from the Wiki `__ + + + + +.. cfunction:: void randShuffle(Mat\& mtx, double iterFactor=1., RNG* rng=0) + + Shuffles the array elements randomly + + + + + + + :param mtx: The input/output numerical 1D array + + + :param iterFactor: The scale factor that determines the number of random swap operations. See the discussion + + + :param rng: The optional random number generator used for shuffling. If it is zero, :func:`theRNG` () is used instead + + + +The function +``randShuffle`` +shuffles the specified 1D array by randomly choosing pairs of elements and swapping them. The number of such swap operations will be +``mtx.rows*mtx.cols*iterFactor`` +See also: +:func:`RNG` +, +:func:`sort` + +.. index:: reduce + + +cv::reduce +---------- + +`id=0.874689843107 Comments from the Wiki `__ + + + + +.. cfunction:: void reduce(const Mat\& mtx, Mat\& vec, int dim, int reduceOp, int dtype=-1) + + Reduces a matrix to a vector + + + + + + + :param mtx: The source 2D matrix + + + :param vec: The destination vector. Its size and type is defined by ``dim`` and ``dtype`` parameters + + + :param dim: The dimension index along which the matrix is reduced. 0 means that the matrix is reduced to a single row and 1 means that the matrix is reduced to a single column + + + :param reduceOp: The reduction operation, one of: + + * **CV_REDUCE_SUM** The output is the sum of all of the matrix's rows/columns. + + * **CV_REDUCE_AVG** The output is the mean vector of all of the matrix's rows/columns. + + * **CV_REDUCE_MAX** The output is the maximum (column/row-wise) of all of the matrix's rows/columns. + + * **CV_REDUCE_MIN** The output is the minimum (column/row-wise) of all of the matrix's rows/columns. + + + + + :param dtype: When it is negative, the destination vector will have the same type as the source matrix, otherwise, its type will be ``CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), mtx.channels())`` + + + +The function +``reduce`` +reduces matrix to a vector by treating the matrix rows/columns as a set of 1D vectors and performing the specified operation on the vectors until a single row/column is obtained. For example, the function can be used to compute horizontal and vertical projections of an raster image. In the case of +``CV_REDUCE_SUM`` +and +``CV_REDUCE_AVG`` +the output may have a larger element bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction modes. + +See also: +:func:`repeat` + +.. index:: repeat + + +cv::repeat +---------- + +`id=0.382463471543 Comments from the Wiki `__ + + + + +.. cfunction:: void repeat(const Mat\& src, int ny, int nx, Mat\& dst) + + + +.. cfunction:: Mat repeat(const Mat\& src, int ny, int nx) + + Fill the destination array with repeated copies of the source array. + + + + + + + :param src: The source array to replicate + + + :param dst: The destination array; will have the same type as ``src`` + + + :param ny: How many times the ``src`` is repeated along the vertical axis + + + :param nx: How many times the ``src`` is repeated along the horizontal axis + + + +The functions +:func:`repeat` +duplicate the source array one or more times along each of the two axes: + + + +.. math:: + + \texttt{dst} _{ij}= \texttt{src} _{i \mod \texttt{src.rows} , \; j \mod \texttt{src.cols} } + + +The second variant of the function is more convenient to use with +:ref:`Matrix Expressions` +See also: +:func:`reduce` +, +:ref:`Matrix Expressions` + +.. index:: saturate_cast + +.. _saturate_cast: + +saturate_cast +------------- + +`id=0.402640107889 Comments from the Wiki `__ + + + + +.. cfunction:: template inline _Tp saturate_cast(unsigned char v) + + + +.. cfunction:: template inline _Tp saturate_cast(signed char v) + + + +.. cfunction:: template inline _Tp saturate_cast(unsigned short v) + + + +.. cfunction:: template inline _Tp saturate_cast(signed short v) + + + +.. cfunction:: template inline _Tp saturate_cast(int v) + + + +.. cfunction:: template inline _Tp saturate_cast(unsigned int v) + + + +.. cfunction:: template inline _Tp saturate_cast(float v) + + + +.. cfunction:: template inline _Tp saturate_cast(double v) + + Template function for accurate conversion from one primitive type to another + + + + + + + :param v: The function parameter + + + +The functions +``saturate_cast`` +resembles the standard C++ cast operations, such as +``static_cast()`` +etc. They perform an efficient and accurate conversion from one primitive type to another, see the introduction. "saturate" in the name means that when the input value +``v`` +is out of range of the target type, the result will not be formed just by taking low bits of the input, but instead the value will be clipped. For example: + + + + +:: + + + + uchar a = saturate_cast(-100); // a = 0 (UCHAR_MIN) + short b = saturate_cast(33333.33333); // b = 32767 (SHRT_MAX) + + +.. + +Such clipping is done when the target type is +``unsigned char, signed char, unsigned short or signed short`` +- for 32-bit integers no clipping is done. + +When the parameter is floating-point value and the target type is an integer (8-, 16- or 32-bit), the floating-point value is first rounded to the nearest integer and then clipped if needed (when the target type is 8- or 16-bit). + +This operation is used in most simple or complex image processing functions in OpenCV. + +See also: +:func:`add` +, +:func:`subtract` +, +:func:`multiply` +, +:func:`divide` +, +:func:`Mat::convertTo` + +.. index:: scaleAdd + + +cv::scaleAdd +------------ + +`id=0.728943838546 Comments from the Wiki `__ + + + + +.. cfunction:: void scaleAdd(const Mat\& src1, double scale, const Mat\& src2, Mat\& dst) + + + +.. cfunction:: void scaleAdd(const MatND\& src1, double scale, const MatND\& src2, MatND\& dst) + + Calculates the sum of a scaled array and another array. + + + + + + + :param src1: The first source array + + + :param scale: Scale factor for the first array + + + :param src2: The second source array; must have the same size and the same type as ``src1`` + + + :param dst: The destination array; will have the same size and the same type as ``src1`` + + + +The function +``cvScaleAdd`` +is one of the classical primitive linear algebra operations, known as +``DAXPY`` +or +``SAXPY`` +in +`BLAS `_ +. It calculates the sum of a scaled array and another array: + + + +.. math:: + + \texttt{dst} (I)= \texttt{scale} \cdot \texttt{src1} (I) + \texttt{src2} (I) + + +The function can also be emulated with a matrix expression, for example: + + + + +:: + + + + Mat A(3, 3, CV_64F); + ... + A.row(0) = A.row(1)*2 + A.row(2); + + +.. + +See also: +:func:`add` +, +:func:`addWeighted` +, +:func:`subtract` +, +:func:`Mat::dot` +, +:func:`Mat::convertTo` +, +:ref:`Matrix Expressions` + +.. index:: setIdentity + + +cv::setIdentity +--------------- + +`id=0.506815953929 Comments from the Wiki `__ + + + + +.. cfunction:: void setIdentity(Mat\& dst, const Scalar\& value=Scalar(1)) + + Initializes a scaled identity matrix + + + + + + + :param dst: The matrix to initialize (not necessarily square) + + + :param value: The value to assign to the diagonal elements + + + +The function +:func:`setIdentity` +initializes a scaled identity matrix: + + + +.. math:: + + \texttt{dst} (i,j)= \fork{\texttt{value}}{ if $i=j$}{0}{otherwise} + + +The function can also be emulated using the matrix initializers and the matrix expressions: + + + +:: + + + + Mat A = Mat::eye(4, 3, CV_32F)*5; + // A will be set to [[5, 0, 0], [0, 5, 0], [0, 0, 5], [0, 0, 0]] + + +.. + +See also: +:func:`Mat::zeros` +, +:func:`Mat::ones` +, +:ref:`Matrix Expressions` +, +:func:`Mat::setTo` +, +:func:`Mat::operator=` +, + + +.. index:: solve + + +cv::solve +--------- + +`id=0.0254819200657 Comments from the Wiki `__ + + + + +.. cfunction:: bool solve(const Mat\& src1, const Mat\& src2, Mat\& dst, int flags=DECOMP_LU) + + Solves one or more linear systems or least-squares problems. + + + + + + + :param src1: The input matrix on the left-hand side of the system + + + :param src2: The input matrix on the right-hand side of the system + + + :param dst: The output solution + + + :param flags: The solution (matrix inversion) method + + + * **DECOMP_LU** Gaussian elimination with optimal pivot element chosen + + + * **DECOMP_CHOLESKY** Cholesky :math:`LL^T` factorization; the matrix ``src1`` must be symmetrical and positively defined + + + * **DECOMP_EIG** Eigenvalue decomposition; the matrix ``src1`` must be symmetrical + + + * **DECOMP_SVD** Singular value decomposition (SVD) method; the system can be over-defined and/or the matrix ``src1`` can be singular + + + * **DECOMP_QR** QR factorization; the system can be over-defined and/or the matrix ``src1`` can be singular + + + * **DECOMP_NORMAL** While all the previous flags are mutually exclusive, this flag can be used together with any of the previous. It means that the normal equations :math:`\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}` are solved instead of the original system :math:`\texttt{src1}\cdot\texttt{dst}=\texttt{src2}` + + + + + +The function +``solve`` +solves a linear system or least-squares problem (the latter is possible with SVD or QR methods, or by specifying the flag +``DECOMP_NORMAL`` +): + + + +.. math:: + + \texttt{dst} = \arg \min _X \| \texttt{src1} \cdot \texttt{X} - \texttt{src2} \| + + +If +``DECOMP_LU`` +or +``DECOMP_CHOLESKY`` +method is used, the function returns 1 if +``src1`` +(or +:math:`\texttt{src1}^T\texttt{src1}` +) is non-singular and 0 otherwise; in the latter case +``dst`` +is not valid. Other methods find some pseudo-solution in the case of singular left-hand side part. + +Note that if you want to find unity-norm solution of an under-defined singular system +:math:`\texttt{src1}\cdot\texttt{dst}=0` +, the function +``solve`` +will not do the work. Use +:func:`SVD::solveZ` +instead. + +See also: +:func:`invert` +, +:func:`SVD` +, +:func:`eigen` + +.. index:: solveCubic + + +cv::solveCubic +-------------- + +`id=0.907626960436 Comments from the Wiki `__ + + + + +.. cfunction:: void solveCubic(const Mat\& coeffs, Mat\& roots) + + Finds the real roots of a cubic equation. + + + + + + + :param coeffs: The equation coefficients, an array of 3 or 4 elements + + + :param roots: The destination array of real roots which will have 1 or 3 elements + + + +The function +``solveCubic`` +finds the real roots of a cubic equation: + +(if coeffs is a 4-element vector) + + + +.. math:: + + \texttt{coeffs} [0] x^3 + \texttt{coeffs} [1] x^2 + \texttt{coeffs} [2] x + \texttt{coeffs} [3] = 0 + + +or (if coeffs is 3-element vector): + + + +.. math:: + + x^3 + \texttt{coeffs} [0] x^2 + \texttt{coeffs} [1] x + \texttt{coeffs} [2] = 0 + + +The roots are stored to +``roots`` +array. + + +.. index:: solvePoly + + +cv::solvePoly +------------- + +`id=0.212310882152 Comments from the Wiki `__ + + + + +.. cfunction:: void solvePoly(const Mat\& coeffs, Mat\& roots, int maxIters=20, int fig=100) + + Finds the real or complex roots of a polynomial equation + + + + + + + :param coeffs: The array of polynomial coefficients + + + :param roots: The destination (complex) array of roots + + + :param maxIters: The maximum number of iterations the algorithm does + + + :param fig: + + + +The function +``solvePoly`` +finds real and complex roots of a polynomial equation: + + +.. math:: + + \texttt{coeffs} [0] x^{n} + \texttt{coeffs} [1] x^{n-1} + ... + \texttt{coeffs} [n-1] x + \texttt{coeffs} [n] = 0 + + + +.. index:: sort + + +cv::sort +-------- + +`id=0.518013202191 Comments from the Wiki `__ + + + + +.. cfunction:: void sort(const Mat\& src, Mat\& dst, int flags) + + Sorts each row or each column of a matrix + + + + + + + :param src: The source single-channel array + + + :param dst: The destination array of the same size and the same type as ``src`` + + + :param flags: The operation flags, a combination of the following values: + + + * **CV_SORT_EVERY_ROW** Each matrix row is sorted independently + + + * **CV_SORT_EVERY_COLUMN** Each matrix column is sorted independently. This flag and the previous one are mutually exclusive + + + * **CV_SORT_ASCENDING** Each matrix row is sorted in the ascending order + + + * **CV_SORT_DESCENDING** Each matrix row is sorted in the descending order. This flag and the previous one are also mutually exclusive + + + + + +The function +``sort`` +sorts each matrix row or each matrix column in ascending or descending order. If you want to sort matrix rows or columns lexicographically, you can use STL +``std::sort`` +generic function with the proper comparison predicate. + +See also: +:func:`sortIdx` +, +:func:`randShuffle` + +.. index:: sortIdx + + +cv::sortIdx +----------- + +`id=0.741410729345 Comments from the Wiki `__ + + + + +.. cfunction:: void sortIdx(const Mat\& src, Mat\& dst, int flags) + + Sorts each row or each column of a matrix + + + + + + + :param src: The source single-channel array + + + :param dst: The destination integer array of the same size as ``src`` + + + :param flags: The operation flags, a combination of the following values: + + + * **CV_SORT_EVERY_ROW** Each matrix row is sorted independently + + + * **CV_SORT_EVERY_COLUMN** Each matrix column is sorted independently. This flag and the previous one are mutually exclusive + + + * **CV_SORT_ASCENDING** Each matrix row is sorted in the ascending order + + + * **CV_SORT_DESCENDING** Each matrix row is sorted in the descending order. This flag and the previous one are also mutually exclusive + + + + + +The function +``sortIdx`` +sorts each matrix row or each matrix column in ascending or descending order. Instead of reordering the elements themselves, it stores the indices of sorted elements in the destination array. For example: + + + + +:: + + + + Mat A = Mat::eye(3,3,CV_32F), B; + sortIdx(A, B, CV_SORT_EVERY_ROW + CV_SORT_ASCENDING); + // B will probably contain + // (because of equal elements in A some permutations are possible): + // [[1, 2, 0], [0, 2, 1], [0, 1, 2]] + + +.. + +See also: +:func:`sort` +, +:func:`randShuffle` + +.. index:: split + + +cv::split +--------- + +`id=0.930887143858 Comments from the Wiki `__ + + + + +.. cfunction:: void split(const Mat\& mtx, Mat* mv) + + + +.. cfunction:: void split(const Mat\& mtx, vector\& mv) + + + +.. cfunction:: void split(const MatND\& mtx, MatND* mv) + + + +.. cfunction:: void split(const MatND\& mtx, vector\& mv) + + Divides multi-channel array into several single-channel arrays + + + + + + + :param mtx: The source multi-channel array + + + :param mv: The destination array or vector of arrays; The number of arrays must match ``mtx.channels()`` . The arrays themselves will be reallocated if needed + + + +The functions +``split`` +split multi-channel array into separate single-channel arrays: + + + +.. math:: + + \texttt{mv} [c](I) = \texttt{mtx} (I)_c + + +If you need to extract a single-channel or do some other sophisticated channel permutation, use +:func:`mixChannels` +See also: +:func:`merge` +, +:func:`mixChannels` +, +:func:`cvtColor` + +.. index:: sqrt + + +cv::sqrt +-------- + +`id=0.282274283871 Comments from the Wiki `__ + + + + +.. cfunction:: void sqrt(const Mat\& src, Mat\& dst) + + + +.. cfunction:: void sqrt(const MatND\& src, MatND\& dst) + + Calculates square root of array elements + + + + + + + :param src: The source floating-point array + + + :param dst: The destination array; will have the same size and the same type as ``src`` + + + +The functions +``sqrt`` +calculate square root of each source array element. in the case of multi-channel arrays each channel is processed independently. The function accuracy is approximately the same as of the built-in +``std::sqrt`` +. + +See also: +:func:`pow` +, +:func:`magnitude` + +.. index:: subtract + + +cv::subtract +------------ + +`id=0.874100847221 Comments from the Wiki `__ + + + + +.. cfunction:: void subtract(const Mat\& src1, const Mat\& src2, Mat\& dst) + + + +.. cfunction:: void subtract(const Mat\& src1, const Mat\& src2, Mat\& dst, const Mat\& mask) + + + +.. cfunction:: void subtract(const Mat\& src1, const Scalar\& sc, Mat\& dst, const Mat\& mask=Mat()) + + + +.. cfunction:: void subtract(const Scalar\& sc, const Mat\& src2, Mat\& dst, const Mat\& mask=Mat()) + + + +.. cfunction:: void subtract(const MatND\& src1, const MatND\& src2, MatND\& dst) + + + +.. cfunction:: void subtract(const MatND\& src1, const MatND\& src2, MatND\& dst, const MatND\& mask) + + + +.. cfunction:: void subtract(const MatND\& src1, const Scalar\& sc, MatND\& dst, const MatND\& mask=MatND()) + + + +.. cfunction:: void subtract(const Scalar\& sc, const MatND\& src2, MatND\& dst, const MatND\& mask=MatND()) + + Calculates per-element difference between two arrays or array and a scalar + + + + + + + :param src1: The first source array + + + :param src2: The second source array. It must have the same size and same type as ``src1`` + + + :param sc: Scalar; the first or the second input parameter + + + :param dst: The destination array; it will have the same size and same type as ``src1`` ; see ``Mat::create`` + + + :param mask: The optional operation mask, 8-bit single channel array; + specifies elements of the destination array to be changed + + + +The functions +``subtract`` +compute + + + + + +* + the difference between two arrays + + + + .. math:: + + \texttt{dst} (I) = \texttt{saturate} ( \texttt{src1} (I) - \texttt{src2} (I)) \quad \texttt{if mask} (I) \ne0 + + + + +* + the difference between array and a scalar: + + + + .. math:: + + \texttt{dst} (I) = \texttt{saturate} ( \texttt{src1} (I) - \texttt{sc} ) \quad \texttt{if mask} (I) \ne0 + + + + +* + the difference between scalar and an array: + + + + .. math:: + + \texttt{dst} (I) = \texttt{saturate} ( \texttt{sc} - \texttt{src2} (I)) \quad \texttt{if mask} (I) \ne0 + + + + +where +``I`` +is multi-dimensional index of array elements. + +The first function in the above list can be replaced with matrix expressions: + + + +:: + + + + dst = src1 - src2; + dst -= src2; // equivalent to subtract(dst, src2, dst); + + +.. + +See also: +:func:`add` +, +:func:`addWeighted` +, +:func:`scaleAdd` +, +:func:`convertScale` +, +:ref:`Matrix Expressions` +, +. + + +.. index:: SVD + +.. _SVD: + +SVD +--- + +`id=0.878765672851 Comments from the Wiki `__ + +.. ctype:: SVD + + + +Class for computing Singular Value Decomposition + + + + +:: + + + + class SVD + { + public: + enum { MODIFY_A=1, NO_UV=2, FULL_UV=4 }; + // default empty constructor + SVD(); + // decomposes A into u, w and vt: A = u*w*vt; + // u and vt are orthogonal, w is diagonal + SVD( const Mat& A, int flags=0 ); + // decomposes A into u, w and vt. + SVD& operator ()( const Mat& A, int flags=0 ); + + // finds such vector x, norm(x)=1, so that A*x = 0, + // where A is singular matrix + static void solveZ( const Mat& A, Mat& x ); + // does back-subsitution: + // x = vt.t()*inv(w)*u.t()*rhs ~ inv(A)*rhs + void backSubst( const Mat& rhs, Mat& x ) const; + + Mat u; // the left orthogonal matrix + Mat w; // vector of singular values + Mat vt; // the right orthogonal matrix + }; + + +.. + +The class +``SVD`` +is used to compute Singular Value Decomposition of a floating-point matrix and then use it to solve least-square problems, under-determined linear systems, invert matrices, compute condition numbers etc. +For a bit faster operation you can pass +``flags=SVD::MODIFY_A|...`` +to modify the decomposed matrix when it is not necessarily to preserve it. If you want to compute condition number of a matrix or absolute value of its determinant - you do not need +``u`` +and +``vt`` +, so you can pass +``flags=SVD::NO_UV|...`` +. Another flag +``FULL_UV`` +indicates that full-size +``u`` +and +``vt`` +must be computed, which is not necessary most of the time. + +See also: +:func:`invert` +, +:func:`solve` +, +:func:`eigen` +, +:func:`determinant` + +.. index:: SVD::SVD + + +cv::SVD::SVD +------------ + +`id=0.534955433029 Comments from the Wiki `__ + + + + +.. cfunction:: SVD::SVD() + + + +.. cfunction:: SVD::SVD( const Mat\& A, int flags=0 ) + + SVD constructors + + + + + + + :param A: The decomposed matrix + + + :param flags: Operation flags + + + + + * **SVD::MODIFY_A** The algorithm can modify the decomposed matrix. It can save some space and speed-up processing a bit + + + * **SVD::NO_UV** Indicates that only the vector of singular values ``w`` is to be computed, while ``u`` and ``vt`` will be set to empty matrices + + + * **SVD::FULL_UV** When the matrix is not square, by default the algorithm produces ``u`` and ``vt`` matrices of sufficiently large size for the further ``A`` reconstruction. If, however, ``FULL_UV`` flag is specified, ``u`` and ``vt`` will be full-size square orthogonal matrices. + + + + + +The first constructor initializes empty +``SVD`` +structure. The second constructor initializes empty +``SVD`` +structure and then calls +:func:`SVD::operator ()` +. + + + +.. index:: SVD::operator () + + +cv::SVD::operator () +-------------------- + +`id=0.679798629401 Comments from the Wiki `__ + + + + +.. cfunction:: SVD\& SVD::operator ()( const Mat\& A, int flags=0 ) + + Performs SVD of a matrix + + + + + + + :param A: The decomposed matrix + + + :param flags: Operation flags + + + + + * **SVD::MODIFY_A** The algorithm can modify the decomposed matrix. It can save some space and speed-up processing a bit + + + * **SVD::NO_UV** Only singular values are needed. The algorithm will not compute ``u`` and ``vt`` matrices + + + * **SVD::FULL_UV** When the matrix is not square, by default the algorithm produces ``u`` and ``vt`` matrices of sufficiently large size for the further ``A`` reconstruction. If, however, ``FULL_UV`` flag is specified, ``u`` and ``vt`` will be full-size square orthogonal matrices. + + + + + +The operator performs singular value decomposition of the supplied matrix. The +``u`` +, +``vt`` +and the vector of singular values +``w`` +are stored in the structure. The same +``SVD`` +structure can be reused many times with different matrices. Each time, if needed, the previous +``u`` +, +``vt`` +and +``w`` +are reclaimed and the new matrices are created, which is all handled by +:func:`Mat::create` +. + + +.. index:: SVD::solveZ + + +cv::SVD::solveZ +--------------- + +`id=0.571955065721 Comments from the Wiki `__ + + + + +.. cfunction:: static void SVD::solveZ( const Mat\& A, Mat\& x ) + + Solves under-determined singular linear system + + + + + + + :param A: The left-hand-side matrix. + + + :param x: The found solution + + + +The method finds unit-length solution +**x** +of the under-determined system +:math:`A x = 0` +. Theory says that such system has infinite number of solutions, so the algorithm finds the unit-length solution as the right singular vector corresponding to the smallest singular value (which should be 0). In practice, because of round errors and limited floating-point accuracy, the input matrix can appear to be close-to-singular rather than just singular. So, strictly speaking, the algorithm solves the following problem: + + + +.. math:: + + x^* = \arg \min _{x: \| x \| =1} \| A \cdot x \| + + + +.. index:: SVD::backSubst + + +cv::SVD::backSubst +------------------ + +`id=0.7874695872 Comments from the Wiki `__ + + + + +.. cfunction:: void SVD::backSubst( const Mat\& rhs, Mat\& x ) const + + Performs singular value back substitution + + + + + + + :param rhs: The right-hand side of a linear system :math:`\texttt{A} \texttt{x} = \texttt{rhs}` being solved, where ``A`` is the matrix passed to :func:`SVD::SVD` or :func:`SVD::operator ()` + + + :param x: The found solution of the system + + + +The method computes back substitution for the specified right-hand side: + + + +.. math:: + + \texttt{x} = \texttt{vt} ^T \cdot diag( \texttt{w} )^{-1} \cdot \texttt{u} ^T \cdot \texttt{rhs} \sim \texttt{A} ^{-1} \cdot \texttt{rhs} + + +Using this technique you can either get a very accurate solution of convenient linear system, or the best (in the least-squares terms) pseudo-solution of an overdetermined linear system. Note that explicit SVD with the further back substitution only makes sense if you need to solve many linear systems with the same left-hand side (e.g. +``A`` +). If all you need is to solve a single system (possibly with multiple +``rhs`` +immediately available), simply call +:func:`solve` +add pass +``cv::DECOMP_SVD`` +there - it will do absolutely the same thing. + + +.. index:: sum + + +cv::sum +------- + +`id=0.557000171762 Comments from the Wiki `__ + + + + +.. cfunction:: Scalar sum(const Mat\& mtx) + + + +.. cfunction:: Scalar sum(const MatND\& mtx) + + Calculates sum of array elements + + + + + + + :param mtx: The source array; must have 1 to 4 channels + + + +The functions +``sum`` +calculate and return the sum of array elements, independently for each channel. + +See also: +:func:`countNonZero` +, +:func:`mean` +, +:func:`meanStdDev` +, +:func:`norm` +, +:func:`minMaxLoc` +, +:func:`reduce` + +.. index:: theRNG + + +cv::theRNG +---------- + +`id=0.270205102539 Comments from the Wiki `__ + + + + +.. cfunction:: RNG\& theRNG() + + Returns the default random number generator + + + +The function +``theRNG`` +returns the default random number generator. For each thread there is separate random number generator, so you can use the function safely in multi-thread environments. If you just need to get a single random number using this generator or initialize an array, you can use +:func:`randu` +or +:func:`randn` +instead. But if you are going to generate many random numbers inside a loop, it will be much faster to use this function to retrieve the generator and then use +``RNG::operator _Tp()`` +. + +See also: +:func:`RNG` +, +:func:`randu` +, +:func:`randn` + +.. index:: trace + + +cv::trace +--------- + +`id=0.290293111938 Comments from the Wiki `__ + + + + +.. cfunction:: Scalar trace(const Mat\& mtx) + + Returns the trace of a matrix + + + + + + + :param mtx: The source matrix + + + +The function +``trace`` +returns the sum of the diagonal elements of the matrix +``mtx`` +. + + + +.. math:: + + \mathrm{tr} ( \texttt{mtx} ) = \sum _i \texttt{mtx} (i,i) + + + +.. index:: transform + + +cv::transform +------------- + +`id=0.76029170559 Comments from the Wiki `__ + + + + +.. cfunction:: void transform(const Mat\& src, Mat\& dst, const Mat\& mtx ) + + Performs matrix transformation of every array element. + + + + + + + :param src: The source array; must have as many channels (1 to 4) as ``mtx.cols`` or ``mtx.cols-1`` + + + :param dst: The destination array; will have the same size and depth as ``src`` and as many channels as ``mtx.rows`` + + + :param mtx: The transformation matrix + + + +The function +``transform`` +performs matrix transformation of every element of array +``src`` +and stores the results in +``dst`` +: + + + +.. math:: + + \texttt{dst} (I) = \texttt{mtx} \cdot \texttt{src} (I) + + +(when +``mtx.cols=src.channels()`` +), or + + + +.. math:: + + \texttt{dst} (I) = \texttt{mtx} \cdot [ \texttt{src} (I); 1] + + +(when +``mtx.cols=src.channels()+1`` +) + +That is, every element of an +``N`` +-channel array +``src`` +is +considered as +``N`` +-element vector, which is transformed using +a +:math:`\texttt{M} \times \texttt{N}` +or +:math:`\texttt{M} \times \texttt{N+1}` +matrix +``mtx`` +into +an element of +``M`` +-channel array +``dst`` +. + +The function may be used for geometrical transformation of +:math:`N` +-dimensional +points, arbitrary linear color space transformation (such as various kinds of RGB +:math:`\rightarrow` +YUV transforms), shuffling the image channels and so forth. + +See also: +:func:`perspectiveTransform` +, +:func:`getAffineTransform` +, +:func:`estimateRigidTransform` +, +:func:`warpAffine` +, +:func:`warpPerspective` + +.. index:: transpose + + +cv::transpose +------------- + +`id=0.171952414584 Comments from the Wiki `__ + + + + +.. cfunction:: void transpose(const Mat\& src, Mat\& dst) + + Transposes a matrix + + + + + + + :param src: The source array + + + :param dst: The destination array of the same type as ``src`` + + + +The function +:func:`transpose` +transposes the matrix +``src`` +: + + + +.. math:: + + \texttt{dst} (i,j) = \texttt{src} (j,i) + + +Note that no complex conjugation is done in the case of a complex +matrix, it should be done separately if needed. + diff --git a/modules/core/doc/utility_and_system_functions_and_macros.rst b/modules/core/doc/utility_and_system_functions_and_macros.rst new file mode 100644 index 000000000..9473871c1 --- /dev/null +++ b/modules/core/doc/utility_and_system_functions_and_macros.rst @@ -0,0 +1,568 @@ +Utility and System Functions and Macros +======================================= + +.. highlight:: cpp + + + +.. index:: alignPtr + + +cv::alignPtr +------------ + +`id=0.732441674276 Comments from the Wiki `__ + + + + +.. cfunction:: template _Tp* alignPtr(_Tp* ptr, int n=sizeof(_Tp)) + + Aligns pointer to the specified number of bytes + + + + + + + :param ptr: The aligned pointer + + + :param n: The alignment size; must be a power of two + + + +The function returns the aligned pointer of the same type as the input pointer: + + +.. math:: + + \texttt{(\_Tp*)(((size\_t)ptr + n-1) \& -n)} + + + +.. index:: alignSize + + +cv::alignSize +------------- + +`id=0.0293178300141 Comments from the Wiki `__ + + + + +.. cfunction:: size_t alignSize(size_t sz, int n) + + Aligns a buffer size to the specified number of bytes + + + + + + + :param sz: The buffer size to align + + + :param n: The alignment size; must be a power of two + + + +The function returns the minimum number that is greater or equal to +``sz`` +and is divisble by +``n`` +: + + +.. math:: + + \texttt{(sz + n-1) \& -n} + + + +.. index:: allocate + + +cv::allocate +------------ + +`id=0.672857293821 Comments from the Wiki `__ + + + + +.. cfunction:: template _Tp* allocate(size_t n) + + Allocates an array of elements + + + + + + + :param n: The number of elements to allocate + + + +The generic function +``allocate`` +allocates buffer for the specified number of elements. For each element the default constructor is called. + + + +.. index:: deallocate + + +cv::deallocate +-------------- + +`id=0.907199792708 Comments from the Wiki `__ + + + + +.. cfunction:: template void deallocate(_Tp* ptr, size_t n) + + Allocates an array of elements + + + + + + + :param ptr: Pointer to the deallocated buffer + + + :param n: The number of elements in the buffer + + + +The generic function +``deallocate`` +deallocates the buffer allocated with +:func:`allocate` +. The number of elements must match the number passed to +:func:`allocate` +. + + +.. index:: CV_Assert + +.. _CV_Assert: + +CV_Assert +--------- + +`id=0.132247699783 Comments from the Wiki `__ + + + + +.. cfunction:: CV_Assert(expr) + + Checks a condition at runtime. + + + + + + +:: + + + + #define CV_Assert( expr ) ... + #define CV_DbgAssert(expr) ... + + +.. + + + + + :param expr: The checked expression + + + +The macros +``CV_Assert`` +and +``CV_DbgAssert`` +evaluate the specified expression and if it is 0, the macros raise an error (see +:func:`error` +). The macro +``CV_Assert`` +checks the condition in both Debug and Release configurations, while +``CV_DbgAssert`` +is only retained in the Debug configuration. + + +.. index:: error + + +cv::error +--------- + +`id=0.274198769781 Comments from the Wiki `__ + + + + +.. cfunction:: void error( const Exception\& exc ) + + + +.. cfunction:: \#define CV_Error( code, msg ) <...> + + + +.. cfunction:: \#define CV_Error_( code, args ) <...> + + Signals an error and raises the exception + + + + + + + :param exc: The exception to throw + + + :param code: The error code, normally, a negative value. The list of pre-defined error codes can be found in ``cxerror.h`` + + + :param msg: Text of the error message + + + :param args: printf-like formatted error message in parantheses + + + +The function and the helper macros +``CV_Error`` +and +``CV_Error_`` +call the error handler. Currently, the error handler prints the error code ( +``exc.code`` +), the context ( +``exc.file`` +, +``exc.line`` +and the error message +``exc.err`` +to the standard error stream +``stderr`` +. In Debug configuration it then provokes memory access violation, so that the execution stack and all the parameters can be analyzed in debugger. In Release configuration the exception +``exc`` +is thrown. + +The macro +``CV_Error_`` +can be used to construct the error message on-fly to include some dynamic information, for example: + + + + +:: + + + + // note the extra parentheses around the formatted text message + CV_Error_(CV_StsOutOfRange, + ("the matrix element ( + i, j, mtx.at(i,j))) + + +.. + + +.. index:: Exception + +.. _Exception: + +Exception +--------- + +`id=0.792198322059 Comments from the Wiki `__ + +.. ctype:: Exception + + + +The exception class passed to error + + + + +:: + + + + class Exception + { + public: + // various constructors and the copy operation + Exception() { code = 0; line = 0; } + Exception(int _code, const string& _err, + const string& _func, const string& _file, int _line); + Exception(const Exception& exc); + Exception& operator = (const Exception& exc); + + // the error code + int code; + // the error text message + string err; + // function name where the error happened + string func; + // the source file name where the error happened + string file; + // the source file line where the error happened + int line; + }; + + +.. + +The class +``Exception`` +encapsulates all or almost all the necessary information about the error happened in the program. The exception is usually constructed and thrown implicitly, via +``CV_Error`` +and +``CV_Error_`` +macros, see +:func:`error` +. + + + +.. index:: fastMalloc + + +cv::fastMalloc +-------------- + +`id=0.913748026438 Comments from the Wiki `__ + + + + +.. cfunction:: void* fastMalloc(size_t size) + + Allocates aligned memory buffer + + + + + + + :param size: The allocated buffer size + + + +The function allocates buffer of the specified size and returns it. When the buffer size is 16 bytes or more, the returned buffer is aligned on 16 bytes. + + +.. index:: fastFree + + +cv::fastFree +------------ + +`id=0.486348253472 Comments from the Wiki `__ + + + + +.. cfunction:: void fastFree(void* ptr) + + Deallocates memory buffer + + + + + + + :param ptr: Pointer to the allocated buffer + + + +The function deallocates the buffer, allocated with +:func:`fastMalloc` +. +If NULL pointer is passed, the function does nothing. + + +.. index:: format + + +cv::format +---------- + +`id=0.359045522761 Comments from the Wiki `__ + + + + +.. cfunction:: string format( const char* fmt, ... ) + + Returns a text string formatted using printf-like expression + + + + + + + :param fmt: The printf-compatible formatting specifiers + + + +The function acts like +``sprintf`` +, but forms and returns STL string. It can be used for form the error message in +:func:`Exception` +constructor. + + +.. index:: getNumThreads + + +cv::getNumThreads +----------------- + +`id=0.665594834701 Comments from the Wiki `__ + + + + +.. cfunction:: int getNumThreads() + + Returns the number of threads used by OpenCV + + + +The function returns the number of threads that is used by OpenCV. + +See also: +:func:`setNumThreads` +, +:func:`getThreadNum` +. + + + +.. index:: getThreadNum + + +cv::getThreadNum +---------------- + +`id=0.835208450402 Comments from the Wiki `__ + + + + +.. cfunction:: int getThreadNum() + + Returns index of the currently executed thread + + + +The function returns 0-based index of the currently executed thread. The function is only valid inside a parallel OpenMP region. When OpenCV is built without OpenMP support, the function always returns 0. + +See also: +:func:`setNumThreads` +, +:func:`getNumThreads` +. + + +.. index:: getTickCount + + +cv::getTickCount +---------------- + +`id=0.682548115061 Comments from the Wiki `__ + + + + +.. cfunction:: int64 getTickCount() + + Returns the number of ticks + + + +The function returns the number of ticks since the certain event (e.g. when the machine was turned on). +It can be used to initialize +:func:`RNG` +or to measure a function execution time by reading the tick count before and after the function call. See also the tick frequency. + + +.. index:: getTickFrequency + + +cv::getTickFrequency +-------------------- + +`id=0.85013360741 Comments from the Wiki `__ + + + + +.. cfunction:: double getTickFrequency() + + Returns the number of ticks per second + + + +The function returns the number of ticks per second. +That is, the following code computes the execution time in seconds. + + + +:: + + + + double t = (double)getTickCount(); + // do something ... + t = ((double)getTickCount() - t)/getTickFrequency(); + + +.. + + +.. index:: setNumThreads + + +cv::setNumThreads +----------------- + +`id=0.215563071229 Comments from the Wiki `__ + + + + +.. cfunction:: void setNumThreads(int nthreads) + + Sets the number of threads used by OpenCV + + + + + + + :param nthreads: The number of threads used by OpenCV + + + +The function sets the number of threads used by OpenCV in parallel OpenMP regions. If +``nthreads=0`` +, the function will use the default number of threads, which is usually equal to the number of the processing cores. + +See also: +:func:`getNumThreads` +, +:func:`getThreadNum` diff --git a/modules/core/doc/xml_yaml_persistence.rst b/modules/core/doc/xml_yaml_persistence.rst new file mode 100644 index 000000000..59da9eb28 --- /dev/null +++ b/modules/core/doc/xml_yaml_persistence.rst @@ -0,0 +1,203 @@ +XML/YAML Persistence +==================== + +.. highlight:: cpp + + + +.. index:: FileStorage + +.. _FileStorage: + +FileStorage +----------- + +`id=0.36488878292 Comments from the Wiki `__ + +.. ctype:: FileStorage + + + +The XML/YAML file storage class + + + + +:: + + + + class FileStorage + { + public: + enum { READ=0, WRITE=1, APPEND=2 }; + enum { UNDEFINED=0, VALUE_EXPECTED=1, NAME_EXPECTED=2, INSIDE_MAP=4 }; + // the default constructor + FileStorage(); + // the constructor that opens the file for reading + // (flags=FileStorage::READ) or writing (flags=FileStorage::WRITE) + FileStorage(const string& filename, int flags); + // wraps the already opened CvFileStorage* + FileStorage(CvFileStorage* fs); + // the destructor; closes the file if needed + virtual ~FileStorage(); + + // opens the specified file for reading (flags=FileStorage::READ) + // or writing (flags=FileStorage::WRITE) + virtual bool open(const string& filename, int flags); + // checks if the storage is opened + virtual bool isOpened() const; + // closes the file + virtual void release(); + + // returns the first top-level node + FileNode getFirstTopLevelNode() const; + // returns the root file node + // (it's the parent of the first top-level node) + FileNode root(int streamidx=0) const; + // returns the top-level node by name + FileNode operator[](const string& nodename) const; + FileNode operator[](const char* nodename) const; + + // returns the underlying CvFileStorage* + CvFileStorage* operator *() { return fs; } + const CvFileStorage* operator *() const { return fs; } + + // writes the certain number of elements of the specified format + // (see DataType) without any headers + void writeRaw( const string& fmt, const uchar* vec, size_t len ); + + // writes an old-style object (CvMat, CvMatND etc.) + void writeObj( const string& name, const void* obj ); + + // returns the default object name from the filename + // (used by cvSave() with the default object name etc.) + static string getDefaultObjectName(const string& filename); + + Ptr fs; + string elname; + vector structs; + int state; + }; + + +.. + + +.. index:: FileNode + +.. _FileNode: + +FileNode +-------- + +`id=0.228849909258 Comments from the Wiki `__ + +.. ctype:: FileNode + + + +The XML/YAML file node class + + + + +:: + + + + class CV_EXPORTS FileNode + { + public: + enum { NONE=0, INT=1, REAL=2, FLOAT=REAL, STR=3, + STRING=STR, REF=4, SEQ=5, MAP=6, TYPE_MASK=7, + FLOW=8, USER=16, EMPTY=32, NAMED=64 }; + FileNode(); + FileNode(const CvFileStorage* fs, const CvFileNode* node); + FileNode(const FileNode& node); + FileNode operator[](const string& nodename) const; + FileNode operator[](const char* nodename) const; + FileNode operator[](int i) const; + int type() const; + int rawDataSize(const string& fmt) const; + bool empty() const; + bool isNone() const; + bool isSeq() const; + bool isMap() const; + bool isInt() const; + bool isReal() const; + bool isString() const; + bool isNamed() const; + string name() const; + size_t size() const; + operator int() const; + operator float() const; + operator double() const; + operator string() const; + + FileNodeIterator begin() const; + FileNodeIterator end() const; + + void readRaw( const string& fmt, uchar* vec, size_t len ) const; + void* readObj() const; + + // do not use wrapper pointer classes for better efficiency + const CvFileStorage* fs; + const CvFileNode* node; + }; + + +.. + + +.. index:: FileNodeIterator + +.. _FileNodeIterator: + +FileNodeIterator +---------------- + +`id=0.575104633905 Comments from the Wiki `__ + +.. ctype:: FileNodeIterator + + + +The XML/YAML file node iterator class + + + + +:: + + + + class CV_EXPORTS FileNodeIterator + { + public: + FileNodeIterator(); + FileNodeIterator(const CvFileStorage* fs, + const CvFileNode* node, size_t ofs=0); + FileNodeIterator(const FileNodeIterator& it); + FileNode operator *() const; + FileNode operator ->() const; + + FileNodeIterator& operator ++(); + FileNodeIterator operator ++(int); + FileNodeIterator& operator --(); + FileNodeIterator operator --(int); + FileNodeIterator& operator += (int); + FileNodeIterator& operator -= (int); + + FileNodeIterator& readRaw( const string& fmt, uchar* vec, + size_t maxCount=(size_t)INT_MAX ); + + const CvFileStorage* fs; + const CvFileNode* container; + CvSeqReader reader; + size_t remaining; + }; + + +.. + diff --git a/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst b/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst new file mode 100644 index 000000000..515df4b9a --- /dev/null +++ b/modules/features2d/doc/common_interfaces_of_descriptor_extractors.rst @@ -0,0 +1,446 @@ +Common Interfaces of Descriptor Extractors +========================================== + +.. highlight:: cpp + + +Extractors of keypoint descriptors in OpenCV have wrappers with common interface that enables to switch easily +between different algorithms solving the same problem. This section is devoted to computing descriptors +that are represented as vectors in a multidimensional space. All objects that implement ''vector'' +descriptor extractors inherit +:func:`DescriptorExtractor` +interface. + + +.. index:: DescriptorExtractor + +.. _DescriptorExtractor: + +DescriptorExtractor +------------------- + +`id=0.00924308242838 Comments from the Wiki `__ + +.. ctype:: DescriptorExtractor + + + +Abstract base class for computing descriptors for image keypoints. + + + + +:: + + + + class CV_EXPORTS DescriptorExtractor + { + public: + virtual ~DescriptorExtractor(); + + void compute( const Mat& image, vector& keypoints, + Mat& descriptors ) const; + void compute( const vector& images, vector >& keypoints, + vector& descriptors ) const; + + virtual void read( const FileNode& ); + virtual void write( FileStorage& ) const; + + virtual int descriptorSize() const = 0; + virtual int descriptorType() const = 0; + + static Ptr create( const string& descriptorExtractorType ); + + protected: + ... + }; + + +.. + +In this interface we assume a keypoint descriptor can be represented as a +dense, fixed-dimensional vector of some basic type. Most descriptors used +in practice follow this pattern, as it makes it very easy to compute +distances between descriptors. Therefore we represent a collection of +descriptors as a +:func:`Mat` +, where each row is one keypoint descriptor. + + +.. index:: DescriptorExtractor::compute + + +cv::DescriptorExtractor::compute +-------------------------------- + +`id=0.622580160404 Comments from the Wiki `__ + + + + +.. cfunction:: void DescriptorExtractor::compute( const Mat\& image, vector\& keypoints, Mat\& descriptors ) const + + Compute the descriptors for a set of keypoints detected in an image (first variant) +or image set (second variant). + + + + + + + :param image: The image. + + + :param keypoints: The keypoints. Keypoints for which a descriptor cannot be computed are removed. + + + :param descriptors: The descriptors. Row i is the descriptor for keypoint i. + + + + + +.. cfunction:: void DescriptorExtractor::compute( const vector\& images, vector >\& keypoints, vector\& descriptors ) const + + + + + + + * **images** The image set. + + + * **keypoints** Input keypoints collection. keypoints[i] is keypoints + detected in images[i]. Keypoints for which a descriptor + can not be computed are removed. + + + * **descriptors** Descriptor collection. descriptors[i] are descriptors computed for + a set keypoints[i]. + + + + +.. index:: DescriptorExtractor::read + + +cv::DescriptorExtractor::read +----------------------------- + +`id=0.708176779821 Comments from the Wiki `__ + + + + +.. cfunction:: void DescriptorExtractor::read( const FileNode\& fn ) + + Read descriptor extractor object from file node. + + + + + + + :param fn: File node from which detector will be read. + + + + +.. index:: DescriptorExtractor::write + + +cv::DescriptorExtractor::write +------------------------------ + +`id=0.206682397054 Comments from the Wiki `__ + + + + +.. cfunction:: void DescriptorExtractor::write( FileStorage\& fs ) const + + Write descriptor extractor object to file storage. + + + + + + + :param fs: File storage in which detector will be written. + + + + +.. index:: DescriptorExtractor::create + + +cv::DescriptorExtractor::create +------------------------------- + +`id=0.923714079643 Comments from the Wiki `__ + + +:func:`DescriptorExtractor` + + +.. cfunction:: Ptr DescriptorExtractor::create( const string\& descriptorExtractorType ) + + Descriptor extractor factory that creates of given type with +default parameters (rather using default constructor). + + + + + + + :param descriptorExtractorType: Descriptor extractor type. + + + +Now the following descriptor extractor types are supported: +\ +``"SIFT"`` +-- +:func:`SiftFeatureDetector` +, +\ +``"SURF"`` +-- +:func:`SurfFeatureDetector` +, +\ +``"BRIEF"`` +-- +:func:`BriefFeatureDetector` +. +\ +Also combined format is supported: descriptor extractor adapter name ( +``"Opponent"`` +-- +:func:`OpponentColorDescriptorExtractor` +) + descriptor extractor name (see above), +e.g. +``"OpponentSIFT"`` +, etc. + + +.. index:: SiftDescriptorExtractor + +.. _SiftDescriptorExtractor: + +SiftDescriptorExtractor +----------------------- + +`id=0.676546819501 Comments from the Wiki `__ + +.. ctype:: SiftDescriptorExtractor + + + +Wrapping class for descriptors computing using +:func:`SIFT` +class. + + + + +:: + + + + class SiftDescriptorExtractor : public DescriptorExtractor + { + public: + SiftDescriptorExtractor( + const SIFT::DescriptorParams& descriptorParams=SIFT::DescriptorParams(), + const SIFT::CommonParams& commonParams=SIFT::CommonParams() ); + SiftDescriptorExtractor( double magnification, bool isNormalize=true, + bool recalculateAngles=true, int nOctaves=SIFT::CommonParams::DEFAULT_NOCTAVES, + int nOctaveLayers=SIFT::CommonParams::DEFAULT_NOCTAVE_LAYERS, + int firstOctave=SIFT::CommonParams::DEFAULT_FIRST_OCTAVE, + int angleMode=SIFT::CommonParams::FIRST_ANGLE ); + + virtual void read (const FileNode &fn); + virtual void write (FileStorage &fs) const; + virtual int descriptorSize() const; + virtual int descriptorType() const; + protected: + ... + } + + +.. + + +.. index:: SurfDescriptorExtractor + +.. _SurfDescriptorExtractor: + +SurfDescriptorExtractor +----------------------- + +`id=0.638581739296 Comments from the Wiki `__ + +.. ctype:: SurfDescriptorExtractor + + + +Wrapping class for descriptors computing using +:func:`SURF` +class. + + + + +:: + + + + class SurfDescriptorExtractor : public DescriptorExtractor + { + public: + SurfDescriptorExtractor( int nOctaves=4, + int nOctaveLayers=2, bool extended=false ); + + virtual void read (const FileNode &fn); + virtual void write (FileStorage &fs) const; + virtual int descriptorSize() const; + virtual int descriptorType() const; + protected: + ... + } + + +.. + + +.. index:: CalonderDescriptorExtractor + +.. _CalonderDescriptorExtractor: + +CalonderDescriptorExtractor +--------------------------- + +`id=0.301561509204 Comments from the Wiki `__ + +.. ctype:: CalonderDescriptorExtractor + + + +Wrapping class for descriptors computing using +:func:`RTreeClassifier` +class. + + + + +:: + + + + template + class CalonderDescriptorExtractor : public DescriptorExtractor + { + public: + CalonderDescriptorExtractor( const string& classifierFile ); + + virtual void read( const FileNode &fn ); + virtual void write( FileStorage &fs ) const; + virtual int descriptorSize() const; + virtual int descriptorType() const; + protected: + ... + } + + +.. + + +.. index:: OpponentColorDescriptorExtractor + +.. _OpponentColorDescriptorExtractor: + +OpponentColorDescriptorExtractor +-------------------------------- + +`id=0.081563051622 Comments from the Wiki `__ + +.. ctype:: OpponentColorDescriptorExtractor + + + +Adapts a descriptor extractor to compute descripors in Opponent Color Space +(refer to van de Sande et al., CGIV 2008 "Color Descriptors for Object Category Recognition"). +Input RGB image is transformed in Opponent Color Space. Then unadapted descriptor extractor +(set in constructor) computes descriptors on each of the three channel and concatenate +them into a single color descriptor. + + + + +:: + + + + class OpponentColorDescriptorExtractor : public DescriptorExtractor + { + public: + OpponentColorDescriptorExtractor( const Ptr& dextractor ); + + virtual void read( const FileNode& ); + virtual void write( FileStorage& ) const; + virtual int descriptorSize() const; + virtual int descriptorType() const; + protected: + ... + }; + + +.. + + +.. index:: BriefDescriptorExtractor + +.. _BriefDescriptorExtractor: + +BriefDescriptorExtractor +------------------------ + +`id=0.207875021385 Comments from the Wiki `__ + +.. ctype:: BriefDescriptorExtractor + + + +Class for computing BRIEF descriptors described in paper of Calonder M., Lepetit V., +Strecha C., Fua P.: ''BRIEF: Binary Robust Independent Elementary Features.'' +11th European Conference on Computer Vision (ECCV), Heraklion, Crete. LNCS Springer, September 2010. + + + + +:: + + + + class BriefDescriptorExtractor : public DescriptorExtractor + { + public: + static const int PATCH_SIZE = 48; + static const int KERNEL_SIZE = 9; + + // bytes is a length of descriptor in bytes. It can be equal 16, 32 or 64 bytes. + BriefDescriptorExtractor( int bytes = 32 ); + + virtual void read( const FileNode& ); + virtual void write( FileStorage& ) const; + virtual int descriptorSize() const; + virtual int descriptorType() const; + protected: + ... + }; + + +.. + diff --git a/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst new file mode 100644 index 000000000..d01047b35 --- /dev/null +++ b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst @@ -0,0 +1,637 @@ +Common Interfaces of Descriptor Matchers +======================================== + +.. highlight:: cpp + + +Matchers of keypoint descriptors in OpenCV have wrappers with common interface that enables to switch easily +between different algorithms solving the same problem. This section is devoted to matching descriptors +that are represented as vectors in a multidimensional space. All objects that implement ''vector'' +descriptor matchers inherit +:func:`DescriptorMatcher` +interface. + + +.. index:: DMatch + +.. _DMatch: + +DMatch +------ + +`id=0.193402930617 Comments from the Wiki `__ + +.. ctype:: DMatch + + + +Match between two keypoint descriptors: query descriptor index, +train descriptor index, train image index and distance between descriptors. + + + + +:: + + + + struct DMatch + { + DMatch() : queryIdx(-1), trainIdx(-1), imgIdx(-1), + distance(std::numeric_limits::max()) {} + DMatch( int _queryIdx, int _trainIdx, float _distance ) : + queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(-1), + distance(_distance) {} + DMatch( int _queryIdx, int _trainIdx, int _imgIdx, float _distance ) : + queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(_imgIdx), + distance(_distance) {} + + int queryIdx; // query descriptor index + int trainIdx; // train descriptor index + int imgIdx; // train image index + + float distance; + + // less is better + bool operator<( const DMatch &m ) const; + }; + + +.. + + +.. index:: DescriptorMatcher + +.. _DescriptorMatcher: + +DescriptorMatcher +----------------- + +`id=0.0185035556985 Comments from the Wiki `__ + +.. ctype:: DescriptorMatcher + + + +Abstract base class for matching keypoint descriptors. It has two groups +of match methods: for matching descriptors of one image with other image or +with image set. + + + + +:: + + + + class DescriptorMatcher + { + public: + virtual ~DescriptorMatcher(); + + virtual void add( const vector& descriptors ); + + const vector& getTrainDescriptors() const; + virtual void clear(); + bool empty() const; + virtual bool isMaskSupported() const = 0; + + virtual void train(); + + /* + * Group of methods to match descriptors from image pair. + */ + void match( const Mat& queryDescriptors, const Mat& trainDescriptors, + vector& matches, const Mat& mask=Mat() ) const; + void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, + vector >& matches, int k, + const Mat& mask=Mat(), bool compactResult=false ) const; + void radiusMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, + vector >& matches, float maxDistance, + const Mat& mask=Mat(), bool compactResult=false ) const; + /* + * Group of methods to match descriptors from one image to image set. + */ + void match( const Mat& queryDescriptors, vector& matches, + const vector& masks=vector() ); + void knnMatch( const Mat& queryDescriptors, vector >& matches, + int k, const vector& masks=vector(), + bool compactResult=false ); + void radiusMatch( const Mat& queryDescriptors, vector >& matches, + float maxDistance, const vector& masks=vector(), + bool compactResult=false ); + + virtual void read( const FileNode& ); + virtual void write( FileStorage& ) const; + + virtual Ptr clone( bool emptyTrainData=false ) const = 0; + + static Ptr create( const string& descriptorMatcherType ); + + protected: + vector trainDescCollection; + ... + }; + + +.. + + +.. index:: DescriptorMatcher::add + + +cv::DescriptorMatcher::add +-------------------------- + +`id=0.549221986718 Comments from the Wiki `__ + + +```` + + +.. cfunction:: void add( const vector\& descriptors ) + + Add descriptors to train descriptor collection. If collection trainDescCollectionis not empty +the new descriptors are added to existing train descriptors. + + + + + + + :param descriptors: Descriptors to add. Each ``descriptors[i]`` is a set of descriptors + from the same (one) train image. + + + + +.. index:: DescriptorMatcher::getTrainDescriptors + + +cv::DescriptorMatcher::getTrainDescriptors +------------------------------------------ + +`id=0.354691082433 Comments from the Wiki `__ + + +```` + + +.. cfunction:: const vector\& getTrainDescriptors() const + + Returns constant link to the train descriptor collection (i.e. trainDescCollection). + + + + +.. index:: DescriptorMatcher::clear + + +cv::DescriptorMatcher::clear +---------------------------- + +`id=0.776403699262 Comments from the Wiki `__ + + + + +.. cfunction:: void DescriptorMatcher::clear() + + Clear train descriptor collection. + + + + +.. index:: DescriptorMatcher::empty + + +cv::DescriptorMatcher::empty +---------------------------- + +`id=0.186730120991 Comments from the Wiki `__ + + + + +.. cfunction:: bool DescriptorMatcher::empty() const + + Return true if there are not train descriptors in collection. + + + + +.. index:: DescriptorMatcher::isMaskSupported + + +cv::DescriptorMatcher::isMaskSupported +-------------------------------------- + +`id=0.4880242426 Comments from the Wiki `__ + + + + +.. cfunction:: bool DescriptorMatcher::isMaskSupported() + + Returns true if descriptor matcher supports masking permissible matches. + + + + +.. index:: DescriptorMatcher::train + + +cv::DescriptorMatcher::train +---------------------------- + +`id=0.708209257367 Comments from the Wiki `__ + + + + +.. cfunction:: void DescriptorMatcher::train() + + Train descriptor matcher (e.g. train flann index). In all methods to match the method train() +is run every time before matching. Some descriptor matchers (e.g. BruteForceMatcher) have empty +implementation of this method, other matchers realy train their inner structures (e.g. FlannBasedMatcher +trains flann::Index) + + + + +.. index:: DescriptorMatcher::match + + +cv::DescriptorMatcher::match +---------------------------- + +`id=0.803878673329 Comments from the Wiki `__ + + +```` +```` +```` +```` + + +.. cfunction:: void DescriptorMatcher::match( const Mat\& queryDescriptors, const Mat\& trainDescriptors, vector\& matches, const Mat\& mask=Mat() ) const + + Find the best match for each descriptor from a query set with train descriptors. +Supposed that the query descriptors are of keypoints detected on the same query image. +In first variant of this method train descriptors are set as input argument and +supposed that they are of keypoints detected on the same train image. In second variant +of the method train descriptors collection that was set using addmethod is used. +Optional mask (or masks) can be set to describe which descriptors can be matched. queryDescriptors[i]can be matched with trainDescriptors[j]only if mask.at(i,j)is non-zero. + + + + + +.. cfunction:: void DescriptorMatcher::match( const Mat\& queryDescriptors, vector\& matches, const vector\& masks=vector() ) + + + + + + + :param queryDescriptors: Query set of descriptors. + + + :param trainDescriptors: Train set of descriptors. This will not be added to train descriptors collection + stored in class object. + + + :param matches: Matches. If some query descriptor masked out in ``mask`` no match will be added for this descriptor. + So ``matches`` size may be less query descriptors count. + + + :param mask: Mask specifying permissible matches between input query and train matrices of descriptors. + + + :param masks: The set of masks. Each ``masks[i]`` specifies permissible matches between input query descriptors + and stored train descriptors from i-th image (i.e. ``trainDescCollection[i])`` . + + + + +.. index:: DescriptorMatcher::knnMatch + + +cv::DescriptorMatcher::knnMatch +------------------------------- + +`id=0.510078848403 Comments from the Wiki `__ + + +:func:`DescriptorMatcher::match` + + +.. cfunction:: void DescriptorMatcher::knnMatch( const Mat\& queryDescriptors, const Mat\& trainDescriptors, vector >\& matches, int k, const Mat\& mask=Mat(), bool compactResult=false ) const + + Find the k best matches for each descriptor from a query set with train descriptors. +Found k (or less if not possible) matches are returned in distance increasing order. +Details about query and train descriptors see in . + + + + + +.. cfunction:: void DescriptorMatcher::knnMatch( const Mat\& queryDescriptors, vector >\& matches, int k, const vector\& masks=vector(), bool compactResult=false ) + + + + + + + :param queryDescriptors, trainDescriptors, mask, masks: See in :func:`DescriptorMatcher::match` . + + + :param matches: Mathes. Each ``matches[i]`` is k or less matches for the same query descriptor. + + + :param k: Count of best matches will be found per each query descriptor (or less if it's not possible). + + + :param compactResult: It's used when mask (or masks) is not empty. If ``compactResult`` is false ``matches`` vector will have the same size as ``queryDescriptors`` rows. If ``compactResult`` + is true ``matches`` vector will not contain matches for fully masked out query descriptors. + + + + +.. index:: DescriptorMatcher::radiusMatch + + +cv::DescriptorMatcher::radiusMatch +---------------------------------- + +`id=0.763278154174 Comments from the Wiki `__ + + +:func:`DescriptorMatcher::match` + + +.. cfunction:: void DescriptorMatcher::radiusMatch( const Mat\& queryDescriptors, const Mat\& trainDescriptors, vector >\& matches, float maxDistance, const Mat\& mask=Mat(), bool compactResult=false ) const + + Find the best matches for each query descriptor which have distance less than given threshold. +Found matches are returned in distance increasing order. Details about query and train +descriptors see in . + + + + + +.. cfunction:: void DescriptorMatcher::radiusMatch( const Mat\& queryDescriptors, vector >\& matches, float maxDistance, const vector\& masks=vector(), bool compactResult=false ) + + + + + + + :param queryDescriptors, trainDescriptors, mask, masks: See in :func:`DescriptorMatcher::match` . + + + :param matches, compactResult: See in :func:`DescriptorMatcher::knnMatch` . + + + :param maxDistance: The threshold to found match distances. + + + + +.. index:: DescriptorMatcher::clone + + +cv::DescriptorMatcher::clone +---------------------------- + +`id=0.743679534249 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr \\DescriptorMatcher::clone( bool emptyTrainData ) const + + Clone the matcher. + + + + + + + :param emptyTrainData: If emptyTrainData is false the method create deep copy of the object, i.e. copies + both parameters and train data. If emptyTrainData is true the method create object copy with current parameters + but with empty train data.. + + + + +.. index:: DescriptorMatcher::create + + +cv::DescriptorMatcher::create +----------------------------- + +`id=0.681869512138 Comments from the Wiki `__ + + +:func:`DescriptorMatcher` + + +.. cfunction:: Ptr DescriptorMatcher::create( const string\& descriptorMatcherType ) + + Descriptor matcher factory that creates of +given type with default parameters (rather using default constructor). + + + + + + + :param descriptorMatcherType: Descriptor matcher type. + + + +Now the following matcher types are supported: +``"BruteForce"`` +(it uses +``L2`` +), +``"BruteForce-L1"`` +, +``"BruteForce-Hamming"`` +, +``"BruteForce-HammingLUT"`` +, +``"FlannBased"`` +. + + +.. index:: BruteForceMatcher + +.. _BruteForceMatcher: + +BruteForceMatcher +----------------- + +`id=0.47821275438 Comments from the Wiki `__ + +.. ctype:: BruteForceMatcher + + + +Brute-force descriptor matcher. For each descriptor in the first set, this matcher finds the closest +descriptor in the second set by trying each one. This descriptor matcher supports masking +permissible matches between descriptor sets. + + + + +:: + + + + template + class BruteForceMatcher : public DescriptorMatcher + { + public: + BruteForceMatcher( Distance d = Distance() ); + virtual ~BruteForceMatcher(); + + virtual bool isMaskSupported() const; + virtual Ptr clone( bool emptyTrainData=false ) const; + protected: + ... + } + + +.. + +For efficiency, BruteForceMatcher is templated on the distance metric. +For float descriptors, a common choice would be +``L2`` +. Class of supported distances are: + + + + +:: + + + + template + struct Accumulator + { + typedef T Type; + }; + + template<> struct Accumulator { typedef unsigned int Type; }; + template<> struct Accumulator { typedef unsigned int Type; }; + template<> struct Accumulator { typedef int Type; }; + template<> struct Accumulator { typedef int Type; }; + + /* + * Squared Euclidean distance functor + */ + template + struct L2 + { + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T* a, const T* b, int size ) const; + }; + + /* + * Manhattan distance (city block distance) functor + */ + template + struct CV_EXPORTS L1 + { + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T* a, const T* b, int size ) const; + ... + }; + + /* + * Hamming distance (city block distance) functor + */ + struct HammingLUT + { + typedef unsigned char ValueType; + typedef int ResultType; + + ResultType operator()( const unsigned char* a, const unsigned char* b, + int size ) const; + ... + }; + + struct Hamming + { + typedef unsigned char ValueType; + typedef int ResultType; + + ResultType operator()( const unsigned char* a, const unsigned char* b, + int size ) const; + ... + }; + + +.. + + +.. index:: FlannBasedMatcher + +.. _FlannBasedMatcher: + +FlannBasedMatcher +----------------- + +`id=0.721140850904 Comments from the Wiki `__ + +.. ctype:: FlannBasedMatcher + + + +Flann based descriptor matcher. This matcher trains +:func:`flann::Index` +on +train descriptor collection and calls it's nearest search methods to find best matches. +So this matcher may be faster in cases of matching to large train collection than +brute force matcher. +``FlannBasedMatcher`` +does not support masking permissible +matches between descriptor sets, because +:func:`flann::Index` +does not +support this. + + + + +:: + + + + class FlannBasedMatcher : public DescriptorMatcher + { + public: + FlannBasedMatcher( + const Ptr& indexParams=new flann::KDTreeIndexParams(), + const Ptr& searchParams=new flann::SearchParams() ); + + virtual void add( const vector& descriptors ); + virtual void clear(); + + virtual void train(); + virtual bool isMaskSupported() const; + + virtual Ptr clone( bool emptyTrainData=false ) const; + protected: + ... + }; + + +.. + diff --git a/modules/features2d/doc/common_interfaces_of_feature_detectors.rst b/modules/features2d/doc/common_interfaces_of_feature_detectors.rst new file mode 100644 index 000000000..4c352af7d --- /dev/null +++ b/modules/features2d/doc/common_interfaces_of_feature_detectors.rst @@ -0,0 +1,1045 @@ +Common Interfaces of Feature Detectors +====================================== + +.. highlight:: cpp + + +Feature detectors in OpenCV have wrappers with common interface that enables to switch easily +between different algorithms solving the same problem. All objects that implement keypoint detectors +inherit +:func:`FeatureDetector` +interface. + + +.. index:: KeyPoint + +.. _KeyPoint: + +KeyPoint +-------- + +`id=0.685159926523 Comments from the Wiki `__ + +.. ctype:: KeyPoint + + + +Data structure for salient point detectors. + + + + +:: + + + + class KeyPoint + { + public: + // the default constructor + KeyPoint() : pt(0,0), size(0), angle(-1), response(0), octave(0), + class_id(-1) {} + // the full constructor + KeyPoint(Point2f _pt, float _size, float _angle=-1, + float _response=0, int _octave=0, int _class_id=-1) + : pt(_pt), size(_size), angle(_angle), response(_response), + octave(_octave), class_id(_class_id) {} + // another form of the full constructor + KeyPoint(float x, float y, float _size, float _angle=-1, + float _response=0, int _octave=0, int _class_id=-1) + : pt(x, y), size(_size), angle(_angle), response(_response), + octave(_octave), class_id(_class_id) {} + // converts vector of keypoints to vector of points + static void convert(const std::vector& keypoints, + std::vector& points2f, + const std::vector& keypointIndexes=std::vector()); + // converts vector of points to the vector of keypoints, where each + // keypoint is assigned the same size and the same orientation + static void convert(const std::vector& points2f, + std::vector& keypoints, + float size=1, float response=1, int octave=0, + int class_id=-1); + + // computes overlap for pair of keypoints; + // overlap is a ratio between area of keypoint regions intersection and + // area of keypoint regions union (now keypoint region is circle) + static float overlap(const KeyPoint& kp1, const KeyPoint& kp2); + + Point2f pt; // coordinates of the keypoints + float size; // diameter of the meaningfull keypoint neighborhood + float angle; // computed orientation of the keypoint (-1 if not applicable) + float response; // the response by which the most strong keypoints + // have been selected. Can be used for the further sorting + // or subsampling + int octave; // octave (pyramid layer) from which the keypoint has been extracted + int class_id; // object class (if the keypoints need to be clustered by + // an object they belong to) + }; + + // writes vector of keypoints to the file storage + void write(FileStorage& fs, const string& name, const vector& keypoints); + // reads vector of keypoints from the specified file storage node + void read(const FileNode& node, CV_OUT vector& keypoints); + + +.. + + +.. index:: FeatureDetector + +.. _FeatureDetector: + +FeatureDetector +--------------- + +`id=0.920345215316 Comments from the Wiki `__ + +.. ctype:: FeatureDetector + + + +Abstract base class for 2D image feature detectors. + + + + +:: + + + + class CV_EXPORTS FeatureDetector + { + public: + virtual ~FeatureDetector(); + + void detect( const Mat& image, vector& keypoints, + const Mat& mask=Mat() ) const; + + void detect( const vector& images, + vector >& keypoints, + const vector& masks=vector() ) const; + + virtual void read(const FileNode&); + virtual void write(FileStorage&) const; + + static Ptr create( const string& detectorType ); + + protected: + ... + }; + + +.. + + +.. index:: FeatureDetector::detect + + +cv::FeatureDetector::detect +--------------------------- + +`id=0.445108206821 Comments from the Wiki `__ + + + + +.. cfunction:: void FeatureDetector::detect( const Mat\& image, vector\& keypoints, const Mat\& mask=Mat() ) const + + Detect keypoints in an image (first variant) or image set (second variant). + + + + + + + :param image: The image. + + + :param keypoints: The detected keypoints. + + + :param mask: Mask specifying where to look for keypoints (optional). Must be a char matrix + with non-zero values in the region of interest. + + + + + +.. cfunction:: void FeatureDetector::detect( const vector\& images, vector >\& keypoints, const vector\& masks=vector() ) const + + + + + + + * **images** Images set. + + + * **keypoints** Collection of keypoints detected in an input images. keypoints[i] is a set of keypoints detected in an images[i]. + + + * **masks** Masks for each input image specifying where to look for keypoints (optional). masks[i] is a mask for images[i]. + Each element of ``masks`` vector must be a char matrix with non-zero values in the region of interest. + + + + +.. index:: FeatureDetector::read + + +cv::FeatureDetector::read +------------------------- + +`id=0.762313089054 Comments from the Wiki `__ + + + + +.. cfunction:: void FeatureDetector::read( const FileNode\& fn ) + + Read feature detector object from file node. + + + + + + + :param fn: File node from which detector will be read. + + + + +.. index:: FeatureDetector::write + + +cv::FeatureDetector::write +-------------------------- + +`id=0.537131606226 Comments from the Wiki `__ + + + + +.. cfunction:: void FeatureDetector::write( FileStorage\& fs ) const + + Write feature detector object to file storage. + + + + + + + :param fs: File storage in which detector will be written. + + + + +.. index:: FeatureDetector::create + + +cv::FeatureDetector::create +--------------------------- + +`id=0.999180942051 Comments from the Wiki `__ + + +:func:`FeatureDetector` + + +.. cfunction:: Ptr FeatureDetector::create( const string\& detectorType ) + + Feature detector factory that creates of given type with +default parameters (rather using default constructor). + + + + + + + :param detectorType: Feature detector type. + + + +Now the following detector types are supported: +\ +``"FAST"`` +-- +:func:`FastFeatureDetector` +, +\ +``"STAR"`` +-- +:func:`StarFeatureDetector` +, +\ +``"SIFT"`` +-- +:func:`SiftFeatureDetector` +, +\ +``"SURF"`` +-- +:func:`SurfFeatureDetector` +, +\ +``"MSER"`` +-- +:func:`MserFeatureDetector` +, +\ +``"GFTT"`` +-- +:func:`GfttFeatureDetector` +, +\ +``"HARRIS"`` +-- +:func:`HarrisFeatureDetector` +. +\ +Also combined format is supported: feature detector adapter name ( +``"Grid"`` +-- +:func:`GridAdaptedFeatureDetector` +, +``"Pyramid"`` +-- +:func:`PyramidAdaptedFeatureDetector` +) + feature detector name (see above), +e.g. +``"GridFAST"`` +, +``"PyramidSTAR"`` +, etc. + + +.. index:: FastFeatureDetector + +.. _FastFeatureDetector: + +FastFeatureDetector +------------------- + +`id=0.162253794116 Comments from the Wiki `__ + +.. ctype:: FastFeatureDetector + + + +Wrapping class for feature detection using +:func:`FAST` +method. + + + + +:: + + + + class FastFeatureDetector : public FeatureDetector + { + public: + FastFeatureDetector( int threshold=1, bool nonmaxSuppression=true ); + virtual void read( const FileNode& fn ); + virtual void write( FileStorage& fs ) const; + protected: + ... + }; + + +.. + + +.. index:: GoodFeaturesToTrackDetector + +.. _GoodFeaturesToTrackDetector: + +GoodFeaturesToTrackDetector +--------------------------- + +`id=0.728462673768 Comments from the Wiki `__ + +.. ctype:: GoodFeaturesToTrackDetector + + + +Wrapping class for feature detection using +:func:`goodFeaturesToTrack` +function. + + + + +:: + + + + class GoodFeaturesToTrackDetector : public FeatureDetector + { + public: + class Params + { + public: + Params( int maxCorners=1000, double qualityLevel=0.01, + double minDistance=1., int blockSize=3, + bool useHarrisDetector=false, double k=0.04 ); + void read( const FileNode& fn ); + void write( FileStorage& fs ) const; + + int maxCorners; + double qualityLevel; + double minDistance; + int blockSize; + bool useHarrisDetector; + double k; + }; + + GoodFeaturesToTrackDetector( const GoodFeaturesToTrackDetector::Params& params= + GoodFeaturesToTrackDetector::Params() ); + GoodFeaturesToTrackDetector( int maxCorners, double qualityLevel, + double minDistance, int blockSize=3, + bool useHarrisDetector=false, double k=0.04 ); + virtual void read( const FileNode& fn ); + virtual void write( FileStorage& fs ) const; + protected: + ... + }; + + +.. + + +.. index:: MserFeatureDetector + +.. _MserFeatureDetector: + +MserFeatureDetector +------------------- + +`id=0.958798683591 Comments from the Wiki `__ + +.. ctype:: MserFeatureDetector + + + +Wrapping class for feature detection using +:func:`MSER` +class. + + + + +:: + + + + class MserFeatureDetector : public FeatureDetector + { + public: + MserFeatureDetector( CvMSERParams params=cvMSERParams() ); + MserFeatureDetector( int delta, int minArea, int maxArea, + double maxVariation, double minDiversity, + int maxEvolution, double areaThreshold, + double minMargin, int edgeBlurSize ); + virtual void read( const FileNode& fn ); + virtual void write( FileStorage& fs ) const; + protected: + ... + }; + + +.. + + +.. index:: StarFeatureDetector + +.. _StarFeatureDetector: + +StarFeatureDetector +------------------- + +`id=0.336277450587 Comments from the Wiki `__ + +.. ctype:: StarFeatureDetector + + + +Wrapping class for feature detection using +:func:`StarDetector` +class. + + + + +:: + + + + class StarFeatureDetector : public FeatureDetector + { + public: + StarFeatureDetector( int maxSize=16, int responseThreshold=30, + int lineThresholdProjected = 10, + int lineThresholdBinarized=8, int suppressNonmaxSize=5 ); + virtual void read( const FileNode& fn ); + virtual void write( FileStorage& fs ) const; + protected: + ... + }; + + +.. + + +.. index:: SiftFeatureDetector + +.. _SiftFeatureDetector: + +SiftFeatureDetector +------------------- + +`id=0.680185509584 Comments from the Wiki `__ + +.. ctype:: SiftFeatureDetector + + + +Wrapping class for feature detection using +:func:`SIFT` +class. + + + + +:: + + + + class SiftFeatureDetector : public FeatureDetector + { + public: + SiftFeatureDetector( + const SIFT::DetectorParams& detectorParams=SIFT::DetectorParams(), + const SIFT::CommonParams& commonParams=SIFT::CommonParams() ); + SiftFeatureDetector( double threshold, double edgeThreshold, + int nOctaves=SIFT::CommonParams::DEFAULT_NOCTAVES, + int nOctaveLayers=SIFT::CommonParams::DEFAULT_NOCTAVE_LAYERS, + int firstOctave=SIFT::CommonParams::DEFAULT_FIRST_OCTAVE, + int angleMode=SIFT::CommonParams::FIRST_ANGLE ); + virtual void read( const FileNode& fn ); + virtual void write( FileStorage& fs ) const; + protected: + ... + }; + + +.. + + +.. index:: SurfFeatureDetector + +.. _SurfFeatureDetector: + +SurfFeatureDetector +------------------- + +`id=0.650289797279 Comments from the Wiki `__ + +.. ctype:: SurfFeatureDetector + + + +Wrapping class for feature detection using +:func:`SURF` +class. + + + + +:: + + + + class SurfFeatureDetector : public FeatureDetector + { + public: + SurfFeatureDetector( double hessianThreshold = 400., int octaves = 3, + int octaveLayers = 4 ); + virtual void read( const FileNode& fn ); + virtual void write( FileStorage& fs ) const; + protected: + ... + }; + + +.. + + +.. index:: GridAdaptedFeatureDetector + +.. _GridAdaptedFeatureDetector: + +GridAdaptedFeatureDetector +-------------------------- + +`id=0.491825982044 Comments from the Wiki `__ + +.. ctype:: GridAdaptedFeatureDetector + + + +Adapts a detector to partition the source image into a grid and detect +points in each cell. + + + + +:: + + + + class GridAdaptedFeatureDetector : public FeatureDetector + { + public: + /* + * detector Detector that will be adapted. + * maxTotalKeypoints Maximum count of keypoints detected on the image. + * Only the strongest keypoints will be keeped. + * gridRows Grid rows count. + * gridCols Grid column count. + */ + GridAdaptedFeatureDetector( const Ptr& detector, + int maxTotalKeypoints, int gridRows=4, + int gridCols=4 ); + virtual void read( const FileNode& fn ); + virtual void write( FileStorage& fs ) const; + protected: + ... + }; + + +.. + + +.. index:: PyramidAdaptedFeatureDetector + +.. _PyramidAdaptedFeatureDetector: + +PyramidAdaptedFeatureDetector +----------------------------- + +`id=0.661979316427 Comments from the Wiki `__ + +.. ctype:: PyramidAdaptedFeatureDetector + + + +Adapts a detector to detect points over multiple levels of a Gaussian +pyramid. Useful for detectors that are not inherently scaled. + + + + +:: + + + + class PyramidAdaptedFeatureDetector : public FeatureDetector + { + public: + PyramidAdaptedFeatureDetector( const Ptr& detector, + int levels=2 ); + virtual void read( const FileNode& fn ); + virtual void write( FileStorage& fs ) const; + protected: + ... + }; + + +.. + + +.. index:: DynamicAdaptedFeatureDetector + +.. _DynamicAdaptedFeatureDetector: + +DynamicAdaptedFeatureDetector +----------------------------- + +`id=0.279668593953 Comments from the Wiki `__ + +.. ctype:: DynamicAdaptedFeatureDetector + + + +An adaptively adjusting detector that iteratively detects until the desired number +of features are found. + +If the detector is persisted, it will "remember" the parameters +used on the last detection. In this way, the detector may be used for consistent numbers +of keypoints in a sets of images that are temporally related such as video streams or +panorama series. + +The DynamicAdaptedFeatureDetector uses another detector such as FAST or SURF to do the dirty work, +with the help of an AdjusterAdapter. +After a detection, and an unsatisfactory number of features are detected, +the AdjusterAdapter will adjust the detection parameters so that the next detection will +result in more or less features. This is repeated until either the number of desired features are found +or the parameters are maxed out. + +Adapters can easily be implemented for any detector via the +AdjusterAdapter interface. + +Beware that this is not thread safe - as the adjustment of parameters breaks the const +of the detection routine... + +Here is a sample of how to create a DynamicAdaptedFeatureDetector. + + + +:: + + + + //sample usage: + //will create a detector that attempts to find + //100 - 110 FAST Keypoints, and will at most run + //FAST feature detection 10 times until that + //number of keypoints are found + Ptr detector(new DynamicAdaptedFeatureDetector (100, 110, 10, + new FastAdjuster(20,true))); + + +.. + + + + +:: + + + + class DynamicAdaptedFeatureDetector: public FeatureDetector + { + public: + DynamicAdaptedFeatureDetector( const Ptr& adjaster, + int min_features=400, int max_features=500, int max_iters=5 ); + ... + }; + + +.. + + +.. index:: DynamicAdaptedFeatureDetector::DynamicAdaptedFeatureDetector + + +cv::DynamicAdaptedFeatureDetector::DynamicAdaptedFeatureDetector +---------------------------------------------------------------- + +`id=0.380560280503 Comments from the Wiki `__ + + + + +.. cfunction:: DynamicAdaptedFeatureDetector::DynamicAdaptedFeatureDetector( const Ptr\& adjaster, int min_features, int max_features, int max_iters ) + + DynamicAdaptedFeatureDetector constructor. + + + + + + :param adjaster: An :func:`AdjusterAdapter` that will do the detection and parameter + adjustment + + + :param min_features: This minimum desired number features. + + + :param max_features: The maximum desired number of features. + + + :param max_iters: The maximum number of times to try to adjust the feature detector parameters. For the :func:`FastAdjuster` this number can be high, + but with Star or Surf, many iterations can get time consuming. At each iteration the detector is rerun, so keep this in mind when choosing this value. + + + + +.. index:: AdjusterAdapter + +.. _AdjusterAdapter: + +AdjusterAdapter +--------------- + +`id=0.944457420305 Comments from the Wiki `__ + +.. ctype:: AdjusterAdapter + + + +A feature detector parameter adjuster interface, this is used by the +:func:`DynamicAdaptedFeatureDetector` +and is a wrapper for +:func:`FeatureDetecto` +r that allow them to be adjusted after a detection. + +See +:func:`FastAdjuster` +, +:func:`StarAdjuster` +, +:func:`SurfAdjuster` +for concrete implementations. + + + +:: + + + + class AdjusterAdapter: public FeatureDetector + { + public: + virtual ~AdjusterAdapter() {} + virtual void tooFew(int min, int n_detected) = 0; + virtual void tooMany(int max, int n_detected) = 0; + virtual bool good() const = 0; + }; + + +.. + + +.. index:: AdjusterAdapter::tooFew + + +cv::AdjusterAdapter::tooFew +--------------------------- + +`id=0.0196686779941 Comments from the Wiki `__ + + + + +.. cfunction:: virtual void tooFew(int min, int n_detected) = 0 + + + +Too few features were detected so, adjust the detector parameters accordingly - so that the next +detection detects more features. + + + + :param min: This minimum desired number features. + + + :param n_detected: The actual number detected last run. + + + +An example implementation of this is + + + +:: + + + + void FastAdjuster::tooFew(int min, int n_detected) + { + thresh_--; + } + + +.. + + +.. index:: AdjusterAdapter::tooMany + + +cv::AdjusterAdapter::tooMany +---------------------------- + +`id=0.25730243639 Comments from the Wiki `__ + + + + +.. cfunction:: virtual void tooMany(int max, int n_detected) = 0 + + Too many features were detected so, adjust the detector parameters accordingly - so that the next +detection detects less features. + + + + + + :param max: This maximum desired number features. + + + :param n_detected: The actual number detected last run. + + + +An example implementation of this is + + + +:: + + + + void FastAdjuster::tooMany(int min, int n_detected) + { + thresh_++; + } + + +.. + + +.. index:: AdjusterAdapter::good + + +cv::AdjusterAdapter::good +------------------------- + +`id=0.221086243146 Comments from the Wiki `__ + + + + +.. cfunction:: virtual bool good() const = 0 + + Are params maxed out or still valid? Returns false if the parameters can't be adjusted any more. + + +An example implementation of this is + + + +:: + + + + bool FastAdjuster::good() const + { + return (thresh_ > 1) && (thresh_ < 200); + } + + +.. + + +.. index:: FastAdjuster + +.. _FastAdjuster: + +FastAdjuster +------------ + +`id=0.622540715413 Comments from the Wiki `__ + +.. ctype:: FastAdjuster + + + +An +:func:`AdjusterAdapter` +for the +:func:`FastFeatureDetector` +. This will basically decrement or increment the +threshhold by 1 + + + + +:: + + + + class FastAdjuster FastAdjuster: public AdjusterAdapter + { + public: + FastAdjuster(int init_thresh = 20, bool nonmax = true); + ... + }; + + +.. + + +.. index:: StarAdjuster + +.. _StarAdjuster: + +StarAdjuster +------------ + +`id=0.351049315282 Comments from the Wiki `__ + +.. ctype:: StarAdjuster + + + +An +:func:`AdjusterAdapter` +for the +:func:`StarFeatureDetector` +. This adjusts the responseThreshhold of +StarFeatureDetector. + + + +:: + + + + class StarAdjuster: public AdjusterAdapter + { + StarAdjuster(double initial_thresh = 30.0); + ... + }; + + +.. + + +.. index:: SurfAdjuster + +.. _SurfAdjuster: + +SurfAdjuster +------------ + +`id=0.268271433862 Comments from the Wiki `__ + +.. ctype:: SurfAdjuster + + + +An +:func:`AdjusterAdapter` +for the +:func:`SurfFeatureDetector` +. This adjusts the hessianThreshold of +SurfFeatureDetector. + + + +:: + + + + class SurfAdjuster: public SurfAdjuster + { + SurfAdjuster(); + ... + }; + + +.. + diff --git a/modules/features2d/doc/common_interfaces_of_generic_descriptor_matchers.rst b/modules/features2d/doc/common_interfaces_of_generic_descriptor_matchers.rst new file mode 100644 index 000000000..0a3c854c3 --- /dev/null +++ b/modules/features2d/doc/common_interfaces_of_generic_descriptor_matchers.rst @@ -0,0 +1,677 @@ +Common Interfaces of Generic Descriptor Matchers +================================================ + +.. highlight:: cpp + + +Matchers of keypoint descriptors in OpenCV have wrappers with common interface that enables to switch easily +between different algorithms solving the same problem. This section is devoted to matching descriptors +that can not be represented as vectors in a multidimensional space. +``GenericDescriptorMatcher`` +is a more generic interface for descriptors. It does not make any assumptions about descriptor representation. +Every descriptor with +:func:`DescriptorExtractor` +interface has a wrapper with +``GenericDescriptorMatcher`` +interface (see +:func:`VectorDescriptorMatcher` +). +There are descriptors such as One way descriptor and Ferns that have +``GenericDescriptorMatcher`` +interface implemented, but do not support +:func:`DescriptorExtractor` +. + + +.. index:: GenericDescriptorMatcher + +.. _GenericDescriptorMatcher: + +GenericDescriptorMatcher +------------------------ + +`id=0.973387347242 Comments from the Wiki `__ + +.. ctype:: GenericDescriptorMatcher + + + +Abstract interface for a keypoint descriptor extracting and matching. +There is +:func:`DescriptorExtractor` +and +:func:`DescriptorMatcher` +for these purposes too, but their interfaces are intended for descriptors +represented as vectors in a multidimensional space. +``GenericDescriptorMatcher`` +is a more generic interface for descriptors. +As +:func:`DescriptorMatcher` +, +``GenericDescriptorMatcher`` +has two groups +of match methods: for matching keypoints of one image with other image or +with image set. + + + + +:: + + + + class GenericDescriptorMatcher + { + public: + GenericDescriptorMatcher(); + virtual ~GenericDescriptorMatcher(); + + virtual void add( const vector& images, + vector >& keypoints ); + + const vector& getTrainImages() const; + const vector >& getTrainKeypoints() const; + virtual void clear(); + + virtual void train() = 0; + + virtual bool isMaskSupported() = 0; + + void classify( const Mat& queryImage, + vector& queryKeypoints, + const Mat& trainImage, + vector& trainKeypoints ) const; + void classify( const Mat& queryImage, + vector& queryKeypoints ); + + /* + * Group of methods to match keypoints from image pair. + */ + void match( const Mat& queryImage, vector& queryKeypoints, + const Mat& trainImage, vector& trainKeypoints, + vector& matches, const Mat& mask=Mat() ) const; + void knnMatch( const Mat& queryImage, vector& queryKeypoints, + const Mat& trainImage, vector& trainKeypoints, + vector >& matches, int k, + const Mat& mask=Mat(), bool compactResult=false ) const; + void radiusMatch( const Mat& queryImage, vector& queryKeypoints, + const Mat& trainImage, vector& trainKeypoints, + vector >& matches, float maxDistance, + const Mat& mask=Mat(), bool compactResult=false ) const; + /* + * Group of methods to match keypoints from one image to image set. + */ + void match( const Mat& queryImage, vector& queryKeypoints, + vector& matches, const vector& masks=vector() ); + void knnMatch( const Mat& queryImage, vector& queryKeypoints, + vector >& matches, int k, + const vector& masks=vector(), bool compactResult=false ); + void radiusMatch( const Mat& queryImage, vector& queryKeypoints, + vector >& matches, float maxDistance, + const vector& masks=vector(), bool compactResult=false ); + + virtual void read( const FileNode& ); + virtual void write( FileStorage& ) const; + + virtual Ptr clone( bool emptyTrainData=false ) const = 0; + + protected: + ... + }; + + +.. + + +.. index:: GenericDescriptorMatcher::add + + +cv::GenericDescriptorMatcher::add +--------------------------------- + +`id=0.507600777855 Comments from the Wiki `__ + + + + +.. cfunction:: void GenericDescriptorMatcher::add( const vector\& images, vector >\& keypoints ) + + Adds images and keypoints from them to the train collection (descriptors are supposed to be calculated here). +If train collection is not empty new image and keypoints from them will be added to +existing data. + + + + + + + :param images: Image collection. + + + :param keypoints: Point collection. Assumes that ``keypoints[i]`` are keypoints + detected in an image ``images[i]`` . + + + + +.. index:: GenericDescriptorMatcher::getTrainImages + + +cv::GenericDescriptorMatcher::getTrainImages +-------------------------------------------- + +`id=0.520364236881 Comments from the Wiki `__ + + + + +.. cfunction:: const vector\& GenericDescriptorMatcher::getTrainImages() const + + Returns train image collection. + + + + +.. index:: GenericDescriptorMatcher::getTrainKeypoints + + +cv::GenericDescriptorMatcher::getTrainKeypoints +----------------------------------------------- + +`id=0.179197628979 Comments from the Wiki `__ + + + + +.. cfunction:: const vector >\& GenericDescriptorMatcher::getTrainKeypoints() const + + Returns train keypoints collection. + + + + +.. index:: GenericDescriptorMatcher::clear + + +cv::GenericDescriptorMatcher::clear +----------------------------------- + +`id=0.163507435554 Comments from the Wiki `__ + + + + +.. cfunction:: void GenericDescriptorMatcher::clear() + + Clear train collection (iamges and keypoints). + + + + +.. index:: GenericDescriptorMatcher::train + + +cv::GenericDescriptorMatcher::train +----------------------------------- + +`id=0.270072381935 Comments from the Wiki `__ + + + + +.. cfunction:: void GenericDescriptorMatcher::train() + + Train the object, e.g. tree-based structure to extract descriptors or +to optimize descriptors matching. + + + + +.. index:: GenericDescriptorMatcher::isMaskSupported + + +cv::GenericDescriptorMatcher::isMaskSupported +--------------------------------------------- + +`id=0.208711469863 Comments from the Wiki `__ + + + + +.. cfunction:: void GenericDescriptorMatcher::isMaskSupported() + + Returns true if generic descriptor matcher supports masking permissible matches. + + + + +.. index:: GenericDescriptorMatcher::classify + + +cv::GenericDescriptorMatcher::classify +-------------------------------------- + +`id=0.550844968727 Comments from the Wiki `__ + + +:func:`GenericDescriptorMatcher::add` + + +.. cfunction:: void GenericDescriptorMatcher::classify( const Mat\& queryImage, vector\& queryKeypoints, const Mat\& trainImage, vector\& trainKeypoints ) const + + Classifies query keypoints under keypoints of one train image qiven as input argument +(first version of the method) or train image collection that set using (second version). + + + + + +.. cfunction:: void GenericDescriptorMatcher::classify( const Mat\& queryImage, vector\& queryKeypoints ) + + + + + + + :param queryImage: The query image. + + + :param queryKeypoints: Keypoints from the query image. + + + :param trainImage: The train image. + + + :param trainKeypoints: Keypoints from the train image. + + + + +.. index:: GenericDescriptorMatcher::match + + +cv::GenericDescriptorMatcher::match +----------------------------------- + +`id=0.91509902003 Comments from the Wiki `__ + + +:func:`GenericDescriptorMatcher::add` +:func:`DescriptorMatcher::match` + + +.. cfunction:: void GenericDescriptorMatcher::match( const Mat\& queryImage, vector\& queryKeypoints, const Mat\& trainImage, vector\& trainKeypoints, vector\& matches, const Mat\& mask=Mat() ) const + + Find best match for query keypoints to the training set. In first version of method +one train image and keypoints detected on it - are input arguments. In second version +query keypoints are matched to training collectin that set using . As in the mask can be set. + + + + + +.. cfunction:: void GenericDescriptorMatcher::match( const Mat\& queryImage, vector\& queryKeypoints, vector\& matches, const vector\& masks=vector() ) + + + + + + + :param queryImage: Query image. + + + :param queryKeypoints: Keypoints detected in ``queryImage`` . + + + :param trainImage: Train image. This will not be added to train image collection + stored in class object. + + + :param trainKeypoints: Keypoints detected in ``trainImage`` . They will not be added to train points collection + stored in class object. + + + :param matches: Matches. If some query descriptor (keypoint) masked out in ``mask`` + no match will be added for this descriptor. + So ``matches`` size may be less query keypoints count. + + + :param mask: Mask specifying permissible matches between input query and train keypoints. + + + :param masks: The set of masks. Each ``masks[i]`` specifies permissible matches between input query keypoints + and stored train keypointss from i-th image. + + + + +.. index:: GenericDescriptorMatcher::knnMatch + + +cv::GenericDescriptorMatcher::knnMatch +-------------------------------------- + +`id=0.828361496735 Comments from the Wiki `__ + + +:func:`GenericDescriptorMatcher::match` +:func:`DescriptorMatcher::knnMatch` + + +.. cfunction:: void GenericDescriptorMatcher::knnMatch( const Mat\& queryImage, vector\& queryKeypoints, const Mat\& trainImage, vector\& trainKeypoints, vector >\& matches, int k, const Mat\& mask=Mat(), bool compactResult=false ) const + + Find the knn best matches for each keypoint from a query set with train keypoints. +Found knn (or less if not possible) matches are returned in distance increasing order. +Details see in and . + + + + + +.. cfunction:: void GenericDescriptorMatcher::knnMatch( const Mat\& queryImage, vector\& queryKeypoints, vector >\& matches, int k, const vector\& masks=vector(), bool compactResult=false ) + + + + +.. index:: GenericDescriptorMatcher::radiusMatch + + +cv::GenericDescriptorMatcher::radiusMatch +----------------------------------------- + +`id=0.732845229707 Comments from the Wiki `__ + + +:func:`GenericDescriptorMatcher::match` +:func:`DescriptorMatcher::radiusMatch` + + +.. cfunction:: void GenericDescriptorMatcher::radiusMatch( const Mat\& queryImage, vector\& queryKeypoints, const Mat\& trainImage, vector\& trainKeypoints, vector >\& matches, float maxDistance, const Mat\& mask=Mat(), bool compactResult=false ) const + + Find the best matches for each query keypoint which have distance less than given threshold. +Found matches are returned in distance increasing order. Details see in and . + + + + + +.. cfunction:: void GenericDescriptorMatcher::radiusMatch( const Mat\& queryImage, vector\& queryKeypoints, vector >\& matches, float maxDistance, const vector\& masks=vector(), bool compactResult=false ) + + + + +.. index:: GenericDescriptorMatcher::read + + +cv::GenericDescriptorMatcher::read +---------------------------------- + +`id=0.937930388921 Comments from the Wiki `__ + + + + +.. cfunction:: void GenericDescriptorMatcher::read( const FileNode\& fn ) + + Reads matcher object from a file node. + + + + +.. index:: GenericDescriptorMatcher::write + + +cv::GenericDescriptorMatcher::write +----------------------------------- + +`id=0.509497773169 Comments from the Wiki `__ + + + + +.. cfunction:: void GenericDescriptorMatcher::write( FileStorage\& fs ) const + + Writes match object to a file storage + + + + +.. index:: GenericDescriptorMatcher::clone + + +cv::GenericDescriptorMatcher::clone +----------------------------------- + +`id=0.864304581549 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr\\GenericDescriptorMatcher::clone( bool emptyTrainData ) const + + Clone the matcher. + + + + + + + :param emptyTrainData: If emptyTrainData is false the method create deep copy of the object, i.e. copies + both parameters and train data. If emptyTrainData is true the method create object copy with current parameters + but with empty train data. + + + + +.. index:: OneWayDescriptorMatcher + +.. _OneWayDescriptorMatcher: + +OneWayDescriptorMatcher +----------------------- + +`id=0.295296902287 Comments from the Wiki `__ + +.. ctype:: OneWayDescriptorMatcher + + + +Wrapping class for computing, matching and classification of descriptors using +:func:`OneWayDescriptorBase` +class. + + + + +:: + + + + class OneWayDescriptorMatcher : public GenericDescriptorMatcher + { + public: + class Params + { + public: + static const int POSE_COUNT = 500; + static const int PATCH_WIDTH = 24; + static const int PATCH_HEIGHT = 24; + static float GET_MIN_SCALE() { return 0.7f; } + static float GET_MAX_SCALE() { return 1.5f; } + static float GET_STEP_SCALE() { return 1.2f; } + + Params( int poseCount = POSE_COUNT, + Size patchSize = Size(PATCH_WIDTH, PATCH_HEIGHT), + string pcaFilename = string(), + string trainPath = string(), string trainImagesList = string(), + float minScale = GET_MIN_SCALE(), float maxScale = GET_MAX_SCALE(), + float stepScale = GET_STEP_SCALE() ); + + int poseCount; + Size patchSize; + string pcaFilename; + string trainPath; + string trainImagesList; + + float minScale, maxScale, stepScale; + }; + + OneWayDescriptorMatcher( const Params& params=Params() ); + virtual ~OneWayDescriptorMatcher(); + + void initialize( const Params& params, const Ptr& base=Ptr() ); + + // Clears keypoints storing in collection and OneWayDescriptorBase + virtual void clear(); + + virtual void train(); + + virtual bool isMaskSupported(); + + virtual void read( const FileNode &fn ); + virtual void write( FileStorage& fs ) const; + + virtual Ptr clone( bool emptyTrainData=false ) const; + protected: + ... + }; + + +.. + + +.. index:: FernDescriptorMatcher + +.. _FernDescriptorMatcher: + +FernDescriptorMatcher +--------------------- + +`id=0.410971973421 Comments from the Wiki `__ + +.. ctype:: FernDescriptorMatcher + + + +Wrapping class for computing, matching and classification of descriptors using +:func:`FernClassifier` +class. + + + + +:: + + + + class FernDescriptorMatcher : public GenericDescriptorMatcher + { + public: + class Params + { + public: + Params( int nclasses=0, + int patchSize=FernClassifier::PATCH_SIZE, + int signatureSize=FernClassifier::DEFAULT_SIGNATURE_SIZE, + int nstructs=FernClassifier::DEFAULT_STRUCTS, + int structSize=FernClassifier::DEFAULT_STRUCT_SIZE, + int nviews=FernClassifier::DEFAULT_VIEWS, + int compressionMethod=FernClassifier::COMPRESSION_NONE, + const PatchGenerator& patchGenerator=PatchGenerator() ); + + Params( const string& filename ); + + int nclasses; + int patchSize; + int signatureSize; + int nstructs; + int structSize; + int nviews; + int compressionMethod; + PatchGenerator patchGenerator; + + string filename; + }; + + FernDescriptorMatcher( const Params& params=Params() ); + virtual ~FernDescriptorMatcher(); + + virtual void clear(); + + virtual void train(); + + virtual bool isMaskSupported(); + + virtual void read( const FileNode &fn ); + virtual void write( FileStorage& fs ) const; + + virtual Ptr clone( bool emptyTrainData=false ) const; + + protected: + ... + }; + + +.. + + +.. index:: VectorDescriptorMatcher + +.. _VectorDescriptorMatcher: + +VectorDescriptorMatcher +----------------------- + +`id=0.89575693039 Comments from the Wiki `__ + +.. ctype:: VectorDescriptorMatcher + + + +Class used for matching descriptors that can be described as vectors in a finite-dimensional space. + + + + +:: + + + + class CV_EXPORTS VectorDescriptorMatcher : public GenericDescriptorMatcher + { + public: + VectorDescriptorMatcher( const Ptr& extractor, const Ptr& matcher ); + virtual ~VectorDescriptorMatcher(); + + virtual void add( const vector& imgCollection, + vector >& pointCollection ); + virtual void clear(); + virtual void train(); + virtual bool isMaskSupported(); + + virtual void read( const FileNode& fn ); + virtual void write( FileStorage& fs ) const; + + virtual Ptr clone( bool emptyTrainData=false ) const; + + protected: + ... + }; + + +.. + +Example of creating: + + + +:: + + + + VectorDescriptorMatcher matcher( new SurfDescriptorExtractor, + new BruteForceMatcher > ); + + +.. + diff --git a/modules/features2d/doc/drawing_function_of_keypoints_and_matches.rst b/modules/features2d/doc/drawing_function_of_keypoints_and_matches.rst new file mode 100644 index 000000000..dfb160436 --- /dev/null +++ b/modules/features2d/doc/drawing_function_of_keypoints_and_matches.rst @@ -0,0 +1,140 @@ +Drawing Function of Keypoints and Matches +========================================= + +.. highlight:: cpp + + + +.. index:: drawMatches + + +cv::drawMatches +--------------- + +`id=0.919261687295 Comments from the Wiki `__ + + + + +.. cfunction:: void drawMatches( const Mat\& img1, const vector\& keypoints1, const Mat\& img2, const vector\& keypoints2, const vector\& matches1to2, Mat\& outImg, const Scalar\& matchColor=Scalar::all(-1), const Scalar\& singlePointColor=Scalar::all(-1), const vector\& matchesMask=vector(), int flags=DrawMatchesFlags::DEFAULT ) + + This function draws matches of keypints from two images on output image. +Match is a line connecting two keypoints (circles). + + + + + +.. cfunction:: void drawMatches( const Mat\& img1, const vector\& keypoints1, const Mat\& img2, const vector\& keypoints2, const vector >\& matches1to2, Mat\& outImg, const Scalar\& matchColor=Scalar::all(-1), const Scalar\& singlePointColor=Scalar::all(-1), const vector>\& matchesMask= vector >(), int flags=DrawMatchesFlags::DEFAULT ) + + + + + + + :param img1: First source image. + + + :param keypoints1: Keypoints from first source image. + + + :param img2: Second source image. + + + :param keypoints2: Keypoints from second source image. + + + :param matches: Matches from first image to second one, i.e. ``keypoints1[i]`` + has corresponding point ``keypoints2[matches[i]]`` . + + + :param outImg: Output image. Its content depends on ``flags`` value + what is drawn in output image. See below possible ``flags`` bit values. + + + :param matchColor: Color of matches (lines and connected keypoints). + If ``matchColor==Scalar::all(-1)`` color will be generated randomly. + + + :param singlePointColor: Color of single keypoints (circles), i.e. keypoints not having the matches. + If ``singlePointColor==Scalar::all(-1)`` color will be generated randomly. + + + :param matchesMask: Mask determining which matches will be drawn. If mask is empty all matches will be drawn. + + + :param flags: Each bit of ``flags`` sets some feature of drawing. + Possible ``flags`` bit values is defined by ``DrawMatchesFlags`` , see below. + + + + + + +:: + + + + struct DrawMatchesFlags + { + enum{ DEFAULT = 0, // Output image matrix will be created (Mat::create), + // i.e. existing memory of output image may be reused. + // Two source image, matches and single keypoints + // will be drawn. + // For each keypoint only the center point will be + // drawn (without the circle around keypoint with + // keypoint size and orientation). + DRAW_OVER_OUTIMG = 1, // Output image matrix will not be + // created (Mat::create). Matches will be drawn + // on existing content of output image. + NOT_DRAW_SINGLE_POINTS = 2, // Single keypoints will not be drawn. + DRAW_RICH_KEYPOINTS = 4 // For each keypoint the circle around + // keypoint with keypoint size and orientation will + // be drawn. + }; + }; + + +.. + + +.. index:: drawKeypoints + + +cv::drawKeypoints +----------------- + +`id=0.694314481427 Comments from the Wiki `__ + + + + +.. cfunction:: void drawKeypoints( const Mat\& image, const vector\& keypoints, Mat\& outImg, const Scalar\& color=Scalar::all(-1), int flags=DrawMatchesFlags::DEFAULT ) + + Draw keypoints. + + + + + + + :param image: Source image. + + + :param keypoints: Keypoints from source image. + + + :param outImg: Output image. Its content depends on ``flags`` value + what is drawn in output image. See possible ``flags`` bit values. + + + :param color: Color of keypoints + + . + + :param flags: Each bit of ``flags`` sets some feature of drawing. + Possible ``flags`` bit values is defined by ``DrawMatchesFlags`` , + see above in :func:`drawMatches` . + + + diff --git a/modules/features2d/doc/feature_detection_and_description.rst b/modules/features2d/doc/feature_detection_and_description.rst new file mode 100644 index 000000000..28baf634b --- /dev/null +++ b/modules/features2d/doc/feature_detection_and_description.rst @@ -0,0 +1,975 @@ +Feature detection and description +================================= + +.. highlight:: cpp + + + +.. index:: FAST + + +cv::FAST +-------- + +`id=0.180338558353 Comments from the Wiki `__ + + + + +.. cfunction:: void FAST( const Mat\& image, vector\& keypoints, int threshold, bool nonmaxSupression=true ) + + Detects corners using FAST algorithm by E. Rosten (''Machine learning for high-speed corner detection'', 2006). + + + + + + :param image: The image. Keypoints (corners) will be detected on this. + + + :param keypoints: Keypoints detected on the image. + + + :param threshold: Threshold on difference between intensity of center pixel and + pixels on circle around this pixel. See description of the algorithm. + + + :param nonmaxSupression: If it is true then non-maximum supression will be applied to detected corners (keypoints). + + + + +.. index:: MSER + +.. _MSER: + +MSER +---- + +`id=0.0333368188128 Comments from the Wiki `__ + +.. ctype:: MSER + + + +Maximally-Stable Extremal Region Extractor + + + + +:: + + + + class MSER : public CvMSERParams + { + public: + // default constructor + MSER(); + // constructor that initializes all the algorithm parameters + MSER( int _delta, int _min_area, int _max_area, + float _max_variation, float _min_diversity, + int _max_evolution, double _area_threshold, + double _min_margin, int _edge_blur_size ); + // runs the extractor on the specified image; returns the MSERs, + // each encoded as a contour (vector, see findContours) + // the optional mask marks the area where MSERs are searched for + void operator()( const Mat& image, vector >& msers, const Mat& mask ) const; + }; + + +.. + +The class encapsulates all the parameters of MSER (see +http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions +) extraction algorithm. + + +.. index:: StarDetector + +.. _StarDetector: + +StarDetector +------------ + +`id=0.378812518152 Comments from the Wiki `__ + +.. ctype:: StarDetector + + + +Implements Star keypoint detector + + + + +:: + + + + class StarDetector : CvStarDetectorParams + { + public: + // default constructor + StarDetector(); + // the full constructor initialized all the algorithm parameters: + // maxSize - maximum size of the features. The following + // values of the parameter are supported: + // 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128 + // responseThreshold - threshold for the approximated laplacian, + // used to eliminate weak features. The larger it is, + // the less features will be retrieved + // lineThresholdProjected - another threshold for the laplacian to + // eliminate edges + // lineThresholdBinarized - another threshold for the feature + // size to eliminate edges. + // The larger the 2 threshold, the more points you get. + StarDetector(int maxSize, int responseThreshold, + int lineThresholdProjected, + int lineThresholdBinarized, + int suppressNonmaxSize); + + // finds keypoints in an image + void operator()(const Mat& image, vector& keypoints) const; + }; + + +.. + +The class implements a modified version of CenSurE keypoint detector described in +Agrawal08 + +.. index:: SIFT + +.. _SIFT: + +SIFT +---- + +`id=0.385373212311 Comments from the Wiki `__ + +.. ctype:: SIFT + + + +Class for extracting keypoints and computing descriptors using approach named Scale Invariant Feature Transform (SIFT). + + + + +:: + + + + class CV_EXPORTS SIFT + { + public: + struct CommonParams + { + static const int DEFAULT_NOCTAVES = 4; + static const int DEFAULT_NOCTAVE_LAYERS = 3; + static const int DEFAULT_FIRST_OCTAVE = -1; + enum{ FIRST_ANGLE = 0, AVERAGE_ANGLE = 1 }; + + CommonParams(); + CommonParams( int _nOctaves, int _nOctaveLayers, int _firstOctave, + int _angleMode ); + int nOctaves, nOctaveLayers, firstOctave; + int angleMode; + }; + + struct DetectorParams + { + static double GET_DEFAULT_THRESHOLD() + { return 0.04 / SIFT::CommonParams::DEFAULT_NOCTAVE_LAYERS / 2.0; } + static double GET_DEFAULT_EDGE_THRESHOLD() { return 10.0; } + + DetectorParams(); + DetectorParams( double _threshold, double _edgeThreshold ); + double threshold, edgeThreshold; + }; + + struct DescriptorParams + { + static double GET_DEFAULT_MAGNIFICATION() { return 3.0; } + static const bool DEFAULT_IS_NORMALIZE = true; + static const int DESCRIPTOR_SIZE = 128; + + DescriptorParams(); + DescriptorParams( double _magnification, bool _isNormalize, + bool _recalculateAngles ); + double magnification; + bool isNormalize; + bool recalculateAngles; + }; + + SIFT(); + //! sift-detector constructor + SIFT( double _threshold, double _edgeThreshold, + int _nOctaves=CommonParams::DEFAULT_NOCTAVES, + int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS, + int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE, + int _angleMode=CommonParams::FIRST_ANGLE ); + //! sift-descriptor constructor + SIFT( double _magnification, bool _isNormalize=true, + bool _recalculateAngles = true, + int _nOctaves=CommonParams::DEFAULT_NOCTAVES, + int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS, + int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE, + int _angleMode=CommonParams::FIRST_ANGLE ); + SIFT( const CommonParams& _commParams, + const DetectorParams& _detectorParams = DetectorParams(), + const DescriptorParams& _descriptorParams = DescriptorParams() ); + + //! returns the descriptor size in floats (128) + int descriptorSize() const { return DescriptorParams::DESCRIPTOR_SIZE; } + //! finds the keypoints using SIFT algorithm + void operator()(const Mat& img, const Mat& mask, + vector& keypoints) const; + //! finds the keypoints and computes descriptors for them using SIFT algorithm. + //! Optionally it can compute descriptors for the user-provided keypoints + void operator()(const Mat& img, const Mat& mask, + vector& keypoints, + Mat& descriptors, + bool useProvidedKeypoints=false) const; + + CommonParams getCommonParams () const { return commParams; } + DetectorParams getDetectorParams () const { return detectorParams; } + DescriptorParams getDescriptorParams () const { return descriptorParams; } + protected: + ... + }; + + +.. + + +.. index:: SURF + +.. _SURF: + +SURF +---- + +`id=0.43149154692 Comments from the Wiki `__ + +.. ctype:: SURF + + + +Class for extracting Speeded Up Robust Features from an image. + + + + +:: + + + + class SURF : public CvSURFParams + { + public: + // default constructor + SURF(); + // constructor that initializes all the algorithm parameters + SURF(double _hessianThreshold, int _nOctaves=4, + int _nOctaveLayers=2, bool _extended=false); + // returns the number of elements in each descriptor (64 or 128) + int descriptorSize() const; + // detects keypoints using fast multi-scale Hessian detector + void operator()(const Mat& img, const Mat& mask, + vector& keypoints) const; + // detects keypoints and computes the SURF descriptors for them; + // output vector "descriptors" stores elements of descriptors and has size + // equal descriptorSize()*keypoints.size() as each descriptor is + // descriptorSize() elements of this vector. + void operator()(const Mat& img, const Mat& mask, + vector& keypoints, + vector& descriptors, + bool useProvidedKeypoints=false) const; + }; + + +.. + +The class +``SURF`` +implements Speeded Up Robust Features descriptor +Bay06 +. +There is fast multi-scale Hessian keypoint detector that can be used to find the keypoints +(which is the default option), but the descriptors can be also computed for the user-specified keypoints. +The function can be used for object tracking and localization, image stitching etc. See the +``find_obj.cpp`` +demo in OpenCV samples directory. + + +.. index:: RandomizedTree + +.. _RandomizedTree: + +RandomizedTree +-------------- + +`id=0.539311466248 Comments from the Wiki `__ + +.. ctype:: RandomizedTree + + + +The class contains base structure for +``RTreeClassifier`` + + + +:: + + + + class CV_EXPORTS RandomizedTree + { + public: + friend class RTreeClassifier; + + RandomizedTree(); + ~RandomizedTree(); + + void train(std::vector const& base_set, + cv::RNG &rng, int depth, int views, + size_t reduced_num_dim, int num_quant_bits); + void train(std::vector const& base_set, + cv::RNG &rng, PatchGenerator &make_patch, int depth, + int views, size_t reduced_num_dim, int num_quant_bits); + + // following two funcs are EXPERIMENTAL + //(do not use unless you know exactly what you do) + static void quantizeVector(float *vec, int dim, int N, float bnds[2], + int clamp_mode=0); + static void quantizeVector(float *src, int dim, int N, float bnds[2], + uchar *dst); + + // patch_data must be a 32x32 array (no row padding) + float* getPosterior(uchar* patch_data); + const float* getPosterior(uchar* patch_data) const; + uchar* getPosterior2(uchar* patch_data); + + void read(const char* file_name, int num_quant_bits); + void read(std::istream &is, int num_quant_bits); + void write(const char* file_name) const; + void write(std::ostream &os) const; + + int classes() { return classes_; } + int depth() { return depth_; } + + void discardFloatPosteriors() { freePosteriors(1); } + + inline void applyQuantization(int num_quant_bits) + { makePosteriors2(num_quant_bits); } + + private: + int classes_; + int depth_; + int num_leaves_; + std::vector nodes_; + float **posteriors_; // 16-bytes aligned posteriors + uchar **posteriors2_; // 16-bytes aligned posteriors + std::vector leaf_counts_; + + void createNodes(int num_nodes, cv::RNG &rng); + void allocPosteriorsAligned(int num_leaves, int num_classes); + void freePosteriors(int which); + // which: 1=posteriors_, 2=posteriors2_, 3=both + void init(int classes, int depth, cv::RNG &rng); + void addExample(int class_id, uchar* patch_data); + void finalize(size_t reduced_num_dim, int num_quant_bits); + int getIndex(uchar* patch_data) const; + inline float* getPosteriorByIndex(int index); + inline uchar* getPosteriorByIndex2(int index); + inline const float* getPosteriorByIndex(int index) const; + void convertPosteriorsToChar(); + void makePosteriors2(int num_quant_bits); + void compressLeaves(size_t reduced_num_dim); + void estimateQuantPercForPosteriors(float perc[2]); + }; + + +.. + + +.. index:: RandomizedTree::train + + +cv::RandomizedTree::train +------------------------- + +`id=0.360469298211 Comments from the Wiki `__ + + + + +.. cfunction:: void train(std::vector const\& base_set, cv::RNG \&rng, PatchGenerator \&make_patch, int depth, int views, size_t reduced_num_dim, int num_quant_bits) + + Trains a randomized tree using input set of keypoints + + + + + +.. cfunction:: void train(std::vector const\& base_set, cv::RNG \&rng, PatchGenerator \&make_patch, int depth, int views, size_t reduced_num_dim, int num_quant_bits) + + + + + + {Vector of + ``BaseKeypoint`` + type. Contains keypoints from the image are used for training} + {Random numbers generator is used for training} + {Patch generator is used for training} + {Maximum tree depth} + + {Number of dimensions are used in compressed signature} + {Number of bits are used for quantization} + + + +.. index:: RandomizedTree::read + + +cv::RandomizedTree::read +------------------------ + +`id=0.663893576705 Comments from the Wiki `__ + + + + +.. cfunction:: read(const char* file_name, int num_quant_bits) + + Reads pre-saved randomized tree from file or stream + + + + +.. cfunction:: read(std::istream \&is, int num_quant_bits) + + + + + + + :param file_name: Filename of file contains randomized tree data + + + :param is: Input stream associated with file contains randomized tree data + + {Number of bits are used for quantization} + + + +.. index:: RandomizedTree::write + + +cv::RandomizedTree::write +------------------------- + +`id=0.640726433619 Comments from the Wiki `__ + + + + +.. cfunction:: void write(const char* file_name) const + + Writes current randomized tree to a file or stream + + + + +.. cfunction:: void write(std::ostream \&os) const + + + + + + + :param file_name: Filename of file where randomized tree data will be stored + + + :param is: Output stream associated with file where randomized tree data will be stored + + + + +.. index:: RandomizedTree::applyQuantization + + +cv::RandomizedTree::applyQuantization +------------------------------------- + +`id=0.113364904421 Comments from the Wiki `__ + + + + +.. cfunction:: void applyQuantization(int num_quant_bits) + + Applies quantization to the current randomized tree + + + + + {Number of bits are used for quantization} + + + +.. index:: RTreeNode + +.. _RTreeNode: + +RTreeNode +--------- + +`id=0.718763052087 Comments from the Wiki `__ + +.. ctype:: RTreeNode + + + +The class contains base structure for +``RandomizedTree`` + + + +:: + + + + struct RTreeNode + { + short offset1, offset2; + + RTreeNode() {} + + RTreeNode(uchar x1, uchar y1, uchar x2, uchar y2) + : offset1(y1*PATCH_SIZE + x1), + offset2(y2*PATCH_SIZE + x2) + {} + + //! Left child on 0, right child on 1 + inline bool operator() (uchar* patch_data) const + { + return patch_data[offset1] > patch_data[offset2]; + } + }; + + +.. + + +.. index:: RTreeClassifier + +.. _RTreeClassifier: + +RTreeClassifier +--------------- + +`id=0.477872539921 Comments from the Wiki `__ + +.. ctype:: RTreeClassifier + + + +The class contains +``RTreeClassifier`` +. It represents calonder descriptor which was originally introduced by Michael Calonder + + + + +:: + + + + class CV_EXPORTS RTreeClassifier + { + public: + static const int DEFAULT_TREES = 48; + static const size_t DEFAULT_NUM_QUANT_BITS = 4; + + RTreeClassifier(); + + void train(std::vector const& base_set, + cv::RNG &rng, + int num_trees = RTreeClassifier::DEFAULT_TREES, + int depth = DEFAULT_DEPTH, + int views = DEFAULT_VIEWS, + size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM, + int num_quant_bits = DEFAULT_NUM_QUANT_BITS, + bool print_status = true); + void train(std::vector const& base_set, + cv::RNG &rng, + PatchGenerator &make_patch, + int num_trees = RTreeClassifier::DEFAULT_TREES, + int depth = DEFAULT_DEPTH, + int views = DEFAULT_VIEWS, + size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM, + int num_quant_bits = DEFAULT_NUM_QUANT_BITS, + bool print_status = true); + + // sig must point to a memory block of at least + //classes()*sizeof(float|uchar) bytes + void getSignature(IplImage *patch, uchar *sig); + void getSignature(IplImage *patch, float *sig); + void getSparseSignature(IplImage *patch, float *sig, + float thresh); + + static int countNonZeroElements(float *vec, int n, double tol=1e-10); + static inline void safeSignatureAlloc(uchar **sig, int num_sig=1, + int sig_len=176); + static inline uchar* safeSignatureAlloc(int num_sig=1, + int sig_len=176); + + inline int classes() { return classes_; } + inline int original_num_classes() + { return original_num_classes_; } + + void setQuantization(int num_quant_bits); + void discardFloatPosteriors(); + + void read(const char* file_name); + void read(std::istream &is); + void write(const char* file_name) const; + void write(std::ostream &os) const; + + std::vector trees_; + + private: + int classes_; + int num_quant_bits_; + uchar **posteriors_; + ushort *ptemp_; + int original_num_classes_; + bool keep_floats_; + }; + + +.. + + +.. index:: RTreeClassifier::train + + +cv::RTreeClassifier::train +-------------------------- + +`id=0.173927228061 Comments from the Wiki `__ + + + + +.. cfunction:: void train(std::vector const\& base_set, cv::RNG \&rng, int num_trees = RTreeClassifier::DEFAULT_TREES, int depth = DEFAULT_DEPTH, int views = DEFAULT_VIEWS, size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM, int num_quant_bits = DEFAULT_NUM_QUANT_BITS, bool print_status = true) + + Trains a randomized tree classificator using input set of keypoints + + + + +.. cfunction:: void train(std::vector const\& base_set, cv::RNG \&rng, PatchGenerator \&make_patch, int num_trees = RTreeClassifier::DEFAULT_TREES, int depth = DEFAULT_DEPTH, int views = DEFAULT_VIEWS, size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM, int num_quant_bits = DEFAULT_NUM_QUANT_BITS, bool print_status = true) + + + + + + {Vector of + ``BaseKeypoint`` + type. Contains keypoints from the image are used for training} + {Random numbers generator is used for training} + {Patch generator is used for training} + {Number of randomized trees used in RTreeClassificator} + {Maximum tree depth} + + {Number of dimensions are used in compressed signature} + {Number of bits are used for quantization} + {Print current status of training on the console} + + + +.. index:: RTreeClassifier::getSignature + + +cv::RTreeClassifier::getSignature +--------------------------------- + +`id=0.90043980708 Comments from the Wiki `__ + + + + +.. cfunction:: void getSignature(IplImage *patch, uchar *sig) + + Returns signature for image patch + + + + +.. cfunction:: void getSignature(IplImage *patch, float *sig) + + + + + + {Image patch to calculate signature for} + {Output signature (array dimension is + ``reduced_num_dim)`` + } + + + +.. index:: RTreeClassifier::getSparseSignature + + +cv::RTreeClassifier::getSparseSignature +--------------------------------------- + +`id=0.692099737961 Comments from the Wiki `__ + + +```` + + +.. cfunction:: void getSparseSignature(IplImage *patch, float *sig, float thresh) + + The function is simular to getSignaturebut uses the threshold for removing all signature elements less than the threshold. So that the signature is compressed + + + + + {Image patch to calculate signature for} + {Output signature (array dimension is + ``reduced_num_dim)`` + } + {The threshold that is used for compressing the signature} + + + +.. index:: RTreeClassifier::countNonZeroElements + + +cv::RTreeClassifier::countNonZeroElements +----------------------------------------- + +`id=0.553226961988 Comments from the Wiki `__ + + + + +.. cfunction:: static int countNonZeroElements(float *vec, int n, double tol=1e-10) + + The function returns the number of non-zero elements in the input array. + + + + + + :param vec: Input vector contains float elements + + + :param n: Input vector size + + {The threshold used for elements counting. We take all elements are less than + ``tol`` + as zero elements} + + + +.. index:: RTreeClassifier::read + + +cv::RTreeClassifier::read +------------------------- + +`id=0.648907224792 Comments from the Wiki `__ + + + + +.. cfunction:: read(const char* file_name) + + Reads pre-saved RTreeClassifier from file or stream + + + + +.. cfunction:: read(std::istream \&is) + + + + + + + :param file_name: Filename of file contains randomized tree data + + + :param is: Input stream associated with file contains randomized tree data + + + + +.. index:: RTreeClassifier::write + + +cv::RTreeClassifier::write +-------------------------- + +`id=0.340545032412 Comments from the Wiki `__ + + + + +.. cfunction:: void write(const char* file_name) const + + Writes current RTreeClassifier to a file or stream + + + + +.. cfunction:: void write(std::ostream \&os) const + + + + + + + :param file_name: Filename of file where randomized tree data will be stored + + + :param is: Output stream associated with file where randomized tree data will be stored + + + + +.. index:: RTreeClassifier::setQuantization + + +cv::RTreeClassifier::setQuantization +------------------------------------ + +`id=0.788175788924 Comments from the Wiki `__ + + + + +.. cfunction:: void setQuantization(int num_quant_bits) + + Applies quantization to the current randomized tree + + + + + {Number of bits are used for quantization} + + +Below there is an example of +``RTreeClassifier`` +usage for feature matching. There are test and train images and we extract features from both with SURF. Output is +:math:`best\_corr` +and +:math:`best\_corr\_idx` +arrays which keep the best probabilities and corresponding features indexes for every train feature. + + + + +:: + + + + CvMemStorage* storage = cvCreateMemStorage(0); + CvSeq *objectKeypoints = 0, *objectDescriptors = 0; + CvSeq *imageKeypoints = 0, *imageDescriptors = 0; + CvSURFParams params = cvSURFParams(500, 1); + cvExtractSURF( test_image, 0, &imageKeypoints, &imageDescriptors, + storage, params ); + cvExtractSURF( train_image, 0, &objectKeypoints, &objectDescriptors, + storage, params ); + + cv::RTreeClassifier detector; + int patch_width = cv::PATCH_SIZE; + iint patch_height = cv::PATCH_SIZE; + vector base_set; + int i=0; + CvSURFPoint* point; + for (i=0;i<(n_points > 0 ? n_points : objectKeypoints->total);i++) + { + point=(CvSURFPoint*)cvGetSeqElem(objectKeypoints,i); + base_set.push_back( + cv::BaseKeypoint(point->pt.x,point->pt.y,train_image)); + } + + //Detector training + cv::RNG rng( cvGetTickCount() ); + cv::PatchGenerator gen(0,255,2,false,0.7,1.3,-CV_PI/3,CV_PI/3, + -CV_PI/3,CV_PI/3); + + printf("RTree Classifier training...n"); + detector.train(base_set,rng,gen,24,cv::DEFAULT_DEPTH,2000, + (int)base_set.size(), detector.DEFAULT_NUM_QUANT_BITS); + printf("Donen"); + + float* signature = new float[detector.original_num_classes()]; + float* best_corr; + int* best_corr_idx; + if (imageKeypoints->total > 0) + { + best_corr = new float[imageKeypoints->total]; + best_corr_idx = new int[imageKeypoints->total]; + } + + for(i=0; i < imageKeypoints->total; i++) + { + point=(CvSURFPoint*)cvGetSeqElem(imageKeypoints,i); + int part_idx = -1; + float prob = 0.0f; + + CvRect roi = cvRect((int)(point->pt.x) - patch_width/2, + (int)(point->pt.y) - patch_height/2, + patch_width, patch_height); + cvSetImageROI(test_image, roi); + roi = cvGetImageROI(test_image); + if(roi.width != patch_width || roi.height != patch_height) + { + best_corr_idx[i] = part_idx; + best_corr[i] = prob; + } + else + { + cvSetImageROI(test_image, roi); + IplImage* roi_image = + cvCreateImage(cvSize(roi.width, roi.height), + test_image->depth, test_image->nChannels); + cvCopy(test_image,roi_image); + + detector.getSignature(roi_image, signature); + for (int j = 0; j< detector.original_num_classes();j++) + { + if (prob < signature[j]) + { + part_idx = j; + prob = signature[j]; + } + } + + best_corr_idx[i] = part_idx; + best_corr[i] = prob; + + + if (roi_image) + cvReleaseImage(&roi_image); + } + cvResetImageROI(test_image); + } + + + +.. + diff --git a/modules/features2d/doc/features2d.rst b/modules/features2d/doc/features2d.rst new file mode 100644 index 000000000..9569bb8a8 --- /dev/null +++ b/modules/features2d/doc/features2d.rst @@ -0,0 +1,14 @@ +********************* +2D Features Framework +********************* + +.. toctree:: + :maxdepth: 2 + + feature_detection_and_description + common_interfaces_of_feature_detectors + common_interfaces_of_descriptor_extractors + common_interfaces_of_descriptor_matchers + common_interfaces_of_generic_descriptor_matchers + drawing_function_of_keypoints_and_matches + object_categorization diff --git a/modules/features2d/doc/object_categorization.rst b/modules/features2d/doc/object_categorization.rst new file mode 100644 index 000000000..32ecbe9df --- /dev/null +++ b/modules/features2d/doc/object_categorization.rst @@ -0,0 +1,408 @@ +Object Categorization +===================== + +.. highlight:: cpp + + +Some approaches based on local 2D features and used to object categorization +are described in this section. + + +.. index:: BOWTrainer + +.. _BOWTrainer: + +BOWTrainer +---------- + +`id=0.926370937775 Comments from the Wiki `__ + +.. ctype:: BOWTrainer + + + +Abstract base class for training ''bag of visual words'' vocabulary from a set of descriptors. +See e.g. ''Visual Categorization with Bags of Keypoints'' of Gabriella Csurka, Christopher R. Dance, +Lixin Fan, Jutta Willamowski, Cedric Bray, 2004. + + + + +:: + + + + class BOWTrainer + { + public: + BOWTrainer(){} + virtual ~BOWTrainer(){} + + void add( const Mat& descriptors ); + const vector& getDescriptors() const; + int descripotorsCount() const; + + virtual void clear(); + + virtual Mat cluster() const = 0; + virtual Mat cluster( const Mat& descriptors ) const = 0; + + protected: + ... + }; + + +.. + + +.. index:: BOWTrainer::add + + +cv::BOWTrainer::add +------------------- + +`id=0.849162389183 Comments from the Wiki `__ + + +```` + + +.. cfunction:: void BOWTrainer::add( const Mat\& descriptors ) + + Add descriptors to training set. The training set will be clustered using clustermethod to construct vocabulary. + + + + + + + :param descriptors: Descriptors to add to training set. Each row of ``descriptors`` + matrix is a one descriptor. + + + + +.. index:: BOWTrainer::getDescriptors + + +cv::BOWTrainer::getDescriptors +------------------------------ + +`id=0.999824242082 Comments from the Wiki `__ + + + + +.. cfunction:: const vector\& BOWTrainer::getDescriptors() const + + Returns training set of descriptors. + + + + +.. index:: BOWTrainer::descripotorsCount + + +cv::BOWTrainer::descripotorsCount +--------------------------------- + +`id=0.497913292449 Comments from the Wiki `__ + + + + +.. cfunction:: const vector\& BOWTrainer::descripotorsCount() const + + Returns count of all descriptors stored in the training set. + + + + +.. index:: BOWTrainer::cluster + + +cv::BOWTrainer::cluster +----------------------- + +`id=0.560094315089 Comments from the Wiki `__ + + + + +.. cfunction:: Mat BOWTrainer::cluster() const + + Cluster train descriptors. Vocabulary consists from cluster centers. So this method +returns vocabulary. In first method variant the stored in object train descriptors will be +clustered, in second variant -- input descriptors will be clustered. + + + + + +.. cfunction:: Mat BOWTrainer::cluster( const Mat\& descriptors ) const + + + + + + + :param descriptors: Descriptors to cluster. Each row of ``descriptors`` + matrix is a one descriptor. Descriptors will not be added + to the inner train descriptor set. + + + + +.. index:: BOWKMeansTrainer + +.. _BOWKMeansTrainer: + +BOWKMeansTrainer +---------------- + +`id=0.588500098443 Comments from the Wiki `__ + +.. ctype:: BOWKMeansTrainer + + + +:func:`kmeans` +based class to train visual vocabulary using the ''bag of visual words'' approach. + + + + +:: + + + + class BOWKMeansTrainer : public BOWTrainer + { + public: + BOWKMeansTrainer( int clusterCount, const TermCriteria& termcrit=TermCriteria(), + int attempts=3, int flags=KMEANS_PP_CENTERS ); + virtual ~BOWKMeansTrainer(){} + + // Returns trained vocabulary (i.e. cluster centers). + virtual Mat cluster() const; + virtual Mat cluster( const Mat& descriptors ) const; + + protected: + ... + }; + + +.. + +To gain an understanding of constructor parameters see +:func:`kmeans` +function +arguments. + + + +.. index:: BOWImgDescriptorExtractor + +.. _BOWImgDescriptorExtractor: + +BOWImgDescriptorExtractor +------------------------- + +`id=0.166378792557 Comments from the Wiki `__ + +.. ctype:: BOWImgDescriptorExtractor + + + +Class to compute image descriptor using ''bad of visual words''. In few, + such computing consists from the following steps: + 1. Compute descriptors for given image and it's keypoints set, +\ +2. Find nearest visual words from vocabulary for each keypoint descriptor, +\ +3. Image descriptor is a normalized histogram of vocabulary words encountered in the image. I.e. + +``i`` +-bin of the histogram is a frequency of +``i`` +-word of vocabulary in the given image. + + + + +:: + + + + class BOWImgDescriptorExtractor + { + public: + BOWImgDescriptorExtractor( const Ptr& dextractor, + const Ptr& dmatcher ); + virtual ~BOWImgDescriptorExtractor(){} + + void setVocabulary( const Mat& vocabulary ); + const Mat& getVocabulary() const; + void compute( const Mat& image, vector& keypoints, + Mat& imgDescriptor, + vector >* pointIdxsOfClusters=0, + Mat* descriptors=0 ); + int descriptorSize() const; + int descriptorType() const; + + protected: + ... + }; + + +.. + + +.. index:: BOWImgDescriptorExtractor::BOWImgDescriptorExtractor + + +cv::BOWImgDescriptorExtractor::BOWImgDescriptorExtractor +-------------------------------------------------------- + +`id=0.355574799377 Comments from the Wiki `__ + + + + +.. cfunction:: BOWImgDescriptorExtractor::BOWImgDescriptorExtractor( const Ptr\& dextractor, const Ptr\& dmatcher ) + + Constructor. + + + + + + + :param dextractor: Descriptor extractor that will be used to compute descriptors + for input image and it's keypoints. + + + :param dmatcher: Descriptor matcher that will be used to find nearest word of trained vocabulary to + each keupoints descriptor of the image. + + + + +.. index:: BOWImgDescriptorExtractor::setVocabulary + + +cv::BOWImgDescriptorExtractor::setVocabulary +-------------------------------------------- + +`id=0.592484692408 Comments from the Wiki `__ + + + + +.. cfunction:: void BOWImgDescriptorExtractor::setVocabulary( const Mat\& vocabulary ) + + Method to set visual vocabulary. + + + + + + + :param vocabulary: Vocabulary (can be trained using inheritor of :func:`BOWTrainer` ). + Each row of vocabulary is a one visual word (cluster center). + + + + +.. index:: BOWImgDescriptorExtractor::getVocabulary + + +cv::BOWImgDescriptorExtractor::getVocabulary +-------------------------------------------- + +`id=0.0185667539631 Comments from the Wiki `__ + + + + +.. cfunction:: const Mat\& BOWImgDescriptorExtractor::getVocabulary() const + + Returns set vocabulary. + + + + +.. index:: BOWImgDescriptorExtractor::compute + + +cv::BOWImgDescriptorExtractor::compute +-------------------------------------- + +`id=0.558308680471 Comments from the Wiki `__ + + + + +.. cfunction:: void BOWImgDescriptorExtractor::compute( const Mat\& image, vector\& keypoints, Mat\& imgDescriptor, vector >* pointIdxsOfClusters=0, Mat* descriptors=0 ) + + Compute image descriptor using set visual vocabulary. + + + + + + + :param image: The image. Image descriptor will be computed for this. + + + :param keypoints: Keypoints detected in the input image. + + + :param imgDescriptor: This is output, i.e. computed image descriptor. + + + :param pointIdxsOfClusters: Indices of keypoints which belong to the cluster, i.e. + ``pointIdxsOfClusters[i]`` is keypoint indices which belong + to the ``i-`` cluster (word of vocabulary) (returned if it is not 0.) + + + :param descriptors: Descriptors of the image keypoints (returned if it is not 0.) + + + + +.. index:: BOWImgDescriptorExtractor::descriptorSize + + +cv::BOWImgDescriptorExtractor::descriptorSize +--------------------------------------------- + +`id=0.758326749957 Comments from the Wiki `__ + + + + +.. cfunction:: int BOWImgDescriptorExtractor::descriptorSize() const + + Returns image discriptor size, if vocabulary was set, and 0 otherwise. + + + + +.. index:: BOWImgDescriptorExtractor::descriptorType + + +cv::BOWImgDescriptorExtractor::descriptorType +--------------------------------------------- + +`id=0.940227909801 Comments from the Wiki `__ + + + + +.. cfunction:: int BOWImgDescriptorExtractor::descriptorType() const + + Returns image descriptor type. + + + diff --git a/modules/gpu/doc/camera_calibration_and_3d_reconstruction.rst b/modules/gpu/doc/camera_calibration_and_3d_reconstruction.rst new file mode 100644 index 000000000..85a2df5e8 --- /dev/null +++ b/modules/gpu/doc/camera_calibration_and_3d_reconstruction.rst @@ -0,0 +1,903 @@ +Camera Calibration and 3d Reconstruction +======================================== + +.. highlight:: cpp + + + +.. index:: gpu::StereoBM_GPU + +.. _gpu::StereoBM_GPU: + +gpu::StereoBM_GPU +----------------- + +`id=0.818546624063 Comments from the Wiki `__ + +.. ctype:: gpu::StereoBM_GPU + + + +The class for computing stereo correspondence using block matching algorithm. + + + + +:: + + + + class StereoBM_GPU + { + public: + enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 }; + + enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 }; + + StereoBM_GPU(); + StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, + int winSize = DEFAULT_WINSZ); + + void operator() (const GpuMat& left, const GpuMat& right, + GpuMat& disparity); + void operator() (const GpuMat& left, const GpuMat& right, + GpuMat& disparity, const Stream & stream); + + static bool checkIfGpuCallReasonable(); + + int preset; + int ndisp; + int winSize; + + float avergeTexThreshold; + + ... + }; + + +.. + +This class computes the disparity map using block matching algorithm. The class also performs pre- and post- filtering steps: sobel prefiltering (if PREFILTER +_ +XSOBEL flag is set) and low textureness filtering (if averageTexThreshols +:math:`>` +0). If +``avergeTexThreshold = 0`` +low textureness filtering is disabled, otherwise disparity is set to 0 in each point +``(x, y)`` +where for left image +:math:`\sum HorizontalGradiensInWindow(x, y, winSize) < (winSize \cdot winSize) \cdot avergeTexThreshold` +i.e. input left image is low textured. + + + +.. index:: cv::gpu::StereoBM_GPU::StereoBM_GPU + +.. _cv::gpu::StereoBM_GPU::StereoBM_GPU: + +cv::gpu::StereoBM_GPU::StereoBM_GPU +----------------------------------- + +`id=0.487412502312 Comments from the Wiki `__ + + +_ + + +.. cfunction:: StereoBM_GPU::StereoBM_GPU() + + + +.. cfunction:: StereoBM_GPU::StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ) + + StereoBMGPU constructors. + + + + + + + :param preset: Preset: + + + + + * **BASIC_PRESET** Without preprocessing. + + + * **PREFILTER_XSOBEL** Sobel prefilter. + + + + + :param ndisparities: Number of disparities. Must be a multiple of 8 and less or equal then 256. + + + :param winSize: Block size. + + + + +.. index:: cv::gpu::StereoBM_GPU::operator () + +.. _cv::gpu::StereoBM_GPU::operator (): + +cv::gpu::StereoBM_GPU::operator () +---------------------------------- + +`id=0.568109898904 Comments from the Wiki `__ + + + + +.. cfunction:: void StereoBM_GPU::operator() (const GpuMat\& left, const GpuMat\& right, GpuMat\& disparity) + + + +.. cfunction:: void StereoBM_GPU::operator() (const GpuMat\& left, const GpuMat\& right, GpuMat\& disparity, const Stream\& stream) + + The stereo correspondence operator. Finds the disparity for the specified rectified stereo pair. + + + + + + + :param left: Left image; supports only ``CV_8UC1`` type. + + + :param right: Right image with the same size and the same type as the left one. + + + :param disparity: Output disparity map. It will be ``CV_8UC1`` image with the same size as the input images. + + + :param stream: Stream for the asynchronous version. + + + + +.. index:: cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable + +.. _cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable: + +cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable +----------------------------------------------- + +`id=0.0777071732975 Comments from the Wiki `__ + + + + +.. cfunction:: bool StereoBM_GPU::checkIfGpuCallReasonable() + + Some heuristics that tries to estmate if the current GPU will be faster then CPU in this algorithm. It queries current active device. + + + + +.. index:: gpu::StereoBeliefPropagation + +.. _gpu::StereoBeliefPropagation: + +gpu::StereoBeliefPropagation +---------------------------- + +`id=0.0927694778121 Comments from the Wiki `__ + +.. ctype:: gpu::StereoBeliefPropagation + + + +The class for computing stereo correspondence using belief propagation algorithm. + + + + +:: + + + + class StereoBeliefPropagation + { + public: + enum { DEFAULT_NDISP = 64 }; + enum { DEFAULT_ITERS = 5 }; + enum { DEFAULT_LEVELS = 5 }; + + static void estimateRecommendedParams(int width, int height, + int& ndisp, int& iters, int& levels); + + explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, + int msg_type = CV_32F); + StereoBeliefPropagation(int ndisp, int iters, int levels, + float max_data_term, float data_weight, + float max_disc_term, float disc_single_jump, + int msg_type = CV_32F); + + void operator()(const GpuMat& left, const GpuMat& right, + GpuMat& disparity); + void operator()(const GpuMat& left, const GpuMat& right, + GpuMat& disparity, Stream& stream); + void operator()(const GpuMat& data, GpuMat& disparity); + void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream); + + int ndisp; + + int iters; + int levels; + + float max_data_term; + float data_weight; + float max_disc_term; + float disc_single_jump; + + int msg_type; + + ... + }; + + +.. + +The class implements Pedro F. Felzenszwalb algorithm +felzenszwalb_bp +. It can compute own data cost (using truncated linear model) or use user-provided data cost. + +**Please note:** +``StereoBeliefPropagation`` +requires a lot of memory: + + +.. math:: + + width \_ step \cdot height \cdot ndisp \cdot 4 \cdot (1 + 0.25) + + +for message storage and + + +.. math:: + + width \_ step \cdot height \cdot ndisp \cdot (1 + 0.25 + 0.0625 + \dotsm + \frac{1}{4^{levels}} + + +for data cost storage. +``width_step`` +is the number of bytes in a line including the padding. + + + +.. index:: gpu::StereoBeliefPropagation::StereoBeliefPropagation + + +cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation +--------------------------------------------------------- + +`id=0.271407166254 Comments from the Wiki `__ + + + + +.. cfunction:: StereoBeliefPropagation::StereoBeliefPropagation( int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int msg_type = CV_32F) + + + +.. cfunction:: StereoBeliefPropagation::StereoBeliefPropagation( int ndisp, int iters, int levels, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int msg_type = CV_32F) + + StereoBeliefPropagation constructors. + + + + + + + :param ndisp: Number of disparities. + + + :param iters: Number of BP iterations on each level. + + + :param levels: Number of levels. + + + :param max_data_term: Threshold for data cost truncation. + + + :param data_weight: Data weight. + + + :param max_disc_term: Threshold for discontinuity truncation. + + + :param disc_single_jump: Discontinuity single jump. + + + :param msg_type: Type for messages. Supports ``CV_16SC1`` and ``CV_32FC1`` . + + + +``StereoBeliefPropagation`` +uses truncated linear model for the data cost and discontinuity term: + + +.. math:: + + DataCost = data \_ weight \cdot \min ( \lvert I_2-I_1 \rvert , max \_ data \_ term) + + + + +.. math:: + + DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term) + + +For more details please see +felzenszwalb_bp +. + +By default +``StereoBeliefPropagation`` +uses floating-point arithmetics and +``CV_32FC1`` +type for messages. But also it can use fixed-point arithmetics and +``CV_16SC1`` +type for messages for better perfomance. To avoid overflow in this case, the parameters must satisfy + + +.. math:: + + 10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX + + + +.. index:: gpu::StereoBeliefPropagation::estimateRecommendedParams + + +cv::gpu::StereoBeliefPropagation::estimateRecommendedParams +----------------------------------------------------------- + +`id=0.673949423374 Comments from the Wiki `__ + + +```` +```` +```` +```` +```` + + +.. cfunction:: void StereoBeliefPropagation::estimateRecommendedParams( int width, int height, int\& ndisp, int\& iters, int\& levels) + + Some heuristics that tries to compute recommended parameters (ndisp, itersand levels) for specified image size (widthand height). + + + + +.. index:: gpu::StereoBeliefPropagation::operator () + + +cv::gpu::StereoBeliefPropagation::operator () +--------------------------------------------- + +`id=0.719591752468 Comments from the Wiki `__ + + + + +.. cfunction:: void StereoBeliefPropagation::operator()( const GpuMat\& left, const GpuMat\& right, GpuMat\& disparity) + + + +.. cfunction:: void StereoBeliefPropagation::operator()( const GpuMat\& left, const GpuMat\& right, GpuMat\& disparity, Stream\& stream) + + The stereo correspondence operator. Finds the disparity for the specified rectified stereo pair or data cost. + + + + + + + :param left: Left image; supports ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types. + + + :param right: Right image with the same size and the same type as the left one. + + + :param disparity: Output disparity map. If ``disparity`` is empty output type will be ``CV_16SC1`` , otherwise output type will be ``disparity.type()`` . + + + :param stream: Stream for the asynchronous version. + + + + + +.. cfunction:: void StereoBeliefPropagation::operator()( const GpuMat\& data, GpuMat\& disparity) + + + +.. cfunction:: void StereoBeliefPropagation::operator()( const GpuMat\& data, GpuMat\& disparity, Stream\& stream) + + + + + + + * **data** The user specified data cost. It must have ``msg_type`` type and :math:`\texttt{imgRows} \cdot \texttt{ndisp} \times \texttt{imgCols}` size. + + + * **disparity** Output disparity map. If ``disparity`` is empty output type will be ``CV_16SC1`` , otherwise output type will be ``disparity.type()`` . + + + * **stream** Stream for the asynchronous version. + + + + +.. index:: gpu::StereoConstantSpaceBP + +.. _gpu::StereoConstantSpaceBP: + +gpu::StereoConstantSpaceBP +-------------------------- + +`id=0.357913399086 Comments from the Wiki `__ + +.. ctype:: gpu::StereoConstantSpaceBP + + + +The class for computing stereo correspondence using constant space belief propagation algorithm. + + + + +:: + + + + class StereoConstantSpaceBP + { + public: + enum { DEFAULT_NDISP = 128 }; + enum { DEFAULT_ITERS = 8 }; + enum { DEFAULT_LEVELS = 4 }; + enum { DEFAULT_NR_PLANE = 4 }; + + static void estimateRecommendedParams(int width, int height, + int& ndisp, int& iters, int& levels, int& nr_plane); + + explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, + int nr_plane = DEFAULT_NR_PLANE, + int msg_type = CV_32F); + StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, + float max_data_term, float data_weight, + float max_disc_term, float disc_single_jump, + int min_disp_th = 0, + int msg_type = CV_32F); + + void operator()(const GpuMat& left, const GpuMat& right, + GpuMat& disparity); + void operator()(const GpuMat& left, const GpuMat& right, + GpuMat& disparity, Stream& stream); + + int ndisp; + + int iters; + int levels; + + int nr_plane; + + float max_data_term; + float data_weight; + float max_disc_term; + float disc_single_jump; + + int min_disp_th; + + int msg_type; + + bool use_local_init_data_cost; + + ... + }; + + +.. + +The class implements Q. Yang algorithm +qx_csbp +. +``StereoConstantSpaceBP`` +supports both local minimum and global minimum data cost initialization algortihms. For more details please see the paper. By default local algorithm is used, and to enable global algorithm set +``use_local_init_data_cost`` +to false. + + + +.. index:: gpu::StereoConstantSpaceBP::StereoConstantSpaceBP + + +cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP +----------------------------------------------------- + +`id=0.540144508025 Comments from the Wiki `__ + + + + +.. cfunction:: StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int nr_plane = DEFAULT_NR_PLANE, int msg_type = CV_32F) + + + +.. cfunction:: StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th = 0, int msg_type = CV_32F) + + StereoConstantSpaceBP constructors. + + + + + + + :param ndisp: Number of disparities. + + + :param iters: Number of BP iterations on each level. + + + :param levels: Number of levels. + + + :param nr_plane: Number of disparity levels on the first level + + + :param max_data_term: Truncation of data cost. + + + :param data_weight: Data weight. + + + :param max_disc_term: Truncation of discontinuity. + + + :param disc_single_jump: Discontinuity single jump. + + + :param min_disp_th: Minimal disparity threshold. + + + :param msg_type: Type for messages. Supports ``CV_16SC1`` and ``CV_32FC1`` . + + + +``StereoConstantSpaceBP`` +uses truncated linear model for the data cost and discontinuity term: + + +.. math:: + + DataCost = data \_ weight \cdot \min ( \lvert I_2-I_1 \rvert , max \_ data \_ term) + + + + +.. math:: + + DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term) + + +For more details please see +qx_csbp +. + +By default +``StereoConstantSpaceBP`` +uses floating-point arithmetics and +``CV_32FC1`` +type for messages. But also it can use fixed-point arithmetics and +``CV_16SC1`` +type for messages for better perfomance. To avoid overflow in this case, the parameters must satisfy + + +.. math:: + + 10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX + + + +.. index:: gpu::StereoConstantSpaceBP::estimateRecommendedParams + + +cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams +--------------------------------------------------------- + +`id=0.693436585596 Comments from the Wiki `__ + + +```` +```` +```` +``_`` +```` +```` + + +.. cfunction:: void StereoConstantSpaceBP::estimateRecommendedParams( int width, int height, int\& ndisp, int\& iters, int\& levels, int\& nr_plane) + + Some heuristics that tries to compute parameters (ndisp, iters, levelsand nrplane) for specified image size (widthand height). + + + + +.. index:: gpu::StereoConstantSpaceBP::operator () + + +cv::gpu::StereoConstantSpaceBP::operator () +------------------------------------------- + +`id=0.0775494401011 Comments from the Wiki `__ + + + + +.. cfunction:: void StereoConstantSpaceBP::operator()( const GpuMat\& left, const GpuMat\& right, GpuMat\& disparity) + + + +.. cfunction:: void StereoConstantSpaceBP::operator()( const GpuMat\& left, const GpuMat\& right, GpuMat\& disparity, Stream\& stream) + + The stereo correspondence operator. Finds the disparity for the specified rectified stereo pair. + + + + + + + :param left: Left image; supports ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types. + + + :param right: Right image with the same size and the same type as the left one. + + + :param disparity: Output disparity map. If ``disparity`` is empty output type will be ``CV_16SC1`` , otherwise output type will be ``disparity.type()`` . + + + :param stream: Stream for the asynchronous version. + + + + +.. index:: gpu::DisparityBilateralFilter + +.. _gpu::DisparityBilateralFilter: + +gpu::DisparityBilateralFilter +----------------------------- + +`id=0.649410057275 Comments from the Wiki `__ + +.. ctype:: gpu::DisparityBilateralFilter + + + +The class for disparity map refinement using joint bilateral filtering. + + + + +:: + + + + class CV_EXPORTS DisparityBilateralFilter + { + public: + enum { DEFAULT_NDISP = 64 }; + enum { DEFAULT_RADIUS = 3 }; + enum { DEFAULT_ITERS = 1 }; + + explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, + int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS); + + DisparityBilateralFilter(int ndisp, int radius, int iters, + float edge_threshold, float max_disc_threshold, + float sigma_range); + + void operator()(const GpuMat& disparity, const GpuMat& image, + GpuMat& dst); + void operator()(const GpuMat& disparity, const GpuMat& image, + GpuMat& dst, Stream& stream); + + ... + }; + + +.. + +The class implements Q. Yang algorithm +qx_csbp +. + + + +.. index:: gpu::DisparityBilateralFilter::DisparityBilateralFilter + + +cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter +----------------------------------------------------------- + +`id=0.896263433112 Comments from the Wiki `__ + + + + +.. cfunction:: DisparityBilateralFilter::DisparityBilateralFilter( int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS) + + + +.. cfunction:: DisparityBilateralFilter::DisparityBilateralFilter( int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range) + + DisparityBilateralFilter constructors. + + + + + + + :param ndisp: Number of disparities. + + + :param radius: Filter radius. + + + :param iters: Number of iterations. + + + :param edge_threshold: Threshold for edges. + + + :param max_disc_threshold: Constant to reject outliers. + + + :param sigma_range: Filter range. + + + + +.. index:: gpu::DisparityBilateralFilter::operator () + + +cv::gpu::DisparityBilateralFilter::operator () +---------------------------------------------- + +`id=0.42982970504 Comments from the Wiki `__ + + + + +.. cfunction:: void DisparityBilateralFilter::operator()( const GpuMat\& disparity, const GpuMat\& image, GpuMat\& dst) + + + +.. cfunction:: void DisparityBilateralFilter::operator()( const GpuMat\& disparity, const GpuMat\& image, GpuMat\& dst, Stream\& stream) + + Refines disparity map using joint bilateral filtering. + + + + + + + :param disparity: Input disparity map; supports ``CV_8UC1`` and ``CV_16SC1`` types. + + + :param image: Input image; supports ``CV_8UC1`` and ``CV_8UC3`` types. + + + :param dst: Destination disparity map; will have the same size and type as ``disparity`` . + + + :param stream: Stream for the asynchronous version. + + + + +.. index:: gpu::drawColorDisp + + +cv::gpu::drawColorDisp +---------------------- + +`id=0.876300277953 Comments from the Wiki `__ + + + + +.. cfunction:: void drawColorDisp(const GpuMat\& src_disp, GpuMat\& dst_disp, int ndisp) + + + +.. cfunction:: void drawColorDisp(const GpuMat\& src_disp, GpuMat\& dst_disp, int ndisp, const Stream\& stream) + + Does coloring of disparity image. + + + + + + + :param src_disp: Source disparity image. Supports ``CV_8UC1`` and ``CV_16SC1`` types. + + + :param dst_disp: Output disparity image. Will have the same size as ``src_disp`` and ``CV_8UC4`` type in ``BGRA`` format (alpha = 255). + + + :param ndisp: Number of disparities. + + + :param stream: Stream for the asynchronous version. + + + +This function converts +:math:`[0..ndisp)` +interval to +:math:`[0..240, 1, 1]` +in +``HSV`` +color space, than convert +``HSV`` +color space to +``RGB`` +. + + + +.. index:: gpu::reprojectImageTo3D + + +cv::gpu::reprojectImageTo3D +--------------------------- + +`id=0.230278526904 Comments from the Wiki `__ + + + + +.. cfunction:: void reprojectImageTo3D(const GpuMat\& disp, GpuMat\& xyzw, const Mat\& Q) + + + +.. cfunction:: void reprojectImageTo3D(const GpuMat\& disp, GpuMat\& xyzw, const Mat\& Q, const Stream\& stream) + + Reprojects disparity image to 3D space. + + + + + + + :param disp: Input disparity image; supports ``CV_8U`` and ``CV_16S`` types. + + + :param xyzw: Output 4-channel floating-point image of the same size as ``disp`` . Each element of ``xyzw(x,y)`` will contain the 3D coordinates ``(x,y,z,1)`` of the point ``(x,y)`` , computed from the disparity map. + + + :param Q: :math:`4 \times 4` perspective transformation matrix that can be obtained via :ref:`StereoRectify` . + + + :param stream: Stream for the asynchronous version. + + + +See also: +:func:`reprojectImageTo3D` +. + + diff --git a/modules/gpu/doc/data_structures.rst b/modules/gpu/doc/data_structures.rst new file mode 100644 index 000000000..eca8d3122 --- /dev/null +++ b/modules/gpu/doc/data_structures.rst @@ -0,0 +1,597 @@ +Data Structures +=============== + +.. highlight:: cpp + + + +.. index:: gpu::DevMem2D_ + +.. _gpu::DevMem2D_: + +gpu::DevMem2D_ +-------------- + +`id=0.542572017346 Comments from the Wiki `__ + +.. ctype:: gpu::DevMem2D_ + + + +This is a simple lightweight class that encapsulate pitched memory on GPU. It is intended to pass to nvcc-compiled code, i.e. CUDA kernels. So it is used internally by OpenCV and by users writes own device code. Its members can be called both from host and from device code. + + + + +:: + + + + template struct DevMem2D_ + { + int cols; + int rows; + T* data; + size_t step; + + DevMem2D_() : cols(0), rows(0), data(0), step(0){}; + DevMem2D_(int rows_, int cols_, T *data_, size_t step_); + + template + explicit DevMem2D_(const DevMem2D_& d); + + typedef T elem_type; + enum { elem_size = sizeof(elem_type) }; + + __CV_GPU_HOST_DEVICE__ size_t elemSize() const; + + /* returns pointer to the beggining of given image row */ + __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0); + __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const; + }; + + +.. + + +.. index:: gpu::PtrStep_ + +.. _gpu::PtrStep_: + +gpu::PtrStep_ +------------- + +`id=0.130599760293 Comments from the Wiki `__ + +.. ctype:: gpu::PtrStep_ + + + +This is structure is similar to DevMem2D +_ +but contains only pointer and row step. Width and height fields are excluded due to performance reasons. The structure is for internal use or for users who write own device code. + + + + +:: + + + + template struct PtrStep_ + { + T* data; + size_t step; + + PtrStep_(); + PtrStep_(const DevMem2D_& mem); + + typedef T elem_type; + enum { elem_size = sizeof(elem_type) }; + + __CV_GPU_HOST_DEVICE__ size_t elemSize() const; + __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0); + __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const; + }; + + + +.. + + +.. index:: gpu::PtrElemStrp_ + +.. _gpu::PtrElemStrp_: + +gpu::PtrElemStrp_ +----------------- + +`id=0.837109179392 Comments from the Wiki `__ + +.. ctype:: gpu::PtrElemStrp_ + + + +This is structure is similar to DevMem2D +_ +but contains only pointer and row step in elements. Width and height fields are excluded due to performance reasons. This class is can only be constructed if sizeof(T) is a multiple of 256. The structure is for internal use or for users who write own device code. + + + + +:: + + + + template struct PtrElemStep_ : public PtrStep_ + { + PtrElemStep_(const DevMem2D_& mem); + __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0); + __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const; + }; + + +.. + + +.. index:: gpu::GpuMat + +.. _gpu::GpuMat: + +gpu::GpuMat +----------- + +`id=0.816128758115 Comments from the Wiki `__ + +.. ctype:: gpu::GpuMat + + + +The base storage class for GPU memory with reference counting. Its interface is almost +:func:`Mat` +interface with some limitations, so using it won't be a problem. The limitations are no arbitrary dimensions support (only 2D), no functions that returns references to its data (because references on GPU are not valid for CPU), no expression templates technique support. Because of last limitation please take care with overloaded matrix operators - they cause memory allocations. The GpuMat class is convertible to +and +so it can be passed to directly to kernel. + + + + +**Please note:** +In contrast with +:func:`Mat` +, In most cases +``GpuMat::isContinuous() == false`` +, i.e. rows are aligned to size depending on hardware. Also single row GpuMat is always a continuous matrix. + + + + +:: + + + + class CV_EXPORTS GpuMat + { + public: + //! default constructor + GpuMat(); + + GpuMat(int rows, int cols, int type); + GpuMat(Size size, int type); + + ..... + + //! builds GpuMat from Mat. Perfom blocking upload to device. + explicit GpuMat (const Mat& m); + + //! returns lightweight DevMem2D_ structure for passing + //to nvcc-compiled code. Contains size, data ptr and step. + template operator DevMem2D_() const; + template operator PtrStep_() const; + + //! pefroms blocking upload data to GpuMat. + void upload(const cv::Mat& m); + void upload(const CudaMem& m, Stream& stream); + + //! downloads data from device to host memory. Blocking calls. + operator Mat() const; + void download(cv::Mat& m) const; + + //! download async + void download(CudaMem& m, Stream& stream) const; + }; + + +.. + +**Please note:** +Is it a bad practice to leave static or global GpuMat variables allocated, i.e. to rely on its destructor. That is because destruction order of such variables and CUDA context is undefined and GPU memory release function returns error if CUDA context has been destroyed before. + + +See also: +:func:`Mat` + +.. index:: gpu::CudaMem + +.. _gpu::CudaMem: + +gpu::CudaMem +------------ + +`id=0.762477139905 Comments from the Wiki `__ + +.. ctype:: gpu::CudaMem + + + +This is a class with reference counting that wraps special memory type allocation functions from CUDA. Its interface is also +:func:`Mat` +-like but with additional memory type parameter: + + + + +* + ``ALLOC_PAGE_LOCKED`` + Set page locked memory type, used commonly for fast and asynchronous upload/download data from/to GPU. + + + +* + ``ALLOC_ZEROCOPY`` + Specifies zero copy memory allocation, i.e. with possibility to map host memory to GPU address space if supported. + + + +* + ``ALLOC_WRITE_COMBINED`` + Sets write combined buffer which is not cached by CPU. Such buffers are used to supply GPU with data when GPU only reads it. The advantage is better CPU cache utilization. + + +Please note that allocation size of such memory types is usually limited. For more details please see "CUDA 2.2 Pinned Memory APIs" document or "CUDA +_ +C Programming Guide". + + + + +:: + + + + class CV_EXPORTS CudaMem + { + public: + enum { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, + ALLOC_WRITE_COMBINED = 4 }; + + CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED); + + //! creates from cv::Mat with coping data + explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED); + + ...... + + void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED); + + //! returns matrix header with disabled ref. counting for CudaMem data. + Mat createMatHeader() const; + operator Mat() const; + + //! maps host memory into device address space + GpuMat createGpuMatHeader() const; + operator GpuMat() const; + + //if host memory can be mapperd to gpu address space; + static bool canMapHostMemory(); + + int alloc_type; + }; + + + +.. + + +.. index:: gpu::CudaMem::createMatHeader + + +cv::gpu::CudaMem::createMatHeader +--------------------------------- + +`id=0.772787893445 Comments from the Wiki `__ + + +:func:`Mat` + + +.. cfunction:: Mat CudaMem::createMatHeader() const + + + +.. cfunction:: CudaMem::operator Mat() const + + Creates header without reference counting to CudaMem data. + + + + +.. index:: gpu::CudaMem::createGpuMatHeader + + +cv::gpu::CudaMem::createGpuMatHeader +------------------------------------ + +`id=0.759677323147 Comments from the Wiki `__ + + +:func:`gpu::GpuMat` +``_`` + + +.. cfunction:: GpuMat CudaMem::createGpuMatHeader() const + + + +.. cfunction:: CudaMem::operator GpuMat() const + + Maps CPU memory to GPU address space and creates header without reference counting for it. This can be done only if memory was allocated with ALLOCZEROCOPYflag and if it is supported by hardware (laptops often share video and CPU memory, so address spaces can be mapped, and that eliminates extra copy). + + + + +.. index:: gpu::CudaMem::canMapHostMemory + + +cv::gpu::CudaMem::canMapHostMemory +---------------------------------- + +`id=0.317724503486 Comments from the Wiki `__ + + +``_`` + + +.. cfunction:: static bool CudaMem::canMapHostMemory() + + Returns true if the current hardware supports address space mapping and ALLOCZEROCOPYmemory allocation + + + +.. index:: gpu::Stream + +.. _gpu::Stream: + +gpu::Stream +----------- + +`id=0.153849663278 Comments from the Wiki `__ + +.. ctype:: gpu::Stream + + + +This class encapsulated queue of the asynchronous calls. Some functions have overloads with additional +:func:`gpu::Stream` +parameter. The overloads do initialization work (allocate output buffers, upload constants, etc.), start GPU kernel and return before results are ready. A check if all operation are complete can be performed via +:func:`gpu::Stream::queryIfComplete()` +. Asynchronous upload/download have to be performed from/to page-locked buffers, i.e. using +:func:`gpu::CudaMem` +or +:func:`Mat` +header that points to a region of +:func:`gpu::CudaMem` +. + +**Please note the limitation** +: currently it is not guaranteed that all will work properly if one operation will be enqueued twice with different data. Some functions use constant GPU memory and next call may update the memory before previous has been finished. But calling asynchronously different operations is safe because each operation has own constant buffer. Memory copy/upload/download/set operations to buffers hold by user are also safe. + + + + +:: + + + + class CV_EXPORTS Stream + { + public: + Stream(); + ~Stream(); + + Stream(const Stream&); + Stream& operator=(const Stream&); + + bool queryIfComplete(); + void waitForCompletion(); + + //! downloads asynchronously. + // Warning! cv::Mat must point to page locked memory + (i.e. to CudaMem data or to its subMat) + void enqueueDownload(const GpuMat& src, CudaMem& dst); + void enqueueDownload(const GpuMat& src, Mat& dst); + + //! uploads asynchronously. + // Warning! cv::Mat must point to page locked memory + (i.e. to CudaMem data or to its ROI) + void enqueueUpload(const CudaMem& src, GpuMat& dst); + void enqueueUpload(const Mat& src, GpuMat& dst); + + void enqueueCopy(const GpuMat& src, GpuMat& dst); + + void enqueueMemSet(const GpuMat& src, Scalar val); + void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask); + + // converts matrix type, ex from float to uchar depending on type + void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, + double a = 1, double b = 0); + }; + + + +.. + + +.. index:: gpu::Stream::queryIfComplete + + +cv::gpu::Stream::queryIfComplete +-------------------------------- + +`id=0.136699172621 Comments from the Wiki `__ + + + + +.. cfunction:: bool Stream::queryIfComplete() + + Returns true if the current stream queue is finished, otherwise false. + + + +.. index:: gpu::Stream::waitForCompletion + + +cv::gpu::Stream::waitForCompletion +---------------------------------- + +`id=0.870172270785 Comments from the Wiki `__ + + + + +.. cfunction:: void Stream::waitForCompletion() + + Blocks until all operations in the stream are complete. + + + +.. index:: gpu::StreamAccessor + +.. _gpu::StreamAccessor: + +gpu::StreamAccessor +------------------- + +`id=0.312772323299 Comments from the Wiki `__ + +.. ctype:: gpu::StreamAccessor + + + +This class provides possibility to get +``cudaStream_t`` +from +:func:`gpu::Stream` +. This class is declared in +``stream_accessor.hpp`` +because that is only public header that depend on Cuda Runtime API. Including it will bring the dependency to your code. + + + + +:: + + + + struct StreamAccessor + { + CV_EXPORTS static cudaStream_t getStream(const Stream& stream); + }; + + +.. + + +.. index:: gpu::createContinuous + + +cv::gpu::createContinuous +------------------------- + +`id=0.638242088099 Comments from the Wiki `__ + + + + +.. cfunction:: void createContinuous(int rows, int cols, int type, GpuMat\& m) + + Creates continuous matrix in GPU memory. + + + + + + + :param rows: Row count. + + + :param cols: Column count. + + + :param type: Type of the matrix. + + + :param m: Destination matrix. Will be only reshaped if it has proper type and area ( ``rows`` :math:`\times` ``cols`` ). + + + +Also the following wrappers are available: + + +.. cfunction:: GpuMat createContinuous(int rows, int cols, int type) + + + +.. cfunction:: void createContinuous(Size size, int type, GpuMat\& m) + + + +.. cfunction:: GpuMat createContinuous(Size size, int type) + + + +Matrix is called continuous if its elements are stored continuously, i.e. wuthout gaps in the end of each row. + + + +.. index:: gpu::ensureSizeIsEnough + + +cv::gpu::ensureSizeIsEnough +--------------------------- + +`id=0.0969536734629 Comments from the Wiki `__ + + + + +.. cfunction:: void ensureSizeIsEnough(int rows, int cols, int type, GpuMat\& m) + + Ensures that size of matrix is big enough and matrix has proper type. The function doesn't reallocate memory if the matrix has proper attributes already. + + + + + + + :param rows: Minimum desired number of rows. + + + :param cols: Minimum desired number of cols. + + + :param type: Desired matrix type. + + + :param m: Destination matrix. + + + +Also the following wrapper is available: + + +.. cfunction:: void ensureSizeIsEnough(Size size, int type, GpuMat\& m) + + + diff --git a/modules/gpu/doc/feature_detection_and_description.rst b/modules/gpu/doc/feature_detection_and_description.rst new file mode 100644 index 000000000..3ade1dd4d --- /dev/null +++ b/modules/gpu/doc/feature_detection_and_description.rst @@ -0,0 +1,661 @@ +Feature Detection and Description +================================= + +.. highlight:: cpp + + + +.. index:: gpu::SURF_GPU + +.. _gpu::SURF_GPU: + +gpu::SURF_GPU +------------- + +`id=0.87802428318 Comments from the Wiki `__ + +.. ctype:: gpu::SURF_GPU + + + +Class for extracting Speeded Up Robust Features from an image. + + + + +:: + + + + class SURF_GPU : public SURFParams_GPU + { + public: + //! returns the descriptor size in float's (64 or 128) + int descriptorSize() const; + + //! upload host keypoints to device memory + static void uploadKeypoints(const vector& keypoints, + GpuMat& keypointsGPU); + //! download keypoints from device to host memory + static void downloadKeypoints(const GpuMat& keypointsGPU, + vector& keypoints); + + //! download descriptors from device to host memory + static void downloadDescriptors(const GpuMat& descriptorsGPU, + vector& descriptors); + + void operator()(const GpuMat& img, const GpuMat& mask, + GpuMat& keypoints); + + void operator()(const GpuMat& img, const GpuMat& mask, + GpuMat& keypoints, GpuMat& descriptors, + bool useProvidedKeypoints = false, + bool calcOrientation = true); + + void operator()(const GpuMat& img, const GpuMat& mask, + std::vector& keypoints); + + void operator()(const GpuMat& img, const GpuMat& mask, + std::vector& keypoints, GpuMat& descriptors, + bool useProvidedKeypoints = false, + bool calcOrientation = true); + + void operator()(const GpuMat& img, const GpuMat& mask, + std::vector& keypoints, + std::vector& descriptors, + bool useProvidedKeypoints = false, + bool calcOrientation = true); + + GpuMat sum; + GpuMat sumf; + + GpuMat mask1; + GpuMat maskSum; + + GpuMat hessianBuffer; + GpuMat maxPosBuffer; + GpuMat featuresBuffer; + }; + + +.. + +The class +``SURF_GPU`` +implements Speeded Up Robust Features descriptor. There is fast multi-scale Hessian keypoint detector that can be used to find the keypoints (which is the default option), but the descriptors can be also computed for the user-specified keypoints. Supports only 8 bit grayscale images. + +The class +``SURF_GPU`` +can store results to GPU and CPU memory and provides static functions to convert results between CPU and GPU version ( +``uploadKeypoints`` +, +``downloadKeypoints`` +, +``downloadDescriptors`` +). CPU results has the same format as +results. GPU results are stored to +``GpuMat`` +. +``keypoints`` +matrix is one row matrix with +``CV_32FC6`` +type. It contains 6 float values per feature: +``x, y, size, response, angle, octave`` +. +``descriptors`` +matrix is +:math:`\texttt{nFeatures} \times \texttt{descriptorSize}` +matrix with +``CV_32FC1`` +type. + +The class +``SURF_GPU`` +uses some buffers and provides access to it. All buffers can be safely released between function calls. + +See also: +. + + + +.. index:: gpu::BruteForceMatcher_GPU + +.. _gpu::BruteForceMatcher_GPU: + +gpu::BruteForceMatcher_GPU +-------------------------- + +`id=0.776429775465 Comments from the Wiki `__ + +.. ctype:: gpu::BruteForceMatcher_GPU + + + +Brute-force descriptor matcher. For each descriptor in the first set, this matcher finds the closest descriptor in the second set by trying each one. This descriptor matcher supports masking permissible matches between descriptor sets. + + + + +:: + + + + template + class BruteForceMatcher_GPU + { + public: + // Add descriptors to train descriptor collection. + void add(const std::vector& descCollection); + + // Get train descriptors collection. + const std::vector& getTrainDescriptors() const; + + // Clear train descriptors collection. + void clear(); + + // Return true if there are not train descriptors in collection. + bool empty() const; + + // Return true if the matcher supports mask in match methods. + bool isMaskSupported() const; + + void matchSingle(const GpuMat& queryDescs, const GpuMat& trainDescs, + GpuMat& trainIdx, GpuMat& distance, + const GpuMat& mask = GpuMat()); + + static void matchDownload(const GpuMat& trainIdx, + const GpuMat& distance, std::vector& matches); + + void match(const GpuMat& queryDescs, const GpuMat& trainDescs, + std::vector& matches, const GpuMat& mask = GpuMat()); + + void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, + const vector& masks = std::vector()); + + void matchCollection(const GpuMat& queryDescs, + const GpuMat& trainCollection, + GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, + const GpuMat& maskCollection); + + static void matchDownload(const GpuMat& trainIdx, GpuMat& imgIdx, + const GpuMat& distance, std::vector& matches); + + void match(const GpuMat& queryDescs, std::vector& matches, + const std::vector& masks = std::vector()); + + void knnMatch(const GpuMat& queryDescs, const GpuMat& trainDescs, + GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, + const GpuMat& mask = GpuMat()); + + static void knnMatchDownload(const GpuMat& trainIdx, + const GpuMat& distance, std::vector< std::vector >& matches, + bool compactResult = false); + + void knnMatch(const GpuMat& queryDescs, const GpuMat& trainDescs, + std::vector< std::vector >& matches, int k, + const GpuMat& mask = GpuMat(), bool compactResult = false); + + void knnMatch(const GpuMat& queryDescs, + std::vector< std::vector >& matches, int knn, + const std::vector& masks = std::vector(), + bool compactResult = false ); + + void radiusMatch(const GpuMat& queryDescs, const GpuMat& trainDescs, + GpuMat& trainIdx, GpuMat& nMatches, GpuMat& distance, + float maxDistance, const GpuMat& mask = GpuMat()); + + static void radiusMatchDownload(const GpuMat& trainIdx, + const GpuMat& nMatches, const GpuMat& distance, + std::vector< std::vector >& matches, + bool compactResult = false); + + void radiusMatch(const GpuMat& queryDescs, const GpuMat& trainDescs, + std::vector< std::vector >& matches, float maxDistance, + const GpuMat& mask = GpuMat(), bool compactResult = false); + + void radiusMatch(const GpuMat& queryDescs, + std::vector< std::vector >& matches, float maxDistance, + const std::vector& masks = std::vector(), + bool compactResult = false); + + private: + std::vector trainDescCollection; + }; + + +.. + +The class +``BruteForceMatcher_GPU`` +has the similar interface to class +. It has two groups of match methods: for matching descriptors of one image with other image or with image set. Also all functions have alternative: save results to GPU memory or to CPU memory. + +``Distance`` +template parameter is kept for CPU/GPU interfaces similarity. +``BruteForceMatcher_GPU`` +supports only +``L1`` +and +``L2`` +distance types. + +See also: +, +. + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::match + +.. _cv::gpu::BruteForceMatcher_GPU::match: + +cv::gpu::BruteForceMatcher_GPU::match +------------------------------------- + +`id=0.164151048457 Comments from the Wiki `__ + + + + +.. cfunction:: void match(const GpuMat\& queryDescs, const GpuMat\& trainDescs, std::vector\& matches, const GpuMat\& mask = GpuMat()) + + + +.. cfunction:: void match(const GpuMat\& queryDescs, std::vector\& matches, const std::vector\& masks = std::vector()) + + Finds the best match for each descriptor from a query set with train descriptors. + + + +See also: +:func:`DescriptorMatcher::match` +. + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::matchSingle + +.. _cv::gpu::BruteForceMatcher_GPU::matchSingle: + +cv::gpu::BruteForceMatcher_GPU::matchSingle +------------------------------------------- + +`id=0.230978706047 Comments from the Wiki `__ + + + + +.. cfunction:: void matchSingle(const GpuMat\& queryDescs, const GpuMat\& trainDescs, GpuMat\& trainIdx, GpuMat\& distance, const GpuMat\& mask = GpuMat()) + + Finds the best match for each query descriptor. Results will be stored to GPU memory. + + + + + + {Query set of descriptors.} + {Train set of descriptors. This will not be added to train descriptors collection stored in class object.} + {One row + ``CV_32SC1`` + matrix. Will contain the best train index for each query. If some query descriptors are masked out in + ``mask`` + it will contain -1.} + {One row + ``CV_32FC1`` + matrix. Will contain the best distance for each query. If some query descriptors are masked out in + ``mask`` + it will contain + ``FLT_MAX`` + .} + + :param mask: Mask specifying permissible matches between input query and train matrices of descriptors. + + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::matchCollection + +.. _cv::gpu::BruteForceMatcher_GPU::matchCollection: + +cv::gpu::BruteForceMatcher_GPU::matchCollection +----------------------------------------------- + +`id=0.934341769456 Comments from the Wiki `__ + + + + +.. cfunction:: void matchCollection(const GpuMat\& queryDescs, const GpuMat\& trainCollection, GpuMat\& trainIdx, GpuMat\& imgIdx, GpuMat\& distance, const GpuMat\& maskCollection) + + Find the best match for each query descriptor from train collection. Results will be stored to GPU memory. + + + + + + {Query set of descriptors.} + { + ``GpuMat`` + containing train collection. It can be obtained from train descriptors collection that was set using + ``add`` + method by + . Or it can contain user defined collection. It must be one row matrix, each element is a + ``DevMem2D`` + that points to one train descriptors matrix.} + {One row + ``CV_32SC1`` + matrix. Will contain the best train index for each query. If some query descriptors are masked out in + ``maskCollection`` + it will contain -1.} + {One row + ``CV_32SC1`` + matrix. Will contain image train index for each query. If some query descriptors are masked out in + ``maskCollection`` + it will contain -1.} + {One row + ``CV_32FC1`` + matrix. Will contain the best distance for each query. If some query descriptors are masked out in + ``maskCollection`` + it will contain + ``FLT_MAX`` + .} + + :param maskCollection: ``GpuMat`` containing set of masks. It can be obtained from ``std::vector`` by . Or it can contain user defined mask set. It must be empty matrix or one row matrix, each element is a ``PtrStep`` that points to one mask. + + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::makeGpuCollection + +.. _cv::gpu::BruteForceMatcher_GPU::makeGpuCollection: + +cv::gpu::BruteForceMatcher_GPU::makeGpuCollection +------------------------------------------------- + +`id=0.285830043662 Comments from the Wiki `__ + + + + +.. cfunction:: void makeGpuCollection(GpuMat\& trainCollection, GpuMat\& maskCollection, const vector\& masks = std::vector()) + + Makes gpu collection of train descriptors and masks in suitable format for function. + + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::matchDownload + +.. _cv::gpu::BruteForceMatcher_GPU::matchDownload: + +cv::gpu::BruteForceMatcher_GPU::matchDownload +--------------------------------------------- + +`id=0.171611509706 Comments from the Wiki `__ + + +```` +```` +```` + + +.. cfunction:: void matchDownload(const GpuMat\& trainIdx, const GpuMat\& distance, std::vector\& matches) + + + +.. cfunction:: void matchDownload(const GpuMat\& trainIdx, GpuMat\& imgIdx, const GpuMat\& distance, std::vector\& matches) + + Downloads trainIdx, imgIdxand distancematrices obtained via or to CPU vector with . + + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::knnMatch + +.. _cv::gpu::BruteForceMatcher_GPU::knnMatch: + +cv::gpu::BruteForceMatcher_GPU::knnMatch +---------------------------------------- + +`id=0.619005099272 Comments from the Wiki `__ + + + + +.. cfunction:: void knnMatch(const GpuMat\& queryDescs, const GpuMat\& trainDescs, std::vector< std::vector >\& matches, int k, const GpuMat\& mask = GpuMat(), bool compactResult = false) + + Finds the k best matches for each descriptor from a query set with train descriptors. Found k (or less if not possible) matches are returned in distance increasing order. + + + + + +.. cfunction:: void knnMatch(const GpuMat\& queryDescs, std::vector< std::vector >\& matches, int k, const std::vector\& masks = std::vector(), bool compactResult = false ) + + + +See also: +:func:`DescriptorMatcher::knnMatch` +. + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::knnMatch + +.. _cv::gpu::BruteForceMatcher_GPU::knnMatch: + +cv::gpu::BruteForceMatcher_GPU::knnMatch +---------------------------------------- + +`id=0.852761934257 Comments from the Wiki `__ + + + + +.. cfunction:: void knnMatch(const GpuMat\& queryDescs, const GpuMat\& trainDescs, GpuMat\& trainIdx, GpuMat\& distance, GpuMat\& allDist, int k, const GpuMat\& mask = GpuMat()) + + Finds the k best matches for each descriptor from a query set with train descriptors. Found k (or less if not possible) matches are returned in distance increasing order. Results will be stored to GPU memory. + + + + + + {Query set of descriptors.} + {Train set of descriptors. This will not be added to train descriptors collection stored in class object.} + {Matrix with + :math:`\texttt{nQueries} \times \texttt{k}` + size and + ``CV_32SC1`` + type. + ``trainIdx.at(queryIdx, i)`` + will contain index of the i'th best trains. If some query descriptors are masked out in + ``mask`` + it will contain -1.} + {Matrix with + :math:`\texttt{nQuery} \times \texttt{k}` + and + ``CV_32FC1`` + type. Will contain distance for each query and the i'th best trains. If some query descriptors are masked out in + ``mask`` + it will contain + ``FLT_MAX`` + .} + {Buffer to store all distances between query descriptors and train descriptors. It will have + :math:`\texttt{nQuery} \times \texttt{nTrain}` + size and + ``CV_32FC1`` + type. + ``allDist.at(queryIdx, trainIdx)`` + will contain + ``FLT_MAX`` + , if + ``trainIdx`` + is one from k best, otherwise it will contain distance between + ``queryIdx`` + and + ``trainIdx`` + descriptors.} + + :param k: Number of the best matches will be found per each query descriptor (or less if it's not possible). + + + :param mask: Mask specifying permissible matches between input query and train matrices of descriptors. + + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::knnMatchDownload + +.. _cv::gpu::BruteForceMatcher_GPU::knnMatchDownload: + +cv::gpu::BruteForceMatcher_GPU::knnMatchDownload +------------------------------------------------ + +`id=0.735745722087 Comments from the Wiki `__ + + +```` +```` +```` +```` + + +.. cfunction:: void knnMatchDownload(const GpuMat\& trainIdx, const GpuMat\& distance, std::vector< std::vector >\& matches, bool compactResult = false) + + Downloads trainIdxand distancematrices obtained via to CPU vector with . If compactResultis true matchesvector will not contain matches for fully masked out query descriptors. + + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::radiusMatch + +.. _cv::gpu::BruteForceMatcher_GPU::radiusMatch: + +cv::gpu::BruteForceMatcher_GPU::radiusMatch +------------------------------------------- + +`id=0.964758287221 Comments from the Wiki `__ + + + + +.. cfunction:: void radiusMatch(const GpuMat\& queryDescs, const GpuMat\& trainDescs, std::vector< std::vector >\& matches, float maxDistance, const GpuMat\& mask = GpuMat(), bool compactResult = false) + + Finds the best matches for each query descriptor which have distance less than given threshold. Found matches are returned in distance increasing order. + + + + + +.. cfunction:: void radiusMatch(const GpuMat\& queryDescs, std::vector< std::vector >\& matches, float maxDistance, const std::vector\& masks = std::vector(), bool compactResult = false) + + + +This function works only on devices with Compute Capability +:math:`>=` +1.1. + +See also: +:func:`DescriptorMatcher::radiusMatch` +. + + + +.. index:: cv::gpu::BruteForceMatcher_GPU::radiusMatch + +.. _cv::gpu::BruteForceMatcher_GPU::radiusMatch: + +cv::gpu::BruteForceMatcher_GPU::radiusMatch +------------------------------------------- + +`id=0.499772925784 Comments from the Wiki `__ + + + + +.. cfunction:: void radiusMatch(const GpuMat\& queryDescs, const GpuMat\& trainDescs, GpuMat\& trainIdx, GpuMat\& nMatches, GpuMat\& distance, float maxDistance, const GpuMat\& mask = GpuMat()) + + Finds the best matches for each query descriptor which have distance less than given threshold. Results will be stored to GPU memory. + + + + + + {Query set of descriptors.} + {Train set of descriptors. This will not be added to train descriptors collection stored in class object.} + { + ``trainIdx.at(queryIdx, i)`` + will contain i'th train index + ``(i < min(nMatches.at(0, queryIdx), trainIdx.cols)`` + . If + ``trainIdx`` + is empty, it will be created with size + :math:`\texttt{nQuery} \times \texttt{nTrain}` + . Or it can be allocated by user (it must have + ``nQuery`` + rows and + ``CV_32SC1`` + type). Cols can be less than + ``nTrain`` + , but it can be that matcher won't find all matches, because it haven't enough memory to store results.} + { + ``nMatches.at(0, queryIdx)`` + will contain matches count for + ``queryIdx`` + . Carefully, + ``nMatches`` + can be greater than + ``trainIdx.cols`` + - it means that matcher didn't find all matches, because it didn't have enough memory.} + { + ``distance.at(queryIdx, i)`` + will contain i'th distance + ``(i < min(nMatches.at(0, queryIdx), trainIdx.cols)`` + . If + ``trainIdx`` + is empty, it will be created with size + :math:`\texttt{nQuery} \times \texttt{nTrain}` + . Otherwise it must be also allocated by user (it must have the same size as + ``trainIdx`` + and + ``CV_32FC1`` + type).} + + :param maxDistance: Distance threshold. + + + :param mask: Mask specifying permissible matches between input query and train matrices of descriptors. + + + +In contrast to +results are not sorted by distance increasing order. + +This function works only on devices with Compute Capability +:math:`>=` +1.1. + + +.. index:: cv::gpu::BruteForceMatcher_GPU::radiusMatchDownload + +.. _cv::gpu::BruteForceMatcher_GPU::radiusMatchDownload: + +cv::gpu::BruteForceMatcher_GPU::radiusMatchDownload +--------------------------------------------------- + +`id=0.627360663551 Comments from the Wiki `__ + + +```` +```` +```` +```` +```` + + +.. cfunction:: void radiusMatchDownload(const GpuMat\& trainIdx, const GpuMat\& nMatches, const GpuMat\& distance, std::vector< std::vector >\& matches, bool compactResult = false) + + Downloads trainIdx, nMatchesand distancematrices obtained via to CPU vector with . If compactResultis true matchesvector will not contain matches for fully masked out query descriptors. + + + diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst new file mode 100644 index 000000000..c24e509a4 --- /dev/null +++ b/modules/gpu/doc/gpu.rst @@ -0,0 +1,18 @@ +******************************* +GPU-accelerated Computer Vision +******************************* + +.. toctree:: + :maxdepth: 2 + + gpu_gpu_module_introduction + gpu_initalization_and_information + gpu_data_structures + gpu_operations_on_matrices + gpu_per-element_operations. + gpu_image_processing + gpu_matrix_reductions + gpu_object_detection + gpu_feature_detection_and_description + gpu_image_filtering + gpu_camera_calibration_and_3d_reconstruction diff --git a/modules/gpu/doc/image_filtering.rst b/modules/gpu/doc/image_filtering.rst new file mode 100644 index 000000000..e25af1fc4 --- /dev/null +++ b/modules/gpu/doc/image_filtering.rst @@ -0,0 +1,1400 @@ +Image Filtering +=============== + +.. highlight:: cpp + + +Functions and classes described in this section are used to perform various linear or non-linear filtering operations on 2D images. + +See also: + +.. index:: gpu::BaseRowFilter_GPU + +.. _gpu::BaseRowFilter_GPU: + +gpu::BaseRowFilter_GPU +---------------------- + +`id=0.764022574035 Comments from the Wiki `__ + +.. ctype:: gpu::BaseRowFilter_GPU + + + +The base class for linear or non-linear filters that processes rows of 2D arrays. Such filters are used for the "horizontal" filtering passes in separable filters. + + + + +:: + + + + class BaseRowFilter_GPU + { + public: + BaseRowFilter_GPU(int ksize_, int anchor_); + virtual ~BaseRowFilter_GPU() {} + virtual void operator()(const GpuMat& src, GpuMat& dst) = 0; + int ksize, anchor; + }; + + +.. + +**Please note:** +This class doesn't allocate memory for destination image. Usually this class is used inside +. + + + +.. index:: gpu::BaseColumnFilter_GPU + +.. _gpu::BaseColumnFilter_GPU: + +gpu::BaseColumnFilter_GPU +------------------------- + +`id=0.823235413785 Comments from the Wiki `__ + +.. ctype:: gpu::BaseColumnFilter_GPU + + + +The base class for linear or non-linear filters that processes columns of 2D arrays. Such filters are used for the "vertical" filtering passes in separable filters. + + + + +:: + + + + class BaseColumnFilter_GPU + { + public: + BaseColumnFilter_GPU(int ksize_, int anchor_); + virtual ~BaseColumnFilter_GPU() {} + virtual void operator()(const GpuMat& src, GpuMat& dst) = 0; + int ksize, anchor; + }; + + +.. + +**Please note:** +This class doesn't allocate memory for destination image. Usually this class is used inside +. + + + +.. index:: gpu::BaseFilter_GPU + +.. _gpu::BaseFilter_GPU: + +gpu::BaseFilter_GPU +------------------- + +`id=0.757554322631 Comments from the Wiki `__ + +.. ctype:: gpu::BaseFilter_GPU + + + +The base class for non-separable 2D filters. + + + + +:: + + + + class CV_EXPORTS BaseFilter_GPU + { + public: + BaseFilter_GPU(const Size& ksize_, const Point& anchor_); + virtual ~BaseFilter_GPU() {} + virtual void operator()(const GpuMat& src, GpuMat& dst) = 0; + Size ksize; + Point anchor; + }; + + +.. + +**Please note:** +This class doesn't allocate memory for destination image. Usually this class is used inside +. + + + +.. index:: gpu::FilterEngine_GPU + +.. _gpu::FilterEngine_GPU: + +gpu::FilterEngine_GPU +--------------------- + +`id=0.649282430206 Comments from the Wiki `__ + +.. ctype:: gpu::FilterEngine_GPU + + + +The base class for Filter Engine. + + + + +:: + + + + class CV_EXPORTS FilterEngine_GPU + { + public: + virtual ~FilterEngine_GPU() {} + + virtual void apply(const GpuMat& src, GpuMat& dst, + Rect roi = Rect(0,0,-1,-1)) = 0; + }; + + +.. + +The class can be used to apply an arbitrary filtering operation to an image. It contains all the necessary intermediate buffers. Pointers to the initialized +``FilterEngine_GPU`` +instances are returned by various +``create*Filter_GPU`` +functions, see below, and they are used inside high-level functions such as +:func:`gpu::filter2D` +, +:func:`gpu::erode` +, +:func:`gpu::Sobel` +etc. + +By using +``FilterEngine_GPU`` +instead of functions you can avoid unnecessary memory allocation for intermediate buffers and get much better performance: + + + + +:: + + + + while (...) + { + cv::gpu::GpuMat src = getImg(); + cv::gpu::GpuMat dst; + // Allocate and release buffers at each iterations + cv::gpu::GaussianBlur(src, dst, ksize, sigma1); + } + + // Allocate buffers only once + cv::Ptr filter = + cv::gpu::createGaussianFilter_GPU(CV_8UC4, ksize, sigma1); + while (...) + { + cv::gpu::GpuMat src = getImg(); + cv::gpu::GpuMat dst; + filter->apply(src, dst, cv::Rect(0, 0, src.cols, src.rows)); + } + // Release buffers only once + filter.release(); + + +.. + +``FilterEngine_GPU`` +can process a rectangular sub-region of an image. By default, if +``roi == Rect(0,0,-1,-1)`` +, +``FilterEngine_GPU`` +processes inner region of image ( +``Rect(anchor.x, anchor.y, src_size.width - ksize.width, src_size.height - ksize.height)`` +), because some filters doesn't check if indices are outside the image for better perfomace. See below which filters supports processing the whole image and which not and image type limitations. + +**Please note:** +The GPU filters doesn't support the in-place mode. + +See also: +, +, +, +, +, +, +, +, +, +, + +.. index:: cv::gpu::createFilter2D_GPU + +.. _cv::gpu::createFilter2D_GPU: + +cv::gpu::createFilter2D_GPU +--------------------------- + +`id=0.305010054416 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createFilter2D_GPU( const Ptr\& filter2D, int srcType, int dstType) + + Creates non-separable filter engine with the specified filter. + + + + + + {Non-separable 2D filter.} + + :param srcType: Input image type. It must be supported by ``filter2D`` . + + + :param dstType: Output image type. It must be supported by ``filter2D`` . + + + +Usually this function is used inside high-level functions, like +, +. + + + +.. index:: cv::gpu::createSeparableFilter_GPU + +.. _cv::gpu::createSeparableFilter_GPU: + +cv::gpu::createSeparableFilter_GPU +---------------------------------- + +`id=0.487586153702 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createSeparableFilter_GPU( const Ptr\& rowFilter, const Ptr\& columnFilter, int srcType, int bufType, int dstType) + + Creates separable filter engine with the specified filters. + + + + + + {"Horizontal" 1D filter.} + {"Vertical" 1D filter.} + + :param srcType: Input image type. It must be supported by ``rowFilter`` . + + + :param bufType: Buffer image type. It must be supported by ``rowFilter`` and ``columnFilter`` . + + + :param dstType: Output image type. It must be supported by ``columnFilter`` . + + + +Usually this function is used inside high-level functions, like +. + + + +.. index:: cv::gpu::getRowSumFilter_GPU + +.. _cv::gpu::getRowSumFilter_GPU: + +cv::gpu::getRowSumFilter_GPU +---------------------------- + +`id=0.462298689415 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1) + + Creates horizontal 1D box filter. + + + + + + + :param srcType: Input image type. Only ``CV_8UC1`` type is supported for now. + + + :param sumType: Output image type. Only ``CV_32FC1`` type is supported for now. + + + :param ksize: Kernel size. + + + :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + + + +.. index:: cv::gpu::getColumnSumFilter_GPU + +.. _cv::gpu::getColumnSumFilter_GPU: + +cv::gpu::getColumnSumFilter_GPU +------------------------------- + +`id=0.000409435820019 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1) + + Creates vertical 1D box filter. + + + + + + + :param sumType: Input image type. Only ``CV_8UC1`` type is supported for now. + + + :param dstType: Output image type. Only ``CV_32FC1`` type is supported for now. + + + :param ksize: Kernel size. + + + :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + + + +.. index:: cv::gpu::createBoxFilter_GPU + +.. _cv::gpu::createBoxFilter_GPU: + +cv::gpu::createBoxFilter_GPU +---------------------------- + +`id=0.843050874841 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createBoxFilter_GPU(int srcType, int dstType, const Size\& ksize, const Point\& anchor = Point(-1,-1)) + + Creates normalized 2D box filter. + + + + + +.. cfunction:: Ptr getBoxFilter_GPU(int srcType, int dstType, const Size\& ksize, Point anchor = Point(-1, -1)) + + + + + + + :param srcType: Input image type. Supports ``CV_8UC1`` and ``CV_8UC4`` . + + + :param dstType: Output image type. Supports only the same as source type. + + + :param ksize: Kernel size. + + + :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`boxFilter` +. + + + +.. index:: gpu::boxFilter + + +cv::gpu::boxFilter +------------------ + +`id=0.55747872173 Comments from the Wiki `__ + + + + +.. cfunction:: void boxFilter(const GpuMat\& src, GpuMat\& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1)) + + Smooths the image using the normalized box filter. + + + + + + + :param src: Input image. Supports ``CV_8UC1`` and ``CV_8UC4`` source types. + + + :param dst: Output image type. Will have the same size and the same type as ``src`` . + + + :param ddepth: Output image depth. Support only the same as source depth ( ``CV_8U`` ) or -1 what means use source depth. + + + :param ksize: Kernel size. + + + :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`boxFilter` +, +. + + + +.. index:: gpu::blur + + +cv::gpu::blur +------------- + +`id=0.311993477932 Comments from the Wiki `__ + + + + +.. cfunction:: void blur(const GpuMat\& src, GpuMat\& dst, Size ksize, Point anchor = Point(-1,-1)) + + A synonym for normalized box filter. + + + + + + + :param src: Input image. Supports ``CV_8UC1`` and ``CV_8UC4`` source type. + + + :param dst: Output image type. Will have the same size and the same type as ``src`` . + + + :param ksize: Kernel size. + + + :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`blur` +, +:func:`gpu::boxFilter` +. + + + +.. index:: cv::gpu::createMorphologyFilter_GPU + +.. _cv::gpu::createMorphologyFilter_GPU: + +cv::gpu::createMorphologyFilter_GPU +----------------------------------- + +`id=0.426788597288 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createMorphologyFilter_GPU(int op, int type, const Mat\& kernel, const Point\& anchor = Point(-1,-1), int iterations = 1) + + Creates 2D morphological filter. + + + + + +.. cfunction:: Ptr getMorphologyFilter_GPU(int op, int type, const Mat\& kernel, const Size\& ksize, Point anchor=Point(-1,-1)) + + + + + + {Morphology operation id. Only + ``MORPH_ERODE`` + and + ``MORPH_DILATE`` + are supported.} + + :param type: Input/output image type. Only ``CV_8UC1`` and ``CV_8UC4`` are supported. + + + :param kernel: 2D 8-bit structuring element for the morphological operation. + + + :param size: Horizontal or vertical structuring element size for separable morphological operations. + + + :param anchor: Anchor position within the structuring element; negative values mean that the anchor is at the center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`createMorphologyFilter` +. + + + +.. index:: gpu::erode + + +cv::gpu::erode +-------------- + +`id=0.358632610182 Comments from the Wiki `__ + + + + +.. cfunction:: void erode(const GpuMat\& src, GpuMat\& dst, const Mat\& kernel, Point anchor = Point(-1, -1), int iterations = 1) + + Erodes an image by using a specific structuring element. + + + + + + + :param src: Source image. Only ``CV_8UC1`` and ``CV_8UC4`` types are supported. + + + :param dst: Destination image. It will have the same size and the same type as ``src`` . + + + :param kernel: Structuring element used for dilation. If ``kernel=Mat()`` , a :math:`3 \times 3` rectangular structuring element is used. + + + :param anchor: Position of the anchor within the element. The default value :math:`(-1, -1)` means that the anchor is at the element center. + + + :param iterations: Number of times erosion to be applied. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`erode` +, +. + + + +.. index:: gpu::dilate + + +cv::gpu::dilate +--------------- + +`id=0.875844424053 Comments from the Wiki `__ + + + + +.. cfunction:: void dilate(const GpuMat\& src, GpuMat\& dst, const Mat\& kernel, Point anchor = Point(-1, -1), int iterations = 1) + + Dilates an image by using a specific structuring element. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` and ``CV_8UC4`` source types. + + + :param dst: Destination image. It will have the same size and the same type as ``src`` . + + + :param kernel: Structuring element used for dilation. If ``kernel=Mat()`` , a :math:`3 \times 3` rectangular structuring element is used. + + + :param anchor: Position of the anchor within the element. The default value :math:`(-1, -1)` means that the anchor is at the element center. + + + :param iterations: Number of times dilation to be applied. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`dilate` +, +. + + + +.. index:: gpu::morphologyEx + + +cv::gpu::morphologyEx +--------------------- + +`id=0.0523750844864 Comments from the Wiki `__ + + + + +.. cfunction:: void morphologyEx(const GpuMat\& src, GpuMat\& dst, int op, const Mat\& kernel, Point anchor = Point(-1, -1), int iterations = 1) + + Applies an advanced morphological operation to image. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` and ``CV_8UC4`` source type. + + + :param dst: Destination image. It will have the same size and the same type as ``src`` + + + :param op: Type of morphological operation, one of the following: + + * **MORPH_OPEN** opening + + * **MORPH_CLOSE** closing + + * **MORPH_GRADIENT** morphological gradient + + * **MORPH_TOPHAT** "top hat" + + * **MORPH_BLACKHAT** "black hat" + + + + + :param kernel: Structuring element. + + + :param anchor: Position of the anchor within the element. The default value Point(-1, -1) means that the anchor is at the element center. + + + :param iterations: Number of times erosion and dilation to be applied. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`morphologyEx` +. + + + +.. index:: cv::gpu::createLinearFilter_GPU + +.. _cv::gpu::createLinearFilter_GPU: + +cv::gpu::createLinearFilter_GPU +------------------------------- + +`id=0.974980429209 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createLinearFilter_GPU(int srcType, int dstType, const Mat\& kernel, const Point\& anchor = Point(-1,-1)) + + Creates the non-separable linear filter. + + + + + +.. cfunction:: Ptr getLinearFilter_GPU(int srcType, int dstType, const Mat\& kernel, const Size\& ksize, Point anchor = Point(-1, -1)) + + + + + + + :param srcType: Input image type. Supports ``CV_8UC1`` and ``CV_8UC4`` . + + + :param dstType: Output image type. Supports only the same as source type. + + + :param kernel: 2D array of filter coefficients. This filter works with integers kernels, if ``kernel`` has ``float`` or ``double`` type it will be used fixed point arithmetic. + + + :param ksize: Kernel size. + + + :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`createLinearFilter` +. + + + +.. index:: gpu::filter2D + + +cv::gpu::filter2D +----------------- + +`id=0.363503314642 Comments from the Wiki `__ + + + + +.. cfunction:: void filter2D(const GpuMat\& src, GpuMat\& dst, int ddepth, const Mat\& kernel, Point anchor=Point(-1,-1)) + + Applies non-separable 2D linear filter to image. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` and ``CV_8UC4`` source types. + + + :param dst: Destination image. It will have the same size and the same number of channels as ``src`` . + + + :param ddepth: The desired depth of the destination image. If it is negative, it will be the same as ``src.depth()`` . Supports only the same depth as source image. + + + :param kernel: 2D array of filter coefficients. This filter works with integers kernels, if ``kernel`` has ``float`` or ``double`` type it will use fixed point arithmetic. + + + :param anchor: Anchor of the kernel that indicates the relative position of a filtered point within the kernel. The anchor should lie within the kernel. The special default value (-1,-1) means that the anchor is at the kernel center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`filter2D` +, +. + + + +.. index:: gpu::Laplacian + + +cv::gpu::Laplacian +------------------ + +`id=0.507549694241 Comments from the Wiki `__ + + + + +.. cfunction:: void Laplacian(const GpuMat\& src, GpuMat\& dst, int ddepth, int ksize = 1, double scale = 1) + + Applies Laplacian operator to image. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` and ``CV_8UC4`` source types. + + + :param dst: Destination image; will have the same size and the same number of channels as ``src`` . + + + :param ddepth: Desired depth of the destination image. Supports only tha same depth as source image depth. + + + :param ksize: Aperture size used to compute the second-derivative filters, see :func:`getDerivKernels` . It must be positive and odd. Supports only ``ksize`` = 1 and ``ksize`` = 3. + + + :param scale: Optional scale factor for the computed Laplacian values (by default, no scaling is applied, see :func:`getDerivKernels` ). + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + +See also: +:func:`Laplacian` +, +:func:`gpu::filter2D` +. + + + +.. index:: cv::gpu::getLinearRowFilter_GPU + +.. _cv::gpu::getLinearRowFilter_GPU: + +cv::gpu::getLinearRowFilter_GPU +------------------------------- + +`id=0.598565723713 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr getLinearRowFilter_GPU(int srcType, int bufType, const Mat\& rowKernel, int anchor = -1, int borderType = BORDER_CONSTANT) + + Creates primitive row filter with the specified kernel. + + + + + + + :param srcType: Source array type. Supports only ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` source types. + + + :param bufType: Inermediate buffer type; must have as many channels as ``srcType`` . + + + :param rowKernel: Filter coefficients. + + + :param anchor: Anchor position within the kernel; negative values mean that anchor is positioned at the aperture center. + + + :param borderType: Pixel extrapolation method; see :func:`borderInterpolate` . About limitation see below. + + + +There are two version of algorithm: NPP and OpenCV. NPP calls when +``srcType == CV_8UC1`` +or +``srcType == CV_8UC4`` +and +``bufType == srcType`` +, otherwise calls OpenCV version. NPP supports only +``BORDER_CONSTANT`` +border type and doesn't check indices outside image. OpenCV version supports only +``CV_32F`` +buffer depth and +``BORDER_REFLECT101`` +, +``BORDER_REPLICATE`` +and +``BORDER_CONSTANT`` +border types and checks indices outside image. + +See also: +, +:func:`createSeparableLinearFilter` +. + + + +.. index:: cv::gpu::getLinearColumnFilter_GPU + +.. _cv::gpu::getLinearColumnFilter_GPU: + +cv::gpu::getLinearColumnFilter_GPU +---------------------------------- + +`id=0.79193716532 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr getLinearColumnFilter_GPU(int bufType, int dstType, const Mat\& columnKernel, int anchor = -1, int borderType = BORDER_CONSTANT) + + Creates the primitive column filter with the specified kernel. + + + + + + + :param bufType: Inermediate buffer type; must have as many channels as ``dstType`` . + + + :param dstType: Destination array type. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` destination types. + + + :param columnKernel: Filter coefficients. + + + :param anchor: Anchor position within the kernel; negative values mean that anchor is positioned at the aperture center. + + + :param borderType: Pixel extrapolation method; see :func:`borderInterpolate` . About limitation see below. + + + +There are two version of algorithm: NPP and OpenCV. NPP calls when +``dstType == CV_8UC1`` +or +``dstType == CV_8UC4`` +and +``bufType == dstType`` +, otherwise calls OpenCV version. NPP supports only +``BORDER_CONSTANT`` +border type and doesn't check indices outside image. OpenCV version supports only +``CV_32F`` +buffer depth and +``BORDER_REFLECT101`` +, +``BORDER_REPLICATE`` +and +``BORDER_CONSTANT`` +border types and checks indices outside image. +See also: +, +:func:`createSeparableLinearFilter` +. + + + +.. index:: cv::gpu::createSeparableLinearFilter_GPU + +.. _cv::gpu::createSeparableLinearFilter_GPU: + +cv::gpu::createSeparableLinearFilter_GPU +---------------------------------------- + +`id=0.165460890318 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat\& rowKernel, const Mat\& columnKernel, const Point\& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1) + + Creates the separable linear filter engine. + + + + + + + :param srcType: Source array type. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` source types. + + + :param dstType: Destination array type. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` destination types. + + + :param rowKernel, columnKernel: Filter coefficients. + + + :param anchor: Anchor position within the kernel; negative values mean that anchor is positioned at the aperture center. + + + :param rowBorderType, columnBorderType: Pixel extrapolation method in the horizontal and the vertical directions; see :func:`borderInterpolate` . About limitation see , . + + + +See also: +, +, +:func:`createSeparableLinearFilter` +. + + + +.. index:: gpu::sepFilter2D + + +cv::gpu::sepFilter2D +-------------------- + +`id=0.596796635286 Comments from the Wiki `__ + + + + +.. cfunction:: void sepFilter2D(const GpuMat\& src, GpuMat\& dst, int ddepth, const Mat\& kernelX, const Mat\& kernelY, Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1) + + Applies separable 2D linear filter to the image. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` source types. + + + :param dst: Destination image; will have the same size and the same number of channels as ``src`` . + + + :param ddepth: Destination image depth. Supports ``CV_8U`` , ``CV_16S`` , ``CV_32S`` and ``CV_32F`` . + + + :param kernelX, kernelY: Filter coefficients. + + + :param anchor: Anchor position within the kernel; The default value :math:`(-1, 1)` means that the anchor is at the kernel center. + + + :param rowBorderType, columnBorderType: Pixel extrapolation method; see :func:`borderInterpolate` . + + + +See also: +, +:func:`sepFilter2D` +. + + + +.. index:: cv::gpu::createDerivFilter_GPU + +.. _cv::gpu::createDerivFilter_GPU: + +cv::gpu::createDerivFilter_GPU +------------------------------ + +`id=0.813463149816 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1) + + Creates filter engine for the generalized Sobel operator. + + + + + + + :param srcType: Source image type. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` source types. + + + :param dstType: Destination image type; must have as many channels as ``srcType`` . Supports ``CV_8U`` , ``CV_16S`` , ``CV_32S`` and ``CV_32F`` depths. + + + :param dx: Derivative order in respect with x. + + + :param dy: Derivative order in respect with y. + + + :param ksize: Aperture size; see :func:`getDerivKernels` . + + + :param rowBorderType, columnBorderType: Pixel extrapolation method; see :func:`borderInterpolate` . + + + +See also: +, +:func:`createDerivFilter` +. + + + +.. index:: gpu::Sobel + + +cv::gpu::Sobel +-------------- + +`id=0.268268712309 Comments from the Wiki `__ + + + + +.. cfunction:: void Sobel(const GpuMat\& src, GpuMat\& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1) + + Applies generalized Sobel operator to the image. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` source types. + + + :param dst: Destination image. Will have the same size and number of channels as source image. + + + :param ddepth: Destination image depth. Supports ``CV_8U`` , ``CV_16S`` , ``CV_32S`` and ``CV_32F`` . + + + :param dx: Derivative order in respect with x. + + + :param dy: Derivative order in respect with y. + + + :param ksize: Size of the extended Sobel kernel, must be 1, 3, 5 or 7. + + + :param scale: Optional scale factor for the computed derivative values (by default, no scaling is applied, see :func:`getDerivKernels` ). + + + :param rowBorderType, columnBorderType: Pixel extrapolation method; see :func:`borderInterpolate` . + + + +See also: +, +:func:`Sobel` +. + + + +.. index:: gpu::Scharr + + +cv::gpu::Scharr +--------------- + +`id=0.233481407766 Comments from the Wiki `__ + + + + +.. cfunction:: void Scharr(const GpuMat\& src, GpuMat\& dst, int ddepth, int dx, int dy, double scale = 1, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1) + + Calculates the first x- or y- image derivative using Scharr operator. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` source types. + + + :param dst: Destination image; will have the same size and the same number of channels as ``src`` . + + + :param ddepth: Destination image depth. Supports ``CV_8U`` , ``CV_16S`` , ``CV_32S`` and ``CV_32F`` . + + + :param xorder: Order of the derivative x. + + + :param yorder: Order of the derivative y. + + + :param scale: Optional scale factor for the computed derivative values (by default, no scaling is applied, see :func:`getDerivKernels` ). + + + :param rowBorderType, columnBorderType: Pixel extrapolation method, see :func:`borderInterpolate` + + + +See also: +, +:func:`Scharr` +. + + + +.. index:: cv::gpu::createGaussianFilter_GPU + +.. _cv::gpu::createGaussianFilter_GPU: + +cv::gpu::createGaussianFilter_GPU +--------------------------------- + +`id=0.963815998788 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createGaussianFilter_GPU(int type, Size ksize, double sigmaX, double sigmaY = 0, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1) + + Creates Gaussian filter engine. + + + + + + + :param type: Source and the destination image type. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` . + + + :param ksize: Aperture size; see :func:`getGaussianKernel` . + + + :param sigmaX: Gaussian sigma in the horizontal direction; see :func:`getGaussianKernel` . + + + :param sigmaY: Gaussian sigma in the vertical direction; if 0, then :math:`\texttt{sigmaY}\leftarrow\texttt{sigmaX}` . + + + :param rowBorderType, columnBorderType: Which border type to use; see :func:`borderInterpolate` . + + + +See also: +, +:func:`createGaussianFilter` +. + + + +.. index:: gpu::GaussianBlur + + +cv::gpu::GaussianBlur +--------------------- + +`id=0.207256970896 Comments from the Wiki `__ + + + + +.. cfunction:: void GaussianBlur(const GpuMat\& src, GpuMat\& dst, Size ksize, double sigmaX, double sigmaY = 0, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1) + + Smooths the image using Gaussian filter. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` source types. + + + :param dst: Destination image; will have the same size and the same type as ``src`` . + + + :param ksize: Gaussian kernel size; ``ksize.width`` and ``ksize.height`` can differ, but they both must be positive and odd. Or, they can be zero's, then they are computed from ``sigmaX`` amd ``sigmaY`` . + + + :param sigmaX, sigmaY: Gaussian kernel standard deviations in X and Y direction. If ``sigmaY`` is zero, it is set to be equal to ``sigmaX`` . If they are both zeros, they are computed from ``ksize.width`` and ``ksize.height`` , respectively, see :func:`getGaussianKernel` . To fully control the result regardless of possible future modification of all this semantics, it is recommended to specify all of ``ksize`` , ``sigmaX`` and ``sigmaY`` . + + + :param rowBorderType, columnBorderType: Pixel extrapolation method; see :func:`borderInterpolate` . + + + +See also: +, +:func:`GaussianBlur` +. + + + +.. index:: cv::gpu::getMaxFilter_GPU + +.. _cv::gpu::getMaxFilter_GPU: + +cv::gpu::getMaxFilter_GPU +------------------------- + +`id=0.531725137196 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr getMaxFilter_GPU(int srcType, int dstType, const Size\& ksize, Point anchor = Point(-1,-1)) + + Creates maximum filter. + + + + + + + :param srcType: Input image type. Supports only ``CV_8UC1`` and ``CV_8UC4`` . + + + :param dstType: Output image type. Supports only the same type as source. + + + :param ksize: Kernel size. + + + :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. + + + +.. index:: cv::gpu::getMinFilter_GPU + +.. _cv::gpu::getMinFilter_GPU: + +cv::gpu::getMinFilter_GPU +------------------------- + +`id=0.894261463231 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr getMinFilter_GPU(int srcType, int dstType, const Size\& ksize, Point anchor = Point(-1,-1)) + + Creates minimum filter. + + + + + + + :param srcType: Input image type. Supports only ``CV_8UC1`` and ``CV_8UC4`` . + + + :param dstType: Output image type. Supports only the same type as source. + + + :param ksize: Kernel size. + + + :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. + + + +**Please note:** +This filter doesn't check out of border accesses, so only proper submatrix of bigger matrix have to be passed to it. diff --git a/modules/gpu/doc/image_processing.rst b/modules/gpu/doc/image_processing.rst new file mode 100644 index 000000000..65a571469 --- /dev/null +++ b/modules/gpu/doc/image_processing.rst @@ -0,0 +1,1254 @@ +Image Processing +================ + +.. highlight:: cpp + + + +.. index:: gpu::meanShiftFiltering + + +cv::gpu::meanShiftFiltering +--------------------------- + +`id=0.23602781775 Comments from the Wiki `__ + + + + +.. cfunction:: void meanShiftFiltering(const GpuMat\& src, GpuMat\& dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)) + + Performs mean-shift filtering for each point of the source image. It maps each point of the source image into another point, and as the result we have new color and new position of each point. + + + + + + + :param src: Source image. Only ``CV_8UC4`` images are supported for now. + + + :param dst: Destination image, containing color of mapped points. Will have the same size and type as ``src`` . + + + :param sp: Spatial window radius. + + + :param sr: Color window radius. + + + :param criteria: Termination criteria. See . + + + + +.. index:: gpu::meanShiftProc + + +cv::gpu::meanShiftProc +---------------------- + +`id=0.410728382716 Comments from the Wiki `__ + + + + +.. cfunction:: void meanShiftProc(const GpuMat\& src, GpuMat\& dstr, GpuMat\& dstsp, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)) + + Performs mean-shift procedure and stores information about processed points (i.e. their colors and positions) into two images. + + + + + + + :param src: Source image. Only ``CV_8UC4`` images are supported for now. + + + :param dstr: Destination image, containing color of mapped points. Will have the same size and type as ``src`` . + + + :param dstsp: Destination image, containing position of mapped points. Will have the same size as ``src`` and ``CV_16SC2`` type. + + + :param sp: Spatial window radius. + + + :param sr: Color window radius. + + + :param criteria: Termination criteria. See . + + + +See also: +:func:`gpu::meanShiftFiltering` +. + + + +.. index:: gpu::meanShiftSegmentation + + +cv::gpu::meanShiftSegmentation +------------------------------ + +`id=0.906705807016 Comments from the Wiki `__ + + + + +.. cfunction:: void meanShiftSegmentation(const GpuMat\& src, Mat\& dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)) + + Performs mean-shift segmentation of the source image and eleminates small segments. + + + + + + + :param src: Source image. Only ``CV_8UC4`` images are supported for now. + + + :param dst: Segmented image. Will have the same size and type as ``src`` . + + + :param sp: Spatial window radius. + + + :param sr: Color window radius. + + + :param minsize: Minimum segment size. Smaller segements will be merged. + + + :param criteria: Termination criteria. See . + + + + +.. index:: gpu::integral + + +cv::gpu::integral +----------------- + +`id=0.330912131672 Comments from the Wiki `__ + + + + +.. cfunction:: void integral(const GpuMat\& src, GpuMat\& sum) + + + +.. cfunction:: void integral(const GpuMat\& src, GpuMat\& sum, GpuMat\& sqsum) + + Computes integral image and squared integral image. + + + + + + + :param src: Source image. Only ``CV_8UC1`` images are supported for now. + + + :param sum: Integral image. Will contain 32-bit unsigned integer values packed into ``CV_32SC1`` . + + + :param sqsum: Squared integral image. Will have ``CV_32FC1`` type. + + + +See also: +:func:`integral` +. + + + +.. index:: gpu::sqrIntegral + + +cv::gpu::sqrIntegral +-------------------- + +`id=0.244733485755 Comments from the Wiki `__ + + + + +.. cfunction:: void sqrIntegral(const GpuMat\& src, GpuMat\& sqsum) + + Computes squared integral image. + + + + + + + :param src: Source image. Only ``CV_8UC1`` images are supported for now. + + + :param sqsum: Squared integral image. Will contain 64-bit unsigned integer values packed into ``CV_64FC1`` . + + + + +.. index:: gpu::columnSum + + +cv::gpu::columnSum +------------------ + +`id=0.436764189613 Comments from the Wiki `__ + + + + +.. cfunction:: void columnSum(const GpuMat\& src, GpuMat\& sum) + + Computes vertical (column) sum. + + + + + + + :param src: Source image. Only ``CV_32FC1`` images are supported for now. + + + :param sum: Destination image. Will have ``CV_32FC1`` type. + + + + +.. index:: gpu::cornerHarris + + +cv::gpu::cornerHarris +--------------------- + +`id=0.31083169009 Comments from the Wiki `__ + + + + +.. cfunction:: void cornerHarris(const GpuMat\& src, GpuMat\& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101) + + Computes Harris cornerness criteria at each image pixel. + + + + + + + :param src: Source image. Only ``CV_8UC1`` and ``CV_32FC1`` images are supported for now. + + + :param dst: Destination image. Will have the same size and ``CV_32FC1`` type and contain cornerness values. + + + :param blockSize: Neighborhood size. + + + :param ksize: Aperture parameter for the Sobel operator. + + + :param k: Harris detector free parameter. + + + :param borderType: Pixel extrapolation method. Only ``BORDER_REFLECT101`` and ``BORDER_REPLICATE`` are supported for now. + + + +See also: +:func:`cornerHarris` +. + + + +.. index:: gpu::cornerMinEigenVal + + +cv::gpu::cornerMinEigenVal +-------------------------- + +`id=0.110930029829 Comments from the Wiki `__ + + + + +.. cfunction:: void cornerMinEigenVal(const GpuMat\& src, GpuMat\& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101) + + Computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria. + + + + + + + :param src: Source image. Only ``CV_8UC1`` and ``CV_32FC1`` images are supported for now. + + + :param dst: Destination image. Will have the same size and ``CV_32FC1`` type and contain cornerness values. + + + :param blockSize: Neighborhood size. + + + :param ksize: Aperture parameter for the Sobel operator. + + + :param k: Harris detector free parameter. + + + :param borderType: Pixel extrapolation method. Only ``BORDER_REFLECT101`` and ``BORDER_REPLICATE`` are supported for now. + + + +See also: +:func:`cornerMinEigenValue` +. + + + +.. index:: gpu::mulSpectrums + + +cv::gpu::mulSpectrums +--------------------- + +`id=0.729621548876 Comments from the Wiki `__ + + + + +.. cfunction:: void mulSpectrums(const GpuMat\& a, const GpuMat\& b, GpuMat\& c, int flags, bool conjB=false) + + Performs per-element multiplication of two Fourier spectrums. + + + + + + + :param a: First spectrum. + + + :param b: Second spectrum. Must have the same size and type as ``a`` . + + + :param c: Destination spectrum. + + + :param flags: Mock paramter is kept for CPU/GPU interfaces similarity. + + + :param conjB: Optional flag which indicates the second spectrum must be conjugated before the multiplication. + + + +Only full (i.e. not packed) +``CV_32FC2`` +complex spectrums in the interleaved format are supported for now. + +See also: +:func:`mulSpectrums` +. + + + +.. index:: gpu::mulAndScaleSpectrums + + +cv::gpu::mulAndScaleSpectrums +----------------------------- + +`id=0.236710779882 Comments from the Wiki `__ + + + + +.. cfunction:: void mulAndScaleSpectrums(const GpuMat\& a, const GpuMat\& b, GpuMat\& c, int flags, float scale, bool conjB=false) + + Performs per-element multiplication of two Fourier spectrums and scales the result. + + + + + + + :param a: First spectrum. + + + :param b: Second spectrum. Must have the same size and type as ``a`` . + + + :param c: Destination spectrum. + + + :param flags: Mock paramter is kept for CPU/GPU interfaces similarity. + + + :param scale: Scale constant. + + + :param conjB: Optional flag which indicates the second spectrum must be conjugated before the multiplication. + + + +Only full (i.e. not packed) +``CV_32FC2`` +complex spectrums in the interleaved format are supported for now. + +See also: +:func:`mulSpectrums` +. + + + +.. index:: gpu::dft + + +cv::gpu::dft +------------ + +`id=0.803545808688 Comments from the Wiki `__ + + +``_`` +``_`` + + +.. cfunction:: void dft(const GpuMat\& src, GpuMat\& dst, Size dft_size, int flags=0) + + Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. Can handle real matrices (CV32FC1) and complex matrices in the interleaved format (CV32FC2). + + + + + + + :param src: Source matrix (real or complex). + + + :param dst: Destination matrix (real or complex). + + + :param dft_size: Size of discrete Fourier transform. + + + :param flags: Optional flags: + + + * **DFT_ROWS** Transform each individual row of the source matrix. + + + * **DFT_SCALE** Scale the result: divide it by the number of elements in the transform (it's obtained from ``dft_size`` ). + + * **DFT_INVERSE** Inverse DFT must be perfromed for complex-complex case (real-complex and complex-real cases are respectively forward and inverse always). + + + + * **DFT_REAL_OUTPUT** The source matrix is the result of real-complex transform, so the destination matrix must be real. + + + + + +The source matrix should be continuous, otherwise reallocation and data copying will be performed. Function chooses the operation mode depending on the flags, size and channel count of the source matrix: + + + + +* + If the source matrix is complex and the output isn't specified as real then the destination matrix will be complex, will have + ``dft_size`` + size and + ``CV_32FC2`` + type. It will contain full result of the DFT (forward or inverse). + + + +* + If the source matrix is complex and the output is specified as real then function assumes that its input is the result of the forward transform (see next item). The destionation matrix will have + ``dft_size`` + size and + ``CV_32FC1`` + type. It will contain result of the inverse DFT. + + + +* + If the source matrix is real (i.e. its type is + ``CV_32FC1`` + ) then forward DFT will be performed. The result of the DFT will be packed into complex ( + ``CV_32FC2`` + ) matrix so its width will be + ``dft_size.width / 2 + 1`` + , but if the source is a single column then height will be reduced instead of width. + + +See also: +:func:`dft` +. + + + +.. index:: gpu::convolve + + +cv::gpu::convolve +----------------- + +`id=0.0322924265809 Comments from the Wiki `__ + + + + +.. cfunction:: void convolve(const GpuMat\& image, const GpuMat\& templ, GpuMat\& result, bool ccorr=false) + + + +.. cfunction:: void convolve(const GpuMat\& image, const GpuMat\& templ, GpuMat\& result, bool ccorr, ConvolveBuf\& buf) + + Computes convolution (or cross-correlation) of two images. + + + + + + + :param image: Source image. Only ``CV_32FC1`` images are supported for now. + + + :param templ: Template image. Must have size not greater then ``image`` size and be the same type as ``image`` . + + + :param result: Result image. Will have the same size and type as ``image`` . + + + :param ccorr: Flags which indicates cross-correlation must be evaluated instead of convolution. + + + :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). + + + + +.. index:: gpu::ConvolveBuf + +.. _gpu::ConvolveBuf: + +gpu::ConvolveBuf +---------------- + +`id=0.79486121673 Comments from the Wiki `__ + +.. ctype:: gpu::ConvolveBuf + + + +Memory buffer for the +:func:`gpu::convolve` +function. + + + + +:: + + + + struct CV_EXPORTS ConvolveBuf + { + ConvolveBuf() {} + ConvolveBuf(Size image_size, Size templ_size) + { create(image_size, templ_size); } + void create(Size image_size, Size templ_size); + + private: + // Hidden + }; + + +.. + + +.. index:: gpu::ConvolveBuf::ConvolveBuf + + +cv::gpu::ConvolveBuf::ConvolveBuf +--------------------------------- + +`id=0.0623603184536 Comments from the Wiki `__ + + + + +.. cfunction:: ConvolveBuf::ConvolveBuf() + + + +Constructs an empty buffer which will be properly resized after first call of the convolve function. + + + +.. cfunction:: ConvolveBuf::ConvolveBuf(Size image_size, Size templ_size) + + + +Constructs a buffer for the convolve function with respectively arguments. + + + +.. index:: gpu::matchTemplate + + +cv::gpu::matchTemplate +---------------------- + +`id=0.238057283546 Comments from the Wiki `__ + + + + +.. cfunction:: void matchTemplate(const GpuMat\& image, const GpuMat\& templ, GpuMat\& result, int method) + + Computes a proximity map for a raster template and an image where the template is searched for. + + + + + + + :param image: Source image. ``CV_32F`` and ``CV_8U`` depth images (1..4 channels) are supported for now. + + + :param templ: Template image. Must have the same size and type as ``image`` . + + + :param result: Map containing comparison results ( ``CV_32FC1`` ). If ``image`` is :math:`W \times H` and ``templ`` is :math:`w \times h` then ``result`` must be :math:`(W-w+1) \times (H-h+1)` . + + + :param method: Specifies the way which the template must be compared with the image. + + + +Following methods are supported for the +``CV_8U`` +depth images for now: + + + + +* + CV + _ + TM + _ + SQDIFF + + +* + CV + _ + TM + _ + SQDIFF + _ + NORMED + + +* + CV + _ + TM + _ + CCORR + + +* + CV + _ + TM + _ + CCORR + _ + NORMED + + +* + CV + _ + TM + _ + CCOEFF + + +* + CV + _ + TM + _ + CCOEFF + _ + NORMED + + +Following methods are supported for the +``CV_32F`` +images for now: + + + + +* + CV + _ + TM + _ + SQDIFF + + +* + CV + _ + TM + _ + CCORR + + +See also: +:func:`matchTemplate` +. + + + +.. index:: gpu::remap + + +cv::gpu::remap +-------------- + +`id=0.530209785207 Comments from the Wiki `__ + + + + +.. cfunction:: void remap(const GpuMat\& src, GpuMat\& dst, const GpuMat\& xmap, const GpuMat\& ymap) + + Applies a generic geometrical transformation to an image. + + + + + + + :param src: Source image. Only ``CV_8UC1`` and ``CV_8UC3`` source types are supported. + + + :param dst: Destination image. It will have the same size as ``xmap`` and the same type as ``src`` . + + + :param xmap: X values. Only ``CV_32FC1`` type is supported. + + + :param ymap: Y values. Only ``CV_32FC1`` type is supported. + + + +The function transforms the source image using the specified map: + + +.. math:: + + \texttt{dst} (x,y) = \texttt{src} (xmap(x,y), ymap(x,y)) + + +Values of pixels with non-integer coordinates are computed using bilinear interpolation. + +See also: +:func:`remap` +. + + + +.. index:: gpu::cvtColor + + +cv::gpu::cvtColor +----------------- + +`id=0.776821974009 Comments from the Wiki `__ + + + + +.. cfunction:: void cvtColor(const GpuMat\& src, GpuMat\& dst, int code, int dcn = 0) + + + +.. cfunction:: void cvtColor(const GpuMat\& src, GpuMat\& dst, int code, int dcn, const Stream\& stream) + + Converts image from one color space to another. + + + + + + + :param src: Source image with ``CV_8U`` , ``CV_16U`` or ``CV_32F`` depth and 1, 3 or 4 channels. + + + :param dst: Destination image; will have the same size and the same depth as ``src`` . + + + :param code: Color space conversion code. For details see :func:`cvtColor` . Conversion to/from Luv and Bayer color spaces doesn't supported. + + + :param dcn: Number of channels in the destination image; if the parameter is 0, the number of the channels will be derived automatically from ``src`` and the ``code`` . + + + :param stream: Stream for the asynchronous version. + + + +3-channel color spaces (like +``HSV`` +, +``XYZ`` +, etc) can be stored to 4-channel image for better perfomance. + +See also: +:func:`cvtColor` +. + + + +.. index:: gpu::threshold + + +cv::gpu::threshold +------------------ + +`id=0.322277132849 Comments from the Wiki `__ + + + + +.. cfunction:: double threshold(const GpuMat\& src, GpuMat\& dst, double thresh, double maxval, int type) + + + +.. cfunction:: double threshold(const GpuMat\& src, GpuMat\& dst, double thresh, double maxval, int type, const Stream\& stream) + + Applies a fixed-level threshold to each array element. + + + + + + + :param src: Source array (single-channel, ``CV_64F`` depth isn't supported). + + + :param dst: Destination array; will have the same size and the same type as ``src`` . + + + :param thresh: Threshold value. + + + :param maxVal: Maximum value to use with ``THRESH_BINARY`` and ``THRESH_BINARY_INV`` thresholding types. + + + :param thresholdType: Thresholding type. For details see :func:`threshold` . ``THRESH_OTSU`` thresholding type doesn't supported. + + + :param stream: Stream for the asynchronous version. + + + +See also: +:func:`threshold` +. + + + +.. index:: gpu::resize + + +cv::gpu::resize +--------------- + +`id=0.0611691517307 Comments from the Wiki `__ + + + + +.. cfunction:: void resize(const GpuMat\& src, GpuMat\& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR) + + Resizes an image. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` and ``CV_8UC4`` types. + + + :param dst: Destination image. It will have size ``dsize`` (when it is non-zero) or the size computed from ``src.size()`` and ``fx`` and ``fy`` . The type of ``dst`` will be the same as of ``src`` . + + + :param dsize: Destination image size. If it is zero, then it is computed as: + + .. math:: + + \texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))} + + + Either ``dsize`` or both ``fx`` or ``fy`` must be non-zero. + + + :param fx: Scale factor along the horizontal axis. When 0, it is computed as + + .. math:: + + \texttt{(double)dsize.width/src.cols} + + + + + :param fy: Scale factor along the vertical axis. When 0, it is computed as + + .. math:: + + \texttt{(double)dsize.height/src.rows} + + + + + :param interpolation: Interpolation method. Supports only ``INTER_NEAREST`` and ``INTER_LINEAR`` . + + + +See also: +:func:`resize` +. + + + +.. index:: gpu::warpAffine + + +cv::gpu::warpAffine +------------------- + +`id=0.774344412912 Comments from the Wiki `__ + + + + +.. cfunction:: void warpAffine(const GpuMat\& src, GpuMat\& dst, const Mat\& M, Size dsize, int flags = INTER_LINEAR) + + Applies an affine transformation to an image. + + + + + + + :param src: Source image. Supports ``CV_8U`` , ``CV_16U`` , ``CV_32S`` or ``CV_32F`` depth and 1, 3 or 4 channels. + + + :param dst: Destination image; will have size ``dsize`` and the same type as ``src`` . + + + :param M: :math:`2\times 3` transformation matrix. + + + :param dsize: Size of the destination image. + + + :param flags: Combination of interpolation methods, see :func:`resize` , and the optional flag ``WARP_INVERSE_MAP`` that means that ``M`` is the inverse transformation ( :math:`\texttt{dst}\rightarrow\texttt{src}` ). Supports only ``INTER_NEAREST`` , ``INTER_LINEAR`` and ``INTER_CUBIC`` interpolation methods. + + + +See also: +:func:`warpAffine` +. + + + +.. index:: gpu::warpPerspective + + +cv::gpu::warpPerspective +------------------------ + +`id=0.371593447911 Comments from the Wiki `__ + + + + +.. cfunction:: void warpPerspective(const GpuMat\& src, GpuMat\& dst, const Mat\& M, Size dsize, int flags = INTER_LINEAR) + + Applies a perspective transformation to an image. + + + + + + + :param src: Source image. Supports ``CV_8U`` , ``CV_16U`` , ``CV_32S`` or ``CV_32F`` depth and 1, 3 or 4 channels. + + + :param dst: Destination image; will have size ``dsize`` and the same type as ``src`` . + + + :param M: :math:`2 + 3` transformation matrix. + + + :param dsize: Size of the destination image. + + + :param flags: Combination of interpolation methods, see :func:`resize` , and the optional flag ``WARP_INVERSE_MAP`` that means that ``M`` is the inverse transformation ( :math:`\texttt{dst}\rightarrow\texttt{src}` ). Supports only ``INTER_NEAREST`` , ``INTER_LINEAR`` and ``INTER_CUBIC`` interpolation methods. + + + +See also: +:func:`warpPerspective` +. + + + +.. index:: gpu::rotate + + +cv::gpu::rotate +--------------- + +`id=0.786425011594 Comments from the Wiki `__ + + + + +.. cfunction:: void rotate(const GpuMat\& src, GpuMat\& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR) + + Rotates an image around the origin (0,0) and then shifts it. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` and ``CV_8UC4`` types. + + + :param dst: Destination image; will have size ``dsize`` and the same type as ``src`` . + + + :param dsize: Size of the destination image. + + + :param angle: Angle of rotation in degrees. + + + :param xShift: Shift along horizontal axis. + + + :param yShift: Shift along vertical axis. + + + :param interpolation: Interpolation method. Supports only ``INTER_NEAREST`` , ``INTER_LINEAR`` and ``INTER_CUBIC`` . + + + +See also: +:func:`gpu::warpAffine` +. + + + +.. index:: gpu::copyMakeBorder + + +cv::gpu::copyMakeBorder +----------------------- + +`id=0.647353295472 Comments from the Wiki `__ + + + + +.. cfunction:: void copyMakeBorder(const GpuMat\& src, GpuMat\& dst, int top, int bottom, int left, int right, const Scalar\& value = Scalar()) + + Copies 2D array to a larger destination array and pads borders with the given constant. + + + + + + + :param src: Source image. Supports ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` and ``CV_32FC1`` types. + + + :param dst: The destination image; will have the same type as ``src`` and the size ``Size(src.cols+left+right, src.rows+top+bottom)`` . + + + :param top, bottom, left, right: Specify how much pixels in each direction from the source image rectangle one needs to extrapolate, e.g. ``top=1, bottom=1, left=1, right=1`` mean that 1 pixel-wide border needs to be built. + + + :param value: Border value. + + + +See also: +:func:`copyMakeBorder` + +.. index:: gpu::rectStdDev + + +cv::gpu::rectStdDev +------------------- + +`id=0.738069327339 Comments from the Wiki `__ + + + + +.. cfunction:: void rectStdDev(const GpuMat\& src, const GpuMat\& sqr, GpuMat\& dst, const Rect\& rect) + + Computes standard deviation of integral images. + + + + + + + :param src: Source image. Supports only ``CV_32SC1`` type. + + + :param sqr: Squared source image. Supports only ``CV_32FC1`` type. + + + :param dst: Destination image; will have the same type and the same size as ``src`` . + + + :param rect: Rectangular window. + + + + +.. index:: gpu::evenLevels + + +cv::gpu::evenLevels +------------------- + +`id=0.621447657379 Comments from the Wiki `__ + + + + +.. cfunction:: void evenLevels(GpuMat\& levels, int nLevels, int lowerLevel, int upperLevel) + + Computes levels with even distribution. + + + + + + + :param levels: Destination array. ``levels`` will have 1 row and ``nLevels`` cols and ``CV_32SC1`` type. + + + :param nLevels: Number of levels being computed. ``nLevels`` must be at least 2. + + + :param lowerLevel: Lower boundary value of the lowest level. + + + :param upperLevel: Upper boundary value of the greatest level. + + + + +.. index:: gpu::histEven + + +cv::gpu::histEven +----------------- + +`id=0.542041052547 Comments from the Wiki `__ + + + + +.. cfunction:: void histEven(const GpuMat\& src, GpuMat\& hist, int histSize, int lowerLevel, int upperLevel) + + + +.. cfunction:: void histEven(const GpuMat\& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4]) + + Calculates histogram with evenly distributed bins. + + + + + + + :param src: Source image. Supports ``CV_8U`` , ``CV_16U`` or ``CV_16S`` depth and 1 or 4 channels. For four-channel image all channels are processed separately. + + + :param hist: Destination histogram. Will have one row, ``histSize`` cols and ``CV_32S`` type. + + + :param histSize: Size of histogram. + + + :param lowerLevel: Lower boundary of lowest level bin. + + + :param upperLevel: Upper boundary of highest level bin. + + + + +.. index:: gpu::histRange + + +cv::gpu::histRange +------------------ + +`id=0.0126893190799 Comments from the Wiki `__ + + + + +.. cfunction:: void histRange(const GpuMat\& src, GpuMat\& hist, const GpuMat\& levels) + + + +.. cfunction:: void histRange(const GpuMat\& src, GpuMat hist[4], const GpuMat levels[4]) + + Calculates histogram with bins determined by levels array. + + + + + + + :param src: Source image. Supports ``CV_8U`` , ``CV_16U`` or ``CV_16S`` depth and 1 or 4 channels. For four-channel image all channels are processed separately. + + + :param hist: Destination histogram. Will have one row, ``(levels.cols-1)`` cols and ``CV_32SC1`` type. + + + :param levels: Number of levels in histogram. + + + diff --git a/modules/gpu/doc/initalization_and_information.rst b/modules/gpu/doc/initalization_and_information.rst new file mode 100644 index 000000000..f80e7279b --- /dev/null +++ b/modules/gpu/doc/initalization_and_information.rst @@ -0,0 +1,402 @@ +Initalization and Information +============================= + +.. highlight:: cpp + + + +.. index:: gpu::getCudaEnabledDeviceCount + + +cv::gpu::getCudaEnabledDeviceCount +---------------------------------- + +`id=0.541856697999 Comments from the Wiki `__ + + + + +.. cfunction:: int getCudaEnabledDeviceCount() + + Returns number of CUDA-enabled devices installed. It is to be used before any other GPU functions calls. If OpenCV is compiled without GPU support this function returns 0. + + + + +.. index:: gpu::setDevice + + +cv::gpu::setDevice +------------------ + +`id=0.817295536445 Comments from the Wiki `__ + + + + +.. cfunction:: void setDevice(int device) + + Sets device and initializes it for the current thread. Call of this function can be omitted, but in this case a default device will be initialized on fist GPU usage. + + + + + + + :param device: index of GPU device in system starting with 0. + + + + +.. index:: gpu::getDevice + + +cv::gpu::getDevice +------------------ + +`id=0.908782607162 Comments from the Wiki `__ + + + + +.. cfunction:: int getDevice() + + Returns the current device index, which was set by {gpu::getDevice} or initialized by default. + + + + +.. index:: gpu::GpuFeature + +.. _gpu::GpuFeature: + +gpu::GpuFeature +--------------- + +`id=0.185426029041 Comments from the Wiki `__ + +.. ctype:: gpu::GpuFeature + + + +GPU compute features. + + + + +:: + + + + enum GpuFeature + { + COMPUTE_10, COMPUTE_11, + COMPUTE_12, COMPUTE_13, + COMPUTE_20, COMPUTE_21, + ATOMICS, NATIVE_DOUBLE + }; + + +.. + + +.. index:: gpu::DeviceInfo + +.. _gpu::DeviceInfo: + +gpu::DeviceInfo +--------------- + +`id=0.91098225386 Comments from the Wiki `__ + +.. ctype:: gpu::DeviceInfo + + + +This class provides functionality for querying the specified GPU properties. + + + + +:: + + + + class CV_EXPORTS DeviceInfo + { + public: + DeviceInfo(); + DeviceInfo(int device_id); + + string name() const; + + int majorVersion() const; + int minorVersion() const; + + int multiProcessorCount() const; + + size_t freeMemory() const; + size_t totalMemory() const; + + bool supports(GpuFeature feature) const; + bool isCompatible() const; + }; + + +.. + + +.. index:: gpu::DeviceInfo::DeviceInfo + + +cv::gpu::DeviceInfo::DeviceInfo +------------------------------- + +`id=0.971366637207 Comments from the Wiki `__ + + +``_`` + + +.. cfunction:: DeviceInfo::DeviceInfo() + + + +.. cfunction:: DeviceInfo::DeviceInfo(int device_id) + + Constructs DeviceInfo object for the specified device. If deviceidparameter is missed it constructs object for the current device. + + + + + + + :param device_id: Index of the GPU device in system starting with 0. + + + + +.. index:: gpu::DeviceInfo::name + + +cv::gpu::DeviceInfo::name +------------------------- + +`id=0.472941921148 Comments from the Wiki `__ + + + + +.. cfunction:: string DeviceInfo::name() + + Returns the device name. + + + + +.. index:: gpu::DeviceInfo::majorVersion + + +cv::gpu::DeviceInfo::majorVersion +--------------------------------- + +`id=0.982334984119 Comments from the Wiki `__ + + + + +.. cfunction:: int DeviceInfo::majorVersion() + + Returns the major compute capability version. + + + + +.. index:: gpu::DeviceInfo::minorVersion + + +cv::gpu::DeviceInfo::minorVersion +--------------------------------- + +`id=0.309433581176 Comments from the Wiki `__ + + + + +.. cfunction:: int DeviceInfo::minorVersion() + + Returns the minor compute capability version. + + + + +.. index:: gpu::DeviceInfo::multiProcessorCount + + +cv::gpu::DeviceInfo::multiProcessorCount +---------------------------------------- + +`id=0.417609601388 Comments from the Wiki `__ + + + + +.. cfunction:: int DeviceInfo::multiProcessorCount() + + Returns the number of streaming multiprocessors. + + + + +.. index:: gpu::DeviceInfo::freeMemory + + +cv::gpu::DeviceInfo::freeMemory +------------------------------- + +`id=0.961189453269 Comments from the Wiki `__ + + + + +.. cfunction:: size_t DeviceInfo::freeMemory() + + Returns the amount of free memory in bytes. + + + + +.. index:: gpu::DeviceInfo::totalMemory + + +cv::gpu::DeviceInfo::totalMemory +-------------------------------- + +`id=0.884488673579 Comments from the Wiki `__ + + + + +.. cfunction:: size_t DeviceInfo::totalMemory() + + Returns the amount of total memory in bytes. + + + + +.. index:: gpu::DeviceInfo::supports + + +cv::gpu::DeviceInfo::supports +----------------------------- + +`id=0.141435828088 Comments from the Wiki `__ + + + + +.. cfunction:: bool DeviceInfo::supports(GpuFeature feature) + + Returns true if the device has the given GPU feature, otherwise false. + + + + + + + :param feature: Feature to be checked. See . + + + + +.. index:: gpu::DeviceInfo::isCompatible + + +cv::gpu::DeviceInfo::isCompatible +--------------------------------- + +`id=0.564690282768 Comments from the Wiki `__ + + + + +.. cfunction:: bool DeviceInfo::isCompatible() + + Returns true if the GPU module can be run on the specified device, otherwise false. + + + + +.. index:: gpu::TargetArchs + +.. _gpu::TargetArchs: + +gpu::TargetArchs +---------------- + +`id=0.200853353999 Comments from the Wiki `__ + +.. ctype:: gpu::TargetArchs + + + +This class provides functionality (as set of static methods) for checking which NVIDIA card architectures the GPU module was built for. + +bigskip +The following method checks whether the module was built with the support of the given feature: + + +.. cfunction:: static bool builtWith(GpuFeature feature) + + + + + + + :param feature: Feature to be checked. See . + + + +There are a set of methods for checking whether the module contains intermediate (PTX) or binary GPU code for the given architecture(s): + + +.. cfunction:: static bool has(int major, int minor) + + + +.. cfunction:: static bool hasPtx(int major, int minor) + + + +.. cfunction:: static bool hasBin(int major, int minor) + + + +.. cfunction:: static bool hasEqualOrLessPtx(int major, int minor) + + + +.. cfunction:: static bool hasEqualOrGreater(int major, int minor) + + + +.. cfunction:: static bool hasEqualOrGreaterPtx(int major, int minor) + + + +.. cfunction:: static bool hasEqualOrGreaterBin(int major, int minor) + + + + + + + * **major** Major compute capability version. + + + * **minor** Minor compute capability version. + + + +According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute capability can always be compiled to binary code of greater or equal compute capability". + diff --git a/modules/gpu/doc/introduction.rst b/modules/gpu/doc/introduction.rst new file mode 100644 index 000000000..765a2f8dd --- /dev/null +++ b/modules/gpu/doc/introduction.rst @@ -0,0 +1,122 @@ +GPU module introduction +======================= + +.. highlight:: cpp + + + +General information +------------------- + + +The OpenCV GPU module is a set of classes and functions to utilize GPU computational capabilities. It is implemented using NVidia CUDA Runtime API, so only the NVidia GPUs are supported. It includes utility functions, low-level vision primitives as well as high-level algorithms. The utility functions and low-level primitives provide a powerful infrastructure for developing fast vision algorithms taking advantage of GPU. Whereas the high-level functionality includes some state-of-the-art algorithms (such as stereo correspondence, face and people detectors etc.), ready to be used by the application developers. + +The GPU module is designed as host-level API, i.e. if a user has pre-compiled OpenCV GPU binaries, it is not necessary to have Cuda Toolkit installed or write any extra code to make use of the GPU. + +The GPU module depends on the Cuda Toolkit and NVidia Performance Primitives library (NPP). Make sure you have the latest versions of those. The two libraries can be downloaded from NVidia site for all supported platforms. To compile OpenCV GPU module you will need a compiler compatible with Cuda Runtime Toolkit. + +OpenCV GPU module is designed for ease of use and does not require any knowledge of Cuda. Though, such a knowledge will certainly be useful in non-trivial cases, or when you want to get the highest performance. It is helpful to have understanding of the costs of various operations, what the GPU does, what are the preferred data formats etc. The GPU module is an effective instrument for quick implementation of GPU-accelerated computer vision algorithms. However, if you algorithm involves many simple operations, then for the best possible performance you may still need to write your own kernels, to avoid extra write and read operations on the intermediate results. + +To enable CUDA support, configure OpenCV using CMake with +``WITH_CUDA=ON`` +. When the flag is set and if CUDA is installed, the full-featured OpenCV GPU module will be built. Otherwise, the module will still be built, but at runtime all functions from the module will throw +:func:`Exception` +with +``CV_GpuNotSupported`` +error code, except for +:func:`gpu::getCudaEnabledDeviceCount()` +. The latter function will return zero GPU count in this case. Building OpenCV without CUDA support does not perform device code compilation, so it does not require Cuda Toolkit installed. Therefore, using +:func:`gpu::getCudaEnabledDeviceCount()` +function it is possible to implement a high-level algorithm that will detect GPU presence at runtime and choose the appropriate implementation (CPU or GPU) accordingly. + + +Compilation for different NVidia platforms. +------------------------------------------- + + +NVidia compiler allows generating binary code (cubin and fatbin) and intermediate code (PTX). Binary code often implies a specific GPU architecture and generation, so the compatibility with other GPUs is not guaranteed. PTX is targeted for a virtual platform, which is defined entirely by the set of capabilities, or features. Depending on the virtual platform chosen, some of the instructions will be emulated or disabled, even if the real hardware supports all the features. + +On first call, the PTX code is compiled to binary code for the particular GPU using JIT compiler. When the target GPU has lower "compute capability" (CC) than the PTX code, JIT fails. + +By default, the OpenCV GPU module includes: + + + + +* + Binaries for compute capabilities 1.3 and 2.0 (controlled by + ``CUDA_ARCH_BIN`` + in CMake) + + +* + PTX code for compute capabilities 1.1 and 1.3 (controlled by + ``CUDA_ARCH_PTX`` + in CMake) + + +That means for devices with CC 1.3 and 2.0 binary images are ready to run. For all newer platforms the PTX code for 1.3 is JIT'ed to a binary image. For devices with 1.1 and 1.2 the PTX for 1.1 is JIT'ed. For devices with CC 1.0 no code is available and the functions will throw +:func:`Exception` +. For platforms where JIT compilation is performed first run will be slow. + +If you happen to have GPU with CC 1.0, the GPU module can still be compiled on it and most of the functions will run just fine on such card. Simply add "1.0" to the list of binaries, for example, +``CUDA_ARCH_BIN="1.0 1.3 2.0"`` +. The functions that can not be run on CC 1.0 GPUs will throw an exception. + +You can always determine at runtime whether OpenCV GPU built binaries (or PTX code) are compatible with your GPU. The function +:func:`gpu::DeviceInfo::isCompatible` +return the compatibility status (true/false). + + + +Threading and multi-threading. +------------------------------ + + +OpenCV GPU module follows Cuda Runtime API conventions regarding the multi-threaded programming. That is, on first the API call a Cuda context is created implicitly, attached to the current CPU thread and then is used as the thread's "current" context. All further operations, such as memory allocation, GPU code compilation, will be associated with the context and the thread. Because any other thread is not attached to the context, memory (and other resources) allocated in the first thread can not be accessed by the other thread. Instead, for this other thread Cuda will create another context associated with it. In short, by default different threads do not share resources. + +But such limitation can be removed using Cuda Driver API (version 3.1 or later). User can retrieve context reference for one thread, attach it to another thread and make it "current" for that thread. Then the threads can share memory and other resources. It is also possible to create a context explicitly before calling any GPU code and attach it to all the threads that you want to share the resources. + +Also it is possible to create context explicitly using Cuda Driver API, attach and make "current" for all necessary threads. Cuda Runtime API (and OpenCV functions respectively) will pick up it. + + +Multi-GPU +--------- + + +In the current version each of the OpenCV GPU algorithms can use only a single GPU. So, to utilize multiple GPUs, user has to manually distribute the work between the GPUs. Here are the two ways of utilizing multiple GPUs: + + + + +* + If you only use synchronous functions, first, create several CPU threads (one per each GPU) and from within each thread create CUDA context for the corresponding GPU using + :func:`gpu::setDevice()` + or Driver API. That's it. Now each of the threads will use the associated GPU. + + +* + In case of asynchronous functions, it is possible to create several Cuda contexts associated with different GPUs but attached to one CPU thread. This can be done only by Driver API. Within the thread you can switch from one GPU to another by making the corresponding context "current". With non-blocking GPU calls managing algorithm is clear. + + +While developing algorithms for multiple GPUs a data passing overhead have to be taken into consideration. For primitive functions and for small images it can be significant and eliminate all the advantages of having multiple GPUs. But for high level algorithms Multi-GPU acceleration may be suitable. For example, Stereo Block Matching algorithm has been successfully parallelized using the following algorithm: + + + + +* + Each image of the stereo pair is split into two horizontal overlapping stripes. + + +* + Each pair of stripes (from the left and the right images) has been processed on a separate Fermi GPU + + +* + The results are merged into the single disparity map. + + +With this scheme dual GPU gave 180 +% +performance increase comparing to the single Fermi GPU. The source code of the example is available at +https://code.ros.org/svn/opencv/trunk/opencv/examples/gpu/ diff --git a/modules/gpu/doc/matrix_reductions.rst b/modules/gpu/doc/matrix_reductions.rst new file mode 100644 index 000000000..9d3e388c9 --- /dev/null +++ b/modules/gpu/doc/matrix_reductions.rst @@ -0,0 +1,366 @@ +Matrix Reductions +================= + +.. highlight:: cpp + + + +.. index:: gpu::meanStdDev + + +cv::gpu::meanStdDev +------------------- + +`id=0.607789005794 Comments from the Wiki `__ + + + + +.. cfunction:: void meanStdDev(const GpuMat\& mtx, Scalar\& mean, Scalar\& stddev) + + Computes mean value and standard deviation of matrix elements. + + + + + + + :param mtx: Source matrix. ``CV_8UC1`` matrices are supported for now. + + + :param mean: Mean value. + + + :param stddev: Standard deviation value. + + + +See also: +:func:`meanStdDev` +. + + + +.. index:: gpu::norm + + +cv::gpu::norm +------------- + +`id=0.423726153071 Comments from the Wiki `__ + + + + +.. cfunction:: double norm(const GpuMat\& src, int normType=NORM_L2) + + Returns norm of matrix (or of two matrices difference). + + + + + + + :param src: Source matrix. Any matrices except 64F are supported. + + + :param normType: Norm type. ``NORM_L1`` , ``NORM_L2`` and ``NORM_INF`` are supported for now. + + + + + +.. cfunction:: double norm(const GpuMat\& src, int normType, GpuMat\& buf) + + + + + + + * **src** Source matrix. Any matrices except 64F are supported. + + + * **normType** Norm type. ``NORM_L1`` , ``NORM_L2`` and ``NORM_INF`` are supported for now. + + + * **buf** Optional buffer to avoid extra memory allocations. It's resized automatically. + + + + + +.. cfunction:: double norm(const GpuMat\& src1, const GpuMat\& src2, int normType=NORM_L2) + + + + + + + * **src1** First source matrix. ``CV_8UC1`` matrices are supported for now. + + + * **src2** Second source matrix. Must have the same size and type as ``src1`` + + . + + * **normType** Norm type. ``NORM_L1`` , ``NORM_L2`` and ``NORM_INF`` are supported for now. + + + +See also: +:func:`norm` +. + + + +.. index:: gpu::sum + + +cv::gpu::sum +------------ + +`id=0.979123982078 Comments from the Wiki `__ + + + + +.. cfunction:: Scalar sum(const GpuMat\& src) + + + +.. cfunction:: Scalar sum(const GpuMat\& src, GpuMat\& buf) + + Returns sum of matrix elements. + + + + + + + :param src: Source image of any depth except ``CV_64F`` . + + + :param buf: Optional buffer to avoid extra memory allocations. It's resized automatically. + + + +See also: +:func:`sum` +. + + + +.. index:: gpu::absSum + + +cv::gpu::absSum +--------------- + +`id=0.607738316178 Comments from the Wiki `__ + + + + +.. cfunction:: Scalar absSum(const GpuMat\& src) + + + +.. cfunction:: Scalar absSum(const GpuMat\& src, GpuMat\& buf) + + Returns sum of matrix elements absolute values. + + + + + + + :param src: Source image of any depth except ``CV_64F`` . + + + :param buf: Optional buffer to avoid extra memory allocations. It's resized automatically. + + + + +.. index:: gpu::sqrSum + + +cv::gpu::sqrSum +--------------- + +`id=0.470934615291 Comments from the Wiki `__ + + + + +.. cfunction:: Scalar sqrSum(const GpuMat\& src) + + + +.. cfunction:: Scalar sqrSum(const GpuMat\& src, GpuMat\& buf) + + Returns squared sum of matrix elements. + + + + + + + :param src: Source image of any depth except ``CV_64F`` . + + + :param buf: Optional buffer to avoid extra memory allocations. It's resized automatically. + + + + +.. index:: gpu::minMax + + +cv::gpu::minMax +--------------- + +`id=0.0207742957447 Comments from the Wiki `__ + + + + +.. cfunction:: void minMax(const GpuMat\& src, double* minVal, double* maxVal=0, const GpuMat\& mask=GpuMat()) + + + +.. cfunction:: void minMax(const GpuMat\& src, double* minVal, double* maxVal, const GpuMat\& mask, GpuMat\& buf) + + Finds global minimum and maximum matrix elements and returns their values. + + + + + + + :param src: Single-channel source image. + + + :param minVal: Pointer to returned minimum value. ``NULL`` if not required. + + + :param maxVal: Pointer to returned maximum value. ``NULL`` if not required. + + + :param mask: Optional mask to select a sub-matrix. + + + :param buf: Optional buffer to avoid extra memory allocations. It's resized automatically. + + + +Function doesn't work with +``CV_64F`` +images on GPU with compute capability +:math:`<` +1.3. +See also: +:func:`minMaxLoc` +. + + + +.. index:: gpu::minMaxLoc + + +cv::gpu::minMaxLoc +------------------ + +`id=0.985111829483 Comments from the Wiki `__ + + + + +.. cfunction:: void minMaxLoc(const GpuMat\& src, double\* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, const GpuMat\& mask=GpuMat()) + + + +.. cfunction:: void minMaxLoc(const GpuMat\& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat\& mask, GpuMat\& valbuf, GpuMat\& locbuf) + + Finds global minimum and maximum matrix elements and returns their values with locations. + + + + + + + :param src: Single-channel source image. + + + :param minVal: Pointer to returned minimum value. ``NULL`` if not required. + + + :param maxVal: Pointer to returned maximum value. ``NULL`` if not required. + + + :param minValLoc: Pointer to returned minimum location. ``NULL`` if not required. + + + :param maxValLoc: Pointer to returned maximum location. ``NULL`` if not required. + + + :param mask: Optional mask to select a sub-matrix. + + + :param valbuf: Optional values buffer to avoid extra memory allocations. It's resized automatically. + + + :param locbuf: Optional locations buffer to avoid extra memory allocations. It's resized automatically. + + + +Function doesn't work with +``CV_64F`` +images on GPU with compute capability +:math:`<` +1.3. +See also: +:func:`minMaxLoc` +. + + + +.. index:: gpu::countNonZero + + +cv::gpu::countNonZero +--------------------- + +`id=0.904273321304 Comments from the Wiki `__ + + + + +.. cfunction:: int countNonZero(const GpuMat\& src) + + + +.. cfunction:: int countNonZero(const GpuMat\& src, GpuMat\& buf) + + Counts non-zero matrix elements. + + + + + + + :param src: Single-channel source image. + + + :param buf: Optional buffer to avoid extra memory allocations. It's resized automatically. + + + +Function doesn't work with +``CV_64F`` +images on GPU with compute capability +:math:`<` +1.3. +See also: +:func:`countNonZero` +. diff --git a/modules/gpu/doc/object_detection.rst b/modules/gpu/doc/object_detection.rst new file mode 100644 index 000000000..16245e237 --- /dev/null +++ b/modules/gpu/doc/object_detection.rst @@ -0,0 +1,575 @@ +Object Detection +================ + +.. highlight:: cpp + + + +.. index:: gpu::HOGDescriptor + +.. _gpu::HOGDescriptor: + +gpu::HOGDescriptor +------------------ + +`id=0.263285034412 Comments from the Wiki `__ + +.. ctype:: gpu::HOGDescriptor + + + +Histogram of Oriented Gradients +dalal_hog +descriptor and detector. + + + + +:: + + + + struct CV_EXPORTS HOGDescriptor + { + enum { DEFAULT_WIN_SIGMA = -1 }; + enum { DEFAULT_NLEVELS = 64 }; + enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; + + HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), + Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), + int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA, + double threshold_L2hys=0.2, bool gamma_correction=true, + int nlevels=DEFAULT_NLEVELS); + + size_t getDescriptorSize() const; + size_t getBlockHistogramSize() const; + + void setSVMDetector(const vector& detector); + + static vector getDefaultPeopleDetector(); + static vector getPeopleDetector48x96(); + static vector getPeopleDetector64x128(); + + void detect(const GpuMat& img, vector& found_locations, + double hit_threshold=0, Size win_stride=Size(), + Size padding=Size()); + + void detectMultiScale(const GpuMat& img, vector& found_locations, + double hit_threshold=0, Size win_stride=Size(), + Size padding=Size(), double scale0=1.05, + int group_threshold=2); + + void getDescriptors(const GpuMat& img, Size win_stride, + GpuMat& descriptors, + int descr_format=DESCR_FORMAT_COL_BY_COL); + + Size win_size; + Size block_size; + Size block_stride; + Size cell_size; + int nbins; + double win_sigma; + double threshold_L2hys; + bool gamma_correction; + int nlevels; + + private: + // Hidden + } + + +.. + +Interfaces of all methods are kept similar to CPU HOG descriptor and detector analogues as much as possible. + + + +.. index:: gpu::HOGDescriptor::HOGDescriptor + + +cv::gpu::HOGDescriptor::HOGDescriptor +------------------------------------- + +`id=0.377426649644 Comments from the Wiki `__ + + + + +.. cfunction:: HOGDescriptor::HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA, double threshold_L2hys=0.2, bool gamma_correction=true, int nlevels=DEFAULT_NLEVELS) + + Creates HOG descriptor and detector. + + + + + + + :param win_size: Detection window size. Must be aligned to block size and block stride. + + + :param block_size: Block size in pixels. Must be aligned to cell size. Only (16,16) is supported for now. + + + :param block_stride: Block stride. Must be a multiple of cell size. + + + :param cell_size: Cell size. Only (8, 8) is supported for now. + + + :param nbins: Number of bins. Only 9 bins per cell is supported for now. + + + :param win_sigma: Gaussian smoothing window parameter. + + + :param threshold_L2Hys: L2-Hys normalization method shrinkage. + + + :param gamma_correction: Do gamma correction preprocessing or not. + + + :param nlevels: Maximum number of detection window increases. + + + + +.. index:: gpu::HOGDescriptor::getDescriptorSize + + +cv::gpu::HOGDescriptor::getDescriptorSize +----------------------------------------- + +`id=0.25703536307 Comments from the Wiki `__ + + + + +.. cfunction:: size_t HOGDescriptor::getDescriptorSize() const + + Returns number of coefficients required for the classification. + + + + +.. index:: gpu::HOGDescriptor::getBlockHistogramSize + + +cv::gpu::HOGDescriptor::getBlockHistogramSize +--------------------------------------------- + +`id=0.91431196569 Comments from the Wiki `__ + + + + +.. cfunction:: size_t HOGDescriptor::getBlockHistogramSize() const + + Returns block histogram size. + + + + +.. index:: gpu::HOGDescriptor::setSVMDetector + + +cv::gpu::HOGDescriptor::setSVMDetector +-------------------------------------- + +`id=0.719708439759 Comments from the Wiki `__ + + + + +.. cfunction:: void HOGDescriptor::setSVMDetector(const vector\& detector) + + Sets coefficients for the linear SVM classifier. + + + + +.. index:: gpu::HOGDescriptor::getDefaultPeopleDetector + + +cv::gpu::HOGDescriptor::getDefaultPeopleDetector +------------------------------------------------ + +`id=0.941470897866 Comments from the Wiki `__ + + + + +.. cfunction:: static vector HOGDescriptor::getDefaultPeopleDetector() + + Returns coefficients of the classifier trained for people detection (for default window size). + + + + +.. index:: gpu::HOGDescriptor::getPeopleDetector48x96 + + +cv::gpu::HOGDescriptor::getPeopleDetector48x96 +---------------------------------------------- + +`id=0.600273723778 Comments from the Wiki `__ + + + + +.. cfunction:: static vector HOGDescriptor::getPeopleDetector48x96() + + Returns coefficients of the classifier trained for people detection (for 48x96 windows). + + + + +.. index:: gpu::HOGDescriptor::getPeopleDetector64x128 + + +cv::gpu::HOGDescriptor::getPeopleDetector64x128 +----------------------------------------------- + +`id=0.583356812364 Comments from the Wiki `__ + + + + +.. cfunction:: static vector HOGDescriptor::getPeopleDetector64x128() + + Returns coefficients of the classifier trained for people detection (for 64x128 windows). + + + + +.. index:: gpu::HOGDescriptor::detect + + +cv::gpu::HOGDescriptor::detect +------------------------------ + +`id=0.0364241115122 Comments from the Wiki `__ + + + + +.. cfunction:: void HOGDescriptor::detect(const GpuMat\& img, vector\& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size()) + + Perfroms object detection without multiscale window. + + + + + + + :param img: Source image. ``CV_8UC1`` and ``CV_8UC4`` types are supported for now. + + + :param found_locations: Will contain left-top corner points of detected objects boundaries. + + + :param hit_threshold: Threshold for the distance between features and SVM classifying plane. Usually it's 0 and should be specfied in the detector coefficients (as the last free coefficient), but if the free coefficient is omitted (it's allowed) you can specify it manually here. + + + :param win_stride: Window stride. Must be a multiple of block stride. + + + :param padding: Mock parameter to keep CPU interface compatibility. Must be (0,0). + + + + +.. index:: gpu::HOGDescriptor::detectMultiScale + + +cv::gpu::HOGDescriptor::detectMultiScale +---------------------------------------- + +`id=0.125190830083 Comments from the Wiki `__ + + + + +.. cfunction:: void HOGDescriptor::detectMultiScale(const GpuMat\& img, vector\& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size(), double scale0=1.05, int group_threshold=2) + + Perfroms object detection with multiscale window. + + + + + + + :param img: Source image. See :func:`gpu::HOGDescriptor::detect` for type limitations. + + + :param found_locations: Will contain detected objects boundaries. + + + :param hit_threshold: The threshold for the distance between features and SVM classifying plane. See :func:`gpu::HOGDescriptor::detect` for details. + + + :param win_stride: Window stride. Must be a multiple of block stride. + + + :param padding: Mock parameter to keep CPU interface compatibility. Must be (0,0). + + + :param scale0: Coefficient of the detection window increase. + + + :param group_threshold: After detection some objects could be covered by many rectangles. This coefficient regulates similarity threshold. 0 means don't perform grouping. + See :func:`groupRectangles` . + + + + +.. index:: gpu::HOGDescriptor::getDescriptors + + +cv::gpu::HOGDescriptor::getDescriptors +-------------------------------------- + +`id=0.128234884479 Comments from the Wiki `__ + + + + +.. cfunction:: void HOGDescriptor::getDescriptors(const GpuMat\& img, Size win_stride, GpuMat\& descriptors, int descr_format=DESCR_FORMAT_COL_BY_COL) + + Returns block descriptors computed for the whole image. It's mainly used for classifier learning purposes. + + + + + + + :param img: Source image. See :func:`gpu::HOGDescriptor::detect` for type limitations. + + + :param win_stride: Window stride. Must be a multiple of block stride. + + + :param descriptors: 2D array of descriptors. + + + :param descr_format: Descriptor storage format: + + + * **DESCR_FORMAT_ROW_BY_ROW** Row-major order. + + + * **DESCR_FORMAT_COL_BY_COL** Column-major order. + + + + + + +.. index:: gpu::CascadeClassifier_GPU + +.. _gpu::CascadeClassifier_GPU: + +gpu::CascadeClassifier_GPU +-------------------------- + +`id=0.362290729184 Comments from the Wiki `__ + +.. ctype:: gpu::CascadeClassifier_GPU + + + +The cascade classifier class for object detection. + + + + +:: + + + + class CV_EXPORTS CascadeClassifier_GPU + { + public: + CascadeClassifier_GPU(); + CascadeClassifier_GPU(const string& filename); + ~CascadeClassifier_GPU(); + + bool empty() const; + bool load(const string& filename); + void release(); + + /* returns number of detected objects */ + int detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size()); + + /* Finds only the largest object. Special mode for need to training*/ + bool findLargestObject; + + /* Draws rectangles in input image */ + bool visualizeInPlace; + + Size getClassifierSize() const; + }; + + +.. + + +.. index:: cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU + +.. _cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU: + +cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU +----------------------------------------------------- + +`id=0.502164537388 Comments from the Wiki `__ + + + + +.. cfunction:: cv::CascadeClassifier_GPU(const string\& filename) + + Loads the classifier from file. + + + + + + :param filename: Name of file from which classifier will be load. Only old haar classifier (trained by haartraining application) and NVidia's nvbin are supported. + + + + +.. index:: cv::gpu::CascadeClassifier_GPU::empty + +.. _cv::gpu::CascadeClassifier_GPU::empty: + +cv::gpu::CascadeClassifier_GPU::empty +------------------------------------- + +`id=0.00879679914574 Comments from the Wiki `__ + + + + +.. cfunction:: bool CascadeClassifier_GPU::empty() const + + Checks if the classifier has been loaded or not. + + + +.. index:: cv::gpu::CascadeClassifier_GPU::load + +.. _cv::gpu::CascadeClassifier_GPU::load: + +cv::gpu::CascadeClassifier_GPU::load +------------------------------------ + +`id=0.831994730738 Comments from the Wiki `__ + + + + +.. cfunction:: bool CascadeClassifier_GPU::load(const string\& filename) + + Loads the classifier from file. The previous content is destroyed. + + + + + + :param filename: Name of file from which classifier will be load. Only old haar classifier (trained by haartraining application) and NVidia's nvbin are supported. + + + + +.. index:: cv::gpu::CascadeClassifier_GPU::release + +.. _cv::gpu::CascadeClassifier_GPU::release: + +cv::gpu::CascadeClassifier_GPU::release +--------------------------------------- + +`id=0.524456582811 Comments from the Wiki `__ + + + + +.. cfunction:: void CascadeClassifier_GPU::release() + + Destroys loaded classifier. + + + +.. index:: cv::gpu::CascadeClassifier_GPU::detectMultiScale + +.. _cv::gpu::CascadeClassifier_GPU::detectMultiScale: + +cv::gpu::CascadeClassifier_GPU::detectMultiScale +------------------------------------------------ + +`id=0.0605957110589 Comments from the Wiki `__ + + + + +.. cfunction:: int CascadeClassifier_GPU::detectMultiScale(const GpuMat\& image, GpuMat\& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size()) + + Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles. + + + + + + + :param image: Matrix of type ``CV_8U`` containing the image in which to detect objects. + + + :param objects: Buffer to store detected objects (rectangles). If it is empty, it will be allocated with default size. If not empty, function will search not more than N objects, where N = sizeof(objectsBufer's data)/sizeof(cv::Rect). + + + :param scaleFactor: Specifies how much the image size is reduced at each image scale. + + + :param minNeighbors: Specifies how many neighbors should each candidate rectangle have to retain it. + + + :param minSize: The minimum possible object size. Objects smaller than that are ignored. + + + +The function returns number of detected objects, so you can retrieve them as in following example: + + + + +:: + + + + + cv::gpu::CascadeClassifier_GPU cascade_gpu(...); + + Mat image_cpu = imread(...) + GpuMat image_gpu(image_cpu); + + GpuMat objbuf; + int detections_number = cascade_gpu.detectMultiScale( image_gpu, + objbuf, 1.2, minNeighbors); + + Mat obj_host; + // download only detected number of rectangles + objbuf.colRange(0, detections_number).download(obj_host); + + Rect* faces = obj_host.ptr(); + for(int i = 0; i < detections_num; ++i) + cv::rectangle(image_cpu, faces[i], Scalar(255)); + + imshow("Faces", image_cpu); + + + +.. + +See also: +:func:`CascadeClassifier::detectMultiScale` +. + diff --git a/modules/gpu/doc/operations_on_matrices.rst b/modules/gpu/doc/operations_on_matrices.rst new file mode 100644 index 000000000..83f3317d6 --- /dev/null +++ b/modules/gpu/doc/operations_on_matrices.rst @@ -0,0 +1,509 @@ +Operations on Matrices +====================== + +.. highlight:: cpp + + + +.. index:: gpu::transpose + + +cv::gpu::transpose +------------------ + +`id=0.581518039061 Comments from the Wiki `__ + + + + +.. cfunction:: void transpose(const GpuMat\& src, GpuMat\& dst) + + Transposes a matrix. + + + + + + + :param src: Source matrix. 1, 4, 8 bytes element sizes are supported for now. + + + :param dst: Destination matrix. + + + +See also: +:func:`transpose` +. + + + +.. index:: gpu::flip + + +cv::gpu::flip +------------- + +`id=0.29725445638 Comments from the Wiki `__ + + + + +.. cfunction:: void flip(const GpuMat\& a, GpuMat\& b, int flipCode) + + Flips a 2D matrix around vertical, horizontal or both axes. + + + + + + + :param a: Source matrix. Only ``CV_8UC1`` and ``CV_8UC4`` matrices are supported for now. + + + :param b: Destination matrix. + + + :param flipCode: Specifies how to flip the source: + + * **0** Flip around x-axis. + + * **:math:`>`0** Flip around y-axis. + + * **:math:`<`0** Flip around both axes. + + + + + +See also: +:func:`flip` +. + + + +.. index:: gpu::LUT + + +cv::gpu::LUT +------------ + +`id=0.279602538414 Comments from the Wiki `__ + + + + +.. math:: + + dst(I) = lut(src(I)) + + + + +.. cfunction:: void LUT(const GpuMat\& src, const Mat\& lut, GpuMat\& dst) + + Transforms the source matrix into the destination matrix using given look-up table: + + + + + + :param src: Source matrix. ``CV_8UC1`` and ``CV_8UC3`` matrixes are supported for now. + + + :param lut: Look-up table. Must be continuous, ``CV_8U`` depth matrix. Its area must satisfy to ``lut.rows`` :math:`\times` ``lut.cols`` = 256 condition. + + + :param dst: Destination matrix. Will have the same depth as ``lut`` and the same number of channels as ``src`` . + + + +See also: +:func:`LUT` +. + + + +.. index:: gpu::merge + + +cv::gpu::merge +-------------- + +`id=0.568969773318 Comments from the Wiki `__ + + + + +.. cfunction:: void merge(const GpuMat* src, size_t n, GpuMat\& dst) + + + +.. cfunction:: void merge(const GpuMat* src, size_t n, GpuMat\& dst, const Stream\& stream) + + Makes a multi-channel matrix out of several single-channel matrices. + + + + + + + :param src: Pointer to array of the source matrices. + + + :param n: Number of source matrices. + + + :param dst: Destination matrix. + + + :param stream: Stream for the asynchronous version. + + + + + +.. cfunction:: void merge(const vector$<$GpuMat$>$\& src, GpuMat\& dst) + + + +.. cfunction:: void merge(const vector$<$GpuMat$>$\& src, GpuMat\& dst, const Stream\& stream) + + + + + + + * **src** Vector of the source matrices. + + + * **dst** Destination matrix. + + + * **stream** Stream for the asynchronous version. + + + +See also: +:func:`merge` +. + + + +.. index:: gpu::split + + +cv::gpu::split +-------------- + +`id=0.117653518739 Comments from the Wiki `__ + + + + +.. cfunction:: void split(const GpuMat\& src, GpuMat* dst) + + + +.. cfunction:: void split(const GpuMat\& src, GpuMat* dst, const Stream\& stream) + + Copies each plane of a multi-channel matrix into an array. + + + + + + + :param src: Source matrix. + + + :param dst: Pointer to array of single-channel matrices. + + + :param stream: Stream for the asynchronous version. + + + + + +.. cfunction:: void split(const GpuMat\& src, vector$<$GpuMat$>$\& dst) + + + +.. cfunction:: void split(const GpuMat\& src, vector$<$GpuMat$>$\& dst, const Stream\& stream) + + + + + + + * **src** Source matrix. + + + * **dst** Destination vector of single-channel matrices. + + + * **stream** Stream for the asynchronous version. + + + +See also: +:func:`split` +. + + + +.. index:: gpu::magnitude + + +cv::gpu::magnitude +------------------ + +`id=0.0879492693083 Comments from the Wiki `__ + + + + +.. cfunction:: void magnitude(const GpuMat\& x, GpuMat\& magnitude) + + Computes magnitudes of complex matrix elements. + + + + + + + :param x: Source complex matrix in the interleaved format ( ``CV_32FC2`` ). + + + :param magnitude: Destination matrix of float magnitudes ( ``CV_32FC1`` ). + + + + + +.. cfunction:: void magnitude(const GpuMat\& x, const GpuMat\& y, GpuMat\& magnitude) + + + +.. cfunction:: void magnitude(const GpuMat\& x, const GpuMat\& y, GpuMat\& magnitude, const Stream\& stream) + + + + + + + * **x** Source matrix, containing real components ( ``CV_32FC1`` ). + + + * **y** Source matrix, containing imaginary components ( ``CV_32FC1`` ). + + + * **magnitude** Destination matrix of float magnitudes ( ``CV_32FC1`` ). + + + * **stream** Stream for the asynchronous version. + + + +See also: +:func:`magnitude` +. + + + +.. index:: gpu::magnitudeSqr + + +cv::gpu::magnitudeSqr +--------------------- + +`id=0.0350196817871 Comments from the Wiki `__ + + + + +.. cfunction:: void magnitudeSqr(const GpuMat\& x, GpuMat\& magnitude) + + Computes squared magnitudes of complex matrix elements. + + + + + + + :param x: Source complex matrix in the interleaved format ( ``CV_32FC2`` ). + + + :param magnitude: Destination matrix of float magnitude squares ( ``CV_32FC1`` ). + + + + + +.. cfunction:: void magnitudeSqr(const GpuMat\& x, const GpuMat\& y, GpuMat\& magnitude) + + + +.. cfunction:: void magnitudeSqr(const GpuMat\& x, const GpuMat\& y, GpuMat\& magnitude, const Stream\& stream) + + + + + + + * **x** Source matrix, containing real components ( ``CV_32FC1`` ). + + + * **y** Source matrix, containing imaginary components ( ``CV_32FC1`` ). + + + * **magnitude** Destination matrix of float magnitude squares ( ``CV_32FC1`` ). + + + * **stream** Stream for the asynchronous version. + + + + +.. index:: gpu::phase + + +cv::gpu::phase +-------------- + +`id=0.274224468378 Comments from the Wiki `__ + + + + +.. cfunction:: void phase(const GpuMat\& x, const GpuMat\& y, GpuMat\& angle, bool angleInDegrees=false) + + + +.. cfunction:: void phase(const GpuMat\& x, const GpuMat\& y, GpuMat\& angle, bool angleInDegrees, const Stream\& stream) + + Computes polar angles of complex matrix elements. + + + + + + + :param x: Source matrix, containing real components ( ``CV_32FC1`` ). + + + :param y: Source matrix, containing imaginary components ( ``CV_32FC1`` ). + + + :param angle: Destionation matrix of angles ( ``CV_32FC1`` ). + + + :param angleInDegress: Flag which indicates angles must be evaluated in degress. + + + :param stream: Stream for the asynchronous version. + + + +See also: +:func:`phase` +. + + + +.. index:: gpu::cartToPolar + + +cv::gpu::cartToPolar +-------------------- + +`id=0.813292348151 Comments from the Wiki `__ + + + + +.. cfunction:: void cartToPolar(const GpuMat\& x, const GpuMat\& y, GpuMat\& magnitude, GpuMat\& angle, bool angleInDegrees=false) + + + +.. cfunction:: void cartToPolar(const GpuMat\& x, const GpuMat\& y, GpuMat\& magnitude, GpuMat\& angle, bool angleInDegrees, const Stream\& stream) + + Converts Cartesian coordinates into polar. + + + + + + + :param x: Source matrix, containing real components ( ``CV_32FC1`` ). + + + :param y: Source matrix, containing imaginary components ( ``CV_32FC1`` ). + + + :param magnitude: Destination matrix of float magnituds ( ``CV_32FC1`` ). + + + :param angle: Destionation matrix of angles ( ``CV_32FC1`` ). + + + :param angleInDegress: Flag which indicates angles must be evaluated in degress. + + + :param stream: Stream for the asynchronous version. + + + +See also: +:func:`cartToPolar` +. + + + +.. index:: gpu::polarToCart + + +cv::gpu::polarToCart +-------------------- + +`id=0.108746506092 Comments from the Wiki `__ + + + + +.. cfunction:: void polarToCart(const GpuMat\& magnitude, const GpuMat\& angle, GpuMat\& x, GpuMat\& y, bool angleInDegrees=false) + + + +.. cfunction:: void polarToCart(const GpuMat\& magnitude, const GpuMat\& angle, GpuMat\& x, GpuMat\& y, bool angleInDegrees, const Stream\& stream) + + Converts polar coordinates into Cartesian. + + + + + + + :param magnitude: Source matrix, containing magnitudes ( ``CV_32FC1`` ). + + + :param angle: Source matrix, containing angles ( ``CV_32FC1`` ). + + + :param x: Destination matrix of real components ( ``CV_32FC1`` ). + + + :param y: Destination matrix of imaginary components ( ``CV_32FC1`` ). + + + :param angleInDegress: Flag which indicates angles are in degress. + + + :param stream: Stream for the asynchronous version. + + + +See also: +:func:`polarToCart` +. diff --git a/modules/gpu/doc/per_element_operations..rst b/modules/gpu/doc/per_element_operations..rst new file mode 100644 index 000000000..a2255d641 --- /dev/null +++ b/modules/gpu/doc/per_element_operations..rst @@ -0,0 +1,721 @@ +Per-element Operations. +======================= + +.. highlight:: cpp + + + +.. index:: gpu::add + + +cv::gpu::add +------------ + +`id=0.387694196113 Comments from the Wiki `__ + + + + +.. cfunction:: void add(const GpuMat\& a, const GpuMat\& b, GpuMat\& c) + + Computes matrix-matrix or matrix-scalar sum. + + + + + + + :param a: First source matrix. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` and ``CV_32FC1`` matrices are supported for now. + + + :param b: Second source matrix. Must have the same size and type as ``a`` . + + + :param c: Destination matrix. Will have the same size and type as ``a`` . + + + + + +.. cfunction:: void add(const GpuMat\& a, const Scalar\& sc, GpuMat\& c) + + + + + + + * **a** Source matrix. ``CV_32FC1`` and ``CV_32FC2`` matrixes are supported for now. + + + * **b** Source scalar to be added to the source matrix. + + + * **c** Destination matrix. Will have the same size and type as ``a`` . + + + +See also: +:func:`add` +. + + + +.. index:: gpu::subtract + + +cv::gpu::subtract +----------------- + +`id=0.316386979537 Comments from the Wiki `__ + + + + +.. cfunction:: void subtract(const GpuMat\& a, const GpuMat\& b, GpuMat\& c) + + Subtracts matrix from another matrix (or scalar from matrix). + + + + + + + :param a: First source matrix. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` and ``CV_32FC1`` matrices are supported for now. + + + :param b: Second source matrix. Must have the same size and type as ``a`` . + + + :param c: Destination matrix. Will have the same size and type as ``a`` . + + + + + +.. cfunction:: void subtract(const GpuMat\& a, const Scalar\& sc, GpuMat\& c) + + + + + + + * **a** Source matrix. ``CV_32FC1`` and ``CV_32FC2`` matrixes are supported for now. + + + * **b** Scalar to be subtracted from the source matrix elements. + + + * **c** Destination matrix. Will have the same size and type as ``a`` . + + + +See also: +:func:`subtract` +. + + + +.. index:: gpu::multiply + + +cv::gpu::multiply +----------------- + +`id=0.12843407457 Comments from the Wiki `__ + + + + +.. cfunction:: void multiply(const GpuMat\& a, const GpuMat\& b, GpuMat\& c) + + Computes per-element product of two matrices (or of matrix and scalar). + + + + + + + :param a: First source matrix. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` and ``CV_32FC1`` matrices are supported for now. + + + :param b: Second source matrix. Must have the same size and type as ``a`` . + + + :param c: Destionation matrix. Will have the same size and type as ``a`` . + + + + + +.. cfunction:: void multiply(const GpuMat\& a, const Scalar\& sc, GpuMat\& c) + + + + + + + * **a** Source matrix. ``CV_32FC1`` and ``CV_32FC2`` matrixes are supported for now. + + + * **b** Scalar to be multiplied by. + + + * **c** Destination matrix. Will have the same size and type as ``a`` . + + + +See also: +:func:`multiply` +. + + + +.. index:: gpu::divide + + +cv::gpu::divide +--------------- + +`id=0.178699823123 Comments from the Wiki `__ + + + + +.. cfunction:: void divide(const GpuMat\& a, const GpuMat\& b, GpuMat\& c) + + Performs per-element division of two matrices (or division of matrix by scalar). + + + + + + + :param a: First source matrix. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` and ``CV_32FC1`` matrices are supported for now. + + + :param b: Second source matrix. Must have the same size and type as ``a`` . + + + :param c: Destionation matrix. Will have the same size and type as ``a`` . + + + + + +.. cfunction:: void divide(const GpuMat\& a, const Scalar\& sc, GpuMat\& c) + + + + + + + * **a** Source matrix. ``CV_32FC1`` and ``CV_32FC2`` matrixes are supported for now. + + + * **b** Scalar to be divided by. + + + * **c** Destination matrix. Will have the same size and type as ``a`` . + + + +This function in contrast to +:func:`divide` +uses round-down rounding mode. + +See also: +:func:`divide` +. + + + +.. index:: gpu::exp + + +cv::gpu::exp +------------ + +`id=0.0437158645609 Comments from the Wiki `__ + + + + +.. cfunction:: void exp(const GpuMat\& a, GpuMat\& b) + + Computes exponent of each matrix element. + + + + + + + :param a: Source matrix. ``CV_32FC1`` matrixes are supported for now. + + + :param b: Destination matrix. Will have the same size and type as ``a`` . + + + +See also: +:func:`exp` +. + + + +.. index:: gpu::log + + +cv::gpu::log +------------ + +`id=0.726514219732 Comments from the Wiki `__ + + + + +.. cfunction:: void log(const GpuMat\& a, GpuMat\& b) + + Computes natural logarithm of absolute value of each matrix element. + + + + + + + :param a: Source matrix. ``CV_32FC1`` matrixes are supported for now. + + + :param b: Destination matrix. Will have the same size and type as ``a`` . + + + +See also: +:func:`log` +. + + + +.. index:: gpu::absdiff + + +cv::gpu::absdiff +---------------- + +`id=0.0449517502969 Comments from the Wiki `__ + + + + +.. cfunction:: void absdiff(const GpuMat\& a, const GpuMat\& b, GpuMat\& c) + + Computes per-element absolute difference of two matrices (or of matrix and scalar). + + + + + + + :param a: First source matrix. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` and ``CV_32FC1`` matrices are supported for now. + + + :param b: Second source matrix. Must have the same size and type as ``a`` . + + + :param c: Destionation matrix. Will have the same size and type as ``a`` . + + + + + +.. cfunction:: void absdiff(const GpuMat\& a, const Scalar\& s, GpuMat\& c) + + + + + + + * **a** Source matrix. ``CV_32FC1`` matrixes are supported for now. + + + * **b** Scalar to be subtracted from the source matrix elements. + + + * **c** Destination matrix. Will have the same size and type as ``a`` . + + + +See also: +:func:`absdiff` +. + + + +.. index:: gpu::compare + + +cv::gpu::compare +---------------- + +`id=0.346307736999 Comments from the Wiki `__ + + + + +.. cfunction:: void compare(const GpuMat\& a, const GpuMat\& b, GpuMat\& c, int cmpop) + + Compares elements of two matrices. + + + + + + + :param a: First source matrix. ``CV_8UC4`` and ``CV_32FC1`` matrices are supported for now. + + + :param b: Second source matrix. Must have the same size and type as ``a`` . + + + :param c: Destination matrix. Will have the same size as ``a`` and be ``CV_8UC1`` type. + + + :param cmpop: Flag specifying the relation between the elements to be checked: + + * **CMP_EQ** :math:`=` + + * **CMP_GT** :math:`>` + + * **CMP_GE** :math:`\ge` + + * **CMP_LT** :math:`<` + + * **CMP_LE** :math:`\le` + + * **CMP_NE** :math:`\ne` + + + + + +See also: +:func:`compare` +. + + + +.. index:: cv::gpu::bitwise_not + +.. _cv::gpu::bitwise_not: + +cv::gpu::bitwise_not +-------------------- + +`id=0.242780097451 Comments from the Wiki `__ + + + + +.. cfunction:: void bitwise_not(const GpuMat\& src, GpuMat\& dst, const GpuMat\& mask=GpuMat()) + + + +.. cfunction:: void bitwise_not(const GpuMat\& src, GpuMat\& dst, const GpuMat\& mask, const Stream\& stream) + + Performs per-element bitwise inversion. + + + + + + + :param src: Source matrix. + + + :param dst: Destination matrix. Will have the same size and type as ``src`` . + + + :param mask: Optional operation mask. 8-bit single channel image. + + + :param stream: Stream for the asynchronous version. + + + +See also: +. + + + +.. index:: cv::gpu::bitwise_or + +.. _cv::gpu::bitwise_or: + +cv::gpu::bitwise_or +------------------- + +`id=0.762303417062 Comments from the Wiki `__ + + + + +.. cfunction:: void bitwise_or(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst, const GpuMat\& mask=GpuMat()) + + + +.. cfunction:: void bitwise_or(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst, const GpuMat\& mask, const Stream\& stream) + + Performs per-element bitwise disjunction of two matrices. + + + + + + + :param src1: First source matrix. + + + :param src2: Second source matrix. It must have the same size and type as ``src1`` . + + + :param dst: Destination matrix. Will have the same size and type as ``src1`` . + + + :param mask: Optional operation mask. 8-bit single channel image. + + + :param stream: Stream for the asynchronous version. + + + +See also: +. + + + +.. index:: cv::gpu::bitwise_and + +.. _cv::gpu::bitwise_and: + +cv::gpu::bitwise_and +-------------------- + +`id=0.621591376205 Comments from the Wiki `__ + + + + +.. cfunction:: void bitwise_and(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst, const GpuMat\& mask=GpuMat()) + + + +.. cfunction:: void bitwise_and(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst, const GpuMat\& mask, const Stream\& stream) + + Performs per-element bitwise conjunction of two matrices. + + + + + + + :param src1: First source matrix. + + + :param src2: Second source matrix. It must have the same size and type as ``src1`` . + + + :param dst: Destination matrix. Will have the same size and type as ``src1`` . + + + :param mask: Optional operation mask. 8-bit single channel image. + + + :param stream: Stream for the asynchronous version. + + + +See also: +. + + + +.. index:: cv::gpu::bitwise_xor + +.. _cv::gpu::bitwise_xor: + +cv::gpu::bitwise_xor +-------------------- + +`id=0.684217951074 Comments from the Wiki `__ + + + + +.. cfunction:: void bitwise_xor(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst, const GpuMat\& mask=GpuMat()) + + + +.. cfunction:: void bitwise_xor(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst, const GpuMat\& mask, const Stream\& stream) + + Performs per-element bitwise "exclusive or" of two matrices. + + + + + + + :param src1: First source matrix. + + + :param src2: Second source matrix. It must have the same size and type as ``src1`` . + + + :param dst: Destination matrix. Will have the same size and type as ``src1`` . + + + :param mask: Optional operation mask. 8-bit single channel image. + + + :param stream: Stream for the asynchronous version. + + + +See also: +. + + + +.. index:: gpu::min + + +cv::gpu::min +------------ + +`id=0.276176266158 Comments from the Wiki `__ + + + + +.. cfunction:: void min(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst) + + + +.. cfunction:: void min(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst, const Stream\& stream) + + Computes per-element minimum of two matrices (or of matrix and scalar). + + + + + + + :param src1: First source matrix. + + + :param src2: Second source matrix. + + + :param dst: Destination matrix. Will have the same size and type as ``src1`` . + + + :param stream: Stream for the asynchronous version. + + + + + +.. cfunction:: void min(const GpuMat\& src1, double src2, GpuMat\& dst) + + + +.. cfunction:: void min(const GpuMat\& src1, double src2, GpuMat\& dst, const Stream\& stream) + + + + + + + * **src1** Source matrix. + + + * **src2** Scalar to be compared with. + + + * **dst** Destination matrix. Will have the same size and type as ``src1`` . + + + * **stream** Stream for the asynchronous version. + + + +See also: +:func:`min` +. + + + +.. index:: gpu::max + + +cv::gpu::max +------------ + +`id=0.175554622377 Comments from the Wiki `__ + + + + +.. cfunction:: void max(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst) + + + +.. cfunction:: void max(const GpuMat\& src1, const GpuMat\& src2, GpuMat\& dst, const Stream\& stream) + + Computes per-element maximum of two matrices (or of matrix and scalar). + + + + + + + :param src1: First source matrix. + + + :param src2: Second source matrix. + + + :param dst: Destination matrix. Will have the same size and type as ``src1`` . + + + :param stream: Stream for the asynchronous version. + + + + + +.. cfunction:: void max(const GpuMat\& src1, double src2, GpuMat\& dst) + + + +.. cfunction:: void max(const GpuMat\& src1, double src2, GpuMat\& dst, const Stream\& stream) + + + + + + + * **src1** Source matrix. + + + * **src2** Scalar to be compared with. + + + * **dst** Destination matrix. Will have the same size and type as ``src1`` . + + + * **stream** Stream for the asynchronous version. + + + +See also: +:func:`max` +. diff --git a/modules/highgui/doc/highgui.rst b/modules/highgui/doc/highgui.rst new file mode 100644 index 000000000..5c6b94fd3 --- /dev/null +++ b/modules/highgui/doc/highgui.rst @@ -0,0 +1,3 @@ +**************************** +High-level GUI and Media I/O +**************************** diff --git a/modules/highgui/doc/highgui__high-level_gui_and_media_i_o.rst b/modules/highgui/doc/highgui__high-level_gui_and_media_i_o.rst new file mode 100644 index 000000000..06417b3cf --- /dev/null +++ b/modules/highgui/doc/highgui__high-level_gui_and_media_i_o.rst @@ -0,0 +1,39 @@ +************************************* +highgui. High-level GUI and Media I/O +************************************* + + +While OpenCV was designed for use in full-scale +applications and can be used within functionally rich UI frameworks (such as Qt, WinForms or Cocoa) or without any UI at all, sometimes there is a need to try some functionality quickly and visualize the results. This is what the HighGUI module has been designed for. + +It provides easy interface to: + + + + +* + create and manipulate windows that can display images and "remember" their content (no need to handle repaint events from OS) + + + +* + add trackbars to the windows, handle simple mouse events as well as keyboard commmands + + + +* + read and write images to/from disk or memory. + + + +* + read video from camera or file and write video to a file. + + + +.. toctree:: + :maxdepth: 2 + + highgui_user_interface + highgui_reading_and_writing_images_and_video + highgui_qt_new_functions diff --git a/modules/highgui/doc/qt_new_functions.rst b/modules/highgui/doc/qt_new_functions.rst new file mode 100644 index 000000000..1a1f25570 --- /dev/null +++ b/modules/highgui/doc/qt_new_functions.rst @@ -0,0 +1,607 @@ +Qt new functions +================ + +.. highlight:: cpp + + + + +.. image:: ../../pics/Qt_GUI.png + + + +This figure explains the new functionalities implemented with Qt GUI. As we can see, the new GUI provides a statusbar, a toolbar, and a control panel. The control panel can have trackbars and buttonbars attached to it. + + + + +* + To attach a trackbar, the window + _ + name parameter must be NULL. + + + +* + To attach a buttonbar, a button must be created. + If the last bar attached to the control panel is a buttonbar, the new button is added on the right of the last button. + If the last bar attached to the control panel is a trackbar, or the control panel is empty, a new buttonbar is created. Then a new button is attached to it. + + +The following code is an example used to generate the figure. + + + +:: + + + + int main(int argc, char *argv[]) + int value = 50; + int value2 = 0; + + cvNamedWindow("main1",CV_WINDOW_NORMAL); + cvNamedWindow("main2",CV_WINDOW_AUTOSIZE | CV_GUI_NORMAL); + + cvCreateTrackbar( "track1", "main1", &value, 255, NULL);//OK tested + char* nameb1 = "button1"; + char* nameb2 = "button2"; + cvCreateButton(nameb1,callbackButton,nameb1,CV_CHECKBOX,1); + + cvCreateButton(nameb2,callbackButton,nameb2,CV_CHECKBOX,0); + cvCreateTrackbar( "track2", NULL, &value2, 255, NULL); + cvCreateButton("button5",callbackButton1,NULL,CV_RADIOBOX,0); + cvCreateButton("button6",callbackButton2,NULL,CV_RADIOBOX,1); + + cvSetMouseCallback( "main2",on_mouse,NULL ); + + IplImage* img1 = cvLoadImage("files/flower.jpg"); + IplImage* img2 = cvCreateImage(cvGetSize(img1),8,3); + CvCapture* video = cvCaptureFromFile("files/hockey.avi"); + IplImage* img3 = cvCreateImage(cvGetSize(cvQueryFrame(video)),8,3); + + while(cvWaitKey(33) != 27) + { + cvAddS(img1,cvScalarAll(value),img2); + cvAddS(cvQueryFrame(video),cvScalarAll(value2),img3); + cvShowImage("main1",img2); + cvShowImage("main2",img3); + } + + cvDestroyAllWindows(); + cvReleaseImage(&img1); + cvReleaseImage(&img2); + cvReleaseImage(&img3); + cvReleaseCapture(&video); + return 0; + } + + +.. + + +.. index:: setWindowProperty + + +cv::setWindowProperty +--------------------- + +`id=0.202216555435 Comments from the Wiki `__ + + + + +.. cfunction:: void setWindowProperty(const string\& name, int prop_id, double prop_value) + + Change the parameters of the window dynamically. + + + + + + + :param name: Name of the window. + + + :param prop_id: Window's property to edit. The operation flags: + + * **CV_WND_PROP_FULLSCREEN** Change if the window is fullscreen ( ``CV_WINDOW_NORMAL`` or ``CV_WINDOW_FULLSCREEN`` ). + + * **CV_WND_PROP_AUTOSIZE** Change if the user can resize the window (texttt {CV\_WINDOW\_NORMAL} or ``CV_WINDOW_AUTOSIZE`` ). + + * **CV_WND_PROP_ASPECTRATIO** Change if the image's aspect ratio is preserved (texttt {CV\_WINDOW\_FREERATIO} or ``CV_WINDOW_KEEPRATIO`` ). + + + + + :param prop_value: New value of the Window's property. The operation flags: + + * **CV_WINDOW_NORMAL** Change the window in normal size, or allows the user to resize the window. + + * **CV_WINDOW_AUTOSIZE** The user cannot resize the window, the size is constrainted by the image displayed. + + * **CV_WINDOW_FULLSCREEN** Change the window to fullscreen. + + * **CV_WINDOW_FREERATIO** The image expends as much as it can (no ratio constraint) + + * **CV_WINDOW_KEEPRATIO** The ration image is respected. + + + + + +The function +`` setWindowProperty`` +allows to change the window's properties. + + + + + +.. index:: getWindowProperty + + +cv::getWindowProperty +--------------------- + +`id=0.467280795493 Comments from the Wiki `__ + + + + +.. cfunction:: void getWindowProperty(const char* name, int prop_id) + + Get the parameters of the window. + + + + + + + :param name: Name of the window. + + + :param prop_id: Window's property to retrive. The operation flags: + + * **CV_WND_PROP_FULLSCREEN** Change if the window is fullscreen ( ``CV_WINDOW_NORMAL`` or ``CV_WINDOW_FULLSCREEN`` ). + + * **CV_WND_PROP_AUTOSIZE** Change if the user can resize the window (texttt {CV\_WINDOW\_NORMAL} or ``CV_WINDOW_AUTOSIZE`` ). + + * **CV_WND_PROP_ASPECTRATIO** Change if the image's aspect ratio is preserved (texttt {CV\_WINDOW\_FREERATIO} or ``CV_WINDOW_KEEPRATIO`` ). + + + + + +See +:ref:`setWindowProperty` +to know the meaning of the returned values. + +The function +`` getWindowProperty`` +return window's properties. + + + +.. index:: fontQt + + +cv::fontQt +---------- + +`id=0.680350496921 Comments from the Wiki `__ + + + + +.. cfunction:: CvFont fontQt(const string\& nameFont, int pointSize = -1, Scalar color = Scalar::all(0), int weight = CV_FONT_NORMAL, int style = CV_STYLE_NORMAL, int spacing = 0) + + Create the font to be used to draw text on an image. + + + + + + + :param nameFont: Name of the font. The name should match the name of a system font (such as ``Times''). If the font is not found, a default one will be used. + + + :param pointSize: Size of the font. If not specified, equal zero or negative, the point size of the font is set to a system-dependent default value. Generally, this is 12 points. + + + :param color: Color of the font in BGRA -- A = 255 is fully transparent. Use the macro CV _ RGB for simplicity. + + + :param weight: The operation flags: + + * **CV_FONT_LIGHT** Weight of 25 + + * **CV_FONT_NORMAL** Weight of 50 + + * **CV_FONT_DEMIBOLD** Weight of 63 + + * **CV_FONT_BOLD** Weight of 75 + + * **CV_FONT_BLACK** Weight of 87 + + You can also specify a positive integer for more control. + + + + :param style: The operation flags: + + * **CV_STYLE_NORMAL** Font is normal + + * **CV_STYLE_ITALIC** Font is in italic + + * **CV_STYLE_OBLIQUE** Font is oblique + + + + + :param spacing: Spacing between characters. Can be negative or positive + + + +The function +``fontQt`` +creates a CvFont object. This CvFont is not compatible with putText. + +A basic usage of this function is: + + + +:: + + + + CvFont font = fontQt(''Times''); + addText( img1, ``Hello World !'', Point(50,50), font); + + +.. + + +.. index:: addText + + +cv::addText +----------- + +`id=0.0425492674947 Comments from the Wiki `__ + + + + +.. cfunction:: void addText(const Mat\& img, const string\& text, Point location, CvFont *font) + + Create the font to be used to draw text on an image + + + + + + :param img: Image where the text should be drawn + + + :param text: Text to write on the image + + + :param location: Point(x,y) where the text should start on the image + + + :param font: Font to use to draw the text + + + +The function +``addText`` +draw +*text* +on the image +*img* +using a specific font +*font* +(see example +:ref:`fontQt` +) + + + + +.. index:: displayOverlay + + +cv::displayOverlay +------------------ + +`id=0.969508597197 Comments from the Wiki `__ + + + + +.. cfunction:: void displayOverlay(const string\& name, const string\& text, int delay) + + Display text on the window's image as an overlay for delay milliseconds. This is not editing the image's data. The text is display on the top of the image. + + + + + + :param name: Name of the window + + + :param text: Overlay text to write on the window's image + + + :param delay: Delay to display the overlay text. If this function is called before the previous overlay text time out, the timer is restarted and the text updated. . If this value is zero, the text never disapers. + + + +The function +``displayOverlay`` +aims at displaying useful information/tips on the window for a certain amount of time +*delay* +. This information is display on the top of the window. + + + + +.. index:: displayStatusBar + + +cv::displayStatusBar +-------------------- + +`id=0.132014751496 Comments from the Wiki `__ + + + + +.. cfunction:: void displayStatusBar(const string\& name, const string\& text, int delayms) + + Display text on the window's statusbar as for delay milliseconds. + + + + + + :param name: Name of the window + + + :param text: Text to write on the window's statusbar + + + :param delay: Delay to display the text. If this function is called before the previous text time out, the timer is restarted and the text updated. If this value is zero, the text never disapers. + + + +The function +``displayOverlay`` +aims at displaying useful information/tips on the window for a certain amount of time +*delay* +. This information is displayed on the window's statubar (the window must be created with +``CV_GUI_EXPANDED`` +flags). + + + + + +.. index:: createOpenGLCallback + + +cv::createOpenGLCallback +------------------------ + +`id=0.0486773148219 Comments from the Wiki `__ + + +*_* + + +.. cfunction:: void createOpenGLCallback( const string\& window_name, OpenGLCallback callbackOpenGL, void* userdata CV_DEFAULT(NULL), double angle CV_DEFAULT(-1), double zmin CV_DEFAULT(-1), double zmax CV_DEFAULT(-1) + + Create a callback function called to draw OpenGL on top the the image display by windowname. + + + + + + :param window_name: Name of the window + + + :param callbackOpenGL: + Pointer to the function to be called every frame. + This function should be prototyped as ``void Foo(*void);`` . + + + :param userdata: pointer passed to the callback function. *(Optional)* + + + :param angle: Specifies the field of view angle, in degrees, in the y direction.. *(Optional - Default 45 degree)* + + + :param zmin: Specifies the distance from the viewer to the near clipping plane (always positive). *(Optional - Default 0.01)* + + + :param zmax: Specifies the distance from the viewer to the far clipping plane (always positive). *(Optional - Default 1000)* + + + +The function +``createOpenGLCallback`` +can be used to draw 3D data on the window. An example of callback could be: + + + +:: + + + + void on_opengl(void* param) + { + glLoadIdentity(); + + glTranslated(0.0, 0.0, -1.0); + + glRotatef( 55, 1, 0, 0 ); + glRotatef( 45, 0, 1, 0 ); + glRotatef( 0, 0, 0, 1 ); + + static const int coords[6][4][3] = { + { { +1, -1, -1 }, { -1, -1, -1 }, { -1, +1, -1 }, { +1, +1, -1 } }, + { { +1, +1, -1 }, { -1, +1, -1 }, { -1, +1, +1 }, { +1, +1, +1 } }, + { { +1, -1, +1 }, { +1, -1, -1 }, { +1, +1, -1 }, { +1, +1, +1 } }, + { { -1, -1, -1 }, { -1, -1, +1 }, { -1, +1, +1 }, { -1, +1, -1 } }, + { { +1, -1, +1 }, { -1, -1, +1 }, { -1, -1, -1 }, { +1, -1, -1 } }, + { { -1, -1, +1 }, { +1, -1, +1 }, { +1, +1, +1 }, { -1, +1, +1 } } + }; + + for (int i = 0; i < 6; ++i) { + glColor3ub( i*20, 100+i*10, i*42 ); + glBegin(GL_QUADS); + for (int j = 0; j < 4; ++j) { + glVertex3d(0.2 * coords[i][j][0], 0.2 * coords[i][j][1], 0.2 * coords[i][j][2]); + } + glEnd(); + } + } + + +.. + + +.. index:: saveWindowParameters + + +cv::saveWindowParameters +------------------------ + +`id=0.189887022151 Comments from the Wiki `__ + + +*_* + + +.. cfunction:: void saveWindowParameters(const string\& name) + + Save parameters of the window windowname. + + + + + + :param name: Name of the window + + + +The function +``saveWindowParameters`` +saves size, location, flags, trackbars' value, zoom and panning location of the window +*window_name* + +.. index:: loadWindowParameters + + +cv::loadWindowParameters +------------------------ + +`id=0.922344403304 Comments from the Wiki `__ + + +*_* + + +.. cfunction:: void loadWindowParameters(const string\& name) + + Load parameters of the window windowname. + + + + + + :param name: Name of the window + + + +The function +``loadWindowParameters`` +load size, location, flags, trackbars' value, zoom and panning location of the window +*window_name* + +.. index:: createButton + + +cv::createButton +---------------- + +`id=0.367650849719 Comments from the Wiki `__ + + +*_* + + +.. cfunction:: createButton( const string\& button_name CV_DEFAULT(NULL),ButtonCallback on_change CV_DEFAULT(NULL), void* userdata CV_DEFAULT(NULL) , int button_type CV_DEFAULT(CV_PUSH_BUTTON), int initial_button_state CV_DEFAULT(0) + + Create a callback function called to draw OpenGL on top the the image display by windowname. + + + + + + :param button_name: Name of the button *( if NULL, the name will be "button ")* + + + :param on_change: + Pointer to the function to be called every time the button changed its state. + This function should be prototyped as ``void Foo(int state,*void);`` . *state* is the current state of the button. It could be -1 for a push button, 0 or 1 for a check/radio box button. + + + :param userdata: pointer passed to the callback function. *(Optional)* + + + +The +``button_type`` +parameter can be : +*(Optional -- Will be a push button by default.) + + * **CV_PUSH_BUTTON** The button will be a push button. + + * **CV_CHECKBOX** The button will be a checkbox button. + + * **CV_RADIOBOX** The button will be a radiobox button. The radiobox on the same buttonbar (same line) are exclusive; one on can be select at the time. + + * + + + + * **initial_button_state** Default state of the button. Use for checkbox and radiobox, its value could be 0 or 1. *(Optional)* + + + +The function +``createButton`` +attach a button to the control panel. Each button is added to a buttonbar on the right of the last button. +A new buttonbar is create if nothing was attached to the control panel before, or if the last element attached to the control panel was a trackbar. + +Here are various example of +``createButton`` +function call: + + + +:: + + + + createButton(NULL,callbackButton);//create a push button "button 0", that will call callbackButton. + createButton("button2",callbackButton,NULL,CV_CHECKBOX,0); + createButton("button3",callbackButton,&value); + createButton("button5",callbackButton1,NULL,CV_RADIOBOX); + createButton("button6",callbackButton2,NULL,CV_PUSH_BUTTON,1); + + +.. + diff --git a/modules/highgui/doc/reading_and_writing_images_and_video.rst b/modules/highgui/doc/reading_and_writing_images_and_video.rst new file mode 100644 index 000000000..8e48c0528 --- /dev/null +++ b/modules/highgui/doc/reading_and_writing_images_and_video.rst @@ -0,0 +1,624 @@ +Reading and Writing Images and Video +==================================== + +.. highlight:: cpp + + + +.. index:: imdecode + + +cv::imdecode +------------ + +`id=0.524391584247 Comments from the Wiki `__ + + + + +.. cfunction:: Mat imdecode( const Mat\& buf, int flags ) + + Reads an image from a buffer in memory. + + + + + + :param buf: The input array of vector of bytes + + + :param flags: The same flags as in :ref:`imread` + + + +The function reads image from the specified buffer in memory. +If the buffer is too short or contains invalid data, the empty matrix will be returned. + +See +:ref:`imread` +for the list of supported formats and the flags description. + + +.. index:: imencode + + +cv::imencode +------------ + +`id=0.960190095821 Comments from the Wiki `__ + + + + +.. cfunction:: bool imencode( const string\& ext, const Mat\& img, vector\& buf, const vector\& params=vector()) + + Encode an image into a memory buffer. + + + + + + :param ext: The file extension that defines the output format + + + :param img: The image to be written + + + :param buf: The output buffer; resized to fit the compressed image + + + :param params: The format-specific parameters; see :ref:`imwrite` + + + +The function compresses the image and stores it in the memory buffer, which is resized to fit the result. +See +:ref:`imwrite` +for the list of supported formats and the flags description. + + + +.. index:: imread + + +cv::imread +---------- + +`id=0.16110153292 Comments from the Wiki `__ + + + + +.. cfunction:: Mat imread( const string\& filename, int flags=1 ) + + Loads an image from a file. + + + + + + + :param filename: Name of file to be loaded. + + + :param flags: Specifies color type of the loaded image: + + + + + * **>0** the loaded image is forced to be a 3-channel color image + + + * **=0** the loaded image is forced to be grayscale + + + * **<0** the loaded image will be loaded as-is (note that in the current implementation the alpha channel, if any, is stripped from the output image, e.g. 4-channel RGBA image will be loaded as RGB if :math:`flags\ge0` ). + + + + + +The function +``imread`` +loads an image from the specified file and returns it. If the image can not be read (because of missing file, improper permissions, unsupported or invalid format), the function returns empty matrix ( +``Mat::data==NULL`` +).Currently, the following file formats are supported: + + + + +* + Windows bitmaps - + ``*.bmp, *.dib`` + (always supported) + + +* + JPEG files - + ``*.jpeg, *.jpg, *.jpe`` + (see + **Note2** + ) + + +* + JPEG 2000 files - + ``*.jp2`` + (see + **Note2** + ) + + +* + Portable Network Graphics - + ``*.png`` + (see + **Note2** + ) + + +* + Portable image format - + ``*.pbm, *.pgm, *.ppm`` + (always supported) + + +* + Sun rasters - + ``*.sr, *.ras`` + (always supported) + + +* + TIFF files - + ``*.tiff, *.tif`` + (see + **Note2** + ) + + +**Note1** +: The function determines type of the image by the content, not by the file extension. + +**Note2** +: On Windows and MacOSX the shipped with OpenCV image codecs (libjpeg, libpng, libtiff and libjasper) are used by default; so OpenCV can always read JPEGs, PNGs and TIFFs. On MacOSX there is also the option to use native MacOSX image readers. But beware that currently these native image loaders give images with somewhat different pixel values, because of the embedded into MacOSX color management. + +On Linux, BSD flavors and other Unix-like open-source operating systems OpenCV looks for the supplied with OS image codecs. Please, install the relevant packages (do not forget the development files, e.g. "libjpeg-dev" etc. in Debian and Ubuntu) in order to get the codec support, or turn on +``OPENCV_BUILD_3RDPARTY_LIBS`` +flag in CMake. + + +.. index:: imwrite + + +cv::imwrite +----------- + +`id=0.00846497387051 Comments from the Wiki `__ + + + + +.. cfunction:: bool imwrite( const string\& filename, const Mat\& img, const vector\& params=vector()) + + Saves an image to a specified file. + + + + + + + :param filename: Name of the file. + + + :param img: The image to be saved. + + + :param params: The format-specific save parameters, encoded as pairs ``paramId_1, paramValue_1, paramId_2, paramValue_2, ...`` . The following parameters are currently supported: + + + + * In the case of JPEG it can be a quality ( ``CV_IMWRITE_JPEG_QUALITY`` ), from 0 to 100 (the higher is the better), 95 by default. + + + * In the case of PNG it can be the compression level ( ``CV_IMWRITE_PNG_COMPRESSION`` ), from 0 to 9 (the higher value means smaller size and longer compression time), 3 by default. + + + * In the case of PPM, PGM or PBM it can a binary format flag ( ``CV_IMWRITE_PXM_BINARY`` ), 0 or 1, 1 by default. + + + + + +The function +``imwrite`` +saves the image to the specified file. The image format is chosen based on the +``filename`` +extension, see +:ref:`imread` +for the list of extensions. Only 8-bit (or 16-bit in the case of PNG, JPEG 2000 and TIFF) single-channel or 3-channel (with 'BGR' channel order) images can be saved using this function. If the format, depth or channel order is different, use +:ref:`Mat::convertTo` +, and +:ref:`cvtColor` +to convert it before saving, or use the universal XML I/O functions to save the image to XML or YAML format. + + + +.. index:: VideoCapture + +.. _VideoCapture: + +VideoCapture +------------ + +`id=0.267295181599 Comments from the Wiki `__ + +.. ctype:: VideoCapture + + + +Class for video capturing from video files or cameras + + + + +:: + + + + class VideoCapture + { + public: + // the default constructor + VideoCapture(); + // the constructor that opens video file + VideoCapture(const string& filename); + // the constructor that starts streaming from the camera + VideoCapture(int device); + + // the destructor + virtual ~VideoCapture(); + + // opens the specified video file + virtual bool open(const string& filename); + + // starts streaming from the specified camera by its id + virtual bool open(int device); + + // returns true if the file was open successfully or if the camera + // has been initialized succesfully + virtual bool isOpened() const; + + // closes the camera stream or the video file + // (automatically called by the destructor) + virtual void release(); + + // grab the next frame or a set of frames from a multi-head camera; + // returns false if there are no more frames + virtual bool grab(); + // reads the frame from the specified video stream + // (non-zero channel is only valid for multi-head camera live streams) + virtual bool retrieve(Mat& image, int channel=0); + // equivalent to grab() + retrieve(image, 0); + virtual VideoCapture& operator >> (Mat& image); + + // sets the specified property propId to the specified value + virtual bool set(int propId, double value); + // retrieves value of the specified property + virtual double get(int propId); + + protected: + ... + }; + + +.. + +The class provides C++ video capturing API. Here is how the class can be used: + + + + +:: + + + + #include "cv.h" + #include "highgui.h" + + using namespace cv; + + int main(int, char**) + { + VideoCapture cap(0); // open the default camera + if(!cap.isOpened()) // check if we succeeded + return -1; + + Mat edges; + namedWindow("edges",1); + for(;;) + { + Mat frame; + cap >> frame; // get a new frame from camera + cvtColor(frame, edges, CV_BGR2GRAY); + GaussianBlur(edges, edges, Size(7,7), 1.5, 1.5); + Canny(edges, edges, 0, 30, 3); + imshow("edges", edges); + if(waitKey(30) >= 0) break; + } + // the camera will be deinitialized automatically in VideoCapture destructor + return 0; + } + + +.. + + +.. index:: VideoCapture::VideoCapture + + +cv::VideoCapture::VideoCapture +------------------------------ + +`id=0.788880569149 Comments from the Wiki `__ + + + + +.. cfunction:: VideoCapture::VideoCapture() + + + +.. cfunction:: VideoCapture::VideoCapture(const string\& filename) + + + +.. cfunction:: VideoCapture::VideoCapture(int device) + + + + + + + :param filename: TOWRITE + + + :param device: TOWRITE + + + +VideoCapture constructors. + + +.. index:: VideoCapture::get + + +cv::VideoCapture::get +--------------------- + +`id=0.977076859044 Comments from the Wiki `__ + + + + +.. cfunction:: double VideoCapture::get(int property_id) + + + + + + + :param property_id: Property identifier. Can be one of the following: + + + + + * **CV_CAP_PROP_POS_MSEC** Film current position in milliseconds or video capture timestamp + + + * **CV_CAP_PROP_POS_FRAMES** 0-based index of the frame to be decoded/captured next + + + * **CV_CAP_PROP_POS_AVI_RATIO** Relative position of the video file (0 - start of the film, 1 - end of the film) + + + * **CV_CAP_PROP_FRAME_WIDTH** Width of the frames in the video stream + + + * **CV_CAP_PROP_FRAME_HEIGHT** Height of the frames in the video stream + + + * **CV_CAP_PROP_FPS** Frame rate + + + * **CV_CAP_PROP_FOURCC** 4-character code of codec + + + * **CV_CAP_PROP_FRAME_COUNT** Number of frames in the video file + + + * **CV_CAP_PROP_FORMAT** The format of the Mat objects returned by retrieve() + + + * **CV_CAP_PROP_MODE** A backend-specific value indicating the current capture mode + + + * **CV_CAP_PROP_BRIGHTNESS** Brightness of the image (only for cameras) + + + * **CV_CAP_PROP_CONTRAST** Contrast of the image (only for cameras) + + + * **CV_CAP_PROP_SATURATION** Saturation of the image (only for cameras) + + + * **CV_CAP_PROP_HUE** Hue of the image (only for cameras) + + + * **CV_CAP_PROP_GAIN** Gain of the image (only for cameras) + + + * **CV_CAP_PROP_EXPOSURE** Exposure (only for cameras) + + + * **CV_CAP_PROP_CONVERT_RGB** Boolean flags indicating whether images should be converted to RGB + + + * **CV_CAP_PROP_WHITE_BALANCE** Currently unsupported + + + * **CV_CAP_PROP_RECTIFICATION** TOWRITE (note: only supported by DC1394 v 2.x backend currently) + + + + + +Note that when querying a property which is unsupported by the backend used by the VideoCapture class, the value 0 is returned. + + +.. index:: VideoCapture::set + + +cv::VideoCapture::set +--------------------- + +`id=0.845027627213 Comments from the Wiki `__ + + + + +.. cfunction:: bool VideoCapture::set(int property_id, double value) + + + + + + + :param property_id: Property identifier. Can be one of the following: + + + + + * **CV_CAP_PROP_POS_MSEC** Film current position in milliseconds or video capture timestamp + + + * **CV_CAP_PROP_POS_FRAMES** 0-based index of the frame to be decoded/captured next + + + * **CV_CAP_PROP_POS_AVI_RATIO** Relative position of the video file (0 - start of the film, 1 - end of the film) + + + * **CV_CAP_PROP_FRAME_WIDTH** Width of the frames in the video stream + + + * **CV_CAP_PROP_FRAME_HEIGHT** Height of the frames in the video stream + + + * **CV_CAP_PROP_FPS** Frame rate + + + * **CV_CAP_PROP_FOURCC** 4-character code of codec + + + * **CV_CAP_PROP_FRAME_COUNT** Number of frames in the video file + + + * **CV_CAP_PROP_FORMAT** The format of the Mat objects returned by retrieve() + + + * **CV_CAP_PROP_MODE** A backend-specific value indicating the current capture mode + + + * **CV_CAP_PROP_BRIGHTNESS** Brightness of the image (only for cameras) + + + * **CV_CAP_PROP_CONTRAST** Contrast of the image (only for cameras) + + + * **CV_CAP_PROP_SATURATION** Saturation of the image (only for cameras) + + + * **CV_CAP_PROP_HUE** Hue of the image (only for cameras) + + + * **CV_CAP_PROP_GAIN** Gain of the image (only for cameras) + + + * **CV_CAP_PROP_EXPOSURE** Exposure (only for cameras) + + + * **CV_CAP_PROP_CONVERT_RGB** Boolean flags indicating whether images should be converted to RGB + + + * **CV_CAP_PROP_WHITE_BALANCE** Currently unsupported + + + * **CV_CAP_PROP_RECTIFICATION** TOWRITE (note: only supported by DC1394 v 2.x backend currently) + + + + + :param value: value of the property. + + + +Sets a property in the VideoCapture backend. + + + +.. index:: VideoWriter + +.. _VideoWriter: + +VideoWriter +----------- + +`id=0.234127975013 Comments from the Wiki `__ + +.. ctype:: VideoWriter + + + +Video writer class + + + + +:: + + + + class VideoWriter + { + public: + // default constructor + VideoWriter(); + // constructor that calls open + VideoWriter(const string& filename, int fourcc, + double fps, Size frameSize, bool isColor=true); + + // the destructor + virtual ~VideoWriter(); + + // opens the file and initializes the video writer. + // filename - the output file name. + // fourcc - the codec + // fps - the number of frames per second + // frameSize - the video frame size + // isColor - specifies whether the video stream is color or grayscale + virtual bool open(const string& filename, int fourcc, + double fps, Size frameSize, bool isColor=true); + + // returns true if the writer has been initialized successfully + virtual bool isOpened() const; + + // writes the next video frame to the stream + virtual VideoWriter& operator << (const Mat& image); + + protected: + ... + }; + + +.. + diff --git a/modules/highgui/doc/user_interface.rst b/modules/highgui/doc/user_interface.rst new file mode 100644 index 000000000..65dde4976 --- /dev/null +++ b/modules/highgui/doc/user_interface.rst @@ -0,0 +1,315 @@ +User Interface +============== + +.. highlight:: cpp + + + +.. index:: createTrackbar + + +cv::createTrackbar +------------------ + +`id=0.122963695249 Comments from the Wiki `__ + + + + +.. cfunction:: int createTrackbar( const string\& trackbarname, const string\& winname, int* value, int count, TrackbarCallback onChange CV_DEFAULT(0), void* userdata CV_DEFAULT(0)) + + Creates a trackbar and attaches it to the specified window + + + + + + + :param trackbarname: Name of the created trackbar. + + + :param winname: Name of the window which will be used as a parent of the created trackbar. + + + :param value: The optional pointer to an integer variable, whose value will reflect the position of the slider. Upon creation, the slider position is defined by this variable. + + + :param count: The maximal position of the slider. The minimal position is always 0. + + + :param onChange: Pointer to the function to be called every time the slider changes position. This function should be prototyped as ``void Foo(int,void*);`` , where the first parameter is the trackbar position and the second parameter is the user data (see the next parameter). If the callback is NULL pointer, then no callbacks is called, but only ``value`` is updated + + + :param userdata: The user data that is passed as-is to the callback; it can be used to handle trackbar events without using global variables + + + +The function +``createTrackbar`` +creates a trackbar (a.k.a. slider or range control) with the specified name and range, assigns a variable +``value`` +to be syncronized with trackbar position and specifies a callback function +``onChange`` +to be called on the trackbar position change. The created trackbar is displayed on the top of the given window. +\ +\ +**[Qt Backend Only]** +qt-specific details: + + + + * **winname** Name of the window which will be used as a parent for created trackbar. Can be NULL if the trackbar should be attached to the control panel. + + + +The created trackbar is displayed at the bottom of the given window if +*winname* +is correctly provided, or displayed on the control panel if +*winname* +is NULL. + +By clicking on the label of each trackbar, it is possible to edit the trackbar's value manually for a more accurate control of it. + + +.. index:: getTrackbarPos + + +cv::getTrackbarPos +------------------ + +`id=0.51821188779 Comments from the Wiki `__ + + + + +.. cfunction:: int getTrackbarPos( const string\& trackbarname, const string\& winname ) + + Returns the trackbar position. + + + + + + + :param trackbarname: Name of the trackbar. + + + :param winname: Name of the window which is the parent of the trackbar. + + + +The function returns the current position of the specified trackbar. +\ +\ +**[Qt Backend Only]** +qt-specific details: + + + + * **winname** Name of the window which is the parent of the trackbar. Can be NULL if the trackbar is attached to the control panel. + + + + +.. index:: imshow + + +cv::imshow +---------- + +`id=0.765508098436 Comments from the Wiki `__ + + + + +.. cfunction:: void imshow( const string\& winname, const Mat\& image ) + + Displays the image in the specified window + + + + + + + :param winname: Name of the window. + + + :param image: Image to be shown. + + + +The function +``imshow`` +displays the image in the specified window. If the window was created with the +``CV_WINDOW_AUTOSIZE`` +flag then the image is shown with its original size, otherwise the image is scaled to fit in the window. The function may scale the image, depending on its depth: + + + + +* + If the image is 8-bit unsigned, it is displayed as is. + + + +* + If the image is 16-bit unsigned or 32-bit integer, the pixels are divided by 256. That is, the value range [0,255*256] is mapped to [0,255]. + + + +* + If the image is 32-bit floating-point, the pixel values are multiplied by 255. That is, the value range [0,1] is mapped to [0,255]. + + + +.. index:: namedWindow + + +cv::namedWindow +--------------- + +`id=0.618574996458 Comments from the Wiki `__ + + + + +.. cfunction:: void namedWindow( const string\& winname, int flags ) + + Creates a window. + + + + + + + :param name: Name of the window in the window caption that may be used as a window identifier. + + + :param flags: Flags of the window. Currently the only supported flag is ``CV_WINDOW_AUTOSIZE`` . If this is set, the window size is automatically adjusted to fit the displayed image (see :ref:`imshow` ), and the user can not change the window size manually. + + + +The function +``namedWindow`` +creates a window which can be used as a placeholder for images and trackbars. Created windows are referred to by their names. + +If a window with the same name already exists, the function does nothing. +\ +\ +**[Qt Backend Only]** +qt-specific details: + + + + * **flags** Flags of the window. Currently the supported flags are: + + + * **CV_WINDOW_NORMAL or CV_WINDOW_AUTOSIZE:** ``CV_WINDOW_NORMAL`` let the user resize the window, whereas ``CV_WINDOW_AUTOSIZE`` adjusts automatically the window's size to fit the displayed image (see :ref:`ShowImage` ), and the user can not change the window size manually. + + + * **CV_WINDOW_FREERATIO or CV_WINDOW_KEEPRATIO:** ``CV_WINDOW_FREERATIO`` adjust the image without respect the its ration, whereas ``CV_WINDOW_KEEPRATIO`` keep the image's ratio. + + + * **CV_GUI_NORMAL or CV_GUI_EXPANDED:** ``CV_GUI_NORMAL`` is the old way to draw the window without statusbar and toolbar, whereas ``CV_GUI_EXPANDED`` is the new enhance GUI. + + + + This parameter is optional. The default flags set for a new window are ``CV_WINDOW_AUTOSIZE`` , ``CV_WINDOW_KEEPRATIO`` , and ``CV_GUI_EXPANDED`` . + + However, if you want to modify the flags, you can combine them using OR operator, ie: + + + :: + + + + namedWindow( ``myWindow'', ``CV_WINDOW_NORMAL`` textbar ``CV_GUI_NORMAL`` ); + + + + .. + + + + +.. index:: setTrackbarPos + + +cv::setTrackbarPos +------------------ + +`id=0.247665233354 Comments from the Wiki `__ + + + + +.. cfunction:: void setTrackbarPos( const string\& trackbarname, const string\& winname, int pos ) + + Sets the trackbar position. + + + + + + + :param trackbarname: Name of the trackbar. + + + :param winname: Name of the window which is the parent of trackbar. + + + :param pos: The new position. + + + +The function sets the position of the specified trackbar in the specified window. +\ +\ +**[Qt Backend Only]** +qt-specific details: + + + + * **winname** Name of the window which is the parent of trackbar. Can be NULL if the trackbar is attached to the control panel. + + + + +.. index:: waitKey + + +cv::waitKey +----------- + +`id=0.777845991089 Comments from the Wiki `__ + + + + +.. cfunction:: int waitKey(int delay=0) + + Waits for a pressed key. + + + + + + + :param delay: Delay in milliseconds. 0 is the special value that means "forever" + + + +The function +``waitKey`` +waits for key event infinitely (when +:math:`\texttt{delay}\leq 0` +) or for +``delay`` +milliseconds, when it's positive. Returns the code of the pressed key or -1 if no key was pressed before the specified time had elapsed. + +**Note:** +This function is the only method in HighGUI that can fetch and handle events, so it needs to be called periodically for normal event processing, unless HighGUI is used within some environment that takes care of event processing. + +**Note 2:** +The function only works if there is at least one HighGUI window created and the window is active. If there are several HighGUI windows, any of them can be active. + diff --git a/modules/imgproc/doc/feature_detection.rst b/modules/imgproc/doc/feature_detection.rst new file mode 100644 index 000000000..69e3cdba2 --- /dev/null +++ b/modules/imgproc/doc/feature_detection.rst @@ -0,0 +1,829 @@ +Feature Detection +================= + +.. highlight:: cpp + + + +.. index:: Canny + + +cv::Canny +--------- + +`id=0.626295418243 Comments from the Wiki `__ + + + + +.. cfunction:: void Canny( const Mat\& image, Mat\& edges, double threshold1, double threshold2, int apertureSize=3, bool L2gradient=false ) + + Finds edges in an image using Canny algorithm. + + + + + + + :param image: Single-channel 8-bit input image + + + :param edges: The output edge map. It will have the same size and the same type as ``image`` + + + :param threshold1: The first threshold for the hysteresis procedure + + + :param threshold2: The second threshold for the hysteresis procedure + + + :param apertureSize: Aperture size for the :func:`Sobel` operator + + + :param L2gradient: Indicates, whether the more accurate :math:`L_2` norm :math:`=\sqrt{(dI/dx)^2 + (dI/dy)^2}` should be used to compute the image gradient magnitude ( ``L2gradient=true`` ), or a faster default :math:`L_1` norm :math:`=|dI/dx|+|dI/dy|` is enough ( ``L2gradient=false`` ) + + + +The function finds edges in the input image +``image`` +and marks them in the output map +``edges`` +using the Canny algorithm. The smallest value between +``threshold1`` +and +``threshold2`` +is used for edge linking, the largest value is used to find the initial segments of strong edges, see +http://en.wikipedia.org/wiki/Canny_edge_detector + +.. index:: cornerEigenValsAndVecs + + +cv::cornerEigenValsAndVecs +-------------------------- + +`id=0.211221916008 Comments from the Wiki `__ + + + + +.. cfunction:: void cornerEigenValsAndVecs( const Mat\& src, Mat\& dst, int blockSize, int apertureSize, int borderType=BORDER_DEFAULT ) + + Calculates eigenvalues and eigenvectors of image blocks for corner detection. + + + + + + + :param src: Input single-channel 8-bit or floating-point image + + + :param dst: Image to store the results. It will have the same size as ``src`` and the type ``CV_32FC(6)`` + + + :param blockSize: Neighborhood size (see discussion) + + + :param apertureSize: Aperture parameter for the :func:`Sobel` operator + + + :param boderType: Pixel extrapolation method; see :func:`borderInterpolate` + + + +For every pixel +:math:`p` +, the function +``cornerEigenValsAndVecs`` +considers a +``blockSize`` +:math:`\times` +``blockSize`` +neigborhood +:math:`S(p)` +. It calculates the covariation matrix of derivatives over the neighborhood as: + + + +.. math:: + + M = \begin{bmatrix} \sum _{S(p)}(dI/dx)^2 & \sum _{S(p)}(dI/dx dI/dy)^2 \\ \sum _{S(p)}(dI/dx dI/dy)^2 & \sum _{S(p)}(dI/dy)^2 \end{bmatrix} + + +Where the derivatives are computed using +:func:`Sobel` +operator. + +After that it finds eigenvectors and eigenvalues of +:math:`M` +and stores them into destination image in the form +:math:`(\lambda_1, \lambda_2, x_1, y_1, x_2, y_2)` +where + + + + +* :math:`\lambda_1, \lambda_2` + are the eigenvalues of + :math:`M` + ; not sorted + + +* :math:`x_1, y_1` + are the eigenvectors corresponding to + :math:`\lambda_1` + + +* :math:`x_2, y_2` + are the eigenvectors corresponding to + :math:`\lambda_2` + + +The output of the function can be used for robust edge or corner detection. + +See also: +:func:`cornerMinEigenVal` +, +:func:`cornerHarris` +, +:func:`preCornerDetect` + +.. index:: cornerHarris + + +cv::cornerHarris +---------------- + +`id=0.781956530281 Comments from the Wiki `__ + + + + +.. cfunction:: void cornerHarris( const Mat\& src, Mat\& dst, int blockSize, int apertureSize, double k, int borderType=BORDER_DEFAULT ) + + Harris edge detector. + + + + + + + :param src: Input single-channel 8-bit or floating-point image + + + :param dst: Image to store the Harris detector responses; will have type ``CV_32FC1`` and the same size as ``src`` + + + :param blockSize: Neighborhood size (see the discussion of :func:`cornerEigenValsAndVecs` ) + + + :param apertureSize: Aperture parameter for the :func:`Sobel` operator + + + :param k: Harris detector free parameter. See the formula below + + + :param boderType: Pixel extrapolation method; see :func:`borderInterpolate` + + + +The function runs the Harris edge detector on the image. Similarly to +:func:`cornerMinEigenVal` +and +:func:`cornerEigenValsAndVecs` +, for each pixel +:math:`(x, y)` +it calculates a +:math:`2\times2` +gradient covariation matrix +:math:`M^{(x,y)}` +over a +:math:`\texttt{blockSize} \times \texttt{blockSize}` +neighborhood. Then, it computes the following characteristic: + + + +.. math:: + + \texttt{dst} (x,y) = \mathrm{det} M^{(x,y)} - k \cdot \left ( \mathrm{tr} M^{(x,y)} \right )^2 + + +Corners in the image can be found as the local maxima of this response map. + + +.. index:: cornerMinEigenVal + + +cv::cornerMinEigenVal +--------------------- + +`id=0.604155117868 Comments from the Wiki `__ + + + + +.. cfunction:: void cornerMinEigenVal( const Mat\& src, Mat\& dst, int blockSize, int apertureSize=3, int borderType=BORDER_DEFAULT ) + + Calculates the minimal eigenvalue of gradient matrices for corner detection. + + + + + + + :param src: Input single-channel 8-bit or floating-point image + + + :param dst: Image to store the minimal eigenvalues; will have type ``CV_32FC1`` and the same size as ``src`` + + + :param blockSize: Neighborhood size (see the discussion of :func:`cornerEigenValsAndVecs` ) + + + :param apertureSize: Aperture parameter for the :func:`Sobel` operator + + + :param boderType: Pixel extrapolation method; see :func:`borderInterpolate` + + + +The function is similar to +:func:`cornerEigenValsAndVecs` +but it calculates and stores only the minimal eigenvalue of the covariation matrix of derivatives, i.e. +:math:`\min(\lambda_1, \lambda_2)` +in terms of the formulae in +:func:`cornerEigenValsAndVecs` +description. + + +.. index:: cornerSubPix + + +cv::cornerSubPix +---------------- + +`id=0.0211213978919 Comments from the Wiki `__ + + + + +.. cfunction:: void cornerSubPix( const Mat\& image, vector\& corners, Size winSize, Size zeroZone, TermCriteria criteria ) + + Refines the corner locations. + + + + + + + :param image: Input image + + + :param corners: Initial coordinates of the input corners; refined coordinates on output + + + :param winSize: Half of the side length of the search window. For example, if ``winSize=Size(5,5)`` , then a :math:`5*2+1 \times 5*2+1 = 11 \times 11` search window would be used + + + :param zeroZone: Half of the size of the dead region in the middle of the search zone over which the summation in the formula below is not done. It is used sometimes to avoid possible singularities of the autocorrelation matrix. The value of (-1,-1) indicates that there is no such size + + + :param criteria: Criteria for termination of the iterative process of corner refinement. That is, the process of corner position refinement stops either after a certain number of iterations or when a required accuracy is achieved. The ``criteria`` may specify either of or both the maximum number of iteration and the required accuracy + + + +The function iterates to find the sub-pixel accurate location of corners, or radial saddle points, as shown in on the picture below. + + + +.. image:: ../../pics/cornersubpix.png + + + +Sub-pixel accurate corner locator is based on the observation that every vector from the center +:math:`q` +to a point +:math:`p` +located within a neighborhood of +:math:`q` +is orthogonal to the image gradient at +:math:`p` +subject to image and measurement noise. Consider the expression: + + + +.. math:: + + \epsilon _i = {DI_{p_i}}^T \cdot (q - p_i) + + +where +:math:`{DI_{p_i}}` +is the image gradient at the one of the points +:math:`p_i` +in a neighborhood of +:math:`q` +. The value of +:math:`q` +is to be found such that +:math:`\epsilon_i` +is minimized. A system of equations may be set up with +:math:`\epsilon_i` +set to zero: + + + +.. math:: + + \sum _i(DI_{p_i} \cdot {DI_{p_i}}^T) - \sum _i(DI_{p_i} \cdot {DI_{p_i}}^T \cdot p_i) + + +where the gradients are summed within a neighborhood ("search window") of +:math:`q` +. Calling the first gradient term +:math:`G` +and the second gradient term +:math:`b` +gives: + + + +.. math:: + + q = G^{-1} \cdot b + + +The algorithm sets the center of the neighborhood window at this new center +:math:`q` +and then iterates until the center keeps within a set threshold. + + + +.. index:: goodFeaturesToTrack + + +cv::goodFeaturesToTrack +----------------------- + +`id=0.784762708085 Comments from the Wiki `__ + + + + +.. cfunction:: void goodFeaturesToTrack( const Mat\& image, vector\& corners, int maxCorners, double qualityLevel, double minDistance, const Mat\& mask=Mat(), int blockSize=3, bool useHarrisDetector=false, double k=0.04 ) + + Determines strong corners on an image. + + + + + + + :param image: The input 8-bit or floating-point 32-bit, single-channel image + + + :param corners: The output vector of detected corners + + + :param maxCorners: The maximum number of corners to return. If there are more corners than that will be found, the strongest of them will be returned + + + :param qualityLevel: Characterizes the minimal accepted quality of image corners; the value of the parameter is multiplied by the by the best corner quality measure (which is the min eigenvalue, see :func:`cornerMinEigenVal` , or the Harris function response, see :func:`cornerHarris` ). The corners, which quality measure is less than the product, will be rejected. For example, if the best corner has the quality measure = 1500, and the ``qualityLevel=0.01`` , then all the corners which quality measure is less than 15 will be rejected. + + + :param minDistance: The minimum possible Euclidean distance between the returned corners + + + :param mask: The optional region of interest. If the image is not empty (then it needs to have the type ``CV_8UC1`` and the same size as ``image`` ), it will specify the region in which the corners are detected + + + :param blockSize: Size of the averaging block for computing derivative covariation matrix over each pixel neighborhood, see :func:`cornerEigenValsAndVecs` + + + :param useHarrisDetector: Indicates, whether to use operator or :func:`cornerMinEigenVal` + + + :param k: Free parameter of Harris detector + + + +The function finds the most prominent corners in the image or in the specified image region, as described +in +Shi94 +: + + + + +#. + the function first calculates the corner quality measure at every source image pixel using the + :func:`cornerMinEigenVal` + or + :func:`cornerHarris` + + +#. + then it performs non-maxima suppression (the local maxima in + :math:`3\times 3` + neighborhood + are retained). + + +#. + the next step rejects the corners with the minimal eigenvalue less than + :math:`\texttt{qualityLevel} \cdot \max_{x,y} qualityMeasureMap(x,y)` + . + + +#. + the remaining corners are then sorted by the quality measure in the descending order. + + +#. + finally, the function throws away each corner + :math:`pt_j` + if there is a stronger corner + :math:`pt_i` + ( + :math:`i < j` + ) such that the distance between them is less than + ``minDistance`` + + +The function can be used to initialize a point-based tracker of an object. + +Note that the if the function is called with different values +``A`` +and +``B`` +of the parameter +``qualityLevel`` +, and +``A`` +> {B}, the vector of returned corners with +``qualityLevel=A`` +will be the prefix of the output vector with +``qualityLevel=B`` +. + +See also: +:func:`cornerMinEigenVal` +, +:func:`cornerHarris` +, +:func:`calcOpticalFlowPyrLK` +, +:func:`estimateRigidMotion` +, +:func:`PlanarObjectDetector` +, +:func:`OneWayDescriptor` + +.. index:: HoughCircles + + +cv::HoughCircles +---------------- + +`id=0.474895262744 Comments from the Wiki `__ + + + + +.. cfunction:: void HoughCircles( Mat\& image, vector\& circles, int method, double dp, double minDist, double param1=100, double param2=100, int minRadius=0, int maxRadius=0 ) + + Finds circles in a grayscale image using a Hough transform. + + + + + + + :param image: The 8-bit, single-channel, grayscale input image + + + :param circles: The output vector of found circles. Each vector is encoded as 3-element floating-point vector :math:`(x, y, radius)` + + + :param method: Currently, the only implemented method is ``CV_HOUGH_GRADIENT`` , which is basically *21HT* , described in Yuen90 . + + + :param dp: The inverse ratio of the accumulator resolution to the image resolution. For example, if ``dp=1`` , the accumulator will have the same resolution as the input image, if ``dp=2`` - accumulator will have half as big width and height, etc + + + :param minDist: Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed + + + :param param1: The first method-specific parameter. in the case of ``CV_HOUGH_GRADIENT`` it is the higher threshold of the two passed to :func:`Canny` edge detector (the lower one will be twice smaller) + + + :param param2: The second method-specific parameter. in the case of ``CV_HOUGH_GRADIENT`` it is the accumulator threshold at the center detection stage. The smaller it is, the more false circles may be detected. Circles, corresponding to the larger accumulator values, will be returned first + + + :param minRadius: Minimum circle radius + + + :param maxRadius: Maximum circle radius + + + +The function finds circles in a grayscale image using some modification of Hough transform. Here is a short usage example: + + + + +:: + + + + #include + #include + #include + + using namespace cv; + + int main(int argc, char** argv) + { + Mat img, gray; + if( argc != 2 && !(img=imread(argv[1], 1)).data) + return -1; + cvtColor(img, gray, CV_BGR2GRAY); + // smooth it, otherwise a lot of false circles may be detected + GaussianBlur( gray, gray, Size(9, 9), 2, 2 ); + vector circles; + HoughCircles(gray, circles, CV_HOUGH_GRADIENT, + 2, gray->rows/4, 200, 100 ); + for( size_t i = 0; i < circles.size(); i++ ) + { + Point center(cvRound(circles[i][0]), cvRound(circles[i][1])); + int radius = cvRound(circles[i][2]); + // draw the circle center + circle( img, center, 3, Scalar(0,255,0), -1, 8, 0 ); + // draw the circle outline + circle( img, center, radius, Scalar(0,0,255), 3, 8, 0 ); + } + namedWindow( "circles", 1 ); + imshow( "circles", img ); + return 0; + } + + +.. + +Note that usually the function detects the circles' centers well, however it may fail to find the correct radii. You can assist the function by specifying the radius range ( +``minRadius`` +and +``maxRadius`` +) if you know it, or you may ignore the returned radius, use only the center and find the correct radius using some additional procedure. + +See also: +:func:`fitEllipse` +, +:func:`minEnclosingCircle` + +.. index:: HoughLines + + +cv::HoughLines +-------------- + +`id=0.877791227007 Comments from the Wiki `__ + + + + +.. cfunction:: void HoughLines( Mat\& image, vector\& lines, double rho, double theta, int threshold, double srn=0, double stn=0 ) + + Finds lines in a binary image using standard Hough transform. + + + + + + + :param image: The 8-bit, single-channel, binary source image. The image may be modified by the function + + + :param lines: The output vector of lines. Each line is represented by a two-element vector :math:`(\rho, \theta)` . :math:`\rho` is the distance from the coordinate origin :math:`(0,0)` (top-left corner of the image) and :math:`\theta` is the line rotation angle in radians ( :math:`0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}` ) + + + :param rho: Distance resolution of the accumulator in pixels + + + :param theta: Angle resolution of the accumulator in radians + + + :param threshold: The accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ) + + + :param srn: For the multi-scale Hough transform it is the divisor for the distance resolution ``rho`` . The coarse accumulator distance resolution will be ``rho`` and the accurate accumulator resolution will be ``rho/srn`` . If both ``srn=0`` and ``stn=0`` then the classical Hough transform is used, otherwise both these parameters should be positive. + + + :param stn: For the multi-scale Hough transform it is the divisor for the distance resolution ``theta`` + + + +The function implements standard or standard multi-scale Hough transform algorithm for line detection. See +:func:`HoughLinesP` +for the code example. + + + +.. index:: HoughLinesP + + +cv::HoughLinesP +--------------- + +`id=0.855533341526 Comments from the Wiki `__ + + + + +.. cfunction:: void HoughLinesP( Mat\& image, vector\& lines, double rho, double theta, int threshold, double minLineLength=0, double maxLineGap=0 ) + + Finds lines segments in a binary image using probabilistic Hough transform. + + + + + + + :param image: The 8-bit, single-channel, binary source image. The image may be modified by the function + + + :param lines: The output vector of lines. Each line is represented by a 4-element vector :math:`(x_1, y_1, x_2, y_2)` , where :math:`(x_1,y_1)` and :math:`(x_2, y_2)` are the ending points of each line segment detected. + + + :param rho: Distance resolution of the accumulator in pixels + + + :param theta: Angle resolution of the accumulator in radians + + + :param threshold: The accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ) + + + :param minLineLength: The minimum line length. Line segments shorter than that will be rejected + + + :param maxLineGap: The maximum allowed gap between points on the same line to link them. + + + +The function implements probabilistic Hough transform algorithm for line detection, described in +Matas00 +. Below is line detection example: + + + + +:: + + + + /* This is a standalone program. Pass an image name as a first parameter + of the program. Switch between standard and probabilistic Hough transform + by changing "#if 1" to "#if 0" and back */ + #include + #include + #include + + using namespace cv; + + int main(int argc, char** argv) + { + Mat src, dst, color_dst; + if( argc != 2 || !(src=imread(argv[1], 0)).data) + return -1; + + Canny( src, dst, 50, 200, 3 ); + cvtColor( dst, color_dst, CV_GRAY2BGR ); + + #if 0 + vector lines; + HoughLines( dst, lines, 1, CV_PI/180, 100 ); + + for( size_t i = 0; i < lines.size(); i++ ) + { + float rho = lines[i][0]; + float theta = lines[i][1]; + double a = cos(theta), b = sin(theta); + double x0 = a*rho, y0 = b*rho; + Point pt1(cvRound(x0 + 1000*(-b)), + cvRound(y0 + 1000*(a))); + Point pt2(cvRound(x0 - 1000*(-b)), + cvRound(y0 - 1000*(a))); + line( color_dst, pt1, pt2, Scalar(0,0,255), 3, 8 ); + } + #else + vector lines; + HoughLinesP( dst, lines, 1, CV_PI/180, 80, 30, 10 ); + for( size_t i = 0; i < lines.size(); i++ ) + { + line( color_dst, Point(lines[i][0], lines[i][1]), + Point(lines[i][2], lines[i][3]), Scalar(0,0,255), 3, 8 ); + } + #endif + namedWindow( "Source", 1 ); + imshow( "Source", src ); + + namedWindow( "Detected Lines", 1 ); + imshow( "Detected Lines", color_dst ); + + waitKey(0); + return 0; + } + + +.. + +This is the sample picture the function parameters have been tuned for: + + + +.. image:: ../../pics/building.jpg + + + +And this is the output of the above program in the case of probabilistic Hough transform + + + +.. image:: ../../pics/houghp.png + + + + +.. index:: preCornerDetect + + +cv::preCornerDetect +------------------- + +`id=0.828630230352 Comments from the Wiki `__ + + + + +.. cfunction:: void preCornerDetect( const Mat\& src, Mat\& dst, int apertureSize, int borderType=BORDER_DEFAULT ) + + Calculates the feature map for corner detection + + + + + + + :param src: The source single-channel 8-bit of floating-point image + + + :param dst: The output image; will have type ``CV_32F`` and the same size as ``src`` + + + :param apertureSize: Aperture size of :func:`Sobel` + + + :param borderType: The pixel extrapolation method; see :func:`borderInterpolate` + + + +The function calculates the complex spatial derivative-based function of the source image + + + +.. math:: + + \texttt{dst} = (D_x \texttt{src} )^2 \cdot D_{yy} \texttt{src} + (D_y \texttt{src} )^2 \cdot D_{xx} \texttt{src} - 2 D_x \texttt{src} \cdot D_y \texttt{src} \cdot D_{xy} \texttt{src} + + +where +:math:`D_x` +, +:math:`D_y` +are the first image derivatives, +:math:`D_{xx}` +, +:math:`D_{yy}` +are the second image derivatives and +:math:`D_{xy}` +is the mixed derivative. + +The corners can be found as local maximums of the functions, as shown below: + + + + +:: + + + + Mat corners, dilated_corners; + preCornerDetect(image, corners, 3); + // dilation with 3x3 rectangular structuring element + dilate(corners, dilated_corners, Mat(), 1); + Mat corner_mask = corners == dilated_corners; + + +.. + diff --git a/modules/imgproc/doc/filtering.rst b/modules/imgproc/doc/filtering.rst new file mode 100644 index 000000000..f548898be --- /dev/null +++ b/modules/imgproc/doc/filtering.rst @@ -0,0 +1,2447 @@ +Image Filtering +=============== + +.. highlight:: cpp + + +Functions and classes described in this section are used to perform various linear or non-linear filtering operations on 2D images (represented as +:func:`Mat` +'s), that is, for each pixel location +:math:`(x,y)` +in the source image some its (normally rectangular) neighborhood is considered and used to compute the response. In case of a linear filter it is a weighted sum of pixel values, in case of morphological operations it is the minimum or maximum etc. The computed response is stored to the destination image at the same location +:math:`(x,y)` +. It means, that the output image will be of the same size as the input image. Normally, the functions supports multi-channel arrays, in which case every channel is processed independently, therefore the output image will also have the same number of channels as the input one. + +Another common feature of the functions and classes described in this section is that, unlike simple arithmetic functions, they need to extrapolate values of some non-existing pixels. For example, if we want to smooth an image using a Gaussian +:math:`3 \times 3` +filter, then during the processing of the left-most pixels in each row we need pixels to the left of them, i.e. outside of the image. We can let those pixels be the same as the left-most image pixels (i.e. use "replicated border" extrapolation method), or assume that all the non-existing pixels are zeros ("contant border" extrapolation method) etc. +OpenCV let the user to specify the extrapolation method; see the function :func:`borderInterpolate` and discussion of ``borderType`` parameter in various functions below. + +.. index:: BaseColumnFilter + +.. _BaseColumnFilter: + +BaseColumnFilter +---------------- + +`id=0.454706608745 Comments from the Wiki `__ + +.. ctype:: BaseColumnFilter + + + +Base class for filters with single-column kernels + + + + +:: + + + + class BaseColumnFilter + { + public: + virtual ~BaseColumnFilter(); + + // To be overriden by the user. + // + // runs filtering operation on the set of rows, + // "dstcount + ksize - 1" rows on input, + // "dstcount" rows on output, + // each input and output row has "width" elements + // the filtered rows are written into "dst" buffer. + virtual void operator()(const uchar** src, uchar* dst, int dststep, + int dstcount, int width) = 0; + // resets the filter state (may be needed for IIR filters) + virtual void reset(); + + int ksize; // the aperture size + int anchor; // position of the anchor point, + // normally not used during the processing + }; + + +.. + +The class +``BaseColumnFilter`` +is the base class for filtering data using single-column kernels. The filtering does not have to be a linear operation. In general, it could be written as following: + + + +.. math:: + + \texttt{dst} (x,y) = F( \texttt{src} [y](x), \; \texttt{src} [y+1](x), \; ..., \; \texttt{src} [y+ \texttt{ksize} -1](x) + + +where +:math:`F` +is the filtering function, but, as it is represented as a class, it can produce any side effects, memorize previously processed data etc. The class only defines the interface and is not used directly. Instead, there are several functions in OpenCV (and you can add more) that return pointers to the derived classes that implement specific filtering operations. Those pointers are then passed to +:func:`FilterEngine` +constructor. While the filtering operation interface uses +``uchar`` +type, a particular implementation is not limited to 8-bit data. + +See also: +:func:`BaseRowFilter` +, +:func:`BaseFilter` +, +:func:`FilterEngine` +, + +:func:`getColumnSumFilter` +, +:func:`getLinearColumnFilter` +, +:func:`getMorphologyColumnFilter` + +.. index:: BaseFilter + +.. _BaseFilter: + +BaseFilter +---------- + +`id=0.636490226772 Comments from the Wiki `__ + +.. ctype:: BaseFilter + + + +Base class for 2D image filters + + + + +:: + + + + class BaseFilter + { + public: + virtual ~BaseFilter(); + + // To be overriden by the user. + // + // runs filtering operation on the set of rows, + // "dstcount + ksize.height - 1" rows on input, + // "dstcount" rows on output, + // each input row has "(width + ksize.width-1)*cn" elements + // each output row has "width*cn" elements. + // the filtered rows are written into "dst" buffer. + virtual void operator()(const uchar** src, uchar* dst, int dststep, + int dstcount, int width, int cn) = 0; + // resets the filter state (may be needed for IIR filters) + virtual void reset(); + Size ksize; + Point anchor; + }; + + +.. + +The class +``BaseFilter`` +is the base class for filtering data using 2D kernels. The filtering does not have to be a linear operation. In general, it could be written as following: + + + +.. math:: + + \begin{array}{l} \texttt{dst} (x,y) = F( \texttt{src} [y](x), \; \texttt{src} [y](x+1), \; ..., \; \texttt{src} [y](x+ \texttt{ksize.width} -1), \\ \texttt{src} [y+1](x), \; \texttt{src} [y+1](x+1), \; ..., \; \texttt{src} [y+1](x+ \texttt{ksize.width} -1), \\ ......................................................................................... \\ \texttt{src} [y+ \texttt{ksize.height-1} ](x), \\ \texttt{src} [y+ \texttt{ksize.height-1} ](x+1), \\ ... + \texttt{src} [y+ \texttt{ksize.height-1} ](x+ \texttt{ksize.width} -1)) + \end{array} + + +where +:math:`F` +is the filtering function. The class only defines the interface and is not used directly. Instead, there are several functions in OpenCV (and you can add more) that return pointers to the derived classes that implement specific filtering operations. Those pointers are then passed to +:func:`FilterEngine` +constructor. While the filtering operation interface uses +``uchar`` +type, a particular implementation is not limited to 8-bit data. + +See also: +:func:`BaseColumnFilter` +, +:func:`BaseRowFilter` +, +:func:`FilterEngine` +, + +:func:`getLinearFilter` +, +:func:`getMorphologyFilter` + +.. index:: BaseRowFilter + +.. _BaseRowFilter: + +BaseRowFilter +------------- + +`id=0.301910611569 Comments from the Wiki `__ + +.. ctype:: BaseRowFilter + + + +Base class for filters with single-row kernels + + + + +:: + + + + class BaseRowFilter + { + public: + virtual ~BaseRowFilter(); + + // To be overriden by the user. + // + // runs filtering operation on the single input row + // of "width" element, each element is has "cn" channels. + // the filtered row is written into "dst" buffer. + virtual void operator()(const uchar* src, uchar* dst, + int width, int cn) = 0; + int ksize, anchor; + }; + + +.. + +The class +``BaseRowFilter`` +is the base class for filtering data using single-row kernels. The filtering does not have to be a linear operation. In general, it could be written as following: + + + +.. math:: + + \texttt{dst} (x,y) = F( \texttt{src} [y](x), \; \texttt{src} [y](x+1), \; ..., \; \texttt{src} [y](x+ \texttt{ksize.width} -1)) + + +where +:math:`F` +is the filtering function. The class only defines the interface and is not used directly. Instead, there are several functions in OpenCV (and you can add more) that return pointers to the derived classes that implement specific filtering operations. Those pointers are then passed to +:func:`FilterEngine` +constructor. While the filtering operation interface uses +``uchar`` +type, a particular implementation is not limited to 8-bit data. + +See also: +:func:`BaseColumnFilter` +, +:func:`Filter` +, +:func:`FilterEngine` +, + +:func:`getLinearRowFilter` +, +:func:`getMorphologyRowFilter` +, +:func:`getRowSumFilter` + +.. index:: FilterEngine + +.. _FilterEngine: + +FilterEngine +------------ + +`id=0.516923245101 Comments from the Wiki `__ + +.. ctype:: FilterEngine + + + +Generic image filtering class + + + + +:: + + + + class FilterEngine + { + public: + // empty constructor + FilterEngine(); + // builds a 2D non-separable filter (!_filter2D.empty()) or + // a separable filter (!_rowFilter.empty() && !_columnFilter.empty()) + // the input data type will be "srcType", the output data type will be "dstType", + // the intermediate data type is "bufType". + // _rowBorderType and _columnBorderType determine how the image + // will be extrapolated beyond the image boundaries. + // _borderValue is only used when _rowBorderType and/or _columnBorderType + // == cv::BORDER_CONSTANT + FilterEngine(const Ptr& _filter2D, + const Ptr& _rowFilter, + const Ptr& _columnFilter, + int srcType, int dstType, int bufType, + int _rowBorderType=BORDER_REPLICATE, + int _columnBorderType=-1, // use _rowBorderType by default + const Scalar& _borderValue=Scalar()); + virtual ~FilterEngine(); + // separate function for the engine initialization + void init(const Ptr& _filter2D, + const Ptr& _rowFilter, + const Ptr& _columnFilter, + int srcType, int dstType, int bufType, + int _rowBorderType=BORDER_REPLICATE, int _columnBorderType=-1, + const Scalar& _borderValue=Scalar()); + // starts filtering of the ROI in an image of size "wholeSize". + // returns the starting y-position in the source image. + virtual int start(Size wholeSize, Rect roi, int maxBufRows=-1); + // alternative form of start that takes the image + // itself instead of "wholeSize". Set isolated to true to pretend that + // there are no real pixels outside of the ROI + // (so that the pixels will be extrapolated using the specified border modes) + virtual int start(const Mat& src, const Rect& srcRoi=Rect(0,0,-1,-1), + bool isolated=false, int maxBufRows=-1); + // processes the next portion of the source image, + // "srcCount" rows starting from "src" and + // stores the results to "dst". + // returns the number of produced rows + virtual int proceed(const uchar* src, int srcStep, int srcCount, + uchar* dst, int dstStep); + // higher-level function that processes the whole + // ROI or the whole image with a single call + virtual void apply( const Mat& src, Mat& dst, + const Rect& srcRoi=Rect(0,0,-1,-1), + Point dstOfs=Point(0,0), + bool isolated=false); + bool isSeparable() const { return filter2D.empty(); } + // how many rows from the input image are not yet processed + int remainingInputRows() const; + // how many output rows are not yet produced + int remainingOutputRows() const; + ... + // the starting and the ending rows in the source image + int startY, endY; + + // pointers to the filters + Ptr filter2D; + Ptr rowFilter; + Ptr columnFilter; + }; + + +.. + +The class +``FilterEngine`` +can be used to apply an arbitrary filtering operation to an image. +It contains all the necessary intermediate buffers, it computes extrapolated values +of the "virtual" pixels outside of the image etc. Pointers to the initialized +``FilterEngine`` +instances +are returned by various +``create*Filter`` +functions, see below, and they are used inside high-level functions such as +:func:`filter2D` +, +:func:`erode` +, +:func:`dilate` +etc, that is, the class is the workhorse in many of OpenCV filtering functions. + +This class makes it easier (though, maybe not very easy yet) to combine filtering operations with other operations, such as color space conversions, thresholding, arithmetic operations, etc. By combining several operations together you can get much better performance because your data will stay in cache. For example, below is the implementation of Laplace operator for a floating-point images, which is a simplified implementation of +:func:`Laplacian` +: + + + + +:: + + + + void laplace_f(const Mat& src, Mat& dst) + { + CV_Assert( src.type() == CV_32F ); + dst.create(src.size(), src.type()); + + // get the derivative and smooth kernels for d2I/dx2. + // for d2I/dy2 we could use the same kernels, just swapped + Mat kd, ks; + getSobelKernels( kd, ks, 2, 0, ksize, false, ktype ); + + // let's process 10 source rows at once + int DELTA = std::min(10, src.rows); + Ptr Fxx = createSeparableLinearFilter(src.type(), + dst.type(), kd, ks, Point(-1,-1), 0, borderType, borderType, Scalar() ); + Ptr Fyy = createSeparableLinearFilter(src.type(), + dst.type(), ks, kd, Point(-1,-1), 0, borderType, borderType, Scalar() ); + + int y = Fxx->start(src), dsty = 0, dy = 0; + Fyy->start(src); + const uchar* sptr = src.data + y*src.step; + + // allocate the buffers for the spatial image derivatives; + // the buffers need to have more than DELTA rows, because at the + // last iteration the output may take max(kd.rows-1,ks.rows-1) + // rows more than the input. + Mat Ixx( DELTA + kd.rows - 1, src.cols, dst.type() ); + Mat Iyy( DELTA + kd.rows - 1, src.cols, dst.type() ); + + // inside the loop we always pass DELTA rows to the filter + // (note that the "proceed" method takes care of possibe overflow, since + // it was given the actual image height in the "start" method) + // on output we can get: + // * < DELTA rows (the initial buffer accumulation stage) + // * = DELTA rows (settled state in the middle) + // * > DELTA rows (then the input image is over, but we generate + // "virtual" rows using the border mode and filter them) + // this variable number of output rows is dy. + // dsty is the current output row. + // sptr is the pointer to the first input row in the portion to process + for( ; dsty < dst.rows; sptr += DELTA*src.step, dsty += dy ) + { + Fxx->proceed( sptr, (int)src.step, DELTA, Ixx.data, (int)Ixx.step ); + dy = Fyy->proceed( sptr, (int)src.step, DELTA, d2y.data, (int)Iyy.step ); + if( dy > 0 ) + { + Mat dstripe = dst.rowRange(dsty, dsty + dy); + add(Ixx.rowRange(0, dy), Iyy.rowRange(0, dy), dstripe); + } + } + } + + +.. + +If you do not need that much control of the filtering process, you can simply use the +``FilterEngine::apply`` +method. Here is how the method is actually implemented: + + + + +:: + + + + void FilterEngine::apply(const Mat& src, Mat& dst, + const Rect& srcRoi, Point dstOfs, bool isolated) + { + // check matrix types + CV_Assert( src.type() == srcType && dst.type() == dstType ); + + // handle the "whole image" case + Rect _srcRoi = srcRoi; + if( _srcRoi == Rect(0,0,-1,-1) ) + _srcRoi = Rect(0,0,src.cols,src.rows); + + // check if the destination ROI is inside the dst. + // and FilterEngine::start will check if the source ROI is inside src. + CV_Assert( dstOfs.x >= 0 && dstOfs.y >= 0 && + dstOfs.x + _srcRoi.width <= dst.cols && + dstOfs.y + _srcRoi.height <= dst.rows ); + + // start filtering + int y = start(src, _srcRoi, isolated); + + // process the whole ROI. Note that "endY - startY" is the total number + // of the source rows to process + // (including the possible rows outside of srcRoi but inside the source image) + proceed( src.data + y*src.step, + (int)src.step, endY - startY, + dst.data + dstOfs.y*dst.step + + dstOfs.x*dst.elemSize(), (int)dst.step ); + } + + +.. + +Unlike the earlier versions of OpenCV, now the filtering operations fully support the notion of image ROI, that is, pixels outside of the ROI but inside the image can be used in the filtering operations. For example, you can take a ROI of a single pixel and filter it - that will be a filter response at that particular pixel (however, it's possible to emulate the old behavior by passing +``isolated=false`` +to +``FilterEngine::start`` +or +``FilterEngine::apply`` +). You can pass the ROI explicitly to +``FilterEngine::apply`` +, or construct a new matrix headers: + + + + +:: + + + + // compute dI/dx derivative at src(x,y) + + // method 1: + // form a matrix header for a single value + float val1 = 0; + Mat dst1(1,1,CV_32F,&val1); + + Ptr Fx = createDerivFilter(CV_32F, CV_32F, + 1, 0, 3, BORDER_REFLECT_101); + Fx->apply(src, Rect(x,y,1,1), Point(), dst1); + + // method 2: + // form a matrix header for a single value + float val2 = 0; + Mat dst2(1,1,CV_32F,&val2); + + Mat pix_roi(src, Rect(x,y,1,1)); + Sobel(pix_roi, dst2, dst2.type(), 1, 0, 3, 1, 0, BORDER_REFLECT_101); + + printf("method1 = + + +.. + +Note on the data types. As it was mentioned in +:func:`BaseFilter` +description, the specific filters can process data of any type, despite that +``Base*Filter::operator()`` +only takes +``uchar`` +pointers and no information about the actual types. To make it all work, the following rules are used: + + + + + +* + in case of separable filtering + ``FilterEngine::rowFilter`` + applied first. It transforms the input image data (of type + ``srcType`` + ) to the intermediate results stored in the internal buffers (of type + ``bufType`` + ). Then these intermediate results are processed + *as single-channel data* + with + ``FilterEngine::columnFilter`` + and stored in the output image (of type + ``dstType`` + ). Thus, the input type for + ``rowFilter`` + is + ``srcType`` + and the output type is + ``bufType`` + ; the input type for + ``columnFilter`` + is + ``CV_MAT_DEPTH(bufType)`` + and the output type is + ``CV_MAT_DEPTH(dstType)`` + . + + + + +* + in case of non-separable filtering + ``bufType`` + must be the same as + ``srcType`` + . The source data is copied to the temporary buffer if needed and then just passed to + ``FilterEngine::filter2D`` + . That is, the input type for + ``filter2D`` + is + ``srcType`` + (= + ``bufType`` + ) and the output type is + ``dstType`` + . + + +See also: +:func:`BaseColumnFilter` +, +:func:`BaseFilter` +, +:func:`BaseRowFilter` +, +:func:`createBoxFilter` +, +:func:`createDerivFilter` +, +:func:`createGaussianFilter` +, +:func:`createLinearFilter` +, +:func:`createMorphologyFilter` +, +:func:`createSeparableLinearFilter` + +.. index:: bilateralFilter + + +cv::bilateralFilter +------------------- + +`id=0.230058109365 Comments from the Wiki `__ + + + + +.. cfunction:: void bilateralFilter( const Mat\& src, Mat\& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT ) + + Applies bilateral filter to the image + + + + + + + :param src: The source 8-bit or floating-point, 1-channel or 3-channel image + + + :param dst: The destination image; will have the same size and the same type as ``src`` + + + :param d: The diameter of each pixel neighborhood, that is used during filtering. If it is non-positive, it's computed from ``sigmaSpace`` + + + :param sigmaColor: Filter sigma in the color space. Larger value of the parameter means that farther colors within the pixel neighborhood (see ``sigmaSpace`` ) will be mixed together, resulting in larger areas of semi-equal color + + + :param sigmaSpace: Filter sigma in the coordinate space. Larger value of the parameter means that farther pixels will influence each other (as long as their colors are close enough; see ``sigmaColor`` ). Then ``d>0`` , it specifies the neighborhood size regardless of ``sigmaSpace`` , otherwise ``d`` is proportional to ``sigmaSpace`` + + + +The function applies bilateral filtering to the input image, as described in +http://www.dai.ed.ac.uk/CVonline/LOCAL\_COPIES/MANDUCHI1/Bilateral\_Filtering.html + +.. index:: blur + + +cv::blur +-------- + +`id=0.123399947745 Comments from the Wiki `__ + + + + +.. cfunction:: void blur( const Mat\& src, Mat\& dst, Size ksize, Point anchor=Point(-1,-1), int borderType=BORDER_DEFAULT ) + + Smoothes image using normalized box filter + + + + + + + :param src: The source image + + + :param dst: The destination image; will have the same size and the same type as ``src`` + + + :param ksize: The smoothing kernel size + + + :param anchor: The anchor point. The default value ``Point(-1,-1)`` means that the anchor is at the kernel center + + + :param borderType: The border mode used to extrapolate pixels outside of the image + + + +The function smoothes the image using the kernel: + + + +.. math:: + + \texttt{K} = \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \end{bmatrix} + + +The call +``blur(src, dst, ksize, anchor, borderType)`` +is equivalent to +``boxFilter(src, dst, src.type(), anchor, true, borderType)`` +. + +See also: +:func:`boxFilter` +, +:func:`bilateralFilter` +, +:func:`GaussianBlur` +, +:func:`medianBlur` +. + + +.. index:: borderInterpolate + + +cv::borderInterpolate +--------------------- + +`id=0.251064105548 Comments from the Wiki `__ + + + + +.. cfunction:: int borderInterpolate( int p, int len, int borderType ) + + Computes source location of extrapolated pixel + + + + + + + :param p: 0-based coordinate of the extrapolated pixel along one of the axes, likely <0 or >= ``len`` + + + :param len: length of the array along the corresponding axis + + + :param borderType: the border type, one of the ``BORDER_*`` , except for ``BORDER_TRANSPARENT`` and ``BORDER_ISOLATED`` . When ``borderType==BORDER_CONSTANT`` the function always returns -1, regardless of ``p`` and ``len`` + + + +The function computes and returns the coordinate of the donor pixel, corresponding to the specified extrapolated pixel when using the specified extrapolation border mode. For example, if we use +``BORDER_WRAP`` +mode in the horizontal direction, +``BORDER_REFLECT_101`` +in the vertical direction and want to compute value of the "virtual" pixel +``Point(-5, 100)`` +in a floating-point image +``img`` +, it will be + + + + +:: + + + + float val = img.at(borderInterpolate(100, img.rows, BORDER_REFLECT_101), + borderInterpolate(-5, img.cols, BORDER_WRAP)); + + +.. + +Normally, the function is not called directly; it is used inside +:func:`FilterEngine` +and +:func:`copyMakeBorder` +to compute tables for quick extrapolation. + +See also: +:func:`FilterEngine` +, +:func:`copyMakeBorder` + +.. index:: boxFilter + + +cv::boxFilter +------------- + +`id=0.447026073473 Comments from the Wiki `__ + + + + +.. cfunction:: void boxFilter( const Mat\& src, Mat\& dst, int ddepth, Size ksize, Point anchor=Point(-1,-1), bool normalize=true, int borderType=BORDER_DEFAULT ) + + Smoothes image using box filter + + + + + + + :param src: The source image + + + :param dst: The destination image; will have the same size and the same type as ``src`` + + + :param ksize: The smoothing kernel size + + + :param anchor: The anchor point. The default value ``Point(-1,-1)`` means that the anchor is at the kernel center + + + :param normalize: Indicates, whether the kernel is normalized by its area or not + + + :param borderType: The border mode used to extrapolate pixels outside of the image + + + +The function smoothes the image using the kernel: + + + +.. math:: + + \texttt{K} = \alpha \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \end{bmatrix} + + +where + + + +.. math:: + + \alpha = \fork{\frac{1}{\texttt{ksize.width*ksize.height}}}{when \texttt{normalize=true}}{1}{otherwise} + + +Unnormalized box filter is useful for computing various integral characteristics over each pixel neighborhood, such as covariation matrices of image derivatives (used in dense optical flow algorithms, +etc.). If you need to compute pixel sums over variable-size windows, use +:func:`integral` +. + +See also: +:func:`boxFilter` +, +:func:`bilateralFilter` +, +:func:`GaussianBlur` +, +:func:`medianBlur` +, +:func:`integral` +. + + +.. index:: buildPyramid + + +cv::buildPyramid +---------------- + +`id=0.672175345454 Comments from the Wiki `__ + + + + +.. cfunction:: void buildPyramid( const Mat\& src, vector\& dst, int maxlevel ) + + Constructs Gaussian pyramid for an image + + + + + + + :param src: The source image; check :func:`pyrDown` for the list of supported types + + + :param dst: The destination vector of ``maxlevel+1`` images of the same type as ``src`` ; ``dst[0]`` will be the same as ``src`` , ``dst[1]`` is the next pyramid layer, + a smoothed and down-sized ``src`` etc. + + + :param maxlevel: The 0-based index of the last (i.e. the smallest) pyramid layer; it must be non-negative + + + +The function constructs a vector of images and builds the gaussian pyramid by recursively applying +:func:`pyrDown` +to the previously built pyramid layers, starting from +``dst[0]==src`` +. + + +.. index:: copyMakeBorder + + +cv::copyMakeBorder +------------------ + +`id=0.695878342683 Comments from the Wiki `__ + + + + +.. cfunction:: void copyMakeBorder( const Mat\& src, Mat\& dst, int top, int bottom, int left, int right, int borderType, const Scalar\& value=Scalar() ) + + Forms a border around the image + + + + + + + :param src: The source image + + + :param dst: The destination image; will have the same type as ``src`` and the size ``Size(src.cols+left+right, src.rows+top+bottom)`` + + + :param top, bottom, left, right: Specify how much pixels in each direction from the source image rectangle one needs to extrapolate, e.g. ``top=1, bottom=1, left=1, right=1`` mean that 1 pixel-wide border needs to be built + + + :param borderType: The border type; see :func:`borderInterpolate` + + + :param value: The border value if ``borderType==BORDER_CONSTANT`` + + + +The function copies the source image into the middle of the destination image. The areas to the left, to the right, above and below the copied source image will be filled with extrapolated pixels. This is not what +:func:`FilterEngine` +or based on it filtering functions do (they extrapolate pixels on-fly), but what other more complex functions, including your own, may do to simplify image boundary handling. + +The function supports the mode when +``src`` +is already in the middle of +``dst`` +. In this case the function does not copy +``src`` +itself, but simply constructs the border, e.g.: + + + + +:: + + + + // let border be the same in all directions + int border=2; + // constructs a larger image to fit both the image and the border + Mat gray_buf(rgb.rows + border*2, rgb.cols + border*2, rgb.depth()); + // select the middle part of it w/o copying data + Mat gray(gray_canvas, Rect(border, border, rgb.cols, rgb.rows)); + // convert image from RGB to grayscale + cvtColor(rgb, gray, CV_RGB2GRAY); + // form a border in-place + copyMakeBorder(gray, gray_buf, border, border, + border, border, BORDER_REPLICATE); + // now do some custom filtering ... + ... + + +.. + +See also: +:func:`borderInterpolate` + +.. index:: createBoxFilter + + +cv::createBoxFilter +------------------- + +`id=0.810459390687 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createBoxFilter( int srcType, int dstType, Size ksize, Point anchor=Point(-1,-1), bool normalize=true, int borderType=BORDER_DEFAULT) + + + +.. cfunction:: Ptr getRowSumFilter(int srcType, int sumType, int ksize, int anchor=-1) + + + +.. cfunction:: Ptr getColumnSumFilter(int sumType, int dstType, int ksize, int anchor=-1, double scale=1) + + Returns box filter engine + + + + + + + :param srcType: The source image type + + + :param sumType: The intermediate horizontal sum type; must have as many channels as ``srcType`` + + + :param dstType: The destination image type; must have as many channels as ``srcType`` + + + :param ksize: The aperture size + + + :param anchor: The anchor position with the kernel; negative values mean that the anchor is at the kernel center + + + :param normalize: Whether the sums are normalized or not; see :func:`boxFilter` + + + :param scale: Another way to specify normalization in lower-level ``getColumnSumFilter`` + + + :param borderType: Which border type to use; see :func:`borderInterpolate` + + + +The function is a convenience function that retrieves horizontal sum primitive filter with +:func:`getRowSumFilter` +, vertical sum filter with +:func:`getColumnSumFilter` +, constructs new +:func:`FilterEngine` +and passes both of the primitive filters there. The constructed filter engine can be used for image filtering with normalized or unnormalized box filter. + +The function itself is used by +:func:`blur` +and +:func:`boxFilter` +. + +See also: +:func:`FilterEngine` +, +:func:`blur` +, +:func:`boxFilter` +. + + +.. index:: createDerivFilter + + +cv::createDerivFilter +--------------------- + +`id=0.257973775543 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createDerivFilter( int srcType, int dstType, int dx, int dy, int ksize, int borderType=BORDER_DEFAULT ) + + Returns engine for computing image derivatives + + + + + + + :param srcType: The source image type + + + :param dstType: The destination image type; must have as many channels as ``srcType`` + + + :param dx: The derivative order in respect with x + + + :param dy: The derivative order in respect with y + + + :param ksize: The aperture size; see :func:`getDerivKernels` + + + :param borderType: Which border type to use; see :func:`borderInterpolate` + + + +The function +:func:`createDerivFilter` +is a small convenience function that retrieves linear filter coefficients for computing image derivatives using +:func:`getDerivKernels` +and then creates a separable linear filter with +:func:`createSeparableLinearFilter` +. The function is used by +:func:`Sobel` +and +:func:`Scharr` +. + +See also: +:func:`createSeparableLinearFilter` +, +:func:`getDerivKernels` +, +:func:`Scharr` +, +:func:`Sobel` +. + + +.. index:: createGaussianFilter + + +cv::createGaussianFilter +------------------------ + +`id=0.432497668489 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createGaussianFilter( int type, Size ksize, double sigmaX, double sigmaY=0, int borderType=BORDER_DEFAULT) + + Returns engine for smoothing images with a Gaussian filter + + + + + + + :param type: The source and the destination image type + + + :param ksize: The aperture size; see :func:`getGaussianKernel` + + + :param sigmaX: The Gaussian sigma in the horizontal direction; see :func:`getGaussianKernel` + + + :param sigmaY: The Gaussian sigma in the vertical direction; if 0, then :math:`\texttt{sigmaY}\leftarrow\texttt{sigmaX}` + + + :param borderType: Which border type to use; see :func:`borderInterpolate` + + + +The function +:func:`createGaussianFilter` +computes Gaussian kernel coefficients and then returns separable linear filter for that kernel. The function is used by +:func:`GaussianBlur` +. Note that while the function takes just one data type, both for input and output, you can pass by this limitation by calling +:func:`getGaussianKernel` +and then +:func:`createSeparableFilter` +directly. + +See also: +:func:`createSeparableLinearFilter` +, +:func:`getGaussianKernel` +, +:func:`GaussianBlur` +. + + +.. index:: createLinearFilter + + +cv::createLinearFilter +---------------------- + +`id=0.792182015763 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createLinearFilter(int srcType, int dstType, const Mat\& kernel, Point _anchor=Point(-1,-1), double delta=0, int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1, const Scalar\& borderValue=Scalar()) + + + +.. cfunction:: Ptr getLinearFilter(int srcType, int dstType, const Mat\& kernel, Point anchor=Point(-1,-1), double delta=0, int bits=0) + + Creates non-separable linear filter engine + + + + + + + :param srcType: The source image type + + + :param dstType: The destination image type; must have as many channels as ``srcType`` + + + :param kernel: The 2D array of filter coefficients + + + :param anchor: The anchor point within the kernel; special value ``Point(-1,-1)`` means that the anchor is at the kernel center + + + :param delta: The value added to the filtered results before storing them + + + :param bits: When the kernel is an integer matrix representing fixed-point filter coefficients, + the parameter specifies the number of the fractional bits + + + :param rowBorderType, columnBorderType: The pixel extrapolation methods in the horizontal and the vertical directions; see :func:`borderInterpolate` + + + :param borderValue: Used in case of constant border + + + +The function returns pointer to 2D linear filter for the specified kernel, the source array type and the destination array type. The function is a higher-level function that calls +``getLinearFilter`` +and passes the retrieved 2D filter to +:func:`FilterEngine` +constructor. + +See also: +:func:`createSeparableLinearFilter` +, +:func:`FilterEngine` +, +:func:`filter2D` + +.. index:: createMorphologyFilter + + +cv::createMorphologyFilter +-------------------------- + +`id=0.0200304994306 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createMorphologyFilter(int op, int type, const Mat\& element, Point anchor=Point(-1,-1), int rowBorderType=BORDER_CONSTANT, int columnBorderType=-1, const Scalar\& borderValue=morphologyDefaultBorderValue()) + + + +.. cfunction:: Ptr getMorphologyFilter(int op, int type, const Mat\& element, Point anchor=Point(-1,-1)) + + + +.. cfunction:: Ptr getMorphologyRowFilter(int op, int type, int esize, int anchor=-1) + + + +.. cfunction:: Ptr getMorphologyColumnFilter(int op, int type, int esize, int anchor=-1) + + + +.. cfunction:: static inline Scalar morphologyDefaultBorderValue(){ return Scalar::all(DBL_MAX) } + + Creates engine for non-separable morphological operations + + + + + + + :param op: The morphology operation id, ``MORPH_ERODE`` or ``MORPH_DILATE`` + + + :param type: The input/output image type + + + :param element: The 2D 8-bit structuring element for the morphological operation. Non-zero elements indicate the pixels that belong to the element + + + :param esize: The horizontal or vertical structuring element size for separable morphological operations + + + :param anchor: The anchor position within the structuring element; negative values mean that the anchor is at the center + + + :param rowBorderType, columnBorderType: The pixel extrapolation methods in the horizontal and the vertical directions; see :func:`borderInterpolate` + + + :param borderValue: The border value in case of a constant border. The default value, \ ``morphologyDefaultBorderValue`` , has the special meaning. It is transformed :math:`+\inf` for the erosion and to :math:`-\inf` for the dilation, which means that the minimum (maximum) is effectively computed only over the pixels that are inside the image. + + + +The functions construct primitive morphological filtering operations or a filter engine based on them. Normally it's enough to use +:func:`createMorphologyFilter` +or even higher-level +:func:`erode` +, +:func:`dilate` +or +:func:`morphologyEx` +, Note, that +:func:`createMorphologyFilter` +analyses the structuring element shape and builds a separable morphological filter engine when the structuring element is square. + +See also: +:func:`erode` +, +:func:`dilate` +, +:func:`morphologyEx` +, +:func:`FilterEngine` + +.. index:: createSeparableLinearFilter + + +cv::createSeparableLinearFilter +------------------------------- + +`id=0.758606922128 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr createSeparableLinearFilter(int srcType, int dstType, const Mat\& rowKernel, const Mat\& columnKernel, Point anchor=Point(-1,-1), double delta=0, int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1, const Scalar\& borderValue=Scalar()) + + + +.. cfunction:: Ptr getLinearColumnFilter(int bufType, int dstType, const Mat\& columnKernel, int anchor, int symmetryType, double delta=0, int bits=0) + + + +.. cfunction:: Ptr getLinearRowFilter(int srcType, int bufType, const Mat\& rowKernel, int anchor, int symmetryType) + + Creates engine for separable linear filter + + + + + + + :param srcType: The source array type + + + :param dstType: The destination image type; must have as many channels as ``srcType`` + + + :param bufType: The inermediate buffer type; must have as many channels as ``srcType`` + + + :param rowKernel: The coefficients for filtering each row + + + :param columnKernel: The coefficients for filtering each column + + + :param anchor: The anchor position within the kernel; negative values mean that anchor is positioned at the aperture center + + + :param delta: The value added to the filtered results before storing them + + + :param bits: When the kernel is an integer matrix representing fixed-point filter coefficients, + the parameter specifies the number of the fractional bits + + + :param rowBorderType, columnBorderType: The pixel extrapolation methods in the horizontal and the vertical directions; see :func:`borderInterpolate` + + + :param borderValue: Used in case of a constant border + + + :param symmetryType: The type of each of the row and column kernel; see :func:`getKernelType` . + + + +The functions construct primitive separable linear filtering operations or a filter engine based on them. Normally it's enough to use +:func:`createSeparableLinearFilter` +or even higher-level +:func:`sepFilter2D` +. The function +:func:`createMorphologyFilter` +is smart enough to figure out the +``symmetryType`` +for each of the two kernels, the intermediate +``bufType`` +, and, if the filtering can be done in integer arithmetics, the number of +``bits`` +to encode the filter coefficients. If it does not work for you, it's possible to call +``getLinearColumnFilter`` +, +``getLinearRowFilter`` +directly and then pass them to +:func:`FilterEngine` +constructor. + +See also: +:func:`sepFilter2D` +, +:func:`createLinearFilter` +, +:func:`FilterEngine` +, +:func:`getKernelType` + +.. index:: dilate + + +cv::dilate +---------- + +`id=0.855515018715 Comments from the Wiki `__ + + + + +.. cfunction:: void dilate( const Mat\& src, Mat\& dst, const Mat\& element, Point anchor=Point(-1,-1), int iterations=1, int borderType=BORDER_CONSTANT, const Scalar\& borderValue=morphologyDefaultBorderValue() ) + + Dilates an image by using a specific structuring element. + + + + + + + :param src: The source image + + + :param dst: The destination image. It will have the same size and the same type as ``src`` + + + :param element: The structuring element used for dilation. If ``element=Mat()`` , a :math:`3\times 3` rectangular structuring element is used + + + :param anchor: Position of the anchor within the element. The default value :math:`(-1, -1)` means that the anchor is at the element center + + + :param iterations: The number of times dilation is applied + + + :param borderType: The pixel extrapolation method; see :func:`borderInterpolate` + + + :param borderValue: The border value in case of a constant border. The default value has a special meaning, see :func:`createMorphologyFilter` + + + +The function dilates the source image using the specified structuring element that determines the shape of a pixel neighborhood over which the maximum is taken: + + + +.. math:: + + \texttt{dst} (x,y) = \max _{(x',y'): \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y') + + +The function supports the in-place mode. Dilation can be applied several ( +``iterations`` +) times. In the case of multi-channel images each channel is processed independently. + +See also: +:func:`erode` +, +:func:`morphologyEx` +, +:func:`createMorphologyFilter` + +.. index:: erode + + +cv::erode +--------- + +`id=0.834434142172 Comments from the Wiki `__ + + + + +.. cfunction:: void erode( const Mat\& src, Mat\& dst, const Mat\& element, Point anchor=Point(-1,-1), int iterations=1, int borderType=BORDER_CONSTANT, const Scalar\& borderValue=morphologyDefaultBorderValue() ) + + Erodes an image by using a specific structuring element. + + + + + + + :param src: The source image + + + :param dst: The destination image. It will have the same size and the same type as ``src`` + + + :param element: The structuring element used for dilation. If ``element=Mat()`` , a :math:`3\times 3` rectangular structuring element is used + + + :param anchor: Position of the anchor within the element. The default value :math:`(-1, -1)` means that the anchor is at the element center + + + :param iterations: The number of times erosion is applied + + + :param borderType: The pixel extrapolation method; see :func:`borderInterpolate` + + + :param borderValue: The border value in case of a constant border. The default value has a special meaning, see :func:`createMorphoogyFilter` + + + +The function erodes the source image using the specified structuring element that determines the shape of a pixel neighborhood over which the minimum is taken: + + + +.. math:: + + \texttt{dst} (x,y) = \min _{(x',y'): \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y') + + +The function supports the in-place mode. Erosion can be applied several ( +``iterations`` +) times. In the case of multi-channel images each channel is processed independently. + +See also: +:func:`dilate` +, +:func:`morphologyEx` +, +:func:`createMorphologyFilter` + +.. index:: filter2D + + +cv::filter2D +------------ + +`id=0.465721876024 Comments from the Wiki `__ + + + + +.. cfunction:: void filter2D( const Mat\& src, Mat\& dst, int ddepth, const Mat\& kernel, Point anchor=Point(-1,-1), double delta=0, int borderType=BORDER_DEFAULT ) + + Convolves an image with the kernel + + + + + + + :param src: The source image + + + :param dst: The destination image. It will have the same size and the same number of channels as ``src`` + + + :param ddepth: The desired depth of the destination image. If it is negative, it will be the same as ``src.depth()`` + + + :param kernel: Convolution kernel (or rather a correlation kernel), a single-channel floating point matrix. If you want to apply different kernels to different channels, split the image into separate color planes using :func:`split` and process them individually + + + :param anchor: The anchor of the kernel that indicates the relative position of a filtered point within the kernel. The anchor should lie within the kernel. The special default value (-1,-1) means that the anchor is at the kernel center + + + :param delta: The optional value added to the filtered pixels before storing them in ``dst`` + + + :param borderType: The pixel extrapolation method; see :func:`borderInterpolate` + + + +The function applies an arbitrary linear filter to the image. In-place operation is supported. When the aperture is partially outside the image, the function interpolates outlier pixel values according to the specified border mode. + +The function does actually computes correlation, not the convolution: + + + +.. math:: + + \texttt{dst} (x,y) = \sum _{ \stackrel{0\leq x' < \texttt{kernel.cols},}{0\leq y' < \texttt{kernel.rows}} } \texttt{kernel} (x',y')* \texttt{src} (x+x'- \texttt{anchor.x} ,y+y'- \texttt{anchor.y} ) + + +That is, the kernel is not mirrored around the anchor point. If you need a real convolution, flip the kernel using +:func:`flip` +and set the new anchor to +``(kernel.cols - anchor.x - 1, kernel.rows - anchor.y - 1)`` +. + +The function uses +-based algorithm in case of sufficiently large kernels (~ +:math:`11\times11` +) and the direct algorithm (that uses the engine retrieved by +:func:`createLinearFilter` +) for small kernels. + +See also: +:func:`sepFilter2D` +, +:func:`createLinearFilter` +, +:func:`dft` +, +:func:`matchTemplate` + +.. index:: GaussianBlur + + +cv::GaussianBlur +---------------- + +`id=0.339491278291 Comments from the Wiki `__ + + + + +.. cfunction:: void GaussianBlur( const Mat\& src, Mat\& dst, Size ksize, double sigmaX, double sigmaY=0, int borderType=BORDER_DEFAULT ) + + Smoothes image using a Gaussian filter + + + + + + + :param src: The source image + + + :param dst: The destination image; will have the same size and the same type as ``src`` + + + :param ksize: The Gaussian kernel size; ``ksize.width`` and ``ksize.height`` can differ, but they both must be positive and odd. Or, they can be zero's, then they are computed from ``sigma*`` + + + :param sigmaX, sigmaY: The Gaussian kernel standard deviations in X and Y direction. If ``sigmaY`` is zero, it is set to be equal to ``sigmaX`` . If they are both zeros, they are computed from ``ksize.width`` and ``ksize.height`` , respectively, see :func:`getGaussianKernel` . To fully control the result regardless of possible future modification of all this semantics, it is recommended to specify all of ``ksize`` , ``sigmaX`` and ``sigmaY`` + + + :param borderType: The pixel extrapolation method; see :func:`borderInterpolate` + + + +The function convolves the source image with the specified Gaussian kernel. In-place filtering is supported. + +See also: +:func:`sepFilter2D` +, +:func:`filter2D` +, +:func:`blur` +, +:func:`boxFilter` +, +:func:`bilateralFilter` +, +:func:`medianBlur` + +.. index:: getDerivKernels + + +cv::getDerivKernels +------------------- + +`id=0.0567043395009 Comments from the Wiki `__ + + + + +.. cfunction:: void getDerivKernels( Mat\& kx, Mat\& ky, int dx, int dy, int ksize, bool normalize=false, int ktype=CV_32F ) + + Returns filter coefficients for computing spatial image derivatives + + + + + + + :param kx: The output matrix of row filter coefficients; will have type ``ktype`` + + + :param ky: The output matrix of column filter coefficients; will have type ``ktype`` + + + :param dx: The derivative order in respect with x + + + :param dy: The derivative order in respect with y + + + :param ksize: The aperture size. It can be ``CV_SCHARR`` , 1, 3, 5 or 7 + + + :param normalize: Indicates, whether to normalize (scale down) the filter coefficients or not. In theory the coefficients should have the denominator :math:`=2^{ksize*2-dx-dy-2}` . If you are going to filter floating-point images, you will likely want to use the normalized kernels. But if you compute derivatives of a 8-bit image, store the results in 16-bit image and wish to preserve all the fractional bits, you may want to set ``normalize=false`` . + + + :param ktype: The type of filter coefficients. It can be ``CV_32f`` or ``CV_64F`` + + + +The function computes and returns the filter coefficients for spatial image derivatives. When +``ksize=CV_SCHARR`` +, the Scharr +:math:`3 \times 3` +kernels are generated, see +:func:`Scharr` +. Otherwise, Sobel kernels are generated, see +:func:`Sobel` +. The filters are normally passed to +:func:`sepFilter2D` +or to +:func:`createSeparableLinearFilter` +. + + +.. index:: getGaussianKernel + + +cv::getGaussianKernel +--------------------- + +`id=0.764317215136 Comments from the Wiki `__ + + + + +.. cfunction:: Mat getGaussianKernel( int ksize, double sigma, int ktype=CV_64F ) + + Returns Gaussian filter coefficients + + + + + + + :param ksize: The aperture size. It should be odd ( :math:`\texttt{ksize} \mod 2 = 1` ) and positive. + + + :param sigma: The Gaussian standard deviation. If it is non-positive, it is computed from ``ksize`` as \ ``sigma = 0.3*(ksize/2 - 1) + 0.8`` + + + :param ktype: The type of filter coefficients. It can be ``CV_32f`` or ``CV_64F`` + + + +The function computes and returns the +:math:`\texttt{ksize} \times 1` +matrix of Gaussian filter coefficients: + + + +.. math:: + + G_i= \alpha *e^{-(i-( \texttt{ksize} -1)/2)^2/(2* \texttt{sigma} )^2}, + + +where +:math:`i=0..\texttt{ksize}-1` +and +:math:`\alpha` +is the scale factor chosen so that +:math:`\sum_i G_i=1` +Two of such generated kernels can be passed to +:func:`sepFilter2D` +or to +:func:`createSeparableLinearFilter` +that will automatically detect that these are smoothing kernels and handle them accordingly. Also you may use the higher-level +:func:`GaussianBlur` +. + +See also: +:func:`sepFilter2D` +, +:func:`createSeparableLinearFilter` +, +:func:`getDerivKernels` +, +:func:`getStructuringElement` +, +:func:`GaussianBlur` +. + + +.. index:: getKernelType + + +cv::getKernelType +----------------- + +`id=0.277314561397 Comments from the Wiki `__ + + + + +.. cfunction:: int getKernelType(const Mat\& kernel, Point anchor) + + Returns the kernel type + + + + + + + :param kernel: 1D array of the kernel coefficients to analyze + + + :param anchor: The anchor position within the kernel + + + +The function analyzes the kernel coefficients and returns the corresponding kernel type: + + + + * **KERNEL_GENERAL** Generic kernel - when there is no any type of symmetry or other properties + + + * **KERNEL_SYMMETRICAL** The kernel is symmetrical: :math:`\texttt{kernel}_i == \texttt{kernel}_{ksize-i-1}` and the anchor is at the center + + + * **KERNEL_ASYMMETRICAL** The kernel is asymmetrical: :math:`\texttt{kernel}_i == -\texttt{kernel}_{ksize-i-1}` and the anchor is at the center + + + * **KERNEL_SMOOTH** All the kernel elements are non-negative and sum to 1. E.g. the Gaussian kernel is both smooth kernel and symmetrical, so the function will return ``KERNEL_SMOOTH | KERNEL_SYMMETRICAL`` + + + * **KERNEL_INTEGER** Al the kernel coefficients are integer numbers. This flag can be combined with ``KERNEL_SYMMETRICAL`` or ``KERNEL_ASYMMETRICAL`` + + + + +.. index:: getStructuringElement + + +cv::getStructuringElement +------------------------- + +`id=0.654078712531 Comments from the Wiki `__ + + + + +.. cfunction:: Mat getStructuringElement(int shape, Size esize, Point anchor=Point(-1,-1)) + + Returns the structuring element of the specified size and shape for morphological operations + + + + + + + :param shape: The element shape, one of: + + + + + + * + ``MORPH_RECT`` + - rectangular structuring element + + + + .. math:: + + E_{ij}=1 + + + + + * + ``MORPH_ELLIPSE`` + - elliptic structuring element, i.e. a filled ellipse inscribed into the rectangle + + ``Rect(0, 0, esize.width, 0.esize.height)`` + + + * + ``MORPH_CROSS`` + - cross-shaped structuring element: + + + + .. math:: + + E_{ij} = \fork{1}{if i=\texttt{anchor.y} or j=\texttt{anchor.x}}{0}{otherwise} + + + + + + :param esize: Size of the structuring element + + + :param anchor: The anchor position within the element. The default value :math:`(-1, -1)` means that the anchor is at the center. Note that only the cross-shaped element's shape depends on the anchor position; in other cases the anchor just regulates by how much the result of the morphological operation is shifted + + + +The function constructs and returns the structuring element that can be then passed to +:func:`createMorphologyFilter` +, +:func:`erode` +, +:func:`dilate` +or +:func:`morphologyEx` +. But also you can construct an arbitrary binary mask yourself and use it as the structuring element. + + +.. index:: medianBlur + + +cv::medianBlur +-------------- + +`id=0.158676781613 Comments from the Wiki `__ + + + + +.. cfunction:: void medianBlur( const Mat\& src, Mat\& dst, int ksize ) + + Smoothes image using median filter + + + + + + + :param src: The source 1-, 3- or 4-channel image. When ``ksize`` is 3 or 5, the image depth should be ``CV_8U`` , ``CV_16U`` or ``CV_32F`` . For larger aperture sizes it can only be ``CV_8U`` + + + :param dst: The destination array; will have the same size and the same type as ``src`` + + + :param ksize: The aperture linear size. It must be odd and more than 1, i.e. 3, 5, 7 ... + + + +The function smoothes image using the median filter with +:math:`\texttt{ksize} \times \texttt{ksize}` +aperture. Each channel of a multi-channel image is processed independently. In-place operation is supported. + +See also: +:func:`bilateralFilter` +, +:func:`blur` +, +:func:`boxFilter` +, +:func:`GaussianBlur` + +.. index:: morphologyEx + + +cv::morphologyEx +---------------- + +`id=0.526746792338 Comments from the Wiki `__ + + + + +.. cfunction:: void morphologyEx( const Mat\& src, Mat\& dst, int op, const Mat\& element, Point anchor=Point(-1,-1), int iterations=1, int borderType=BORDER_CONSTANT, const Scalar\& borderValue=morphologyDefaultBorderValue() ) + + Performs advanced morphological transformations + + + + + + + :param src: Source image + + + :param dst: Destination image. It will have the same size and the same type as ``src`` + + + :param element: Structuring element + + + :param op: Type of morphological operation, one of the following: + + * **MORPH_OPEN** opening + + * **MORPH_CLOSE** closing + + * **MORPH_GRADIENT** morphological gradient + + * **MORPH_TOPHAT** "top hat" + + * **MORPH_BLACKHAT** "black hat" + + + + + :param iterations: Number of times erosion and dilation are applied + + + :param borderType: The pixel extrapolation method; see :func:`borderInterpolate` + + + :param borderValue: The border value in case of a constant border. The default value has a special meaning, see :func:`createMorphoogyFilter` + + + +The function can perform advanced morphological transformations using erosion and dilation as basic operations. + +Opening: + + + +.. math:: + + \texttt{dst} = \mathrm{open} ( \texttt{src} , \texttt{element} )= \mathrm{dilate} ( \mathrm{erode} ( \texttt{src} , \texttt{element} )) + + +Closing: + + + +.. math:: + + \texttt{dst} = \mathrm{close} ( \texttt{src} , \texttt{element} )= \mathrm{erode} ( \mathrm{dilate} ( \texttt{src} , \texttt{element} )) + + +Morphological gradient: + + + +.. math:: + + \texttt{dst} = \mathrm{morph\_grad} ( \texttt{src} , \texttt{element} )= \mathrm{dilate} ( \texttt{src} , \texttt{element} )- \mathrm{erode} ( \texttt{src} , \texttt{element} ) + + +"Top hat": + + + +.. math:: + + \texttt{dst} = \mathrm{tophat} ( \texttt{src} , \texttt{element} )= \texttt{src} - \mathrm{open} ( \texttt{src} , \texttt{element} ) + + +"Black hat": + + + +.. math:: + + \texttt{dst} = \mathrm{blackhat} ( \texttt{src} , \texttt{element} )= \mathrm{close} ( \texttt{src} , \texttt{element} )- \texttt{src} + + +Any of the operations can be done in-place. + +See also: +:func:`dilate` +, +:func:`erode` +, +:func:`createMorphologyFilter` + +.. index:: Laplacian + + +cv::Laplacian +------------- + +`id=0.865408874155 Comments from the Wiki `__ + + + + +.. cfunction:: void Laplacian( const Mat\& src, Mat\& dst, int ddepth, int ksize=1, double scale=1, double delta=0, int borderType=BORDER_DEFAULT ) + + Calculates the Laplacian of an image + + + + + + + :param src: Source image + + + :param dst: Destination image; will have the same size and the same number of channels as ``src`` + + + :param ddepth: The desired depth of the destination image + + + :param ksize: The aperture size used to compute the second-derivative filters, see :func:`getDerivKernels` . It must be positive and odd + + + :param scale: The optional scale factor for the computed Laplacian values (by default, no scaling is applied, see :func:`getDerivKernels` ) + + + :param delta: The optional delta value, added to the results prior to storing them in ``dst`` + + + :param borderType: The pixel extrapolation method, see :func:`borderInterpolate` + + + +The function calculates the Laplacian of the source image by adding up the second x and y derivatives calculated using the Sobel operator: + + + +.. math:: + + \texttt{dst} = \Delta \texttt{src} = \frac{\partial^2 \texttt{src}}{\partial x^2} + \frac{\partial^2 \texttt{src}}{\partial y^2} + + +This is done when +``ksize > 1`` +. When +``ksize == 1`` +, the Laplacian is computed by filtering the image with the following +:math:`3 \times 3` +aperture: + + + +.. math:: + + \vecthreethree {0}{1}{0}{1}{-4}{1}{0}{1}{0} + + +See also: +:func:`Sobel` +, +:func:`Scharr` + +.. index:: pyrDown + + +cv::pyrDown +----------- + +`id=0.613622119877 Comments from the Wiki `__ + + + + +.. cfunction:: void pyrDown( const Mat\& src, Mat\& dst, const Size\& dstsize=Size()) + + Smoothes an image and downsamples it. + + + + + + + :param src: The source image + + + :param dst: The destination image. It will have the specified size and the same type as ``src`` + + + :param dstsize: Size of the destination image. By default it is computed as ``Size((src.cols+1)/2, (src.rows+1)/2)`` . But in any case the following conditions should be satisfied: + + .. math:: + + \begin{array}{l} + | \texttt{dstsize.width} *2-src.cols| \leq 2 \\ | \texttt{dstsize.height} *2-src.rows| \leq 2 \end{array} + + + + + +The function performs the downsampling step of the Gaussian pyramid construction. First it convolves the source image with the kernel: + + + +.. math:: + + \frac{1}{16} \begin{bmatrix} 1 & 4 & 6 & 4 & 1 \\ 4 & 16 & 24 & 16 & 4 \\ 6 & 24 & 36 & 24 & 6 \\ 4 & 16 & 24 & 16 & 4 \\ 1 & 4 & 6 & 4 & 1 \end{bmatrix} + + +and then downsamples the image by rejecting even rows and columns. + + +.. index:: pyrUp + + +cv::pyrUp +--------- + +`id=0.0770034459997 Comments from the Wiki `__ + + + + +.. cfunction:: void pyrUp( const Mat\& src, Mat\& dst, const Size\& dstsize=Size()) + + Upsamples an image and then smoothes it + + + + + + + :param src: The source image + + + :param dst: The destination image. It will have the specified size and the same type as ``src`` + + + :param dstsize: Size of the destination image. By default it is computed as ``Size(src.cols*2, (src.rows*2)`` . But in any case the following conditions should be satisfied: + + .. math:: + + \begin{array}{l} + | \texttt{dstsize.width} -src.cols*2| \leq ( \texttt{dstsize.width} \mod 2) \\ | \texttt{dstsize.height} -src.rows*2| \leq ( \texttt{dstsize.height} \mod 2) \end{array} + + + + + +The function performs the upsampling step of the Gaussian pyramid construction (it can actually be used to construct the Laplacian pyramid). First it upsamples the source image by injecting even zero rows and columns and then convolves the result with the same kernel as in +:func:`pyrDown` +, multiplied by 4. + + +.. index:: sepFilter2D + + +cv::sepFilter2D +--------------- + +`id=0.347140352045 Comments from the Wiki `__ + + + + +.. cfunction:: void sepFilter2D( const Mat\& src, Mat\& dst, int ddepth, const Mat\& rowKernel, const Mat\& columnKernel, Point anchor=Point(-1,-1), double delta=0, int borderType=BORDER_DEFAULT ) + + Applies separable linear filter to an image + + + + + + + :param src: The source image + + + :param dst: The destination image; will have the same size and the same number of channels as ``src`` + + + :param ddepth: The destination image depth + + + :param rowKernel: The coefficients for filtering each row + + + :param columnKernel: The coefficients for filtering each column + + + :param anchor: The anchor position within the kernel; The default value :math:`(-1, 1)` means that the anchor is at the kernel center + + + :param delta: The value added to the filtered results before storing them + + + :param borderType: The pixel extrapolation method; see :func:`borderInterpolate` + + + +The function applies a separable linear filter to the image. That is, first, every row of +``src`` +is filtered with 1D kernel +``rowKernel`` +. Then, every column of the result is filtered with 1D kernel +``columnKernel`` +and the final result shifted by +``delta`` +is stored in +``dst`` +. + +See also: +:func:`createSeparableLinearFilter` +, +:func:`filter2D` +, +:func:`Sobel` +, +:func:`GaussianBlur` +, +:func:`boxFilter` +, +:func:`blur` +. + + +.. index:: Sobel + + +cv::Sobel +--------- + +`id=0.368514989628 Comments from the Wiki `__ + + + + +.. cfunction:: void Sobel( const Mat\& src, Mat\& dst, int ddepth, int xorder, int yorder, int ksize=3, double scale=1, double delta=0, int borderType=BORDER_DEFAULT ) + + Calculates the first, second, third or mixed image derivatives using an extended Sobel operator + + + + + + + :param src: The source image + + + :param dst: The destination image; will have the same size and the same number of channels as ``src`` + + + :param ddepth: The destination image depth + + + :param xorder: Order of the derivative x + + + :param yorder: Order of the derivative y + + + :param ksize: Size of the extended Sobel kernel, must be 1, 3, 5 or 7 + + + :param scale: The optional scale factor for the computed derivative values (by default, no scaling is applied, see :func:`getDerivKernels` ) + + + :param delta: The optional delta value, added to the results prior to storing them in ``dst`` + + + :param borderType: The pixel extrapolation method, see :func:`borderInterpolate` + + + +In all cases except 1, an +:math:`\texttt{ksize} \times +\texttt{ksize}` +separable kernel will be used to calculate the +derivative. When +:math:`\texttt{ksize = 1}` +, a +:math:`3 \times 1` +or +:math:`1 \times 3` +kernel will be used (i.e. no Gaussian smoothing is done). +``ksize = 1`` +can only be used for the first or the second x- or y- derivatives. + +There is also the special value +``ksize = CV_SCHARR`` +(-1) that corresponds to a +:math:`3\times3` +Scharr +filter that may give more accurate results than a +:math:`3\times3` +Sobel. The Scharr +aperture is + + + +.. math:: + + \vecthreethree{-3}{0}{3}{-10}{0}{10}{-3}{0}{3} + + +for the x-derivative or transposed for the y-derivative. + +The function calculates the image derivative by convolving the image with the appropriate kernel: + + + +.. math:: + + \texttt{dst} = \frac{\partial^{xorder+yorder} \texttt{src}}{\partial x^{xorder} \partial y^{yorder}} + + +The Sobel operators combine Gaussian smoothing and differentiation, +so the result is more or less resistant to the noise. Most often, +the function is called with ( +``xorder`` += 1, +``yorder`` += 0, +``ksize`` += 3) or ( +``xorder`` += 0, +``yorder`` += 1, +``ksize`` += 3) to calculate the first x- or y- image +derivative. The first case corresponds to a kernel of: + + + +.. math:: + + \vecthreethree{-1}{0}{1}{-2}{0}{2}{-1}{0}{1} + + +and the second one corresponds to a kernel of: + + +.. math:: + + \vecthreethree{-1}{-2}{-1}{0}{0}{0}{1}{2}{1} + + +See also: +:func:`Scharr` +, +:func:`Lapacian` +, +:func:`sepFilter2D` +, +:func:`filter2D` +, +:func:`GaussianBlur` + +.. index:: Scharr + + +cv::Scharr +---------- + +`id=0.127904577221 Comments from the Wiki `__ + + + + +.. cfunction:: void Scharr( const Mat\& src, Mat\& dst, int ddepth, int xorder, int yorder, double scale=1, double delta=0, int borderType=BORDER_DEFAULT ) + + Calculates the first x- or y- image derivative using Scharr operator + + + + + + + :param src: The source image + + + :param dst: The destination image; will have the same size and the same number of channels as ``src`` + + + :param ddepth: The destination image depth + + + :param xorder: Order of the derivative x + + + :param yorder: Order of the derivative y + + + :param scale: The optional scale factor for the computed derivative values (by default, no scaling is applied, see :func:`getDerivKernels` ) + + + :param delta: The optional delta value, added to the results prior to storing them in ``dst`` + + + :param borderType: The pixel extrapolation method, see :func:`borderInterpolate` + + + +The function computes the first x- or y- spatial image derivative using Scharr operator. The call + + +.. math:: + + \texttt{Scharr(src, dst, ddepth, xorder, yorder, scale, delta, borderType)} + + +is equivalent to + + +.. math:: + + \texttt{Sobel(src, dst, ddepth, xorder, yorder, CV\_SCHARR, scale, delta, borderType)} . + + diff --git a/modules/imgproc/doc/geometric_transformations.rst b/modules/imgproc/doc/geometric_transformations.rst new file mode 100644 index 000000000..46b03d0e4 --- /dev/null +++ b/modules/imgproc/doc/geometric_transformations.rst @@ -0,0 +1,774 @@ +Geometric Image Transformations +=============================== + +.. highlight:: cpp + + +The functions in this section perform various geometrical transformations of 2D images. That is, they do not change the image content, but deform the pixel grid, and map this deformed grid to the destination image. In fact, to avoid sampling artifacts, the mapping is done in the reverse order, from destination to the source. That is, for each pixel +:math:`(x, y)` +of the destination image, the functions compute coordinates of the corresponding "donor" pixel in the source image and copy the pixel value, that is: + + + +.. math:: + + \texttt{dst} (x,y)= \texttt{src} (f_x(x,y), f_y(x,y)) + + +In the case when the user specifies the forward mapping: +:math:`\left: \texttt{src} \rightarrow \texttt{dst}` +, the OpenCV functions first compute the corresponding inverse mapping: +:math:`\left: \texttt{dst} \rightarrow \texttt{src}` +and then use the above formula. + +The actual implementations of the geometrical transformations, from the most generic +:ref:`Remap` +and to the simplest and the fastest +:ref:`Resize` +, need to solve the 2 main problems with the above formula: + + + + +#. + extrapolation of non-existing pixels. Similarly to the filtering functions, described in the previous section, for some + :math:`(x,y)` + one of + :math:`f_x(x,y)` + or + :math:`f_y(x,y)` + , or they both, may fall outside of the image, in which case some extrapolation method needs to be used. OpenCV provides the same selection of the extrapolation methods as in the filtering functions, but also an additional method + ``BORDER_TRANSPARENT`` + , which means that the corresponding pixels in the destination image will not be modified at all. + + + +#. + interpolation of pixel values. Usually + :math:`f_x(x,y)` + and + :math:`f_y(x,y)` + are floating-point numbers (i.e. + :math:`\left` + can be an affine or perspective transformation, or radial lens distortion correction etc.), so a pixel values at fractional coordinates needs to be retrieved. In the simplest case the coordinates can be just rounded to the nearest integer coordinates and the corresponding pixel used, which is called nearest-neighbor interpolation. However, a better result can be achieved by using more sophisticated + `interpolation methods `_ + , where a polynomial function is fit into some neighborhood of the computed pixel + :math:`(f_x(x,y), f_y(x,y))` + and then the value of the polynomial at + :math:`(f_x(x,y), f_y(x,y))` + is taken as the interpolated pixel value. In OpenCV you can choose between several interpolation methods, see + :ref:`Resize` + . + + + +.. index:: convertMaps + + +cv::convertMaps +--------------- + +`id=0.830076060616 Comments from the Wiki `__ + + + + +.. cfunction:: void convertMaps( const Mat\& map1, const Mat\& map2, Mat\& dstmap1, Mat\& dstmap2, int dstmap1type, bool nninterpolation=false ) + + Converts image transformation maps from one representation to another + + + + + + + :param map1: The first input map of type ``CV_16SC2`` or ``CV_32FC1`` or ``CV_32FC2`` + + + :param map2: The second input map of type ``CV_16UC1`` or ``CV_32FC1`` or none (empty matrix), respectively + + + :param dstmap1: The first output map; will have type ``dstmap1type`` and the same size as ``src`` + + + :param dstmap2: The second output map + + + :param dstmap1type: The type of the first output map; should be ``CV_16SC2`` , ``CV_32FC1`` or ``CV_32FC2`` + + + :param nninterpolation: Indicates whether the fixed-point maps will be used for nearest-neighbor or for more complex interpolation + + + +The function converts a pair of maps for +:func:`remap` +from one representation to another. The following options ( +``(map1.type(), map2.type())`` +:math:`\rightarrow` +``(dstmap1.type(), dstmap2.type())`` +) are supported: + + + + +#. + :math:`\texttt{(CV\_32FC1, CV\_32FC1)} \rightarrow \texttt{(CV\_16SC2, CV\_16UC1)}` + . This is the most frequently used conversion operation, in which the original floating-point maps (see + :func:`remap` + ) are converted to more compact and much faster fixed-point representation. The first output array will contain the rounded coordinates and the second array (created only when + ``nninterpolation=false`` + ) will contain indices in the interpolation tables. + + + +#. + :math:`\texttt{(CV\_32FC2)} \rightarrow \texttt{(CV\_16SC2, CV\_16UC1)}` + . The same as above, but the original maps are stored in one 2-channel matrix. + + + +#. + the reverse conversion. Obviously, the reconstructed floating-point maps will not be exactly the same as the originals. + + +See also: +:func:`remap` +, +:func:`undisort` +, +:func:`initUndistortRectifyMap` + +.. index:: getAffineTransform + + +cv::getAffineTransform +---------------------- + +`id=0.578246613742 Comments from the Wiki `__ + + + + +.. cfunction:: Mat getAffineTransform( const Point2f src[], const Point2f dst[] ) + + Calculates the affine transform from 3 pairs of the corresponding points + + + + + + + :param src: Coordinates of a triangle vertices in the source image + + + :param dst: Coordinates of the corresponding triangle vertices in the destination image + + + +The function calculates the +:math:`2 \times 3` +matrix of an affine transform such that: + + + +.. math:: + + \begin{bmatrix} x'_i \\ y'_i \end{bmatrix} = \texttt{map\_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix} + + +where + + + +.. math:: + + dst(i)=(x'_i,y'_i), + src(i)=(x_i, y_i), + i=0,1,2 + + +See also: +:func:`warpAffine` +, +:func:`transform` + +.. index:: getPerspectiveTransform + + +cv::getPerspectiveTransform +--------------------------- + +`id=0.124978390322 Comments from the Wiki `__ + + + + +.. cfunction:: Mat getPerspectiveTransform( const Point2f src[], const Point2f dst[] ) + + Calculates the perspective transform from 4 pairs of the corresponding points + + + + + + + :param src: Coordinates of a quadrange vertices in the source image + + + :param dst: Coordinates of the corresponding quadrangle vertices in the destination image + + + +The function calculates the +:math:`3 \times 3` +matrix of a perspective transform such that: + + + +.. math:: + + \begin{bmatrix} t_i x'_i \\ t_i y'_i \\ t_i \end{bmatrix} = \texttt{map\_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix} + + +where + + + +.. math:: + + dst(i)=(x'_i,y'_i), + src(i)=(x_i, y_i), + i=0,1,2 + + +See also: +:func:`findHomography` +, +:func:`warpPerspective` +, +:func:`perspectiveTransform` + +.. index:: getRectSubPix + + +cv::getRectSubPix +----------------- + +`id=0.0571919909094 Comments from the Wiki `__ + + + + +.. cfunction:: void getRectSubPix( const Mat\& image, Size patchSize, Point2f center, Mat\& dst, int patchType=-1 ) + + Retrieves the pixel rectangle from an image with sub-pixel accuracy + + + + + + + :param src: Source image + + + :param patchSize: Size of the extracted patch + + + :param center: Floating point coordinates of the extracted rectangle center within the source image. The center must be inside the image + + + :param dst: The extracted patch; will have the size ``patchSize`` and the same number of channels as ``src`` + + + :param patchType: The depth of the extracted pixels. By default they will have the same depth as ``src`` + + + +The function +``getRectSubPix`` +extracts pixels from +``src`` +: + + + +.. math:: + + dst(x, y) = src(x + \texttt{center.x} - ( \texttt{dst.cols} -1)*0.5, y + \texttt{center.y} - ( \texttt{dst.rows} -1)*0.5) + + +where the values of the pixels at non-integer coordinates are retrieved +using bilinear interpolation. Every channel of multiple-channel +images is processed independently. While the rectangle center +must be inside the image, parts of the rectangle may be +outside. In this case, the replication border mode (see +:func:`borderInterpolate` +) is used to extrapolate +the pixel values outside of the image. + +See also: +:func:`warpAffine` +, +:func:`warpPerspective` + +.. index:: getRotationMatrix2D + + +cv::getRotationMatrix2D +----------------------- + +`id=0.641646199188 Comments from the Wiki `__ + + + + +.. cfunction:: Mat getRotationMatrix2D( Point2f center, double angle, double scale ) + + Calculates the affine matrix of 2d rotation. + + + + + + + :param center: Center of the rotation in the source image + + + :param angle: The rotation angle in degrees. Positive values mean counter-clockwise rotation (the coordinate origin is assumed to be the top-left corner) + + + :param scale: Isotropic scale factor + + + +The function calculates the following matrix: + + + +.. math:: + + \begin{bmatrix} \alpha & \beta & (1- \alpha ) \cdot \texttt{center.x} - \beta \cdot \texttt{center.y} \\ - \beta & \alpha & \beta \cdot \texttt{center.x} - (1- \alpha ) \cdot \texttt{center.y} \end{bmatrix} + + +where + + + +.. math:: + + \begin{array}{l} \alpha = \texttt{scale} \cdot \cos \texttt{angle} , \\ \beta = \texttt{scale} \cdot \sin \texttt{angle} \end{array} + + +The transformation maps the rotation center to itself. If this is not the purpose, the shift should be adjusted. + +See also: +:func:`getAffineTransform` +, +:func:`warpAffine` +, +:func:`transform` + +.. index:: invertAffineTransform + + +cv::invertAffineTransform +------------------------- + +`id=0.772575709646 Comments from the Wiki `__ + + + + +.. cfunction:: void invertAffineTransform(const Mat\& M, Mat\& iM) + + Inverts an affine transformation + + + + + + + :param M: The original affine transformation + + + :param iM: The output reverse affine transformation + + + +The function computes inverse affine transformation represented by +:math:`2 \times 3` +matrix +``M`` +: + + + +.. math:: + + \begin{bmatrix} a_{11} & a_{12} & b_1 \\ a_{21} & a_{22} & b_2 \end{bmatrix} + + +The result will also be a +:math:`2 \times 3` +matrix of the same type as +``M`` +. + + +.. index:: remap + + +cv::remap +--------- + +`id=0.948217317394 Comments from the Wiki `__ + + + + +.. cfunction:: void remap( const Mat\& src, Mat\& dst, const Mat\& map1, const Mat\& map2, int interpolation, int borderMode=BORDER_CONSTANT, const Scalar\& borderValue=Scalar()) + + Applies a generic geometrical transformation to an image. + + + + + + + :param src: Source image + + + :param dst: Destination image. It will have the same size as ``map1`` and the same type as ``src`` + + + :param map1: The first map of either ``(x,y)`` points or just ``x`` values having type ``CV_16SC2`` , ``CV_32FC1`` or ``CV_32FC2`` . See :func:`convertMaps` for converting floating point representation to fixed-point for speed. + + + :param map2: The second map of ``y`` values having type ``CV_16UC1`` , ``CV_32FC1`` or none (empty map if map1 is ``(x,y)`` points), respectively + + + :param interpolation: The interpolation method, see :func:`resize` . The method ``INTER_AREA`` is not supported by this function + + + :param borderMode: The pixel extrapolation method, see :func:`borderInterpolate` . When the \ ``borderMode=BORDER_TRANSPARENT`` , it means that the pixels in the destination image that corresponds to the "outliers" in the source image are not modified by the function + + + :param borderValue: A value used in the case of a constant border. By default it is 0 + + + +The function +``remap`` +transforms the source image using the specified map: + + + +.. math:: + + \texttt{dst} (x,y) = \texttt{src} (map_x(x,y),map_y(x,y)) + + +Where values of pixels with non-integer coordinates are computed using one of the available interpolation methods. +:math:`map_x` +and +:math:`map_y` +can be encoded as separate floating-point maps in +:math:`map_1` +and +:math:`map_2` +respectively, or interleaved floating-point maps of +:math:`(x,y)` +in +:math:`map_1` +, or +fixed-point maps made by using +:func:`convertMaps` +. The reason you might want to convert from floating to fixed-point +representations of a map is that they can yield much faster (~2x) remapping operations. In the converted case, +:math:`map_1` +contains pairs +``(cvFloor(x), cvFloor(y))`` +and +:math:`map_2` +contains indices in a table of interpolation coefficients. + +This function can not operate in-place. + + +.. index:: resize + + +cv::resize +---------- + +`id=0.927768028114 Comments from the Wiki `__ + + + + +.. cfunction:: void resize( const Mat\& src, Mat\& dst, Size dsize, double fx=0, double fy=0, int interpolation=INTER_LINEAR ) + + Resizes an image + + + + + + + :param src: Source image + + + :param dst: Destination image. It will have size ``dsize`` (when it is non-zero) or the size computed from ``src.size()`` + and ``fx`` and ``fy`` . The type of ``dst`` will be the same as of ``src`` . + + + :param dsize: The destination image size. If it is zero, then it is computed as: + + .. math:: + + \texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))} + + . + Either ``dsize`` or both ``fx`` or ``fy`` must be non-zero. + + + :param fx: The scale factor along the horizontal axis. When 0, it is computed as + + .. math:: + + \texttt{(double)dsize.width/src.cols} + + + + + :param fy: The scale factor along the vertical axis. When 0, it is computed as + + .. math:: + + \texttt{(double)dsize.height/src.rows} + + + + + :param interpolation: The interpolation method: + + * **INTER_NEAREST** nearest-neighbor interpolation + + * **INTER_LINEAR** bilinear interpolation (used by default) + + * **INTER_AREA** resampling using pixel area relation. It may be the preferred method for image decimation, as it gives moire-free results. But when the image is zoomed, it is similar to the ``INTER_NEAREST`` method + + * **INTER_CUBIC** bicubic interpolation over 4x4 pixel neighborhood + + * **INTER_LANCZOS4** Lanczos interpolation over 8x8 pixel neighborhood + + + + + +The function +``resize`` +resizes an image +``src`` +down to or up to the specified size. +Note that the initial +``dst`` +type or size are not taken into account. Instead the size and type are derived from the +``src`` +, +``dsize`` +, +``fx`` +and +``fy`` +. If you want to resize +``src`` +so that it fits the pre-created +``dst`` +, you may call the function as: + + + + +:: + + + + // explicitly specify dsize=dst.size(); fx and fy will be computed from that. + resize(src, dst, dst.size(), 0, 0, interpolation); + + +.. + +If you want to decimate the image by factor of 2 in each direction, you can call the function this way: + + + + +:: + + + + // specify fx and fy and let the function to compute the destination image size. + resize(src, dst, Size(), 0.5, 0.5, interpolation); + + +.. + +See also: +:func:`warpAffine` +, +:func:`warpPerspective` +, +:func:`remap` +. + + + +.. index:: warpAffine + + +cv::warpAffine +-------------- + +`id=0.796627178227 Comments from the Wiki `__ + + + + +.. cfunction:: void warpAffine( const Mat\& src, Mat\& dst, const Mat\& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, const Scalar\& borderValue=Scalar()) + + Applies an affine transformation to an image. + + + + + + + :param src: Source image + + + :param dst: Destination image; will have size ``dsize`` and the same type as ``src`` + + + :param M: :math:`2\times 3` transformation matrix + + + :param dsize: Size of the destination image + + + :param flags: A combination of interpolation methods, see :func:`resize` , and the optional flag ``WARP_INVERSE_MAP`` that means that ``M`` is the inverse transformation ( :math:`\texttt{dst}\rightarrow\texttt{src}` ) + + + :param borderMode: The pixel extrapolation method, see :func:`borderInterpolate` . When the \ ``borderMode=BORDER_TRANSPARENT`` , it means that the pixels in the destination image that corresponds to the "outliers" in the source image are not modified by the function + + + :param borderValue: A value used in case of a constant border. By default it is 0 + + + +The function +``warpAffine`` +transforms the source image using the specified matrix: + + + +.. math:: + + \texttt{dst} (x,y) = \texttt{src} ( \texttt{M} _{11} x + \texttt{M} _{12} y + \texttt{M} _{13}, \texttt{M} _{21} x + \texttt{M} _{22} y + \texttt{M} _{23}) + + +when the flag +``WARP_INVERSE_MAP`` +is set. Otherwise, the transformation is first inverted with +:func:`invertAffineTransform` +and then put in the formula above instead of +``M`` +. +The function can not operate in-place. + +See also: +:func:`warpPerspective` +, +:func:`resize` +, +:func:`remap` +, +:func:`getRectSubPix` +, +:func:`transform` + +.. index:: warpPerspective + + +cv::warpPerspective +------------------- + +`id=0.733510667556 Comments from the Wiki `__ + + + + +.. cfunction:: void warpPerspective( const Mat\& src, Mat\& dst, const Mat\& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, const Scalar\& borderValue=Scalar()) + + Applies a perspective transformation to an image. + + + + + + + :param src: Source image + + + :param dst: Destination image; will have size ``dsize`` and the same type as ``src`` + + + :param M: :math:`3\times 3` transformation matrix + + + :param dsize: Size of the destination image + + + :param flags: A combination of interpolation methods, see :func:`resize` , and the optional flag ``WARP_INVERSE_MAP`` that means that ``M`` is the inverse transformation ( :math:`\texttt{dst}\rightarrow\texttt{src}` ) + + + :param borderMode: The pixel extrapolation method, see :func:`borderInterpolate` . When the \ ``borderMode=BORDER_TRANSPARENT`` , it means that the pixels in the destination image that corresponds to the "outliers" in the source image are not modified by the function + + + :param borderValue: A value used in case of a constant border. By default it is 0 + + + +The function +``warpPerspective`` +transforms the source image using the specified matrix: + + + +.. math:: + + \texttt{dst} (x,y) = \texttt{src} \left ( \frac{M_{11} x + M_{12} y + M_{13}}{M_{31} x + M_{32} y + M_{33}} , + \frac{M_{21} x + M_{22} y + M_{23}}{M_{31} x + M_{32} y + M_{33}} \right ) + + +when the flag +``WARP_INVERSE_MAP`` +is set. Otherwise, the transformation is first inverted with +:func:`invert` +and then put in the formula above instead of +``M`` +. +The function can not operate in-place. + +See also: +:func:`warpAffine` +, +:func:`resize` +, +:func:`remap` +, +:func:`getRectSubPix` +, +:func:`perspectiveTransform` diff --git a/modules/imgproc/doc/histograms.rst b/modules/imgproc/doc/histograms.rst new file mode 100644 index 000000000..af8f09a85 --- /dev/null +++ b/modules/imgproc/doc/histograms.rst @@ -0,0 +1,404 @@ +Histograms +========== + +.. highlight:: cpp + + + +.. index:: calcHist + + +cv::calcHist +------------ + +`id=0.023612377096 Comments from the Wiki `__ + + + + +.. cfunction:: void calcHist( const Mat* arrays, int narrays, const int* channels, const Mat\& mask, MatND\& hist, int dims, const int* histSize, const float** ranges, bool uniform=true, bool accumulate=false ) + + + +.. cfunction:: void calcHist( const Mat* arrays, int narrays, const int* channels, const Mat\& mask, SparseMat\& hist, int dims, const int* histSize, const float** ranges, bool uniform=true, bool accumulate=false ) + + Calculates histogram of a set of arrays + + + + + + + :param arrays: Source arrays. They all should have the same depth, ``CV_8U`` or ``CV_32F`` , and the same size. Each of them can have an arbitrary number of channels + + + :param narrays: The number of source arrays + + + :param channels: The list of ``dims`` channels that are used to compute the histogram. The first array channels are numerated from 0 to ``arrays[0].channels()-1`` , the second array channels are counted from ``arrays[0].channels()`` to ``arrays[0].channels() + arrays[1].channels()-1`` etc. + + + :param mask: The optional mask. If the matrix is not empty, it must be 8-bit array of the same size as ``arrays[i]`` . The non-zero mask elements mark the array elements that are counted in the histogram + + + :param hist: The output histogram, a dense or sparse ``dims`` -dimensional array + + + :param dims: The histogram dimensionality; must be positive and not greater than ``CV_MAX_DIMS`` (=32 in the current OpenCV version) + + + :param histSize: The array of histogram sizes in each dimension + + + :param ranges: The array of ``dims`` arrays of the histogram bin boundaries in each dimension. When the histogram is uniform ( ``uniform`` =true), then for each dimension ``i`` it's enough to specify the lower (inclusive) boundary :math:`L_0` of the 0-th histogram bin and the upper (exclusive) boundary :math:`U_{\texttt{histSize}[i]-1}` for the last histogram bin ``histSize[i]-1`` . That is, in the case of uniform histogram each of ``ranges[i]`` is an array of 2 elements. When the histogram is not uniform ( ``uniform=false`` ), then each of ``ranges[i]`` contains ``histSize[i]+1`` elements: :math:`L_0, U_0=L_1, U_1=L_2, ..., U_{\texttt{histSize[i]}-2}=L_{\texttt{histSize[i]}-1}, U_{\texttt{histSize[i]}-1}` . The array elements, which are not between :math:`L_0` and :math:`U_{\texttt{histSize[i]}-1}` , are not counted in the histogram + + + :param uniform: Indicates whether the histogram is uniform or not, see above + + + :param accumulate: Accumulation flag. If it is set, the histogram is not cleared in the beginning (when it is allocated). This feature allows user to compute a single histogram from several sets of arrays, or to update the histogram in time + + + +The functions +``calcHist`` +calculate the histogram of one or more +arrays. The elements of a tuple that is used to increment +a histogram bin are taken at the same location from the corresponding +input arrays. The sample below shows how to compute 2D Hue-Saturation histogram for a color imag + + + + +:: + + + + #include + #include + + using namespace cv; + + int main( int argc, char** argv ) + { + Mat src, hsv; + if( argc != 2 || !(src=imread(argv[1], 1)).data ) + return -1; + + cvtColor(src, hsv, CV_BGR2HSV); + + // let's quantize the hue to 30 levels + // and the saturation to 32 levels + int hbins = 30, sbins = 32; + int histSize[] = {hbins, sbins}; + // hue varies from 0 to 179, see cvtColor + float hranges[] = { 0, 180 }; + // saturation varies from 0 (black-gray-white) to + // 255 (pure spectrum color) + float sranges[] = { 0, 256 }; + const float* ranges[] = { hranges, sranges }; + MatND hist; + // we compute the histogram from the 0-th and 1-st channels + int channels[] = {0, 1}; + + calcHist( &hsv, 1, channels, Mat(), // do not use mask + hist, 2, histSize, ranges, + true, // the histogram is uniform + false ); + double maxVal=0; + minMaxLoc(hist, 0, &maxVal, 0, 0); + + int scale = 10; + Mat histImg = Mat::zeros(sbins*scale, hbins*10, CV_8UC3); + + for( int h = 0; h < hbins; h++ ) + for( int s = 0; s < sbins; s++ ) + { + float binVal = hist.at(h, s); + int intensity = cvRound(binVal*255/maxVal); + rectangle( histImg, Point(h*scale, s*scale), + Point( (h+1)*scale - 1, (s+1)*scale - 1), + Scalar::all(intensity), + CV_FILLED ); + } + + namedWindow( "Source", 1 ); + imshow( "Source", src ); + + namedWindow( "H-S Histogram", 1 ); + imshow( "H-S Histogram", histImg ); + waitKey(); + } + + +.. + + +.. index:: calcBackProject + + +cv::calcBackProject +------------------- + +`id=0.307675677402 Comments from the Wiki `__ + + + + +.. cfunction:: void calcBackProject( const Mat* arrays, int narrays, const int* channels, const MatND\& hist, Mat\& backProject, const float** ranges, double scale=1, bool uniform=true ) + + + +.. cfunction:: void calcBackProject( const Mat* arrays, int narrays, const int* channels, const SparseMat\& hist, Mat\& backProject, const float** ranges, double scale=1, bool uniform=true ) + + Calculates the back projection of a histogram. + + + + + + + :param arrays: Source arrays. They all should have the same depth, ``CV_8U`` or ``CV_32F`` , and the same size. Each of them can have an arbitrary number of channels + + + :param narrays: The number of source arrays + + + :param channels: The list of channels that are used to compute the back projection. The number of channels must match the histogram dimensionality. The first array channels are numerated from 0 to ``arrays[0].channels()-1`` , the second array channels are counted from ``arrays[0].channels()`` to ``arrays[0].channels() + arrays[1].channels()-1`` etc. + + + :param hist: The input histogram, a dense or sparse + + + :param backProject: Destination back projection aray; will be a single-channel array of the same size and the same depth as ``arrays[0]`` + + + :param ranges: The array of arrays of the histogram bin boundaries in each dimension. See :func:`calcHist` + + + :param scale: The optional scale factor for the output back projection + + + :param uniform: Indicates whether the histogram is uniform or not, see above + + + +The functions +``calcBackProject`` +calculate the back project of the histogram. That is, similarly to +``calcHist`` +, at each location +``(x, y)`` +the function collects the values from the selected channels in the input images and finds the corresponding histogram bin. But instead of incrementing it, the function reads the bin value, scales it by +``scale`` +and stores in +``backProject(x,y)`` +. In terms of statistics, the function computes probability of each element value in respect with the empirical probability distribution represented by the histogram. Here is how, for example, you can find and track a bright-colored object in a scene: + + + + + +#. + Before the tracking, show the object to the camera such that covers almost the whole frame. Calculate a hue histogram. The histogram will likely have a strong maximums, corresponding to the dominant colors in the object. + + + +#. + During the tracking, calculate back projection of a hue plane of each input video frame using that pre-computed histogram. Threshold the back projection to suppress weak colors. It may also have sense to suppress pixels with non sufficient color saturation and too dark or too bright pixels. + + + +#. + Find connected components in the resulting picture and choose, for example, the largest component. + + +That is the approximate algorithm of +:func:`CAMShift` +color object tracker. + +See also: +:func:`calcHist` + +.. index:: compareHist + + +cv::compareHist +--------------- + +`id=0.679842058679 Comments from the Wiki `__ + + + + +.. cfunction:: double compareHist( const MatND\& H1, const MatND\& H2, int method ) + + + +.. cfunction:: double compareHist( const SparseMat\& H1, const SparseMat\& H2, int method ) + + Compares two histograms + + + + + + + :param H1: The first compared histogram + + + :param H2: The second compared histogram of the same size as ``H1`` + + + :param method: The comparison method, one of the following: + + + * **CV_COMP_CORREL** Correlation + + + * **CV_COMP_CHISQR** Chi-Square + + + * **CV_COMP_INTERSECT** Intersection + + + * **CV_COMP_BHATTACHARYYA** Bhattacharyya distance + + + + + +The functions +``compareHist`` +compare two dense or two sparse histograms using the specified method: + + + + + +* Correlation (method=CV\_COMP\_CORREL) + + + .. math:: + + d(H_1,H_2) = \frac{\sum_I (H_1(I) - \bar{H_1}) (H_2(I) - \bar{H_2})}{\sqrt{\sum_I(H_1(I) - \bar{H_1})^2 \sum_I(H_2(I) - \bar{H_2})^2}} + + + where + + + .. math:: + + \bar{H_k} = \frac{1}{N} \sum _J H_k(J) + + + and + :math:`N` + is the total number of histogram bins. + + + +* Chi-Square (method=CV\_COMP\_CHISQR) + + + .. math:: + + d(H_1,H_2) = \sum _I \frac{\left(H_1(I)-H_2(I)\right)^2}{H_1(I)+H_2(I)} + + + + +* Intersection (method=CV\_COMP\_INTERSECT) + + + .. math:: + + d(H_1,H_2) = \sum _I \min (H_1(I), H_2(I)) + + + + +* Bhattacharyya distance (method=CV\_COMP\_BHATTACHARYYA) + + + .. math:: + + d(H_1,H_2) = \sqrt{1 - \frac{1}{\sqrt{\bar{H_1} \bar{H_2} N^2}} \sum_I \sqrt{H_1(I) \cdot H_2(I)}} + + + + +The function returns +:math:`d(H_1, H_2)` +. + +While the function works well with 1-, 2-, 3-dimensional dense histograms, it may not be suitable for high-dimensional sparse histograms, where, because of aliasing and sampling problems the coordinates of non-zero histogram bins can slightly shift. To compare such histograms or more general sparse configurations of weighted points, consider using the +:func:`calcEMD` +function. + + +.. index:: equalizeHist + + +cv::equalizeHist +---------------- + +`id=0.125539341699 Comments from the Wiki `__ + + + + +.. cfunction:: void equalizeHist( const Mat\& src, Mat\& dst ) + + Equalizes the histogram of a grayscale image. + + + + + + + :param src: The source 8-bit single channel image + + + :param dst: The destination image; will have the same size and the same type as ``src`` + + + +The function equalizes the histogram of the input image using the following algorithm: + + + + + +#. + calculate the histogram + :math:`H` + for + ``src`` + . + + +#. + normalize the histogram so that the sum of histogram bins is 255. + + +#. + compute the integral of the histogram: + + + .. math:: + + H'_i = \sum _{0 \le j < i} H(j) + + + + +#. + transform the image using + :math:`H'` + as a look-up table: + :math:`\texttt{dst}(x,y) = H'(\texttt{src}(x,y))` + + +The algorithm normalizes the brightness and increases the contrast of the image. + diff --git a/modules/imgproc/doc/imgproc.rst b/modules/imgproc/doc/imgproc.rst new file mode 100644 index 000000000..f89c6c13d --- /dev/null +++ b/modules/imgproc/doc/imgproc.rst @@ -0,0 +1,16 @@ +**************** +Image Processing +**************** + +.. toctree:: + :maxdepth: 2 + + histograms + filtering + geometric_transformations + miscellaneous_transformations + structural_analysis_and_shape_descriptors + planar_subdivisions + motion_analysis_and_object_tracking + feature_detection + object_detection diff --git a/modules/imgproc/doc/miscellaneous_transformations.rst b/modules/imgproc/doc/miscellaneous_transformations.rst new file mode 100644 index 000000000..56cc183d1 --- /dev/null +++ b/modules/imgproc/doc/miscellaneous_transformations.rst @@ -0,0 +1,1440 @@ +Miscellaneous Image Transformations +=================================== + +.. highlight:: cpp + + + +.. index:: adaptiveThreshold + + +cv::adaptiveThreshold +--------------------- + +`id=0.756758527362 Comments from the Wiki `__ + + + + +.. cfunction:: void adaptiveThreshold( const Mat\& src, Mat\& dst, double maxValue, int adaptiveMethod, int thresholdType, int blockSize, double C ) + + Applies an adaptive threshold to an array. + + + + + + + :param src: Source 8-bit single-channel image + + + :param dst: Destination image; will have the same size and the same type as ``src`` + + + :param maxValue: The non-zero value assigned to the pixels for which the condition is satisfied. See the discussion + + + :param adaptiveMethod: Adaptive thresholding algorithm to use, + ``ADAPTIVE_THRESH_MEAN_C`` or ``ADAPTIVE_THRESH_GAUSSIAN_C`` (see the discussion) + + + :param thresholdType: Thresholding type; must be one of ``THRESH_BINARY`` or ``THRESH_BINARY_INV`` + + + :param blockSize: The size of a pixel neighborhood that is used to calculate a threshold value for the pixel: 3, 5, 7, and so on + + + :param C: The constant subtracted from the mean or weighted mean (see the discussion); normally, it's positive, but may be zero or negative as well + + + +The function transforms a grayscale image to a binary image according to the formulas: + + + + + * **THRESH_BINARY** + + .. math:: + + dst(x,y) = \fork{\texttt{maxValue}}{if $src(x,y) > T(x,y)$}{0}{otherwise} + + + + + * **THRESH_BINARY_INV** + + .. math:: + + dst(x,y) = \fork{0}{if $src(x,y) > T(x,y)$}{\texttt{maxValue}}{otherwise} + + + + + +where +:math:`T(x,y)` +is a threshold calculated individually for each pixel. + + + + + +#. + For the method + ``ADAPTIVE_THRESH_MEAN_C`` + the threshold value + :math:`T(x,y)` + is the mean of a + :math:`\texttt{blockSize} \times \texttt{blockSize}` + neighborhood of + :math:`(x, y)` + , minus + ``C`` + . + + + +#. + For the method + ``ADAPTIVE_THRESH_GAUSSIAN_C`` + the threshold value + :math:`T(x, y)` + is the weighted sum (i.e. cross-correlation with a Gaussian window) of a + :math:`\texttt{blockSize} \times \texttt{blockSize}` + neighborhood of + :math:`(x, y)` + , minus + ``C`` + . The default sigma (standard deviation) is used for the specified + ``blockSize`` + , see + :func:`getGaussianKernel` + . + + +The function can process the image in-place. + +See also: +:func:`threshold` +, +:func:`blur` +, +:func:`GaussianBlur` + +.. index:: cvtColor + + +cv::cvtColor +------------ + +`id=0.24884397971 Comments from the Wiki `__ + + + + +.. cfunction:: void cvtColor( const Mat\& src, Mat\& dst, int code, int dstCn=0 ) + + Converts image from one color space to another + + + + + + + :param src: The source image, 8-bit unsigned, 16-bit unsigned ( ``CV_16UC...`` ) or single-precision floating-point + + + :param dst: The destination image; will have the same size and the same depth as ``src`` + + + :param code: The color space conversion code; see the discussion + + + :param dstCn: The number of channels in the destination image; if the parameter is 0, the number of the channels will be derived automatically from ``src`` and the ``code`` + + + +The function converts the input image from one color +space to another. In the case of transformation to-from RGB color space the ordering of the channels should be specified explicitly (RGB or BGR). + +The conventional ranges for R, G and B channel values are: + + + + + +* + 0 to 255 for + ``CV_8U`` + images + + + +* + 0 to 65535 for + ``CV_16U`` + images and + + + +* + 0 to 1 for + ``CV_32F`` + images. + + +Of course, in the case of linear transformations the range does not matter, +but in the non-linear cases the input RGB image should be normalized to the proper value range in order to get the correct results, e.g. for RGB +:math:`\rightarrow` +L*u*v* transformation. For example, if you have a 32-bit floating-point image directly converted from 8-bit image without any scaling, then it will have 0..255 value range, instead of the assumed by the function 0..1. So, before calling +``cvtColor`` +, you need first to scale the image down: + + + +:: + + + + img *= 1./255; + cvtColor(img, img, CV_BGR2Luv); + + +.. + +The function can do the following transformations: + + + + + +* + Transformations within RGB space like adding/removing the alpha channel, reversing the channel order, conversion to/from 16-bit RGB color (R5:G6:B5 or R5:G5:B5), as well as conversion to/from grayscale using: + + + + .. math:: + + \text{RGB[A] to Gray:} \quad Y \leftarrow 0.299 \cdot R + 0.587 \cdot G + 0.114 \cdot B + + + + and + + + + .. math:: + + \text{Gray to RGB[A]:} \quad R \leftarrow Y, G \leftarrow Y, B \leftarrow Y, A \leftarrow 0 + + + + The conversion from a RGB image to gray is done with: + + + + :: + + + + cvtColor(src, bwsrc, CV_RGB2GRAY); + + + .. + + Some more advanced channel reordering can also be done with + :func:`mixChannels` + . + + + + +* + RGB + :math:`\leftrightarrow` + CIE XYZ.Rec 709 with D65 white point ( + ``CV_BGR2XYZ, CV_RGB2XYZ, CV_XYZ2BGR, CV_XYZ2RGB`` + ): + + + + .. math:: + + \begin{bmatrix} X \\ Y \\ Z + \end{bmatrix} \leftarrow \begin{bmatrix} 0.412453 & 0.357580 & 0.180423 \\ 0.212671 & 0.715160 & 0.072169 \\ 0.019334 & 0.119193 & 0.950227 + \end{bmatrix} \cdot \begin{bmatrix} R \\ G \\ B + \end{bmatrix} + + + + + .. math:: + + \begin{bmatrix} R \\ G \\ B + \end{bmatrix} \leftarrow \begin{bmatrix} 3.240479 & -1.53715 & -0.498535 \\ -0.969256 & 1.875991 & 0.041556 \\ 0.055648 & -0.204043 & 1.057311 + \end{bmatrix} \cdot \begin{bmatrix} X \\ Y \\ Z + \end{bmatrix} + + + :math:`X` + , + :math:`Y` + and + :math:`Z` + cover the whole value range (in the case of floating-point images + :math:`Z` + may exceed 1). + + + + +* + RGB + :math:`\leftrightarrow` + YCrCb JPEG (a.k.a. YCC) ( + ``CV_BGR2YCrCb, CV_RGB2YCrCb, CV_YCrCb2BGR, CV_YCrCb2RGB`` + ) + + + + .. math:: + + Y \leftarrow 0.299 \cdot R + 0.587 \cdot G + 0.114 \cdot B + + + + + .. math:: + + Cr \leftarrow (R-Y) \cdot 0.713 + delta + + + + + .. math:: + + Cb \leftarrow (B-Y) \cdot 0.564 + delta + + + + + .. math:: + + R \leftarrow Y + 1.403 \cdot (Cr - delta) + + + + + .. math:: + + G \leftarrow Y - 0.344 \cdot (Cr - delta) - 0.714 \cdot (Cb - delta) + + + + + .. math:: + + B \leftarrow Y + 1.773 \cdot (Cb - delta) + + + where + + + + .. math:: + + delta = \left \{ \begin{array}{l l} 128 & \mbox{for 8-bit images} \\ 32768 & \mbox{for 16-bit images} \\ 0.5 & \mbox{for floating-point images} \end{array} \right . + + + + Y, Cr and Cb cover the whole value range. + + + + +* + RGB + :math:`\leftrightarrow` + HSV ( + ``CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB`` + ) + in the case of 8-bit and 16-bit images + R, G and B are converted to floating-point format and scaled to fit the 0 to 1 range + + + + .. math:: + + V \leftarrow max(R,G,B) + + + + + .. math:: + + S \leftarrow \fork{\frac{V-min(R,G,B)}{V}}{if $V \neq 0$}{0}{otherwise} + + + + + .. math:: + + H \leftarrow \forkthree{{60(G - B)}/{S}}{if $V=R$}{{120+60(B - R)}/{S}}{if $V=G$}{{240+60(R - G)}/{S}}{if $V=B$} + + + if + :math:`H<0` + then + :math:`H \leftarrow H+360` + On output + :math:`0 \leq V \leq 1` + , + :math:`0 \leq S \leq 1` + , + :math:`0 \leq H \leq 360` + . + + The values are then converted to the destination data type: + + + + + * 8-bit images + + + .. math:: + + V \leftarrow 255 V, S \leftarrow 255 S, H \leftarrow H/2 \text{(to fit to 0 to 255)} + + + + + * 16-bit images (currently not supported) + + + .. math:: + + V <- 65535 V, S <- 65535 S, H <- H + + + + + * 32-bit images + H, S, V are left as is + + + + +* + RGB + :math:`\leftrightarrow` + HLS ( + ``CV_BGR2HLS, CV_RGB2HLS, CV_HLS2BGR, CV_HLS2RGB`` + ). + in the case of 8-bit and 16-bit images + R, G and B are converted to floating-point format and scaled to fit the 0 to 1 range. + + + + .. math:: + + V_{max} \leftarrow {max}(R,G,B) + + + + + .. math:: + + V_{min} \leftarrow {min}(R,G,B) + + + + + .. math:: + + L \leftarrow \frac{V_{max} + V_{min}}{2} + + + + + .. math:: + + S \leftarrow \fork { \frac{V_{max} - V_{min}}{V_{max} + V_{min}} }{if $L < 0.5$ } + { \frac{V_{max} - V_{min}}{2 - (V_{max} + V_{min})} }{if $L \ge 0.5$ } + + + + + .. math:: + + H \leftarrow \forkthree {{60(G - B)}/{S}}{if $V_{max}=R$ } + {{120+60(B - R)}/{S}}{if $V_{max}=G$ } + {{240+60(R - G)}/{S}}{if $V_{max}=B$ } + + + if + :math:`H<0` + then + :math:`H \leftarrow H+360` + On output + :math:`0 \leq L \leq 1` + , + :math:`0 \leq S \leq 1` + , + :math:`0 \leq H \leq 360` + . + + The values are then converted to the destination data type: + + + + + * 8-bit images + + + .. math:: + + V \leftarrow 255 \cdot V, S \leftarrow 255 \cdot S, H \leftarrow H/2 \; \text{(to fit to 0 to 255)} + + + + + * 16-bit images (currently not supported) + + + .. math:: + + V <- 65535 \cdot V, S <- 65535 \cdot S, H <- H + + + + + * 32-bit images + H, S, V are left as is + + + + +* + RGB + :math:`\leftrightarrow` + CIE L*a*b* ( + ``CV_BGR2Lab, CV_RGB2Lab, CV_Lab2BGR, CV_Lab2RGB`` + ) + in the case of 8-bit and 16-bit images + R, G and B are converted to floating-point format and scaled to fit the 0 to 1 range + + + .. math:: + + \vecthree{X}{Y}{Z} \leftarrow \vecthreethree{0.412453}{0.357580}{0.180423}{0.212671}{0.715160}{0.072169}{0.019334}{0.119193}{0.950227} \cdot \vecthree{R}{G}{B} + + + + + .. math:: + + X \leftarrow X/X_n, \text{where} X_n = 0.950456 + + + + + .. math:: + + Z \leftarrow Z/Z_n, \text{where} Z_n = 1.088754 + + + + + .. math:: + + L \leftarrow \fork{116*Y^{1/3}-16}{for $Y>0.008856$}{903.3*Y}{for $Y \le 0.008856$} + + + + + .. math:: + + a \leftarrow 500 (f(X)-f(Y)) + delta + + + + + .. math:: + + b \leftarrow 200 (f(Y)-f(Z)) + delta + + + where + + + .. math:: + + f(t)= \fork{t^{1/3}}{for $t>0.008856$}{7.787 t+16/116}{for $t\leq 0.008856$} + + + and + + + .. math:: + + delta = \fork{128}{for 8-bit images}{0}{for floating-point images} + + + On output + :math:`0 \leq L \leq 100` + , + :math:`-127 \leq a \leq 127` + , + :math:`-127 \leq b \leq 127` + The values are then converted to the destination data type: + + + + + * 8-bit images + + + .. math:: + + L \leftarrow L*255/100, \; a \leftarrow a + 128, \; b \leftarrow b + 128 + + + + + * 16-bit images + currently not supported + + + * 32-bit images + L, a, b are left as is + + + + +* + RGB + :math:`\leftrightarrow` + CIE L*u*v* ( + ``CV_BGR2Luv, CV_RGB2Luv, CV_Luv2BGR, CV_Luv2RGB`` + ) + in the case of 8-bit and 16-bit images + R, G and B are converted to floating-point format and scaled to fit 0 to 1 range + + + + .. math:: + + \vecthree{X}{Y}{Z} \leftarrow \vecthreethree{0.412453}{0.357580}{0.180423}{0.212671}{0.715160}{0.072169}{0.019334}{0.119193}{0.950227} \cdot \vecthree{R}{G}{B} + + + + + .. math:: + + L \leftarrow \fork{116 Y^{1/3}}{for $Y>0.008856$}{903.3 Y}{for $Y\leq 0.008856$} + + + + + .. math:: + + u' \leftarrow 4*X/(X + 15*Y + 3 Z) + + + + + .. math:: + + v' \leftarrow 9*Y/(X + 15*Y + 3 Z) + + + + + .. math:: + + u \leftarrow 13*L*(u' - u_n) \quad \text{where} \quad u_n=0.19793943 + + + + + .. math:: + + v \leftarrow 13*L*(v' - v_n) \quad \text{where} \quad v_n=0.46831096 + + + On output + :math:`0 \leq L \leq 100` + , + :math:`-134 \leq u \leq 220` + , + :math:`-140 \leq v \leq 122` + . + + The values are then converted to the destination data type: + + + + + * 8-bit images + + + .. math:: + + L \leftarrow 255/100 L, \; u \leftarrow 255/354 (u + 134), \; v \leftarrow 255/256 (v + 140) + + + + + * 16-bit images + currently not supported + + + * 32-bit images + L, u, v are left as is + + + The above formulas for converting RGB to/from various color spaces have been taken from multiple sources on Web, primarily from the Charles Poynton site + http://www.poynton.com/ColorFAQ.html + + +* + Bayer + :math:`\rightarrow` + RGB ( + ``CV_BayerBG2BGR, CV_BayerGB2BGR, CV_BayerRG2BGR, CV_BayerGR2BGR, CV_BayerBG2RGB, CV_BayerGB2RGB, CV_BayerRG2RGB, CV_BayerGR2RGB`` + ) The Bayer pattern is widely used in CCD and CMOS cameras. It allows one to get color pictures from a single plane where R,G and B pixels (sensors of a particular component) are interleaved like this: + + + + .. math:: + + \newcommand{\Rcell}{\color{red}R} \newcommand{\Gcell}{\color{green}G} \newcommand{\Bcell}{\color{blue}B} \definecolor{BackGray}{rgb}{0.8,0.8,0.8} \begin{array}{ c c c c c } \Rcell & \Gcell & \Rcell & \Gcell & \Rcell \\ \Gcell & \colorbox{BackGray}{\Bcell} & \colorbox{BackGray}{\Gcell} & \Bcell & \Gcell \\ \Rcell & \Gcell & \Rcell & \Gcell & \Rcell \\ \Gcell & \Bcell & \Gcell & \Bcell & \Gcell \\ \Rcell & \Gcell & \Rcell & \Gcell & \Rcell \end{array} + + + The output RGB components of a pixel are interpolated from 1, 2 or + 4 neighbors of the pixel having the same color. There are several + modifications of the above pattern that can be achieved by shifting + the pattern one pixel left and/or one pixel up. The two letters + :math:`C_1` + and + :math:`C_2` + in the conversion constants + ``CV_Bayer`` + :math:`C_1 C_2` + ``2BGR`` + and + ``CV_Bayer`` + :math:`C_1 C_2` + ``2RGB`` + indicate the particular pattern + type - these are components from the second row, second and third + columns, respectively. For example, the above pattern has very + popular "BG" type. + + + +.. index:: distanceTransform + + +cv::distanceTransform +--------------------- + +`id=0.475976287777 Comments from the Wiki `__ + + + + +.. cfunction:: void distanceTransform( const Mat\& src, Mat\& dst, int distanceType, int maskSize ) + + + +.. cfunction:: void distanceTransform( const Mat\& src, Mat\& dst, Mat\& labels, int distanceType, int maskSize ) + + Calculates the distance to the closest zero pixel for each pixel of the source image. + + + + + + + :param src: 8-bit, single-channel (binary) source image + + + :param dst: Output image with calculated distances; will be 32-bit floating-point, single-channel image of the same size as ``src`` + + + :param distanceType: Type of distance; can be ``CV_DIST_L1, CV_DIST_L2`` or ``CV_DIST_C`` + + + :param maskSize: Size of the distance transform mask; can be 3, 5 or ``CV_DIST_MASK_PRECISE`` (the latter option is only supported by the first of the functions). In the case of ``CV_DIST_L1`` or ``CV_DIST_C`` distance type the parameter is forced to 3, because a :math:`3\times 3` mask gives the same result as a :math:`5\times 5` or any larger aperture. + + + :param labels: The optional output 2d array of labels - the discrete Voronoi diagram; will have type ``CV_32SC1`` and the same size as ``src`` . See the discussion + + + +The functions +``distanceTransform`` +calculate the approximate or precise +distance from every binary image pixel to the nearest zero pixel. +(for zero image pixels the distance will obviously be zero). + +When +``maskSize == CV_DIST_MASK_PRECISE`` +and +``distanceType == CV_DIST_L2`` +, the function runs the algorithm described in +Felzenszwalb04 +. + +In other cases the algorithm +Borgefors86 +is used, that is, +for pixel the function finds the shortest path to the nearest zero pixel +consisting of basic shifts: horizontal, +vertical, diagonal or knight's move (the latest is available for a +:math:`5\times 5` +mask). The overall distance is calculated as a sum of these +basic distances. Because the distance function should be symmetric, +all of the horizontal and vertical shifts must have the same cost (that +is denoted as +``a`` +), all the diagonal shifts must have the +same cost (denoted +``b`` +), and all knight's moves must have +the same cost (denoted +``c`` +). For +``CV_DIST_C`` +and +``CV_DIST_L1`` +types the distance is calculated precisely, +whereas for +``CV_DIST_L2`` +(Euclidian distance) the distance +can be calculated only with some relative error (a +:math:`5\times 5` +mask +gives more accurate results). For +``a`` +, +``b`` +and +``c`` +OpenCV uses the values suggested in the original paper: + + + +.. table:: + + ============== =================== ====================== + ``CV_DIST_C`` :math:`(3\times 3)` a = 1, b = 1 \ + ============== =================== ====================== + ``CV_DIST_L1`` :math:`(3\times 3)` a = 1, b = 2 \ + ``CV_DIST_L2`` :math:`(3\times 3)` a=0.955, b=1.3693 \ + ``CV_DIST_L2`` :math:`(5\times 5)` a=1, b=1.4, c=2.1969 \ + ============== =================== ====================== + +Typically, for a fast, coarse distance estimation +``CV_DIST_L2`` +, +a +:math:`3\times 3` +mask is used, and for a more accurate distance estimation +``CV_DIST_L2`` +, a +:math:`5\times 5` +mask or the precise algorithm is used. +Note that both the precise and the approximate algorithms are linear on the number of pixels. + +The second variant of the function does not only compute the minimum distance for each pixel +:math:`(x, y)` +, +but it also identifies the nearest the nearest connected +component consisting of zero pixels. Index of the component is stored in +:math:`\texttt{labels}(x, y)` +. +The connected components of zero pixels are also found and marked by the function. + +In this mode the complexity is still linear. +That is, the function provides a very fast way to compute Voronoi diagram for the binary image. +Currently, this second variant can only use the approximate distance transform algorithm. + + + +.. index:: floodFill + + +cv::floodFill +------------- + +`id=0.32473486903 Comments from the Wiki `__ + + + + +.. cfunction:: int floodFill( Mat\& image, Point seed, Scalar newVal, Rect* rect=0, Scalar loDiff=Scalar(), Scalar upDiff=Scalar(), int flags=4 ) + + + +.. cfunction:: int floodFill( Mat\& image, Mat\& mask, Point seed, Scalar newVal, Rect* rect=0, Scalar loDiff=Scalar(), Scalar upDiff=Scalar(), int flags=4 ) + + Fills a connected component with the given color. + + + + + + + :param image: Input/output 1- or 3-channel, 8-bit or floating-point image. It is modified by the function unless the ``FLOODFILL_MASK_ONLY`` flag is set (in the second variant of the function; see below) + + + :param mask: (For the second function only) Operation mask, should be a single-channel 8-bit image, 2 pixels wider and 2 pixels taller. The function uses and updates the mask, so the user takes responsibility of initializing the ``mask`` content. Flood-filling can't go across non-zero pixels in the mask, for example, an edge detector output can be used as a mask to stop filling at edges. It is possible to use the same mask in multiple calls to the function to make sure the filled area do not overlap. **Note** : because the mask is larger than the filled image, a pixel :math:`(x, y)` in ``image`` will correspond to the pixel :math:`(x+1, y+1)` in the ``mask`` + + + :param seed: The starting point + + + :param newVal: New value of the repainted domain pixels + + + :param loDiff: Maximal lower brightness/color difference between the currently observed pixel and one of its neighbors belonging to the component, or a seed pixel being added to the component + + + :param upDiff: Maximal upper brightness/color difference between the currently observed pixel and one of its neighbors belonging to the component, or a seed pixel being added to the component + + + :param rect: The optional output parameter that the function sets to the minimum bounding rectangle of the repainted domain + + + :param flags: The operation flags. Lower bits contain connectivity value, 4 (by default) or 8, used within the function. Connectivity determines which neighbors of a pixel are considered. Upper bits can be 0 or a combination of the following flags: + + + * **FLOODFILL_FIXED_RANGE** if set, the difference between the current pixel and seed pixel is considered, otherwise the difference between neighbor pixels is considered (i.e. the range is floating) + + + * **FLOODFILL_MASK_ONLY** (for the second variant only) if set, the function does not change the image ( ``newVal`` is ignored), but fills the mask + + + + + +The functions +``floodFill`` +fill a connected component starting from the seed point with the specified color. The connectivity is determined by the color/brightness closeness of the neighbor pixels. The pixel at +:math:`(x,y)` +is considered to belong to the repainted domain if: + + + + + +* grayscale image, floating range + + + .. math:: + + \texttt{src} (x',y')- \texttt{loDiff} \leq \texttt{src} (x,y) \leq \texttt{src} (x',y')+ \texttt{upDiff} + + + + +* grayscale image, fixed range + + + .. math:: + + \texttt{src} ( \texttt{seed} .x, \texttt{seed} .y)- \texttt{loDiff} \leq \texttt{src} (x,y) \leq \texttt{src} ( \texttt{seed} .x, \texttt{seed} .y)+ \texttt{upDiff} + + + + +* color image, floating range + + + .. math:: + + \texttt{src} (x',y')_r- \texttt{loDiff} _r \leq \texttt{src} (x,y)_r \leq \texttt{src} (x',y')_r+ \texttt{upDiff} _r + + + + + .. math:: + + \texttt{src} (x',y')_g- \texttt{loDiff} _g \leq \texttt{src} (x,y)_g \leq \texttt{src} (x',y')_g+ \texttt{upDiff} _g + + + + + .. math:: + + \texttt{src} (x',y')_b- \texttt{loDiff} _b \leq \texttt{src} (x,y)_b \leq \texttt{src} (x',y')_b+ \texttt{upDiff} _b + + + + +* color image, fixed range + + + .. math:: + + \texttt{src} ( \texttt{seed} .x, \texttt{seed} .y)_r- \texttt{loDiff} _r \leq \texttt{src} (x,y)_r \leq \texttt{src} ( \texttt{seed} .x, \texttt{seed} .y)_r+ \texttt{upDiff} _r + + + + + .. math:: + + \texttt{src} ( \texttt{seed} .x, \texttt{seed} .y)_g- \texttt{loDiff} _g \leq \texttt{src} (x,y)_g \leq \texttt{src} ( \texttt{seed} .x, \texttt{seed} .y)_g+ \texttt{upDiff} _g + + + + + .. math:: + + \texttt{src} ( \texttt{seed} .x, \texttt{seed} .y)_b- \texttt{loDiff} _b \leq \texttt{src} (x,y)_b \leq \texttt{src} ( \texttt{seed} .x, \texttt{seed} .y)_b+ \texttt{upDiff} _b + + + + +where +:math:`src(x',y')` +is the value of one of pixel neighbors that is already known to belong to the component. That is, to be added to the connected component, a pixel's color/brightness should be close enough to the: + + + + +* + color/brightness of one of its neighbors that are already referred to the connected component in the case of floating range + + + +* + color/brightness of the seed point in the case of fixed range. + + +By using these functions you can either mark a connected component with the specified color in-place, or build a mask and then extract the contour or copy the region to another image etc. Various modes of the function are demonstrated in +``floodfill.c`` +sample. + +See also: +:func:`findContours` + +.. index:: inpaint + + +cv::inpaint +----------- + +`id=0.00179717902812 Comments from the Wiki `__ + + + + +.. cfunction:: void inpaint( const Mat\& src, const Mat\& inpaintMask, Mat\& dst, double inpaintRadius, int flags ) + + Inpaints the selected region in the image. + + + + + + + :param src: The input 8-bit 1-channel or 3-channel image. + + + :param inpaintMask: The inpainting mask, 8-bit 1-channel image. Non-zero pixels indicate the area that needs to be inpainted. + + + :param dst: The output image; will have the same size and the same type as ``src`` + + + :param inpaintRadius: The radius of a circlular neighborhood of each point inpainted that is considered by the algorithm. + + + :param flags: The inpainting method, one of the following: + + * **INPAINT_NS** Navier-Stokes based method. + + * **INPAINT_TELEA** The method by Alexandru Telea Telea04 + + + + + +The function reconstructs the selected image area from the pixel near the area boundary. The function may be used to remove dust and scratches from a scanned photo, or to remove undesirable objects from still images or video. See +http://en.wikipedia.org/wiki/Inpainting +for more details. + + + +.. index:: integral + + +cv::integral +------------ + +`id=0.889978298704 Comments from the Wiki `__ + + + + +.. cfunction:: void integral( const Mat\& image, Mat\& sum, int sdepth=-1 ) + + + +.. cfunction:: void integral( const Mat\& image, Mat\& sum, Mat\& sqsum, int sdepth=-1 ) + + + +.. cfunction:: void integral( const Mat\& image, Mat\& sum, Mat\& sqsum, Mat\& tilted, int sdepth=-1 ) + + Calculates the integral of an image. + + + + + + + :param image: The source image, :math:`W \times H` , 8-bit or floating-point (32f or 64f) + + + :param sum: The integral image, :math:`(W+1)\times (H+1)` , 32-bit integer or floating-point (32f or 64f) + + + :param sqsum: The integral image for squared pixel values, :math:`(W+1)\times (H+1)` , double precision floating-point (64f) + + + :param tilted: The integral for the image rotated by 45 degrees, :math:`(W+1)\times (H+1)` , the same data type as ``sum`` + + + :param sdepth: The desired depth of the integral and the tilted integral images, ``CV_32S`` , ``CV_32F`` or ``CV_64F`` + + + +The functions +``integral`` +calculate one or more integral images for the source image as following: + + + +.. math:: + + \texttt{sum} (X,Y) = \sum _{x`__ + + + + +.. cfunction:: double threshold( const Mat\& src, Mat\& dst, double thresh, double maxVal, int thresholdType ) + + Applies a fixed-level threshold to each array element + + + + + + + :param src: Source array (single-channel, 8-bit of 32-bit floating point) + + + :param dst: Destination array; will have the same size and the same type as ``src`` + + + :param thresh: Threshold value + + + :param maxVal: Maximum value to use with ``THRESH_BINARY`` and ``THRESH_BINARY_INV`` thresholding types + + + :param thresholdType: Thresholding type (see the discussion) + + + +The function applies fixed-level thresholding +to a single-channel array. The function is typically used to get a +bi-level (binary) image out of a grayscale image ( +:func:`compare` +could +be also used for this purpose) or for removing a noise, i.e. filtering +out pixels with too small or too large values. There are several +types of thresholding that the function supports that are determined by +``thresholdType`` +: + + + + + * **THRESH_BINARY** + + .. math:: + + \texttt{dst} (x,y) = \fork{\texttt{maxVal}}{if $\texttt{src}(x,y) > \texttt{thresh}$}{0}{otherwise} + + + + + * **THRESH_BINARY_INV** + + .. math:: + + \texttt{dst} (x,y) = \fork{0}{if $\texttt{src}(x,y) > \texttt{thresh}$}{\texttt{maxVal}}{otherwise} + + + + + * **THRESH_TRUNC** + + .. math:: + + \texttt{dst} (x,y) = \fork{\texttt{threshold}}{if $\texttt{src}(x,y) > \texttt{thresh}$}{\texttt{src}(x,y)}{otherwise} + + + + + * **THRESH_TOZERO** + + .. math:: + + \texttt{dst} (x,y) = \fork{\texttt{src}(x,y)}{if $\texttt{src}(x,y) > \texttt{thresh}$}{0}{otherwise} + + + + + * **THRESH_TOZERO_INV** + + .. math:: + + \texttt{dst} (x,y) = \fork{0}{if $\texttt{src}(x,y) > \texttt{thresh}$}{\texttt{src}(x,y)}{otherwise} + + + + + +Also, the special value +``THRESH_OTSU`` +may be combined with +one of the above values. In this case the function determines the optimal threshold +value using Otsu's algorithm and uses it instead of the specified +``thresh`` +. +The function returns the computed threshold value. +Currently, Otsu's method is implemented only for 8-bit images. + + + +.. image:: ../../pics/threshold.png + + + +See also: +:func:`adaptiveThreshold` +, +:func:`findContours` +, +:func:`compare` +, +:func:`min` +, +:func:`max` + +.. index:: watershed + + +cv::watershed +------------- + +`id=0.488625172566 Comments from the Wiki `__ + + + + +.. cfunction:: void watershed( const Mat\& image, Mat\& markers ) + + Does marker-based image segmentation using watershed algrorithm + + + + + + + :param image: The input 8-bit 3-channel image. + + + :param markers: The input/output 32-bit single-channel image (map) of markers. It should have the same size as ``image`` + + + +The function implements one of the variants +of watershed, non-parametric marker-based segmentation algorithm, +described in +Meyer92 +. Before passing the image to the +function, user has to outline roughly the desired regions in the image +``markers`` +with positive ( +:math:`>0` +) indices, i.e. every region is +represented as one or more connected components with the pixel values +1, 2, 3 etc (such markers can be retrieved from a binary mask +using +:func:`findContours` +and +:func:`drawContours` +, see +``watershed.cpp`` +demo). +The markers will be "seeds" of the future image +regions. All the other pixels in +``markers`` +, which relation to the +outlined regions is not known and should be defined by the algorithm, +should be set to 0's. On the output of the function, each pixel in +markers is set to one of values of the "seed" components, or to -1 at +boundaries between the regions. + +Note, that it is not necessary that every two neighbor connected +components are separated by a watershed boundary (-1's pixels), for +example, in case when such tangent components exist in the initial +marker image. Visual demonstration and usage example of the function +can be found in OpenCV samples directory; see +``watershed.cpp`` +demo. + +See also: +:func:`findContours` + +.. index:: grabCut + + +cv::grabCut +----------- + +`id=0.466443243205 Comments from the Wiki `__ + + + + +.. cfunction:: void grabCut(const Mat\& image, Mat\& mask, Rect rect, Mat\& bgdModel, Mat\& fgdModel, int iterCount, int mode ) + + Runs GrabCut algorithm + + + + + + + :param image: The input 8-bit 3-channel image. + + + :param mask: The input/output 8-bit single-channel mask. Its elements may have one of four values. The mask is initialize when ``mode==GC_INIT_WITH_RECT`` + + + + + * **GC_BGD** Certainly a background pixel + + + * **GC_FGD** Certainly a foreground (object) pixel + + + * **GC_PR_BGD** Likely a background pixel + + + * **GC_PR_BGD** Likely a foreground pixel + + + + + :param rect: The ROI containing the segmented object. The pixels outside of the ROI are marked as "certainly a background". The parameter is only used when ``mode==GC_INIT_WITH_RECT`` + + + :param bgdModel, fgdModel: Temporary arrays used for segmentation. Do not modify them while you are processing the same image + + + :param iterCount: The number of iterations the algorithm should do before returning the result. Note that the result can be refined with further calls with the ``mode==GC_INIT_WITH_MASK`` or ``mode==GC_EVAL`` + + + :param mode: The operation mode + + + + + * **GC_INIT_WITH_RECT** The function initializes the state and the mask using the provided rectangle. After that it runs ``iterCount`` iterations of the algorithm + + + * **GC_INIT_WITH_MASK** The function initializes the state using the provided mask. Note that ``GC_INIT_WITH_RECT`` and ``GC_INIT_WITH_MASK`` can be combined, then all the pixels outside of the ROI are automatically initialized with ``GC_BGD`` + + . + + + * **GC_EVAL** The value means that algorithm should just resume. + + + + + +The function implements the +`GrabCut `_ +image segmentation algorithm. +See the sample grabcut.cpp on how to use the function. + diff --git a/modules/imgproc/doc/motion_analysis_and_object_tracking.rst b/modules/imgproc/doc/motion_analysis_and_object_tracking.rst new file mode 100644 index 000000000..d1df62baf --- /dev/null +++ b/modules/imgproc/doc/motion_analysis_and_object_tracking.rst @@ -0,0 +1,227 @@ +Motion Analysis and Object Tracking +=================================== + +.. highlight:: cpp + + + +.. index:: accumulate + + +cv::accumulate +-------------- + +`id=0.681079907994 Comments from the Wiki `__ + + + + +.. cfunction:: void accumulate( const Mat\& src, Mat\& dst, const Mat\& mask=Mat() ) + + Adds image to the accumulator. + + + + + + + :param src: The input image, 1- or 3-channel, 8-bit or 32-bit floating point + + + :param dst: The accumulator image with the same number of channels as input image, 32-bit or 64-bit floating-point + + + :param mask: Optional operation mask + + + +The function adds +``src`` +, or some of its elements, to +``dst`` +: + + + +.. math:: + + \texttt{dst} (x,y) \leftarrow \texttt{dst} (x,y) + \texttt{src} (x,y) \quad \text{if} \quad \texttt{mask} (x,y) \ne 0 + + +The function supports multi-channel images; each channel is processed independently. + +The functions +``accumulate*`` +can be used, for example, to collect statistic of background of a scene, viewed by a still camera, for the further foreground-background segmentation. + +See also: +:func:`accumulateSquare` +, +:func:`accumulateProduct` +, +:func:`accumulateWeighted` + +.. index:: accumulateSquare + + +cv::accumulateSquare +-------------------- + +`id=0.655955936814 Comments from the Wiki `__ + + + + +.. cfunction:: void accumulateSquare( const Mat\& src, Mat\& dst, const Mat\& mask=Mat() ) + + Adds the square of the source image to the accumulator. + + + + + + + :param src: The input image, 1- or 3-channel, 8-bit or 32-bit floating point + + + :param dst: The accumulator image with the same number of channels as input image, 32-bit or 64-bit floating-point + + + :param mask: Optional operation mask + + + +The function adds the input image +``src`` +or its selected region, raised to power 2, to the accumulator +``dst`` +: + + + +.. math:: + + \texttt{dst} (x,y) \leftarrow \texttt{dst} (x,y) + \texttt{src} (x,y)^2 \quad \text{if} \quad \texttt{mask} (x,y) \ne 0 + + +The function supports multi-channel images; each channel is processed independently. + +See also: +:func:`accumulateSquare` +, +:func:`accumulateProduct` +, +:func:`accumulateWeighted` + +.. index:: accumulateProduct + + +cv::accumulateProduct +--------------------- + +`id=0.866927763669 Comments from the Wiki `__ + + + + +.. cfunction:: void accumulateProduct( const Mat\& src1, const Mat\& src2, Mat\& dst, const Mat\& mask=Mat() ) + + Adds the per-element product of two input images to the accumulator. + + + + + + + :param src1: The first input image, 1- or 3-channel, 8-bit or 32-bit floating point + + + :param src2: The second input image of the same type and the same size as ``src1`` + + + :param dst: Accumulator with the same number of channels as input images, 32-bit or 64-bit floating-point + + + :param mask: Optional operation mask + + + +The function adds the product of 2 images or their selected regions to the accumulator +``dst`` +: + + + +.. math:: + + \texttt{dst} (x,y) \leftarrow \texttt{dst} (x,y) + \texttt{src1} (x,y) \cdot \texttt{src2} (x,y) \quad \text{if} \quad \texttt{mask} (x,y) \ne 0 + + +The function supports multi-channel images; each channel is processed independently. + +See also: +:func:`accumulate` +, +:func:`accumulateSquare` +, +:func:`accumulateWeighted` + +.. index:: accumulateWeighted + + +cv::accumulateWeighted +---------------------- + +`id=0.956120320296 Comments from the Wiki `__ + + + + +.. cfunction:: void accumulateWeighted( const Mat\& src, Mat\& dst, double alpha, const Mat\& mask=Mat() ) + + Updates the running average. + + + + + + + :param src: The input image, 1- or 3-channel, 8-bit or 32-bit floating point + + + :param dst: The accumulator image with the same number of channels as input image, 32-bit or 64-bit floating-point + + + :param alpha: Weight of the input image + + + :param mask: Optional operation mask + + + +The function calculates the weighted sum of the input image +``src`` +and the accumulator +``dst`` +so that +``dst`` +becomes a running average of frame sequence: + + + +.. math:: + + \texttt{dst} (x,y) \leftarrow (1- \texttt{alpha} ) \cdot \texttt{dst} (x,y) + \texttt{alpha} \cdot \texttt{src} (x,y) \quad \text{if} \quad \texttt{mask} (x,y) \ne 0 + + +that is, +``alpha`` +regulates the update speed (how fast the accumulator "forgets" about earlier images). +The function supports multi-channel images; each channel is processed independently. + +See also: +:func:`accumulate` +, +:func:`accumulateSquare` +, +:func:`accumulateProduct` diff --git a/modules/imgproc/doc/object_detection.rst b/modules/imgproc/doc/object_detection.rst new file mode 100644 index 000000000..e5a497b21 --- /dev/null +++ b/modules/imgproc/doc/object_detection.rst @@ -0,0 +1,146 @@ +Object Detection +================ + +.. highlight:: cpp + + + +.. index:: matchTemplate + + +cv::matchTemplate +----------------- + +`id=0.821462672178 Comments from the Wiki `__ + + + + +.. cfunction:: void matchTemplate( const Mat\& image, const Mat\& templ, Mat\& result, int method ) + + Compares a template against overlapped image regions. + + + + + + + :param image: Image where the search is running; should be 8-bit or 32-bit floating-point + + + :param templ: Searched template; must be not greater than the source image and have the same data type + + + :param result: A map of comparison results; will be single-channel 32-bit floating-point. + If ``image`` is :math:`W \times H` and ``templ`` is :math:`w \times h` then ``result`` will be :math:`(W-w+1) \times (H-h+1)` + + + :param method: Specifies the comparison method (see below) + + + +The function slides through +``image`` +, compares the +overlapped patches of size +:math:`w \times h` +against +``templ`` +using the specified method and stores the comparison results to +``result`` +. Here are the formulas for the available comparison +methods ( +:math:`I` +denotes +``image`` +, +:math:`T` +``template`` +, +:math:`R` +``result`` +). The summation is done over template and/or the +image patch: +:math:`x' = 0...w-1, y' = 0...h-1` + + + + +* method=CV\_TM\_SQDIFF + + + .. math:: + + R(x,y)= \sum _{x',y'} (T(x',y')-I(x+x',y+y'))^2 + + + + +* method=CV\_TM\_SQDIFF\_NORMED + + + .. math:: + + R(x,y)= \frac{\sum_{x',y'} (T(x',y')-I(x+x',y+y'))^2}{\sqrt{\sum_{x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}} + + + + +* method=CV\_TM\_CCORR + + + .. math:: + + R(x,y)= \sum _{x',y'} (T(x',y') \cdot I(x+x',y+y')) + + + + +* method=CV\_TM\_CCORR\_NORMED + + + .. math:: + + R(x,y)= \frac{\sum_{x',y'} (T(x',y') \cdot I'(x+x',y+y'))}{\sqrt{\sum_{x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}} + + + + +* method=CV\_TM\_CCOEFF + + + .. math:: + + R(x,y)= \sum _{x',y'} (T'(x',y') \cdot I(x+x',y+y')) + + + where + + + .. math:: + + \begin{array}{l} T'(x',y')=T(x',y') - 1/(w \cdot h) \cdot \sum _{x'',y''} T(x'',y'') \\ I'(x+x',y+y')=I(x+x',y+y') - 1/(w \cdot h) \cdot \sum _{x'',y''} I(x+x'',y+y'') \end{array} + + + + +* method=CV\_TM\_CCOEFF\_NORMED + + + .. math:: + + R(x,y)= \frac{ \sum_{x',y'} (T'(x',y') \cdot I'(x+x',y+y')) }{ \sqrt{\sum_{x',y'}T'(x',y')^2 \cdot \sum_{x',y'} I'(x+x',y+y')^2} } + + + + +After the function finishes the comparison, the best matches can be found as global minimums (when +``CV_TM_SQDIFF`` +was used) or maximums (when +``CV_TM_CCORR`` +or +``CV_TM_CCOEFF`` +was used) using the +:func:`minMaxLoc` +function. In the case of a color image, template summation in the numerator and each sum in the denominator is done over all of the channels (and separate mean values are used for each channel). That is, the function can take a color template and a color image; the result will still be a single-channel image, which is easier to analyze. + diff --git a/modules/imgproc/doc/planar_subdivisions.rst b/modules/imgproc/doc/planar_subdivisions.rst new file mode 100644 index 000000000..0354358a1 --- /dev/null +++ b/modules/imgproc/doc/planar_subdivisions.rst @@ -0,0 +1,6 @@ +Planar Subdivisions +=================== + +.. highlight:: cpp + + diff --git a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst new file mode 100644 index 000000000..2d5a653ff --- /dev/null +++ b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst @@ -0,0 +1,1086 @@ +Structural Analysis and Shape Descriptors +========================================= + +.. highlight:: cpp + + + +.. index:: moments + + +cv::moments +----------- + +`id=0.590002473104 Comments from the Wiki `__ + + + + +.. cfunction:: Moments moments( const Mat\& array, bool binaryImage=false ) + + Calculates all of the moments up to the third order of a polygon or rasterized shape. + + + +where the class +``Moments`` +is defined as: + + + +:: + + + + class Moments + { + public: + Moments(); + Moments(double m00, double m10, double m01, double m20, double m11, + double m02, double m30, double m21, double m12, double m03 ); + Moments( const CvMoments& moments ); + operator CvMoments() const; + + // spatial moments + double m00, m10, m01, m20, m11, m02, m30, m21, m12, m03; + // central moments + double mu20, mu11, mu02, mu30, mu21, mu12, mu03; + // central normalized moments + double nu20, nu11, nu02, nu30, nu21, nu12, nu03; + }; + + +.. + + + + + :param array: A raster image (single-channel, 8-bit or floating-point 2D array) or an array + ( :math:`1 \times N` or :math:`N \times 1` ) of 2D points ( ``Point`` or ``Point2f`` ) + + + :param binaryImage: (For images only) If it is true, then all the non-zero image pixels are treated as 1's + + + +The function computes moments, up to the 3rd order, of a vector shape or a rasterized shape. +In case of a raster image, the spatial moments +:math:`\texttt{Moments::m}_{ji}` +are computed as: + + + +.. math:: + + \texttt{m} _{ji}= \sum _{x,y} \left ( \texttt{array} (x,y) \cdot x^j \cdot y^i \right ), + + +the central moments +:math:`\texttt{Moments::mu}_{ji}` +are computed as: + + +.. math:: + + \texttt{mu} _{ji}= \sum _{x,y} \left ( \texttt{array} (x,y) \cdot (x - \bar{x} )^j \cdot (y - \bar{y} )^i \right ) + + +where +:math:`(\bar{x}, \bar{y})` +is the mass center: + + + +.. math:: + + \bar{x} = \frac{\texttt{m}_{10}}{\texttt{m}_{00}} , \; \bar{y} = \frac{\texttt{m}_{01}}{\texttt{m}_{00}} + + +and the normalized central moments +:math:`\texttt{Moments::nu}_{ij}` +are computed as: + + +.. math:: + + \texttt{nu} _{ji}= \frac{\texttt{mu}_{ji}}{\texttt{m}_{00}^{(i+j)/2+1}} . + + +Note that +:math:`\texttt{mu}_{00}=\texttt{m}_{00}` +, +:math:`\texttt{nu}_{00}=1` +:math:`\texttt{nu}_{10}=\texttt{mu}_{10}=\texttt{mu}_{01}=\texttt{mu}_{10}=0` +, hence the values are not stored. + +The moments of a contour are defined in the same way, but computed using Green's formula +(see +http://en.wikipedia.org/wiki/Green_theorem +), therefore, because of a limited raster resolution, the moments computed for a contour will be slightly different from the moments computed for the same contour rasterized. + +See also: +:func:`contourArea` +, +:func:`arcLength` + +.. index:: HuMoments + + +cv::HuMoments +------------- + +`id=0.89426793428 Comments from the Wiki `__ + + + + +.. cfunction:: void HuMoments( const Moments\& moments, double h[7] ) + + Calculates the seven Hu invariants. + + + + + + + :param moments: The input moments, computed with :func:`moments` + + + :param h: The output Hu invariants + + + +The function calculates the seven Hu invariants, see +http://en.wikipedia.org/wiki/Image_moment +, that are defined as: + + + +.. math:: + + \begin{array}{l} h[0]= \eta _{20}+ \eta _{02} \\ h[1]=( \eta _{20}- \eta _{02})^{2}+4 \eta _{11}^{2} \\ h[2]=( \eta _{30}-3 \eta _{12})^{2}+ (3 \eta _{21}- \eta _{03})^{2} \\ h[3]=( \eta _{30}+ \eta _{12})^{2}+ ( \eta _{21}+ \eta _{03})^{2} \\ h[4]=( \eta _{30}-3 \eta _{12})( \eta _{30}+ \eta _{12})[( \eta _{30}+ \eta _{12})^{2}-3( \eta _{21}+ \eta _{03})^{2}]+(3 \eta _{21}- \eta _{03})( \eta _{21}+ \eta _{03})[3( \eta _{30}+ \eta _{12})^{2}-( \eta _{21}+ \eta _{03})^{2}] \\ h[5]=( \eta _{20}- \eta _{02})[( \eta _{30}+ \eta _{12})^{2}- ( \eta _{21}+ \eta _{03})^{2}]+4 \eta _{11}( \eta _{30}+ \eta _{12})( \eta _{21}+ \eta _{03}) \\ h[6]=(3 \eta _{21}- \eta _{03})( \eta _{21}+ \eta _{03})[3( \eta _{30}+ \eta _{12})^{2}-( \eta _{21}+ \eta _{03})^{2}]-( \eta _{30}-3 \eta _{12})( \eta _{21}+ \eta _{03})[3( \eta _{30}+ \eta _{12})^{2}-( \eta _{21}+ \eta _{03})^{2}] \\ \end{array} + + +where +:math:`\eta_{ji}` +stand for +:math:`\texttt{Moments::nu}_{ji}` +. + +These values are proved to be invariant to the image scale, rotation, and reflection except the seventh one, whose sign is changed by reflection. Of course, this invariance was proved with the assumption of infinite image resolution. In case of a raster images the computed Hu invariants for the original and transformed images will be a bit different. + +See also: +:func:`matchShapes` + +.. index:: findContours + + +cv::findContours +---------------- + +`id=0.588223691954 Comments from the Wiki `__ + + + + +.. cfunction:: void findContours( const Mat\& image, vector >\& contours, vector\& hierarchy, int mode, int method, Point offset=Point()) + + + +.. cfunction:: void findContours( const Mat\& image, vector >\& contours, int mode, int method, Point offset=Point()) + + Finds the contours in a binary image. + + + + + + + :param image: The source, an 8-bit single-channel image. Non-zero pixels are treated as 1's, zero pixels remain 0's - the image is treated as ``binary`` . You can use :func:`compare` , :func:`inRange` , :func:`threshold` , :func:`adaptiveThreshold` , :func:`Canny` etc. to create a binary image out of a grayscale or color one. The function modifies the ``image`` while extracting the contours + + + :param contours: The detected contours. Each contour is stored as a vector of points + + + :param hiararchy: The optional output vector that will contain information about the image topology. It will have as many elements as the number of contours. For each contour ``contours[i]`` , the elements ``hierarchy[i][0]`` , ``hiearchy[i][1]`` , ``hiearchy[i][2]`` , ``hiearchy[i][3]`` will be set to 0-based indices in ``contours`` of the next and previous contours at the same hierarchical level, the first child contour and the parent contour, respectively. If for some contour ``i`` there is no next, previous, parent or nested contours, the corresponding elements of ``hierarchy[i]`` will be negative + + + :param mode: The contour retrieval mode + + + * **CV_RETR_EXTERNAL** retrieves only the extreme outer contours; It will set ``hierarchy[i][2]=hierarchy[i][3]=-1`` for all the contours + + + * **CV_RETR_LIST** retrieves all of the contours without establishing any hierarchical relationships + + + * **CV_RETR_CCOMP** retrieves all of the contours and organizes them into a two-level hierarchy: on the top level are the external boundaries of the components, on the second level are the boundaries of the holes. If inside a hole of a connected component there is another contour, it will still be put on the top level + + + * **CV_RETR_TREE** retrieves all of the contours and reconstructs the full hierarchy of nested contours. This full hierarchy is built and shown in OpenCV ``contours.c`` demo + + + + + :param method: The contour approximation method. + + + * **CV_CHAIN_APPROX_NONE** stores absolutely all the contour points. That is, every 2 points of a contour stored with this method are 8-connected neighbors of each other + + + * **CV_CHAIN_APPROX_SIMPLE** compresses horizontal, vertical, and diagonal segments and leaves only their end points. E.g. an up-right rectangular contour will be encoded with 4 points + + + * **CV_CHAIN_APPROX_TC89_L1,CV_CHAIN_APPROX_TC89_KCOS** applies one of the flavors of the Teh-Chin chain approximation algorithm; see TehChin89 + + + + + :param offset: The optional offset, by which every contour point is shifted. This is useful if the contours are extracted from the image ROI and then they should be analyzed in the whole image context + + + +The function retrieves contours from the +binary image using the algorithm +Suzuki85 +. The contours are a useful tool for shape analysis and object detection and recognition. See +``squares.c`` +in the OpenCV sample directory. + +**Note:** +the source +``image`` +is modified by this function. + + +.. index:: drawContours + + +cv::drawContours +---------------- + +`id=0.331502695445 Comments from the Wiki `__ + + + + +.. cfunction:: void drawContours( Mat\& image, const vector >\& contours, int contourIdx, const Scalar\& color, int thickness=1, int lineType=8, const vector\& hierarchy=vector(), int maxLevel=INT_MAX, Point offset=Point() ) + + Draws contours' outlines or filled contours. + + + + + + + :param image: The destination image + + + :param contours: All the input contours. Each contour is stored as a point vector + + + :param contourIdx: Indicates the contour to draw. If it is negative, all the contours are drawn + + + :param color: The contours' color + + + :param thickness: Thickness of lines the contours are drawn with. + If it is negative (e.g. ``thickness=CV_FILLED`` ), the contour interiors are + drawn. + + + :param lineType: The line connectivity; see :func:`line` description + + + :param hierarchy: The optional information about hierarchy. It is only needed if you want to draw only some of the contours (see ``maxLevel`` ) + + + :param maxLevel: Maximal level for drawn contours. If 0, only + the specified contour is drawn. If 1, the function draws the contour(s) and all the nested contours. If 2, the function draws the contours, all the nested contours and all the nested into nested contours etc. This parameter is only taken into account when there is ``hierarchy`` available. + + + :param offset: The optional contour shift parameter. Shift all the drawn contours by the specified :math:`\texttt{offset}=(dx,dy)` + + + +The function draws contour outlines in the image if +:math:`\texttt{thickness} \ge 0` +or fills the area bounded by the contours if +:math:`\texttt{thickness}<0` +. Here is the example on how to retrieve connected components from the binary image and label them + + + + +:: + + + + #include "cv.h" + #include "highgui.h" + + using namespace cv; + + int main( int argc, char** argv ) + { + Mat src; + // the first command line parameter must be file name of binary + // (black-n-white) image + if( argc != 2 || !(src=imread(argv[1], 0)).data) + return -1; + + Mat dst = Mat::zeros(src.rows, src.cols, CV_8UC3); + + src = src > 1; + namedWindow( "Source", 1 ); + imshow( "Source", src ); + + vector > contours; + vector hierarchy; + + findContours( src, contours, hierarchy, + CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE ); + + // iterate through all the top-level contours, + // draw each connected component with its own random color + int idx = 0; + for( ; idx >= 0; idx = hierarchy[idx][0] ) + { + Scalar color( rand()&255, rand()&255, rand()&255 ); + drawContours( dst, contours, idx, color, CV_FILLED, 8, hierarchy ); + } + + namedWindow( "Components", 1 ); + imshow( "Components", dst ); + waitKey(0); + } + + +.. + + +.. index:: approxPolyDP + + +cv::approxPolyDP +---------------- + +`id=0.742043912961 Comments from the Wiki `__ + + + + +.. cfunction:: void approxPolyDP( const Mat\& curve, vector\& approxCurve, double epsilon, bool closed ) + + + +.. cfunction:: void approxPolyDP( const Mat\& curve, vector\& approxCurve, double epsilon, bool closed ) + + Approximates polygonal curve(s) with the specified precision. + + + + + + + :param curve: The polygon or curve to approximate. Must be :math:`1 \times N` or :math:`N \times 1` matrix of type ``CV_32SC2`` or ``CV_32FC2`` . You can also convert ``vector`` or ``vector&)`` constructor. + + + :param approxCurve: The result of the approximation; The type should match the type of the input curve + + + :param epsilon: Specifies the approximation accuracy. This is the maximum distance between the original curve and its approximation + + + :param closed: If true, the approximated curve is closed (i.e. its first and last vertices are connected), otherwise it's not + + + +The functions +``approxPolyDP`` +approximate a curve or a polygon with another curve/polygon with less vertices, so that the distance between them is less or equal to the specified precision. It used Douglas-Peucker algorithm +http://en.wikipedia.org/wiki/Ramer-Douglas-Peucker_algorithm + +.. index:: arcLength + + +cv::arcLength +------------- + +`id=0.67500264216 Comments from the Wiki `__ + + + + +.. cfunction:: double arcLength( const Mat\& curve, bool closed ) + + Calculates a contour perimeter or a curve length. + + + + + + + :param curve: The input vector of 2D points, represented by ``CV_32SC2`` or ``CV_32FC2`` matrix, or by ``vector`` or ``vector`` converted to a matrix with ``Mat(const vector&)`` constructor + + + :param closed: Indicates, whether the curve is closed or not + + + +The function computes the curve length or the closed contour perimeter. + + +.. index:: boundingRect + + +cv::boundingRect +---------------- + +`id=0.804384497809 Comments from the Wiki `__ + + + + +.. cfunction:: Rect boundingRect( const Mat\& points ) + + Calculates the up-right bounding rectangle of a point set. + + + + + + + :param points: The input 2D point set, represented by ``CV_32SC2`` or ``CV_32FC2`` matrix, or by ``vector`` or ``vector`` converted to the matrix using ``Mat(const vector&)`` constructor. + + + +The function calculates and returns the minimal up-right bounding rectangle for the specified point set. + + + +.. index:: estimateRigidTransform + + +cv::estimateRigidTransform +-------------------------- + +`id=0.692485680585 Comments from the Wiki `__ + + + + +.. cfunction:: Mat estimateRigidTransform( const Mat\& srcpt, const Mat\& dstpt, bool fullAffine ) + + Computes optimal affine transformation between two 2D point sets + + + + + + + :param srcpt: The first input 2D point set + + + :param dst: The second input 2D point set of the same size and the same type as ``A`` + + + :param fullAffine: If true, the function finds the optimal affine transformation with no any additional resrictions (i.e. there are 6 degrees of freedom); otherwise, the class of transformations to choose from is limited to combinations of translation, rotation and uniform scaling (i.e. there are 5 degrees of freedom) + + + +The function finds the optimal affine transform +:math:`[A|b]` +(a +:math:`2 \times 3` +floating-point matrix) that approximates best the transformation from +:math:`\texttt{srcpt}_i` +to +:math:`\texttt{dstpt}_i` +: + + + +.. math:: + + [A^*|b^*] = arg \min _{[A|b]} \sum _i \| \texttt{dstpt} _i - A { \texttt{srcpt} _i}^T - b \| ^2 + + +where +:math:`[A|b]` +can be either arbitrary (when +``fullAffine=true`` +) or have form + + +.. math:: + + \begin{bmatrix} a_{11} & a_{12} & b_1 \\ -a_{12} & a_{11} & b_2 \end{bmatrix} + + +when +``fullAffine=false`` +. + +See also: +:func:`getAffineTransform` +, +:func:`getPerspectiveTransform` +, +:func:`findHomography` + +.. index:: estimateAffine3D + + +cv::estimateAffine3D +-------------------- + +`id=0.36971452976 Comments from the Wiki `__ + + + + +.. cfunction:: int estimateAffine3D(const Mat\& srcpt, const Mat\& dstpt, Mat\& out, vector\& outliers, double ransacThreshold = 3.0, double confidence = 0.99) + + Computes optimal affine transformation between two 3D point sets + + + + + + + :param srcpt: The first input 3D point set + + + :param dstpt: The second input 3D point set + + + :param out: The output 3D affine transformation matrix :math:`3 \times 4` + + + :param outliers: The output vector indicating which points are outliers + + + :param ransacThreshold: The maximum reprojection error in RANSAC algorithm to consider a point an inlier + + + :param confidence: The confidence level, between 0 and 1, with which the matrix is estimated + + + +The function estimates the optimal 3D affine transformation between two 3D point sets using RANSAC algorithm. + + + +.. index:: contourArea + + +cv::contourArea +--------------- + +`id=0.725148942941 Comments from the Wiki `__ + + + + +.. cfunction:: double contourArea( const Mat\& contour ) + + Calculates the contour area + + + + + + + :param contour: The contour vertices, represented by ``CV_32SC2`` or ``CV_32FC2`` matrix, or by ``vector`` or ``vector`` converted to the matrix using ``Mat(const vector&)`` constructor. + + + +The function computes the contour area. Similarly to +:func:`moments` +the area is computed using the Green formula, thus the returned area and the number of non-zero pixels, if you draw the contour using +:func:`drawContours` +or +:func:`fillPoly` +, can be different. +Here is a short example: + + + + +:: + + + + vector contour; + contour.push_back(Point2f(0, 0)); + contour.push_back(Point2f(10, 0)); + contour.push_back(Point2f(10, 10)); + contour.push_back(Point2f(5, 4)); + + double area0 = contourArea(contour); + vector approx; + approxPolyDP(contour, approx, 5, true); + double area1 = contourArea(approx); + + cout << "area0 =" << area0 << endl << + "area1 =" << area1 << endl << + "approx poly vertices" << approx.size() << endl; + + +.. + + +.. index:: convexHull + + +cv::convexHull +-------------- + +`id=0.132488090238 Comments from the Wiki `__ + + + + +.. cfunction:: void convexHull( const Mat\& points, vector\& hull, bool clockwise=false ) + + + +.. cfunction:: void convexHull( const Mat\& points, vector\& hull, bool clockwise=false ) + + + +.. cfunction:: void convexHull( const Mat\& points, vector\& hull, bool clockwise=false ) + + Finds the convex hull of a point set. + + + + + + + :param points: The input 2D point set, represented by ``CV_32SC2`` or ``CV_32FC2`` matrix, or by ``vector`` or ``vector`` converted to the matrix using ``Mat(const vector&)`` constructor. + + + :param hull: The output convex hull. It is either a vector of points that form the hull (must have the same type as the input points), or a vector of 0-based point indices of the hull points in the original array (since the set of convex hull points is a subset of the original point set). + + + :param clockwise: If true, the output convex hull will be oriented clockwise, otherwise it will be oriented counter-clockwise. Here, the usual screen coordinate system is assumed - the origin is at the top-left corner, x axis is oriented to the right, and y axis is oriented downwards. + + + +The functions find the convex hull of a 2D point set using Sklansky's algorithm +Sklansky82 +that has +:math:`O(N logN)` +or +:math:`O(N)` +complexity (where +:math:`N` +is the number of input points), depending on how the initial sorting is implemented (currently it is +:math:`O(N logN)` +. See the OpenCV sample +``convexhull.c`` +that demonstrates the use of the different function variants. + + + +.. index:: fitEllipse + + +cv::fitEllipse +-------------- + +`id=0.626729818481 Comments from the Wiki `__ + + + + +.. cfunction:: RotatedRect fitEllipse( const Mat\& points ) + + Fits an ellipse around a set of 2D points. + + + + + + + :param points: The input 2D point set, represented by ``CV_32SC2`` or ``CV_32FC2`` matrix, or by ``vector`` or ``vector`` converted to the matrix using ``Mat(const vector&)`` constructor. + + + +The function calculates the ellipse that fits best +(in least-squares sense) a set of 2D points. It returns the rotated rectangle in which the ellipse is inscribed. + + +.. index:: fitLine + + +cv::fitLine +----------- + +`id=0.448441220498 Comments from the Wiki `__ + + + + +.. cfunction:: void fitLine( const Mat\& points, Vec4f\& line, int distType, double param, double reps, double aeps ) + + + +.. cfunction:: void fitLine( const Mat\& points, Vec6f\& line, int distType, double param, double reps, double aeps ) + + Fits a line to a 2D or 3D point set. + + + + + + + :param points: The input 2D point set, represented by ``CV_32SC2`` or ``CV_32FC2`` matrix, or by ``vector`` , ``vector`` , ``vector`` or ``vector`` converted to the matrix by ``Mat(const vector&)`` constructor + + + :param line: The output line parameters. In the case of a 2d fitting, + it is a vector of 4 floats ``(vx, vy, + x0, y0)`` where ``(vx, vy)`` is a normalized vector collinear to the + line and ``(x0, y0)`` is some point on the line. in the case of a + 3D fitting it is vector of 6 floats ``(vx, vy, vz, x0, y0, z0)`` + where ``(vx, vy, vz)`` is a normalized vector collinear to the line + and ``(x0, y0, z0)`` is some point on the line + + + :param distType: The distance used by the M-estimator (see the discussion) + + + :param param: Numerical parameter ( ``C`` ) for some types of distances, if 0 then some optimal value is chosen + + + :param reps, aeps: Sufficient accuracy for the radius (distance between the coordinate origin and the line) and angle, respectively; 0.01 would be a good default value for both. + + + +The functions +``fitLine`` +fit a line to a 2D or 3D point set by minimizing +:math:`\sum_i \rho(r_i)` +where +:math:`r_i` +is the distance between the +:math:`i^{th}` +point and the line and +:math:`\rho(r)` +is a distance function, one of: + + + + + +* distType=CV\_DIST\_L2 + + + .. math:: + + \rho (r) = r^2/2 \quad \text{(the simplest and the fastest least-squares method)} + + + + +* distType=CV\_DIST\_L1 + + + .. math:: + + \rho (r) = r + + + + +* distType=CV\_DIST\_L12 + + + .. math:: + + \rho (r) = 2 \cdot ( \sqrt{1 + \frac{r^2}{2}} - 1) + + + + +* distType=CV\_DIST\_FAIR + + + .. math:: + + \rho \left (r \right ) = C^2 \cdot \left ( \frac{r}{C} - \log{\left(1 + \frac{r}{C}\right)} \right ) \quad \text{where} \quad C=1.3998 + + + + +* distType=CV\_DIST\_WELSCH + + + .. math:: + + \rho \left (r \right ) = \frac{C^2}{2} \cdot \left ( 1 - \exp{\left(-\left(\frac{r}{C}\right)^2\right)} \right ) \quad \text{where} \quad C=2.9846 + + + + +* distType=CV\_DIST\_HUBER + + + .. math:: + + \rho (r) = \fork{r^2/2}{if $r < C$}{C \cdot (r-C/2)}{otherwise} \quad \text{where} \quad C=1.345 + + + + +The algorithm is based on the M-estimator ( +http://en.wikipedia.org/wiki/M-estimator +) technique, that iteratively fits the line using weighted least-squares algorithm and after each iteration the weights +:math:`w_i` +are adjusted to beinversely proportional to +:math:`\rho(r_i)` +. + + + +.. index:: isContourConvex + + +cv::isContourConvex +------------------- + +`id=0.120465278519 Comments from the Wiki `__ + + + + +.. cfunction:: bool isContourConvex( const Mat\& contour ) + + Tests contour convexity. + + + + + + + :param contour: The tested contour, a matrix of type ``CV_32SC2`` or ``CV_32FC2`` , or ``vector`` or ``vector`` converted to the matrix using ``Mat(const vector&)`` constructor. + + + +The function tests whether the input contour is convex or not. The contour must be simple, i.e. without self-intersections, otherwise the function output is undefined. + + + +.. index:: minAreaRect + + +cv::minAreaRect +--------------- + +`id=0.652453833352 Comments from the Wiki `__ + + + + +.. cfunction:: RotatedRect minAreaRect( const Mat\& points ) + + Finds the minimum area rotated rectangle enclosing a 2D point set. + + + + + + + :param points: The input 2D point set, represented by ``CV_32SC2`` or ``CV_32FC2`` matrix, or by ``vector`` or ``vector`` converted to the matrix using ``Mat(const vector&)`` constructor. + + + +The function calculates and returns the minimum area bounding rectangle (possibly rotated) for the specified point set. See the OpenCV sample +``minarea.c`` + +.. index:: minEnclosingCircle + + +cv::minEnclosingCircle +---------------------- + +`id=0.373088882575 Comments from the Wiki `__ + + + + +.. cfunction:: void minEnclosingCircle( const Mat\& points, Point2f\& center, float\& radius ) + + Finds the minimum area circle enclosing a 2D point set. + + + + + + + :param points: The input 2D point set, represented by ``CV_32SC2`` or ``CV_32FC2`` matrix, or by ``vector`` or ``vector`` converted to the matrix using ``Mat(const vector&)`` constructor. + + + :param center: The output center of the circle + + + :param radius: The output radius of the circle + + + +The function finds the minimal enclosing circle of a 2D point set using iterative algorithm. See the OpenCV sample +``minarea.c`` + +.. index:: matchShapes + + +cv::matchShapes +--------------- + +`id=0.0434997600134 Comments from the Wiki `__ + + + + +.. cfunction:: double matchShapes( const Mat\& object1, const Mat\& object2, int method, double parameter=0 ) + + Compares two shapes. + + + + + + + :param object1: The first contour or grayscale image + + + :param object2: The second contour or grayscale image + + + :param method: Comparison method: + ``CV_CONTOUR_MATCH_I1`` , \ + ``CV_CONTOURS_MATCH_I2`` \ + or + ``CV_CONTOURS_MATCH_I3`` (see the discussion below) + + + :param parameter: Method-specific parameter (is not used now) + + + +The function compares two shapes. The 3 implemented methods all use Hu invariants (see +:func:`HuMoments` +) as following ( +:math:`A` +denotes +``object1`` +, +:math:`B` +denotes +``object2`` +): + + + + + +* method=CV\_CONTOUR\_MATCH\_I1 + + + .. math:: + + I_1(A,B) = \sum _{i=1...7} \left | \frac{1}{m^A_i} - \frac{1}{m^B_i} \right | + + + + +* method=CV\_CONTOUR\_MATCH\_I2 + + + .. math:: + + I_2(A,B) = \sum _{i=1...7} \left | m^A_i - m^B_i \right | + + + + +* method=CV\_CONTOUR\_MATCH\_I3 + + + .. math:: + + I_3(A,B) = \sum _{i=1...7} \frac{ \left| m^A_i - m^B_i \right| }{ \left| m^A_i \right| } + + + + +where + + + +.. math:: + + \begin{array}{l} m^A_i = \mathrm{sign} (h^A_i) \cdot \log{h^A_i} \\ m^B_i = \mathrm{sign} (h^B_i) \cdot \log{h^B_i} \end{array} + + +and +:math:`h^A_i, h^B_i` +are the Hu moments of +:math:`A` +and +:math:`B` +respectively. + + + +.. index:: pointPolygonTest + + +cv::pointPolygonTest +-------------------- + +`id=0.709544893205 Comments from the Wiki `__ + + + + +.. cfunction:: double pointPolygonTest( const Mat\& contour, Point2f pt, bool measureDist ) + + Performs point-in-contour test. + + + + + + + :param contour: The input contour + + + :param pt: The point tested against the contour + + + :param measureDist: If true, the function estimates the signed distance from the point to the nearest contour edge; otherwise, the function only checks if the point is inside or not. + + + +The function determines whether the +point is inside a contour, outside, or lies on an edge (or coincides +with a vertex). It returns positive (inside), negative (outside) or zero (on an edge) value, +correspondingly. When +``measureDist=false`` +, the return value +is +1, -1 and 0, respectively. Otherwise, the return value +it is a signed distance between the point and the nearest contour +edge. + +Here is the sample output of the function, where each image pixel is tested against the contour. + + + +.. image:: ../../pics/pointpolygon.png + + + diff --git a/modules/index.rst b/modules/index.rst new file mode 100644 index 000000000..7e9ae97d6 --- /dev/null +++ b/modules/index.rst @@ -0,0 +1,31 @@ +.. opencvstd documentation master file, created by + sphinx-quickstart on Mon Feb 14 00:30:43 2011. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to opencvstd's documentation! +===================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + core/doc/intro.rst + core/doc/core.rst + imgproc/doc/imgproc.rst + highgui/doc/highgui.rst + video/doc/video.rst + calib3d/doc/calib3d.rst + features2d/doc/features2d.rst + objdetect/doc/objdetect.rst + ml/doc/ml.rst + gpu/doc/gpu.rst + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/modules/ml/doc/boosting.rst b/modules/ml/doc/boosting.rst new file mode 100644 index 000000000..729d22146 --- /dev/null +++ b/modules/ml/doc/boosting.rst @@ -0,0 +1,414 @@ +Boosting +======== + +.. highlight:: cpp + + +A common machine learning task is supervised learning. In supervised learning, the goal is to learn the functional relationship +:math:`F: y = F(x)` +between the input +:math:`x` +and the output +:math:`y` +. Predicting the qualitative output is called classification, while predicting the quantitative output is called regression. + +Boosting is a powerful learning concept, which provide a solution to the supervised classification learning task. It combines the performance of many "weak" classifiers to produce a powerful 'committee' +:ref:`HTF01` +. A weak classifier is only required to be better than chance, and thus can be very simple and computationally inexpensive. Many of them smartly combined, however, results in a strong classifier, which often outperforms most 'monolithic' strong classifiers such as SVMs and Neural Networks. + +Decision trees are the most popular weak classifiers used in boosting schemes. Often the simplest decision trees with only a single split node per tree (called stumps) are sufficient. + +The boosted model is based on +:math:`N` +training examples +:math:`{(x_i,y_i)}1N` +with +:math:`x_i \in{R^K}` +and +:math:`y_i \in{-1, +1}` +. +:math:`x_i` +is a +:math:`K` +-component vector. Each component encodes a feature relevant for the learning task at hand. The desired two-class output is encoded as -1 and +1. + +Different variants of boosting are known such as Discrete Adaboost, Real AdaBoost, LogitBoost, and Gentle AdaBoost +:ref:`FHT98` +. All of them are very similar in their overall structure. Therefore, we will look only at the standard two-class Discrete AdaBoost algorithm as shown in the box below. Each sample is initially assigned the same weight (step 2). Next a weak classifier +:math:`f_{m(x)}` +is trained on the weighted training data (step 3a). Its weighted training error and scaling factor +:math:`c_m` +is computed (step 3b). The weights are increased for training samples, which have been misclassified (step 3c). All weights are then normalized, and the process of finding the next weak classifier continues for another +:math:`M` +-1 times. The final classifier +:math:`F(x)` +is the sign of the weighted sum over the individual weak classifiers (step 4). + + + + + +* + Given + :math:`N` + examples + :math:`{(x_i,y_i)}1N` + with + :math:`x_i \in{R^K}, y_i \in{-1, +1}` + . + + +* + Start with weights + :math:`w_i = 1/N, i = 1,...,N` + . + + +* + Repeat for + :math:`m` + = + :math:`1,2,...,M` + : + + + + + * + Fit the classifier + :math:`f_m(x) \in{-1,1}` + , using weights + :math:`w_i` + on the training data. + + + * + Compute + :math:`err_m = E_w [1_{(y =\neq f_m(x))}], c_m = log((1 - err_m)/err_m)` + . + + + * + Set + :math:`w_i \Leftarrow w_i exp[c_m 1_{(y_i \neq f_m(x_i))}], i = 1,2,...,N,` + and renormalize so that + :math:`\Sigma i w_i = 1` + . + + + * + Output the classifier sign + :math:`[\Sigma m = 1M c_m f_m(x)]` + . + + + + +Two-class Discrete AdaBoost Algorithm: Training (steps 1 to 3) and Evaluation (step 4) +**NOTE:** +As well as the classical boosting methods, the current implementation supports 2-class classifiers only. For M +:math:`>` +2 classes there is the +**AdaBoost.MH** +algorithm, described in +:ref:`FHT98` +, that reduces the problem to the 2-class problem, yet with a much larger training set. + +In order to reduce computation time for boosted models without substantially losing accuracy, the influence trimming technique may be employed. As the training algorithm proceeds and the number of trees in the ensemble is increased, a larger number of the training samples are classified correctly and with increasing confidence, thereby those samples receive smaller weights on the subsequent iterations. Examples with very low relative weight have small impact on training of the weak classifier. Thus such examples may be excluded during the weak classifier training without having much effect on the induced classifier. This process is controlled with the weight +_ +trim +_ +rate parameter. Only examples with the summary fraction weight +_ +trim +_ +rate of the total weight mass are used in the weak classifier training. Note that the weights for +**all** +training examples are recomputed at each training iteration. Examples deleted at a particular iteration may be used again for learning some of the weak classifiers further +:ref:`FHT98` +. + +**[HTF01] Hastie, T., Tibshirani, R., Friedman, J. H. The Elements of Statistical Learning: Data Mining, Inference, and Prediction. Springer Series in Statistics. 2001.** +**[FHT98] Friedman, J. H., Hastie, T. and Tibshirani, R. Additive Logistic Regression: a Statistical View of Boosting. Technical Report, Dept. of Statistics, Stanford University, 1998.** + +.. index:: CvBoostParams + +.. _CvBoostParams: + +CvBoostParams +------------- + +`id=0.227680975216 Comments from the Wiki `__ + +.. ctype:: CvBoostParams + + + +Boosting training parameters. + + + + +:: + + + + struct CvBoostParams : public CvDTreeParams + { + int boost_type; + int weak_count; + int split_criteria; + double weight_trim_rate; + + CvBoostParams(); + CvBoostParams( int boost_type, int weak_count, double weight_trim_rate, + int max_depth, bool use_surrogates, const float* priors ); + }; + + +.. + +The structure is derived from +:ref:`CvDTreeParams` +, but not all of the decision tree parameters are supported. In particular, cross-validation is not supported. + + + +.. index:: CvBoostTree + +.. _CvBoostTree: + +CvBoostTree +----------- + +`id=0.166418635075 Comments from the Wiki `__ + +.. ctype:: CvBoostTree + + + +Weak tree classifier. + + + + +:: + + + + class CvBoostTree: public CvDTree + { + public: + CvBoostTree(); + virtual ~CvBoostTree(); + + virtual bool train( CvDTreeTrainData* _train_data, + const CvMat* subsample_idx, CvBoost* ensemble ); + virtual void scale( double s ); + virtual void read( CvFileStorage* fs, CvFileNode* node, + CvBoost* ensemble, CvDTreeTrainData* _data ); + virtual void clear(); + + protected: + ... + CvBoost* ensemble; + }; + + +.. + +The weak classifier, a component of the boosted tree classifier +:ref:`CvBoost` +, is a derivative of +:ref:`CvDTree` +. Normally, there is no need to use the weak classifiers directly, however they can be accessed as elements of the sequence +``CvBoost::weak`` +, retrieved by +``CvBoost::get_weak_predictors`` +. + +Note, that in the case of LogitBoost and Gentle AdaBoost each weak predictor is a regression tree, rather than a classification tree. Even in the case of Discrete AdaBoost and Real AdaBoost the +``CvBoostTree::predict`` +return value ( +``CvDTreeNode::value`` +) is not the output class label; a negative value "votes" for class +# +0, a positive - for class +# +1. And the votes are weighted. The weight of each individual tree may be increased or decreased using the method +``CvBoostTree::scale`` +. + + + +.. index:: CvBoost + +.. _CvBoost: + +CvBoost +------- + +`id=0.0263891264552 Comments from the Wiki `__ + +.. ctype:: CvBoost + + + +Boosted tree classifier. + + + + +:: + + + + class CvBoost : public CvStatModel + { + public: + // Boosting type + enum { DISCRETE=0, REAL=1, LOGIT=2, GENTLE=3 }; + + // Splitting criteria + enum { DEFAULT=0, GINI=1, MISCLASS=3, SQERR=4 }; + + CvBoost(); + virtual ~CvBoost(); + + CvBoost( const CvMat* _train_data, int _tflag, + const CvMat* _responses, const CvMat* _var_idx=0, + const CvMat* _sample_idx=0, const CvMat* _var_type=0, + const CvMat* _missing_mask=0, + CvBoostParams params=CvBoostParams() ); + + virtual bool train( const CvMat* _train_data, int _tflag, + const CvMat* _responses, const CvMat* _var_idx=0, + const CvMat* _sample_idx=0, const CvMat* _var_type=0, + const CvMat* _missing_mask=0, + CvBoostParams params=CvBoostParams(), + bool update=false ); + + virtual float predict( const CvMat* _sample, const CvMat* _missing=0, + CvMat* weak_responses=0, CvSlice slice=CV_WHOLE_SEQ, + bool raw_mode=false ) const; + + virtual void prune( CvSlice slice ); + + virtual void clear(); + + virtual void write( CvFileStorage* storage, const char* name ); + virtual void read( CvFileStorage* storage, CvFileNode* node ); + + CvSeq* get_weak_predictors(); + const CvBoostParams& get_params() const; + ... + + protected: + virtual bool set_params( const CvBoostParams& _params ); + virtual void update_weights( CvBoostTree* tree ); + virtual void trim_weights(); + virtual void write_params( CvFileStorage* fs ); + virtual void read_params( CvFileStorage* fs, CvFileNode* node ); + + CvDTreeTrainData* data; + CvBoostParams params; + CvSeq* weak; + ... + }; + + +.. + + +.. index:: CvBoost::train + +.. _CvBoost::train: + +CvBoost::train +-------------- + +`id=0.756448003801 Comments from the Wiki `__ + + + + +.. cfunction:: bool CvBoost::train( const CvMat* _train_data, int _tflag, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, const CvMat* _var_type=0, const CvMat* _missing_mask=0, CvBoostParams params=CvBoostParams(), bool update=false ) + + Trains a boosted tree classifier. + + + +The train method follows the common template; the last parameter +``update`` +specifies whether the classifier needs to be updated (i.e. the new weak tree classifiers added to the existing ensemble), or the classifier needs to be rebuilt from scratch. The responses must be categorical, i.e. boosted trees can not be built for regression, and there should be 2 classes. + + + +.. index:: CvBoost::predict + +.. _CvBoost::predict: + +CvBoost::predict +---------------- + +`id=0.275883150474 Comments from the Wiki `__ + + + + +.. cfunction:: float CvBoost::predict( const CvMat* sample, const CvMat* missing=0, CvMat* weak_responses=0, CvSlice slice=CV_WHOLE_SEQ, bool raw_mode=false ) const + + Predicts a response for the input sample. + + + +The method +``CvBoost::predict`` +runs the sample through the trees in the ensemble and returns the output class label based on the weighted voting. + + + +.. index:: CvBoost::prune + +.. _CvBoost::prune: + +CvBoost::prune +-------------- + +`id=0.22443448309 Comments from the Wiki `__ + + + + +.. cfunction:: void CvBoost::prune( CvSlice slice ) + + Removes the specified weak classifiers. + + + +The method removes the specified weak classifiers from the sequence. Note that this method should not be confused with the pruning of individual decision trees, which is currently not supported. + + + +.. index:: CvBoost::get_weak_predictors + +.. _CvBoost::get_weak_predictors: + +CvBoost::get_weak_predictors +---------------------------- + +`id=0.670781607621 Comments from the Wiki `__ + + + + +.. cfunction:: CvSeq* CvBoost::get_weak_predictors() + + Returns the sequence of weak tree classifiers. + + + +The method returns the sequence of weak classifiers. Each element of the sequence is a pointer to a +``CvBoostTree`` +class (or, probably, to some of its derivatives). + diff --git a/modules/ml/doc/decision_trees.rst b/modules/ml/doc/decision_trees.rst new file mode 100644 index 000000000..37fea26ed --- /dev/null +++ b/modules/ml/doc/decision_trees.rst @@ -0,0 +1,606 @@ +Decision Trees +============== + +.. highlight:: cpp + + +The ML classes discussed in this section implement Classification And Regression Tree algorithms, which are described in +`[Breiman84] <#paper_Breiman84>`_ +. + +The class +:ref:`CvDTree` +represents a single decision tree that may be used alone, or as a base class in tree ensembles (see +:ref:`Boosting` +and +:ref:`Random Trees` +). + +A decision tree is a binary tree (i.e. tree where each non-leaf node has exactly 2 child nodes). It can be used either for classification, when each tree leaf is marked with some class label (multiple leafs may have the same label), or for regression, when each tree leaf is also assigned a constant (so the approximation function is piecewise constant). + + +Predicting with Decision Trees +------------------------------ + + +To reach a leaf node, and to obtain a response for the input feature +vector, the prediction procedure starts with the root node. From each +non-leaf node the procedure goes to the left (i.e. selects the left +child node as the next observed node), or to the right based on the +value of a certain variable, whose index is stored in the observed +node. The variable can be either ordered or categorical. In the first +case, the variable value is compared with the certain threshold (which +is also stored in the node); if the value is less than the threshold, +the procedure goes to the left, otherwise, to the right (for example, +if the weight is less than 1 kilogram, the procedure goes to the left, +else to the right). And in the second case the discrete variable value is +tested to see if it belongs to a certain subset of values (also stored +in the node) from a limited set of values the variable could take; if +yes, the procedure goes to the left, else - to the right (for example, +if the color is green or red, go to the left, else to the right). That +is, in each node, a pair of entities (variable +_ +index, decision +_ +rule +(threshold/subset)) is used. This pair is called a split (split on +the variable variable +_ +index). Once a leaf node is reached, the value +assigned to this node is used as the output of prediction procedure. + +Sometimes, certain features of the input vector are missed (for example, in the darkness it is difficult to determine the object color), and the prediction procedure may get stuck in the certain node (in the mentioned example if the node is split by color). To avoid such situations, decision trees use so-called surrogate splits. That is, in addition to the best "primary" split, every tree node may also be split on one or more other variables with nearly the same results. + + +Training Decision Trees +----------------------- + + +The tree is built recursively, starting from the root node. All of the training data (feature vectors and the responses) is used to split the root node. In each node the optimum decision rule (i.e. the best "primary" split) is found based on some criteria (in ML +``gini`` +"purity" criteria is used for classification, and sum of squared errors is used for regression). Then, if necessary, the surrogate splits are found that resemble the results of the primary split on the training data; all of the data is divided using the primary and the surrogate splits (just like it is done in the prediction procedure) between the left and the right child node. Then the procedure recursively splits both left and right nodes. At each node the recursive procedure may stop (i.e. stop splitting the node further) in one of the following cases: + + + + +* depth of the tree branch being constructed has reached the specified maximum value. + + +* number of training samples in the node is less than the specified threshold, when it is not statistically representative to split the node further. + + +* all the samples in the node belong to the same class (or, in the case of regression, the variation is too small). + + +* the best split found does not give any noticeable improvement compared to a random choice. + + +When the tree is built, it may be pruned using a cross-validation procedure, if necessary. That is, some branches of the tree that may lead to the model overfitting are cut off. Normally this procedure is only applied to standalone decision trees, while tree ensembles usually build small enough trees and use their own protection schemes against overfitting. + + +Variable importance +------------------- + + +Besides the obvious use of decision trees - prediction, the tree can be also used for various data analysis. One of the key properties of the constructed decision tree algorithms is that it is possible to compute importance (relative decisive power) of each variable. For example, in a spam filter that uses a set of words occurred in the message as a feature vector, the variable importance rating can be used to determine the most "spam-indicating" words and thus help to keep the dictionary size reasonable. + +Importance of each variable is computed over all the splits on this variable in the tree, primary and surrogate ones. Thus, to compute variable importance correctly, the surrogate splits must be enabled in the training parameters, even if there is no missing data. + +**[Breiman84] Breiman, L., Friedman, J. Olshen, R. and Stone, C. (1984), "Classification and Regression Trees", Wadsworth.** + +.. index:: CvDTreeSplit + +.. _CvDTreeSplit: + +CvDTreeSplit +------------ + +`id=0.286654154683 Comments from the Wiki `__ + +.. ctype:: CvDTreeSplit + + + +Decision tree node split. + + + + +:: + + + + struct CvDTreeSplit + { + int var_idx; + int inversed; + float quality; + CvDTreeSplit* next; + union + { + int subset[2]; + struct + { + float c; + int split_point; + } + ord; + }; + }; + + +.. + + +.. index:: CvDTreeNode + +.. _CvDTreeNode: + +CvDTreeNode +----------- + +`id=0.948528874157 Comments from the Wiki `__ + +.. ctype:: CvDTreeNode + + + +Decision tree node. + + + + +:: + + + + struct CvDTreeNode + { + int class_idx; + int Tn; + double value; + + CvDTreeNode* parent; + CvDTreeNode* left; + CvDTreeNode* right; + + CvDTreeSplit* split; + + int sample_count; + int depth; + ... + }; + + +.. + +Other numerous fields of +``CvDTreeNode`` +are used internally at the training stage. + + + +.. index:: CvDTreeParams + +.. _CvDTreeParams: + +CvDTreeParams +------------- + +`id=0.924935526415 Comments from the Wiki `__ + +.. ctype:: CvDTreeParams + + + +Decision tree training parameters. + + + + +:: + + + + struct CvDTreeParams + { + int max_categories; + int max_depth; + int min_sample_count; + int cv_folds; + bool use_surrogates; + bool use_1se_rule; + bool truncate_pruned_tree; + float regression_accuracy; + const float* priors; + + CvDTreeParams() : max_categories(10), max_depth(INT_MAX), min_sample_count(10), + cv_folds(10), use_surrogates(true), use_1se_rule(true), + truncate_pruned_tree(true), regression_accuracy(0.01f), priors(0) + {} + + CvDTreeParams( int _max_depth, int _min_sample_count, + float _regression_accuracy, bool _use_surrogates, + int _max_categories, int _cv_folds, + bool _use_1se_rule, bool _truncate_pruned_tree, + const float* _priors ); + }; + + +.. + +The structure contains all the decision tree training parameters. There is a default constructor that initializes all the parameters with the default values tuned for standalone classification tree. Any of the parameters can be overridden then, or the structure may be fully initialized using the advanced variant of the constructor. + + + +.. index:: CvDTreeTrainData + +.. _CvDTreeTrainData: + +CvDTreeTrainData +---------------- + +`id=0.0482986639469 Comments from the Wiki `__ + +.. ctype:: CvDTreeTrainData + + + +Decision tree training data and shared data for tree ensembles. + + + + +:: + + + + struct CvDTreeTrainData + { + CvDTreeTrainData(); + CvDTreeTrainData( const CvMat* _train_data, int _tflag, + const CvMat* _responses, const CvMat* _var_idx=0, + const CvMat* _sample_idx=0, const CvMat* _var_type=0, + const CvMat* _missing_mask=0, + const CvDTreeParams& _params=CvDTreeParams(), + bool _shared=false, bool _add_labels=false ); + virtual ~CvDTreeTrainData(); + + virtual void set_data( const CvMat* _train_data, int _tflag, + const CvMat* _responses, const CvMat* _var_idx=0, + const CvMat* _sample_idx=0, const CvMat* _var_type=0, + const CvMat* _missing_mask=0, + const CvDTreeParams& _params=CvDTreeParams(), + bool _shared=false, bool _add_labels=false, + bool _update_data=false ); + + virtual void get_vectors( const CvMat* _subsample_idx, + float* values, uchar* missing, float* responses, + bool get_class_idx=false ); + + virtual CvDTreeNode* subsample_data( const CvMat* _subsample_idx ); + + virtual void write_params( CvFileStorage* fs ); + virtual void read_params( CvFileStorage* fs, CvFileNode* node ); + + // release all the data + virtual void clear(); + + int get_num_classes() const; + int get_var_type(int vi) const; + int get_work_var_count() const; + + virtual int* get_class_labels( CvDTreeNode* n ); + virtual float* get_ord_responses( CvDTreeNode* n ); + virtual int* get_labels( CvDTreeNode* n ); + virtual int* get_cat_var_data( CvDTreeNode* n, int vi ); + virtual CvPair32s32f* get_ord_var_data( CvDTreeNode* n, int vi ); + virtual int get_child_buf_idx( CvDTreeNode* n ); + + //////////////////////////////////// + + virtual bool set_params( const CvDTreeParams& params ); + virtual CvDTreeNode* new_node( CvDTreeNode* parent, int count, + int storage_idx, int offset ); + + virtual CvDTreeSplit* new_split_ord( int vi, float cmp_val, + int split_point, int inversed, float quality ); + virtual CvDTreeSplit* new_split_cat( int vi, float quality ); + virtual void free_node_data( CvDTreeNode* node ); + virtual void free_train_data(); + virtual void free_node( CvDTreeNode* node ); + + int sample_count, var_all, var_count, max_c_count; + int ord_var_count, cat_var_count; + bool have_labels, have_priors; + bool is_classifier; + + int buf_count, buf_size; + bool shared; + + CvMat* cat_count; + CvMat* cat_ofs; + CvMat* cat_map; + + CvMat* counts; + CvMat* buf; + CvMat* direction; + CvMat* split_buf; + + CvMat* var_idx; + CvMat* var_type; // i-th element = + // k<0 - ordered + // k>=0 - categorical, see k-th element of cat_* arrays + CvMat* priors; + + CvDTreeParams params; + + CvMemStorage* tree_storage; + CvMemStorage* temp_storage; + + CvDTreeNode* data_root; + + CvSet* node_heap; + CvSet* split_heap; + CvSet* cv_heap; + CvSet* nv_heap; + + CvRNG rng; + }; + + +.. + +This structure is mostly used internally for storing both standalone trees and tree ensembles efficiently. Basically, it contains 3 types of information: + + + + +#. The training parameters, an instance of :ref:`CvDTreeParams`. + + +#. The training data, preprocessed in order to find the best splits more efficiently. For tree ensembles this preprocessed data is reused by all the trees. Additionally, the training data characteristics that are shared by all trees in the ensemble are stored here: variable types, the number of classes, class label compression map etc. + + +#. Buffers, memory storages for tree nodes, splits and other elements of the trees constructed. + + +There are 2 ways of using this structure. In simple cases (e.g. a standalone tree, or the ready-to-use "black box" tree ensemble from ML, like +:ref:`Random Trees` +or +:ref:`Boosting` +) there is no need to care or even to know about the structure - just construct the needed statistical model, train it and use it. The +``CvDTreeTrainData`` +structure will be constructed and used internally. However, for custom tree algorithms, or another sophisticated cases, the structure may be constructed and used explicitly. The scheme is the following: + + + + +* + The structure is initialized using the default constructor, followed by + ``set_data`` + (or it is built using the full form of constructor). The parameter + ``_shared`` + must be set to + ``true`` + . + + +* + One or more trees are trained using this data, see the special form of the method + ``CvDTree::train`` + . + + +* + Finally, the structure can be released only after all the trees using it are released. + + + +.. index:: CvDTree + +.. _CvDTree: + +CvDTree +------- + +`id=0.802824162542 Comments from the Wiki `__ + +.. ctype:: CvDTree + + + +Decision tree. + + + + +:: + + + + class CvDTree : public CvStatModel + { + public: + CvDTree(); + virtual ~CvDTree(); + + virtual bool train( const CvMat* _train_data, int _tflag, + const CvMat* _responses, const CvMat* _var_idx=0, + const CvMat* _sample_idx=0, const CvMat* _var_type=0, + const CvMat* _missing_mask=0, + CvDTreeParams params=CvDTreeParams() ); + + virtual bool train( CvDTreeTrainData* _train_data, + const CvMat* _subsample_idx ); + + virtual CvDTreeNode* predict( const CvMat* _sample, + const CvMat* _missing_data_mask=0, + bool raw_mode=false ) const; + virtual const CvMat* get_var_importance(); + virtual void clear(); + + virtual void read( CvFileStorage* fs, CvFileNode* node ); + virtual void write( CvFileStorage* fs, const char* name ); + + // special read & write methods for trees in the tree ensembles + virtual void read( CvFileStorage* fs, CvFileNode* node, + CvDTreeTrainData* data ); + virtual void write( CvFileStorage* fs ); + + const CvDTreeNode* get_root() const; + int get_pruned_tree_idx() const; + CvDTreeTrainData* get_data(); + + protected: + + virtual bool do_train( const CvMat* _subsample_idx ); + + virtual void try_split_node( CvDTreeNode* n ); + virtual void split_node_data( CvDTreeNode* n ); + virtual CvDTreeSplit* find_best_split( CvDTreeNode* n ); + virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi ); + virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi ); + virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi ); + virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi ); + virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi ); + virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi ); + virtual double calc_node_dir( CvDTreeNode* node ); + virtual void complete_node_dir( CvDTreeNode* node ); + virtual void cluster_categories( const int* vectors, int vector_count, + int var_count, int* sums, int k, int* cluster_labels ); + + virtual void calc_node_value( CvDTreeNode* node ); + + virtual void prune_cv(); + virtual double update_tree_rnc( int T, int fold ); + virtual int cut_tree( int T, int fold, double min_alpha ); + virtual void free_prune_data(bool cut_tree); + virtual void free_tree(); + + virtual void write_node( CvFileStorage* fs, CvDTreeNode* node ); + virtual void write_split( CvFileStorage* fs, CvDTreeSplit* split ); + virtual CvDTreeNode* read_node( CvFileStorage* fs, + CvFileNode* node, + CvDTreeNode* parent ); + virtual CvDTreeSplit* read_split( CvFileStorage* fs, CvFileNode* node ); + virtual void write_tree_nodes( CvFileStorage* fs ); + virtual void read_tree_nodes( CvFileStorage* fs, CvFileNode* node ); + + CvDTreeNode* root; + + int pruned_tree_idx; + CvMat* var_importance; + + CvDTreeTrainData* data; + }; + + +.. + + +.. index:: CvDTree::train + +.. _CvDTree::train: + +CvDTree::train +-------------- + +`id=0.215158058664 Comments from the Wiki `__ + + + + +.. cfunction:: bool CvDTree::train( const CvMat* _train_data, int _tflag, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, const CvMat* _var_type=0, const CvMat* _missing_mask=0, CvDTreeParams params=CvDTreeParams() ) + + + +.. cfunction:: bool CvDTree::train( CvDTreeTrainData* _train_data, const CvMat* _subsample_idx ) + + Trains a decision tree. + + + +There are 2 +``train`` +methods in +``CvDTree`` +. + +The first method follows the generic +``CvStatModel::train`` +conventions, it is the most complete form. Both data layouts ( +``_tflag=CV_ROW_SAMPLE`` +and +``_tflag=CV_COL_SAMPLE`` +) are supported, as well as sample and variable subsets, missing measurements, arbitrary combinations of input and output variable types etc. The last parameter contains all of the necessary training parameters, see the +:ref:`CvDTreeParams` +description. + +The second method +``train`` +is mostly used for building tree ensembles. It takes the pre-constructed +:ref:`CvDTreeTrainData` +instance and the optional subset of training set. The indices in +``_subsample_idx`` +are counted relatively to the +``_sample_idx`` +, passed to +``CvDTreeTrainData`` +constructor. For example, if +``_sample_idx=[1, 5, 7, 100]`` +, then +``_subsample_idx=[0,3]`` +means that the samples +``[1, 100]`` +of the original training set are used. + + + +.. index:: CvDTree::predict + +.. _CvDTree::predict: + +CvDTree::predict +---------------- + +`id=0.366805937359 Comments from the Wiki `__ + + + + +.. cfunction:: CvDTreeNode* CvDTree::predict( const CvMat* _sample, const CvMat* _missing_data_mask=0, bool raw_mode=false ) const + + Returns the leaf node of the decision tree corresponding to the input vector. + + + +The method takes the feature vector and the optional missing measurement mask on input, traverses the decision tree and returns the reached leaf node on output. The prediction result, either the class label or the estimated function value, may be retrieved as the +``value`` +field of the +:ref:`CvDTreeNode` +structure, for example: dtree- +:math:`>` +predict(sample,mask)- +:math:`>` +value. + +The last parameter is normally set to +``false`` +, implying a regular +input. If it is +``true`` +, the method assumes that all the values of +the discrete input variables have been already normalized to +:math:`0` +to +:math:`num\_of\_categories_i-1` +ranges. (as the decision tree uses such +normalized representation internally). It is useful for faster prediction +with tree ensembles. For ordered input variables the flag is not used. + +Example: Building A Tree for Classifying Mushrooms. See the +``mushroom.cpp`` +sample that demonstrates how to build and use the +decision tree. + diff --git a/modules/ml/doc/expectation_maximization.rst b/modules/ml/doc/expectation_maximization.rst new file mode 100644 index 000000000..b24bb620b --- /dev/null +++ b/modules/ml/doc/expectation_maximization.rst @@ -0,0 +1,436 @@ +Expectation-Maximization +======================== + +.. highlight:: cpp + + +The EM (Expectation-Maximization) algorithm estimates the parameters of the multivariate probability density function in the form of a Gaussian mixture distribution with a specified number of mixtures. + +Consider the set of the feature vectors +:math:`x_1, x_2,...,x_{N}` +: N vectors from a d-dimensional Euclidean space drawn from a Gaussian mixture: + + + +.. math:: + + p(x;a_k,S_k, \pi _k) = \sum _{k=1}^{m} \pi _kp_k(x), \quad \pi _k \geq 0, \quad \sum _{k=1}^{m} \pi _k=1, + + + + +.. math:: + + p_k(x)= \varphi (x;a_k,S_k)= \frac{1}{(2\pi)^{d/2}\mid{S_k}\mid^{1/2}} exp \left \{ - \frac{1}{2} (x-a_k)^TS_k^{-1}(x-a_k) \right \} , + + +where +:math:`m` +is the number of mixtures, +:math:`p_k` +is the normal distribution +density with the mean +:math:`a_k` +and covariance matrix +:math:`S_k` +, +:math:`\pi_k` +is the weight of the k-th mixture. Given the number of mixtures +:math:`M` +and the samples +:math:`x_i` +, +:math:`i=1..N` +the algorithm finds the +maximum-likelihood estimates (MLE) of the all the mixture parameters, +i.e. +:math:`a_k` +, +:math:`S_k` +and +:math:`\pi_k` +: + + + +.. math:: + + L(x, \theta )=logp(x, \theta )= \sum _{i=1}^{N}log \left ( \sum _{k=1}^{m} \pi _kp_k(x) \right ) \to \max _{ \theta \in \Theta }, + + + + +.. math:: + + \Theta = \left \{ (a_k,S_k, \pi _k): a_k \in \mathbbm{R} ^d,S_k=S_k^T>0,S_k \in \mathbbm{R} ^{d \times d}, \pi _k \geq 0, \sum _{k=1}^{m} \pi _k=1 \right \} . + + +EM algorithm is an iterative procedure. Each iteration of it includes +two steps. At the first step (Expectation-step, or E-step), we find a +probability +:math:`p_{i,k}` +(denoted +:math:`\alpha_{i,k}` +in the formula below) of +sample +``i`` +to belong to mixture +``k`` +using the currently +available mixture parameter estimates: + + + +.. math:: + + \alpha _{ki} = \frac{\pi_k\varphi(x;a_k,S_k)}{\sum\limits_{j=1}^{m}\pi_j\varphi(x;a_j,S_j)} . + + +At the second step (Maximization-step, or M-step) the mixture parameter estimates are refined using the computed probabilities: + + + +.. math:: + + \pi _k= \frac{1}{N} \sum _{i=1}^{N} \alpha _{ki}, \quad a_k= \frac{\sum\limits_{i=1}^{N}\alpha_{ki}x_i}{\sum\limits_{i=1}^{N}\alpha_{ki}} , \quad S_k= \frac{\sum\limits_{i=1}^{N}\alpha_{ki}(x_i-a_k)(x_i-a_k)^T}{\sum\limits_{i=1}^{N}\alpha_{ki}} , + + +Alternatively, the algorithm may start with the M-step when the initial values for +:math:`p_{i,k}` +can be provided. Another alternative when +:math:`p_{i,k}` +are unknown, is to use a simpler clustering algorithm to pre-cluster the input samples and thus obtain initial +:math:`p_{i,k}` +. Often (and in ML) the +:ref:`KMeans2` +algorithm is used for that purpose. + +One of the main that EM algorithm should deal with is the large number +of parameters to estimate. The majority of the parameters sits in +covariance matrices, which are +:math:`d \times d` +elements each +(where +:math:`d` +is the feature space dimensionality). However, in +many practical problems the covariance matrices are close to diagonal, +or even to +:math:`\mu_k*I` +, where +:math:`I` +is identity matrix and +:math:`\mu_k` +is mixture-dependent "scale" parameter. So a robust computation +scheme could be to start with the harder constraints on the covariance +matrices and then use the estimated parameters as an input for a less +constrained optimization problem (often a diagonal covariance matrix is +already a good enough approximation). + +**References:** + + + + +* + Bilmes98 J. A. Bilmes. A Gentle Tutorial of the EM Algorithm and its Application to Parameter Estimation for Gaussian Mixture and Hidden Markov Models. Technical Report TR-97-021, International Computer Science Institute and Computer Science Division, University of California at Berkeley, April 1998. + + + +.. index:: CvEMParams + +.. _CvEMParams: + +CvEMParams +---------- + +`id=0.432576013672 Comments from the Wiki `__ + +.. ctype:: CvEMParams + + + +Parameters of the EM algorithm. + + + + +:: + + + + struct CvEMParams + { + CvEMParams() : nclusters(10), cov_mat_type(CvEM::COV_MAT_DIAGONAL), + start_step(CvEM::START_AUTO_STEP), probs(0), weights(0), means(0), + covs(0) + { + term_crit=cvTermCriteria( CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, + 100, FLT_EPSILON ); + } + + CvEMParams( int _nclusters, int _cov_mat_type=1/*CvEM::COV_MAT_DIAGONAL*/, + int _start_step=0/*CvEM::START_AUTO_STEP*/, + CvTermCriteria _term_crit=cvTermCriteria( + CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, + 100, FLT_EPSILON), + CvMat* _probs=0, CvMat* _weights=0, + CvMat* _means=0, CvMat** _covs=0 ) : + nclusters(_nclusters), cov_mat_type(_cov_mat_type), + start_step(_start_step), + probs(_probs), weights(_weights), means(_means), covs(_covs), + term_crit(_term_crit) + {} + + int nclusters; + int cov_mat_type; + int start_step; + const CvMat* probs; + const CvMat* weights; + const CvMat* means; + const CvMat** covs; + CvTermCriteria term_crit; + }; + + +.. + +The structure has 2 constructors, the default one represents a rough rule-of-thumb, with another one it is possible to override a variety of parameters, from a single number of mixtures (the only essential problem-dependent parameter), to the initial values for the mixture parameters. + + + +.. index:: CvEM + +.. _CvEM: + +CvEM +---- + +`id=0.808344863567 Comments from the Wiki `__ + +.. ctype:: CvEM + + + +EM model. + + + + +:: + + + + class CV_EXPORTS CvEM : public CvStatModel + { + public: + // Type of covariance matrices + enum { COV_MAT_SPHERICAL=0, COV_MAT_DIAGONAL=1, COV_MAT_GENERIC=2 }; + + // The initial step + enum { START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0 }; + + CvEM(); + CvEM( const CvMat* samples, const CvMat* sample_idx=0, + CvEMParams params=CvEMParams(), CvMat* labels=0 ); + virtual ~CvEM(); + + virtual bool train( const CvMat* samples, const CvMat* sample_idx=0, + CvEMParams params=CvEMParams(), CvMat* labels=0 ); + + virtual float predict( const CvMat* sample, CvMat* probs ) const; + virtual void clear(); + + int get_nclusters() const { return params.nclusters; } + const CvMat* get_means() const { return means; } + const CvMat** get_covs() const { return covs; } + const CvMat* get_weights() const { return weights; } + const CvMat* get_probs() const { return probs; } + + protected: + + virtual void set_params( const CvEMParams& params, + const CvVectors& train_data ); + virtual void init_em( const CvVectors& train_data ); + virtual double run_em( const CvVectors& train_data ); + virtual void init_auto( const CvVectors& samples ); + virtual void kmeans( const CvVectors& train_data, int nclusters, + CvMat* labels, CvTermCriteria criteria, + const CvMat* means ); + CvEMParams params; + double log_likelihood; + + CvMat* means; + CvMat** covs; + CvMat* weights; + CvMat* probs; + + CvMat* log_weight_div_det; + CvMat* inv_eigen_values; + CvMat** cov_rotate_mats; + }; + + +.. + + +.. index:: CvEM::train + +.. _CvEM::train: + +CvEM::train +----------- + +`id=0.340076585117 Comments from the Wiki `__ + + + + +.. cfunction:: void CvEM::train( const CvMat* samples, const CvMat* sample_idx=0, CvEMParams params=CvEMParams(), CvMat* labels=0 ) + + Estimates the Gaussian mixture parameters from the sample set. + + + +Unlike many of the ML models, EM is an unsupervised learning algorithm and it does not take responses (class labels or the function values) on input. Instead, it computes the +:ref:`MLE` +of the Gaussian mixture parameters from the input sample set, stores all the parameters inside the structure: +:math:`p_{i,k}` +in +``probs`` +, +:math:`a_k` +in +``means`` +:math:`S_k` +in +``covs[k]`` +, +:math:`\pi_k` +in +``weights`` +and optionally computes the output "class label" for each sample: +:math:`\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N` +(i.e. indices of the most-probable mixture for each sample). + +The trained model can be used further for prediction, just like any other classifier. The model trained is similar to the +:ref:`Bayes classifier` +. + + +Example: Clustering random samples of multi-Gaussian distribution using EM + + + + +:: + + + + #include "ml.h" + #include "highgui.h" + + int main( int argc, char** argv ) + { + const int N = 4; + const int N1 = (int)sqrt((double)N); + const CvScalar colors[] = {{0,0,255}},{{0,255,0}}, + {{0,255,255}},{{255,255,0} + ; + int i, j; + int nsamples = 100; + CvRNG rng_state = cvRNG(-1); + CvMat* samples = cvCreateMat( nsamples, 2, CV_32FC1 ); + CvMat* labels = cvCreateMat( nsamples, 1, CV_32SC1 ); + IplImage* img = cvCreateImage( cvSize( 500, 500 ), 8, 3 ); + float _sample[2]; + CvMat sample = cvMat( 1, 2, CV_32FC1, _sample ); + CvEM em_model; + CvEMParams params; + CvMat samples_part; + + cvReshape( samples, samples, 2, 0 ); + for( i = 0; i < N; i++ ) + { + CvScalar mean, sigma; + + // form the training samples + cvGetRows( samples, &samples_part, i*nsamples/N, + (i+1)*nsamples/N ); + mean = cvScalar(((i + ((i/N1)+1.)*img->height/(N1+1)); + sigma = cvScalar(30,30); + cvRandArr( &rng_state, &samples_part, CV_RAND_NORMAL, + mean, sigma ); + } + cvReshape( samples, samples, 1, 0 ); + + // initialize model's parameters + params.covs = NULL; + params.means = NULL; + params.weights = NULL; + params.probs = NULL; + params.nclusters = N; + params.cov_mat_type = CvEM::COV_MAT_SPHERICAL; + params.start_step = CvEM::START_AUTO_STEP; + params.term_crit.max_iter = 10; + params.term_crit.epsilon = 0.1; + params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS; + + // cluster the data + em_model.train( samples, 0, params, labels ); + + #if 0 + // the piece of code shows how to repeatedly optimize the model + // with less-constrained parameters + //(COV_MAT_DIAGONAL instead of COV_MAT_SPHERICAL) + // when the output of the first stage is used as input for the second. + CvEM em_model2; + params.cov_mat_type = CvEM::COV_MAT_DIAGONAL; + params.start_step = CvEM::START_E_STEP; + params.means = em_model.get_means(); + params.covs = (const CvMat**)em_model.get_covs(); + params.weights = em_model.get_weights(); + + em_model2.train( samples, 0, params, labels ); + // to use em_model2, replace em_model.predict() + // with em_model2.predict() below + #endif + // classify every image pixel + cvZero( img ); + for( i = 0; i < img->height; i++ ) + { + for( j = 0; j < img->width; j++ ) + { + CvPoint pt = cvPoint(j, i); + sample.data.fl[0] = (float)j; + sample.data.fl[1] = (float)i; + int response = cvRound(em_model.predict( &sample, NULL )); + CvScalar c = colors[response]; + + cvCircle( img, pt, 1, cvScalar(c.val[0]*0.75, + c.val[1]*0.75,c.val[2]*0.75), CV_FILLED ); + } + } + + //draw the clustered samples + for( i = 0; i < nsamples; i++ ) + { + CvPoint pt; + pt.x = cvRound(samples->data.fl[i*2]); + pt.y = cvRound(samples->data.fl[i*2+1]); + cvCircle( img, pt, 1, colors[labels->data.i[i]], CV_FILLED ); + } + + cvNamedWindow( "EM-clustering result", 1 ); + cvShowImage( "EM-clustering result", img ); + cvWaitKey(0); + + cvReleaseMat( &samples ); + cvReleaseMat( &labels ); + return 0; + } + + + +.. + diff --git a/modules/ml/doc/k_nearest_neighbors.rst b/modules/ml/doc/k_nearest_neighbors.rst new file mode 100644 index 000000000..eea5d7057 --- /dev/null +++ b/modules/ml/doc/k_nearest_neighbors.rst @@ -0,0 +1,254 @@ +K Nearest Neighbors +=================== + +.. highlight:: cpp + + +The algorithm caches all of the training samples, and predicts the response for a new sample by analyzing a certain number ( +**K** +) of the nearest neighbors of the sample (using voting, calculating weighted sum etc.) The method is sometimes referred to as "learning by example", because for prediction it looks for the feature vector with a known response that is closest to the given vector. + + + +.. index:: CvKNearest + +.. _CvKNearest: + +CvKNearest +---------- + +`id=0.969498355265 Comments from the Wiki `__ + +.. ctype:: CvKNearest + + + +K Nearest Neighbors model. + + + + +:: + + + + class CvKNearest : public CvStatModel + { + public: + + CvKNearest(); + virtual ~CvKNearest(); + + CvKNearest( const CvMat* _train_data, const CvMat* _responses, + const CvMat* _sample_idx=0, bool _is_regression=false, int max_k=32 ); + + virtual bool train( const CvMat* _train_data, const CvMat* _responses, + const CvMat* _sample_idx=0, bool is_regression=false, + int _max_k=32, bool _update_base=false ); + + virtual float find_nearest( const CvMat* _samples, int k, CvMat* results, + const float** neighbors=0, CvMat* neighbor_responses=0, CvMat* dist=0 ) const; + + virtual void clear(); + int get_max_k() const; + int get_var_count() const; + int get_sample_count() const; + bool is_regression() const; + + protected: + ... + }; + + +.. + + +.. index:: CvKNearest::train + +.. _CvKNearest::train: + +CvKNearest::train +----------------- + +`id=0.0998674771945 Comments from the Wiki `__ + + + + +.. cfunction:: bool CvKNearest::train( const CvMat* _train_data, const CvMat* _responses, const CvMat* _sample_idx=0, bool is_regression=false, int _max_k=32, bool _update_base=false ) + + Trains the model. + + + +The method trains the K-Nearest model. It follows the conventions of generic +``train`` +"method" with the following limitations: only CV +_ +ROW +_ +SAMPLE data layout is supported, the input variables are all ordered, the output variables can be either categorical ( +``is_regression=false`` +) or ordered ( +``is_regression=true`` +), variable subsets ( +``var_idx`` +) and missing measurements are not supported. + +The parameter +``_max_k`` +specifies the number of maximum neighbors that may be passed to the method +``find_nearest`` +. + +The parameter +``_update_base`` +specifies whether the model is trained from scratch +( +``_update_base=false`` +), or it is updated using the new training data ( +``_update_base=true`` +). In the latter case the parameter +``_max_k`` +must not be larger than the original value. + + + +.. index:: CvKNearest::find_nearest + +.. _CvKNearest::find_nearest: + +CvKNearest::find_nearest +------------------------ + +`id=0.654974872601 Comments from the Wiki `__ + + + + +.. cfunction:: float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* results=0, const float** neighbors=0, CvMat* neighbor_responses=0, CvMat* dist=0 ) const + + Finds the neighbors for the input vectors. + + + +For each input vector (which are the rows of the matrix +``_samples`` +) the method finds the +:math:`\texttt{k} \le +\texttt{get\_max\_k()}` +nearest neighbor. In the case of regression, +the predicted result will be a mean value of the particular vector's +neighbor responses. In the case of classification the class is determined +by voting. + +For custom classification/regression prediction, the method can optionally return pointers to the neighbor vectors themselves ( +``neighbors`` +, an array of +``k*_samples->rows`` +pointers), their corresponding output values ( +``neighbor_responses`` +, a vector of +``k*_samples->rows`` +elements) and the distances from the input vectors to the neighbors ( +``dist`` +, also a vector of +``k*_samples->rows`` +elements). + +For each input vector the neighbors are sorted by their distances to the vector. + +If only a single input vector is passed, all output matrices are optional and the predicted value is returned by the method. + + + + + + +:: + + + + #include "ml.h" + #include "highgui.h" + + int main( int argc, char** argv ) + { + const int K = 10; + int i, j, k, accuracy; + float response; + int train_sample_count = 100; + CvRNG rng_state = cvRNG(-1); + CvMat* trainData = cvCreateMat( train_sample_count, 2, CV_32FC1 ); + CvMat* trainClasses = cvCreateMat( train_sample_count, 1, CV_32FC1 ); + IplImage* img = cvCreateImage( cvSize( 500, 500 ), 8, 3 ); + float _sample[2]; + CvMat sample = cvMat( 1, 2, CV_32FC1, _sample ); + cvZero( img ); + + CvMat trainData1, trainData2, trainClasses1, trainClasses2; + + // form the training samples + cvGetRows( trainData, &trainData1, 0, train_sample_count/2 ); + cvRandArr( &rng_state, &trainData1, CV_RAND_NORMAL, cvScalar(200,200), cvScalar(50,50) ); + + cvGetRows( trainData, &trainData2, train_sample_count/2, train_sample_count ); + cvRandArr( &rng_state, &trainData2, CV_RAND_NORMAL, cvScalar(300,300), cvScalar(50,50) ); + + cvGetRows( trainClasses, &trainClasses1, 0, train_sample_count/2 ); + cvSet( &trainClasses1, cvScalar(1) ); + + cvGetRows( trainClasses, &trainClasses2, train_sample_count/2, train_sample_count ); + cvSet( &trainClasses2, cvScalar(2) ); + + // learn classifier + CvKNearest knn( trainData, trainClasses, 0, false, K ); + CvMat* nearests = cvCreateMat( 1, K, CV_32FC1); + + for( i = 0; i < img->height; i++ ) + { + for( j = 0; j < img->width; j++ ) + { + sample.data.fl[0] = (float)j; + sample.data.fl[1] = (float)i; + + // estimates the response and get the neighbors' labels + response = knn.find_nearest(&sample,K,0,0,nearests,0); + + // compute the number of neighbors representing the majority + for( k = 0, accuracy = 0; k < K; k++ ) + { + if( nearests->data.fl[k] == response) + accuracy++; + } + // highlight the pixel depending on the accuracy (or confidence) + cvSet2D( img, i, j, response == 1 ? + (accuracy > 5 ? CV_RGB(180,0,0) : CV_RGB(180,120,0)) : + (accuracy > 5 ? CV_RGB(0,180,0) : CV_RGB(120,120,0)) ); + } + } + + // display the original training samples + for( i = 0; i < train_sample_count/2; i++ ) + { + CvPoint pt; + pt.x = cvRound(trainData1.data.fl[i*2]); + pt.y = cvRound(trainData1.data.fl[i*2+1]); + cvCircle( img, pt, 2, CV_RGB(255,0,0), CV_FILLED ); + pt.x = cvRound(trainData2.data.fl[i*2]); + pt.y = cvRound(trainData2.data.fl[i*2+1]); + cvCircle( img, pt, 2, CV_RGB(0,255,0), CV_FILLED ); + } + + cvNamedWindow( "classifier result", 1 ); + cvShowImage( "classifier result", img ); + cvWaitKey(0); + + cvReleaseMat( &trainClasses ); + cvReleaseMat( &trainData ); + return 0; + } + + +.. + diff --git a/modules/ml/doc/ml.rst b/modules/ml/doc/ml.rst new file mode 100644 index 000000000..98b101c33 --- /dev/null +++ b/modules/ml/doc/ml.rst @@ -0,0 +1,22 @@ +**************** +Machine Learning +**************** + +The Machine Learning Library (MLL) is a set of classes and functions for statistical classification, regression and clustering of data. + +Most of the classification and regression algorithms are implemented as C++ classes. As the algorithms have different seta of features (like the ability to handle missing measurements, or categorical input variables etc.), there is a little common ground between the classes. This common ground is defined by the class `CvStatModel` that all the other ML classes are derived from. + + +.. toctree:: + :maxdepth: 2 + + statistical_models + normal_bayes_classifier + k_nearest_neighbors + support_vector_machines + decision_trees + boosting + random_trees + expectation-maximization + neural_networks + diff --git a/modules/ml/doc/neural_networks.rst b/modules/ml/doc/neural_networks.rst new file mode 100644 index 000000000..97e6f8863 --- /dev/null +++ b/modules/ml/doc/neural_networks.rst @@ -0,0 +1,399 @@ +Neural Networks +=============== + +.. highlight:: cpp + + +ML implements feed-forward artificial neural networks, more particularly, multi-layer perceptrons (MLP), the most commonly used type of neural networks. MLP consists of the input layer, output layer and one or more hidden layers. Each layer of MLP includes one or more neurons that are directionally linked with the neurons from the previous and the next layer. Here is an example of a 3-layer perceptron with 3 inputs, 2 outputs and the hidden layer including 5 neurons: + + + +.. image:: ../../pics/mlp_.png + + + +All the neurons in MLP are similar. Each of them has several input links (i.e. it takes the output values from several neurons in the previous layer on input) and several output links (i.e. it passes the response to several neurons in the next layer). The values retrieved from the previous layer are summed with certain weights, individual for each neuron, plus the bias term, and the sum is transformed using the activation function +:math:`f` +that may be also different for different neurons. Here is the picture: + + + +.. image:: ../../pics/neuron_model.png + + + +In other words, given the outputs +:math:`x_j` +of the layer +:math:`n` +, the outputs +:math:`y_i` +of the layer +:math:`n+1` +are computed as: + + + +.. math:: + + u_i = \sum _j (w^{n+1}_{i,j}*x_j) + w^{n+1}_{i,bias} + + + + +.. math:: + + y_i = f(u_i) + + +Different activation functions may be used, ML implements 3 standard ones: + + + + +* + Identity function ( + ``CvANN_MLP::IDENTITY`` + ): + :math:`f(x)=x` + + +* + Symmetrical sigmoid ( + ``CvANN_MLP::SIGMOID_SYM`` + ): + :math:`f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x}` + ), the default choice for MLP; the standard sigmoid with + :math:`\beta =1, \alpha =1` + is shown below: + + + + .. image:: ../../pics/sigmoid_bipolar.png + + + + + +* + Gaussian function ( + ``CvANN_MLP::GAUSSIAN`` + ): + :math:`f(x)=\beta e^{-\alpha x*x}` + , not completely supported by the moment. + + +In ML all the neurons have the same activation functions, with the same free parameters ( +:math:`\alpha, \beta` +) that are specified by user and are not altered by the training algorithms. + +So the whole trained network works as follows: It takes the feature vector on input, the vector size is equal to the size of the input layer, when the values are passed as input to the first hidden layer, the outputs of the hidden layer are computed using the weights and the activation functions and passed further downstream, until we compute the output layer. + +So, in order to compute the network one needs to know all the +weights +:math:`w^{n+1)}_{i,j}` +. The weights are computed by the training +algorithm. The algorithm takes a training set: multiple input vectors +with the corresponding output vectors, and iteratively adjusts the +weights to try to make the network give the desired response on the +provided input vectors. + +The larger the network size (the number of hidden layers and their sizes), +the more is the potential network flexibility, and the error on the +training set could be made arbitrarily small. But at the same time the +learned network will also "learn" the noise present in the training set, +so the error on the test set usually starts increasing after the network +size reaches some limit. Besides, the larger networks are train much +longer than the smaller ones, so it is reasonable to preprocess the data +(using +:ref:`CalcPCA` +or similar technique) and train a smaller network +on only the essential features. + +Another feature of the MLP's is their inability to handle categorical +data as is, however there is a workaround. If a certain feature in the +input or output (i.e. in the case of +``n`` +-class classifier for +:math:`n>2` +) layer is categorical and can take +:math:`M>2` +different values, it makes sense to represent it as binary tuple of +``M`` +elements, where +``i`` +-th element is 1 if and only if the +feature is equal to the +``i`` +-th value out of +``M`` +possible. It +will increase the size of the input/output layer, but will speedup the +training algorithm convergence and at the same time enable "fuzzy" values +of such variables, i.e. a tuple of probabilities instead of a fixed value. + +ML implements 2 algorithms for training MLP's. The first is the classical +random sequential back-propagation algorithm +and the second (default one) is batch RPROP algorithm. + +References: + + + + +* + http://en.wikipedia.org/wiki/Backpropagation + . Wikipedia article about the back-propagation algorithm. + + +* + Y. LeCun, L. Bottou, G.B. Orr and K.-R. Muller, "Efficient backprop", in Neural Networks---Tricks of the Trade, Springer Lecture Notes in Computer Sciences 1524, pp.5-50, 1998. + + +* + M. Riedmiller and H. Braun, "A Direct Adaptive Method for Faster Backpropagation Learning: The RPROP Algorithm", Proc. ICNN, San Francisco (1993). + + + +.. index:: CvANN_MLP_TrainParams + +.. _CvANN_MLP_TrainParams: + +CvANN_MLP_TrainParams +--------------------- + +`id=0.637270235159 Comments from the Wiki `__ + +.. ctype:: CvANN_MLP_TrainParams + + + +Parameters of the MLP training algorithm. + + + + +:: + + + + struct CvANN_MLP_TrainParams + { + CvANN_MLP_TrainParams(); + CvANN_MLP_TrainParams( CvTermCriteria term_crit, int train_method, + double param1, double param2=0 ); + ~CvANN_MLP_TrainParams(); + + enum { BACKPROP=0, RPROP=1 }; + + CvTermCriteria term_crit; + int train_method; + + // backpropagation parameters + double bp_dw_scale, bp_moment_scale; + + // rprop parameters + double rp_dw0, rp_dw_plus, rp_dw_minus, rp_dw_min, rp_dw_max; + }; + + +.. + +The structure has default constructor that initializes parameters for +``RPROP`` +algorithm. There is also more advanced constructor to customize the parameters and/or choose backpropagation algorithm. Finally, the individual parameters can be adjusted after the structure is created. + + + +.. index:: CvANN_MLP + +.. _CvANN_MLP: + +CvANN_MLP +--------- + +`id=0.404391979594 Comments from the Wiki `__ + +.. ctype:: CvANN_MLP + + + +MLP model. + + + + +:: + + + + class CvANN_MLP : public CvStatModel + { + public: + CvANN_MLP(); + CvANN_MLP( const CvMat* _layer_sizes, + int _activ_func=SIGMOID_SYM, + double _f_param1=0, double _f_param2=0 ); + + virtual ~CvANN_MLP(); + + virtual void create( const CvMat* _layer_sizes, + int _activ_func=SIGMOID_SYM, + double _f_param1=0, double _f_param2=0 ); + + virtual int train( const CvMat* _inputs, const CvMat* _outputs, + const CvMat* _sample_weights, + const CvMat* _sample_idx=0, + CvANN_MLP_TrainParams _params = CvANN_MLP_TrainParams(), + int flags=0 ); + virtual float predict( const CvMat* _inputs, + CvMat* _outputs ) const; + + virtual void clear(); + + // possible activation functions + enum { IDENTITY = 0, SIGMOID_SYM = 1, GAUSSIAN = 2 }; + + // available training flags + enum { UPDATE_WEIGHTS = 1, NO_INPUT_SCALE = 2, NO_OUTPUT_SCALE = 4 }; + + virtual void read( CvFileStorage* fs, CvFileNode* node ); + virtual void write( CvFileStorage* storage, const char* name ); + + int get_layer_count() { return layer_sizes ? layer_sizes->cols : 0; } + const CvMat* get_layer_sizes() { return layer_sizes; } + + protected: + + virtual bool prepare_to_train( const CvMat* _inputs, const CvMat* _outputs, + const CvMat* _sample_weights, const CvMat* _sample_idx, + CvANN_MLP_TrainParams _params, + CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags ); + + // sequential random backpropagation + virtual int train_backprop( CvVectors _ivecs, CvVectors _ovecs, + const double* _sw ); + + // RPROP algorithm + virtual int train_rprop( CvVectors _ivecs, CvVectors _ovecs, + const double* _sw ); + + virtual void calc_activ_func( CvMat* xf, const double* bias ) const; + virtual void calc_activ_func_deriv( CvMat* xf, CvMat* deriv, + const double* bias ) const; + virtual void set_activ_func( int _activ_func=SIGMOID_SYM, + double _f_param1=0, double _f_param2=0 ); + virtual void init_weights(); + virtual void scale_input( const CvMat* _src, CvMat* _dst ) const; + virtual void scale_output( const CvMat* _src, CvMat* _dst ) const; + virtual void calc_input_scale( const CvVectors* vecs, int flags ); + virtual void calc_output_scale( const CvVectors* vecs, int flags ); + + virtual void write_params( CvFileStorage* fs ); + virtual void read_params( CvFileStorage* fs, CvFileNode* node ); + + CvMat* layer_sizes; + CvMat* wbuf; + CvMat* sample_weights; + double** weights; + double f_param1, f_param2; + double min_val, max_val, min_val1, max_val1; + int activ_func; + int max_count, max_buf_sz; + CvANN_MLP_TrainParams params; + CvRNG rng; + }; + + +.. + +Unlike many other models in ML that are constructed and trained at once, in the MLP model these steps are separated. First, a network with the specified topology is created using the non-default constructor or the method +``create`` +. All the weights are set to zeros. Then the network is trained using the set of input and output vectors. The training procedure can be repeated more than once, i.e. the weights can be adjusted based on the new training data. + + + +.. index:: CvANN_MLP::create + +.. _CvANN_MLP::create: + +CvANN_MLP::create +----------------- + +`id=0.505267168137 Comments from the Wiki `__ + + + + +.. cfunction:: void CvANN_MLP::create( const CvMat* _layer_sizes, int _activ_func=SIGMOID_SYM, double _f_param1=0, double _f_param2=0 ) + + Constructs the MLP with the specified topology + + + + + + + :param _layer_sizes: The integer vector specifies the number of neurons in each layer including the input and output layers. + + + :param _activ_func: Specifies the activation function for each neuron; one of ``CvANN_MLP::IDENTITY`` , ``CvANN_MLP::SIGMOID_SYM`` and ``CvANN_MLP::GAUSSIAN`` . + + + :param _f_param1,_f_param2: Free parameters of the activation function, :math:`\alpha` and :math:`\beta` , respectively. See the formulas in the introduction section. + + + +The method creates a MLP network with the specified topology and assigns the same activation function to all the neurons. + + +.. index:: CvANN_MLP::train + +.. _CvANN_MLP::train: + +CvANN_MLP::train +---------------- + +`id=0.561890021588 Comments from the Wiki `__ + + + + +.. cfunction:: int CvANN_MLP::train( const CvMat* _inputs, const CvMat* _outputs, const CvMat* _sample_weights, const CvMat* _sample_idx=0, CvANN_MLP_TrainParams _params = CvANN_MLP_TrainParams(), int flags=0 ) + + Trains/updates MLP. + + + + + + + :param _inputs: A floating-point matrix of input vectors, one vector per row. + + + :param _outputs: A floating-point matrix of the corresponding output vectors, one vector per row. + + + :param _sample_weights: (RPROP only) The optional floating-point vector of weights for each sample. Some samples may be more important than others for training, and the user may want to raise the weight of certain classes to find the right balance between hit-rate and false-alarm rate etc. + + + :param _sample_idx: The optional integer vector indicating the samples (i.e. rows of ``_inputs`` and ``_outputs`` ) that are taken into account. + + + :param _params: The training params. See ``CvANN_MLP_TrainParams`` description. + + + :param _flags: The various parameters to control the training algorithm. May be a combination of the following: + + * **UPDATE_WEIGHTS = 1** algorithm updates the network weights, rather than computes them from scratch (in the latter case the weights are initialized using *Nguyen-Widrow* algorithm). + + * **NO_INPUT_SCALE** algorithm does not normalize the input vectors. If this flag is not set, the training algorithm normalizes each input feature independently, shifting its mean value to 0 and making the standard deviation =1. If the network is assumed to be updated frequently, the new training data could be much different from original one. In this case user should take care of proper normalization. + + * **NO_OUTPUT_SCALE** algorithm does not normalize the output vectors. If the flag is not set, the training algorithm normalizes each output features independently, by transforming it to the certain range depending on the activation function used. + + + + + +This method applies the specified training algorithm to compute/adjust the network weights. It returns the number of done iterations. + diff --git a/modules/ml/doc/normal_bayes_classifier.rst b/modules/ml/doc/normal_bayes_classifier.rst new file mode 100644 index 000000000..9dd57e387 --- /dev/null +++ b/modules/ml/doc/normal_bayes_classifier.rst @@ -0,0 +1,125 @@ +Normal Bayes Classifier +======================= + +.. highlight:: cpp + + +This is a simple classification model assuming that feature vectors from each class are normally distributed (though, not necessarily independently distributed), so the whole data distribution function is assumed to be a Gaussian mixture, one component per class. Using the training data the algorithm estimates mean vectors and covariance matrices for every class, and then it uses them for prediction. + +**[Fukunaga90] K. Fukunaga. Introduction to Statistical Pattern Recognition. second ed., New York: Academic Press, 1990.** + +.. index:: CvNormalBayesClassifier + +.. _CvNormalBayesClassifier: + +CvNormalBayesClassifier +----------------------- + +`id=0.110421013491 Comments from the Wiki `__ + +.. ctype:: CvNormalBayesClassifier + + + +Bayes classifier for normally distributed data. + + + + +:: + + + + class CvNormalBayesClassifier : public CvStatModel + { + public: + CvNormalBayesClassifier(); + virtual ~CvNormalBayesClassifier(); + + CvNormalBayesClassifier( const CvMat* _train_data, const CvMat* _responses, + const CvMat* _var_idx=0, const CvMat* _sample_idx=0 ); + + virtual bool train( const CvMat* _train_data, const CvMat* _responses, + const CvMat* _var_idx = 0, const CvMat* _sample_idx=0, bool update=false ); + + virtual float predict( const CvMat* _samples, CvMat* results=0 ) const; + virtual void clear(); + + virtual void save( const char* filename, const char* name=0 ); + virtual void load( const char* filename, const char* name=0 ); + + virtual void write( CvFileStorage* storage, const char* name ); + virtual void read( CvFileStorage* storage, CvFileNode* node ); + protected: + ... + }; + + +.. + + +.. index:: CvNormalBayesClassifier::train + +.. _CvNormalBayesClassifier::train: + +CvNormalBayesClassifier::train +------------------------------ + +`id=0.746566750452 Comments from the Wiki `__ + + + + +.. cfunction:: bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx =0, const CvMat* _sample_idx=0, bool update=false ) + + Trains the model. + + + +The method trains the Normal Bayes classifier. It follows the conventions of the generic +``train`` +"method" with the following limitations: only CV +_ +ROW +_ +SAMPLE data layout is supported; the input variables are all ordered; the output variable is categorical (i.e. elements of +``_responses`` +must be integer numbers, though the vector may have +``CV_32FC1`` +type), and missing measurements are not supported. + +In addition, there is an +``update`` +flag that identifies whether the model should be trained from scratch ( +``update=false`` +) or should be updated using the new training data ( +``update=true`` +). + + +.. index:: CvNormalBayesClassifier::predict + +.. _CvNormalBayesClassifier::predict: + +CvNormalBayesClassifier::predict +-------------------------------- + +`id=0.821415185096 Comments from the Wiki `__ + + + + +.. cfunction:: float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results=0 ) const + + Predicts the response for sample(s) + + + +The method +``predict`` +estimates the most probable classes for the input vectors. The input vectors (one or more) are stored as rows of the matrix +``samples`` +. In the case of multiple input vectors, there should be one output vector +``results`` +. The predicted class for a single input vector is returned by the method. + diff --git a/modules/ml/doc/random_trees.rst b/modules/ml/doc/random_trees.rst new file mode 100644 index 000000000..34dbc52ee --- /dev/null +++ b/modules/ml/doc/random_trees.rst @@ -0,0 +1,415 @@ +Random Trees +============ + +.. highlight:: cpp + + +Random trees have been introduced by Leo Breiman and Adele Cutler: +http://www.stat.berkeley.edu/users/breiman/RandomForests/ +. The algorithm can deal with both classification and regression problems. Random trees is a collection (ensemble) of tree predictors that is called +**forest** +further in this section (the term has been also introduced by L. Breiman). The classification works as follows: the random trees classifier takes the input feature vector, classifies it with every tree in the forest, and outputs the class label that recieved the majority of "votes". In the case of regression the classifier response is the average of the responses over all the trees in the forest. + +All the trees are trained with the same parameters, but on the different training sets, which are generated from the original training set using the bootstrap procedure: for each training set we randomly select the same number of vectors as in the original set ( +``=N`` +). The vectors are chosen with replacement. That is, some vectors will occur more than once and some will be absent. At each node of each tree trained not all the variables are used to find the best split, rather than a random subset of them. With each node a new subset is generated, however its size is fixed for all the nodes and all the trees. It is a training parameter, set to +:math:`\sqrt{number\_of\_variables}` +by default. None of the trees that are built are pruned. + +In random trees there is no need for any accuracy estimation procedures, such as cross-validation or bootstrap, or a separate test set to get an estimate of the training error. The error is estimated internally during the training. When the training set for the current tree is drawn by sampling with replacement, some vectors are left out (so-called +*oob (out-of-bag) data* +). The size of oob data is about +``N/3`` +. The classification error is estimated by using this oob-data as following: + + + + +* + Get a prediction for each vector, which is oob relatively to the i-th tree, using the very i-th tree. + + +* + After all the trees have been trained, for each vector that has ever been oob, find the class-"winner" for it (i.e. the class that has got the majority of votes in the trees, where the vector was oob) and compare it to the ground-truth response. + + +* + Then the classification error estimate is computed as ratio of number of misclassified oob vectors to all the vectors in the original data. In the case of regression the oob-error is computed as the squared error for oob vectors difference divided by the total number of vectors. + + +**References:** + + + + +* + Machine Learning, Wald I, July 2002. + + http://stat-www.berkeley.edu/users/breiman/wald2002-1.pdf + + +* + Looking Inside the Black Box, Wald II, July 2002. + + http://stat-www.berkeley.edu/users/breiman/wald2002-2.pdf + + +* + Software for the Masses, Wald III, July 2002. + + http://stat-www.berkeley.edu/users/breiman/wald2002-3.pdf + + +* + And other articles from the web site + http://www.stat.berkeley.edu/users/breiman/RandomForests/cc_home.htm + . + + + +.. index:: CvRTParams + +.. _CvRTParams: + +CvRTParams +---------- + +`id=0.971665272168 Comments from the Wiki `__ + +.. ctype:: CvRTParams + + + +Training Parameters of Random Trees. + + + + +:: + + + + struct CvRTParams : public CvDTreeParams + { + bool calc_var_importance; + int nactive_vars; + CvTermCriteria term_crit; + + CvRTParams() : CvDTreeParams( 5, 10, 0, false, 10, 0, false, false, 0 ), + calc_var_importance(false), nactive_vars(0) + { + term_crit = cvTermCriteria( CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 50, 0.1 ); + } + + CvRTParams( int _max_depth, int _min_sample_count, + float _regression_accuracy, bool _use_surrogates, + int _max_categories, const float* _priors, + bool _calc_var_importance, + int _nactive_vars, int max_tree_count, + float forest_accuracy, int termcrit_type ); + }; + + +.. + +The set of training parameters for the forest is the superset of the training parameters for a single tree. However, Random trees do not need all the functionality/features of decision trees, most noticeably, the trees are not pruned, so the cross-validation parameters are not used. + + + +.. index:: CvRTrees + +.. _CvRTrees: + +CvRTrees +-------- + +`id=0.485875932457 Comments from the Wiki `__ + +.. ctype:: CvRTrees + + + +Random Trees. + + + + +:: + + + + class CvRTrees : public CvStatModel + { + public: + CvRTrees(); + virtual ~CvRTrees(); + virtual bool train( const CvMat* _train_data, int _tflag, + const CvMat* _responses, const CvMat* _var_idx=0, + const CvMat* _sample_idx=0, const CvMat* _var_type=0, + const CvMat* _missing_mask=0, + CvRTParams params=CvRTParams() ); + virtual float predict( const CvMat* sample, const CvMat* missing = 0 ) + const; + virtual void clear(); + + virtual const CvMat* get_var_importance(); + virtual float get_proximity( const CvMat* sample_1, const CvMat* sample_2 ) + const; + + virtual void read( CvFileStorage* fs, CvFileNode* node ); + virtual void write( CvFileStorage* fs, const char* name ); + + CvMat* get_active_var_mask(); + CvRNG* get_rng(); + + int get_tree_count() const; + CvForestTree* get_tree(int i) const; + + protected: + + bool grow_forest( const CvTermCriteria term_crit ); + + // array of the trees of the forest + CvForestTree** trees; + CvDTreeTrainData* data; + int ntrees; + int nclasses; + ... + }; + + +.. + + +.. index:: CvRTrees::train + +.. _CvRTrees::train: + +CvRTrees::train +--------------- + +`id=0.951910664821 Comments from the Wiki `__ + + + + +.. cfunction:: bool CvRTrees::train( const CvMat* train_data, int tflag, const CvMat* responses, const CvMat* comp_idx=0, const CvMat* sample_idx=0, const CvMat* var_type=0, const CvMat* missing_mask=0, CvRTParams params=CvRTParams() ) + + Trains the Random Trees model. + + + +The method +``CvRTrees::train`` +is very similar to the first form of +``CvDTree::train`` +() and follows the generic method +``CvStatModel::train`` +conventions. All of the specific to the algorithm training parameters are passed as a +:ref:`CvRTParams` +instance. The estimate of the training error ( +``oob-error`` +) is stored in the protected class member +``oob_error`` +. + + + +.. index:: CvRTrees::predict + +.. _CvRTrees::predict: + +CvRTrees::predict +----------------- + +`id=0.175799484956 Comments from the Wiki `__ + + + + +.. cfunction:: double CvRTrees::predict( const CvMat* sample, const CvMat* missing=0 ) const + + Predicts the output for the input sample. + + + +The input parameters of the prediction method are the same as in +``CvDTree::predict`` +, but the return value type is different. This method returns the cumulative result from all the trees in the forest (the class that receives the majority of voices, or the mean of the regression function estimates). + + + +.. index:: CvRTrees::get_var_importance + +.. _CvRTrees::get_var_importance: + +CvRTrees::get_var_importance +---------------------------- + +`id=0.336660771362 Comments from the Wiki `__ + + + + +.. cfunction:: const CvMat* CvRTrees::get_var_importance() const + + Retrieves the variable importance array. + + + +The method returns the variable importance vector, computed at the training stage when +``:ref:`CvRTParams`::calc_var_importance`` +is set. If the training flag is not set, then the +``NULL`` +pointer is returned. This is unlike decision trees, where variable importance can be computed anytime after the training. + + + +.. index:: CvRTrees::get_proximity + +.. _CvRTrees::get_proximity: + +CvRTrees::get_proximity +----------------------- + +`id=0.2120965436 Comments from the Wiki `__ + + + + +.. cfunction:: float CvRTrees::get_proximity( const CvMat* sample_1, const CvMat* sample_2 ) const + + Retrieves the proximity measure between two training samples. + + + +The method returns proximity measure between any two samples (the ratio of the those trees in the ensemble, in which the samples fall into the same leaf node, to the total number of the trees). + + +Example: Prediction of mushroom goodness using random trees classifier + + + + +:: + + + + #include + #include + #include + #include "ml.h" + + int main( void ) + { + CvStatModel* cls = NULL; + CvFileStorage* storage = cvOpenFileStorage( "Mushroom.xml", + NULL,CV_STORAGE_READ ); + CvMat* data = (CvMat*)cvReadByName(storage, NULL, "sample", 0 ); + CvMat train_data, test_data; + CvMat response; + CvMat* missed = NULL; + CvMat* comp_idx = NULL; + CvMat* sample_idx = NULL; + CvMat* type_mask = NULL; + int resp_col = 0; + int i,j; + CvRTreesParams params; + CvTreeClassifierTrainParams cart_params; + const int ntrain_samples = 1000; + const int ntest_samples = 1000; + const int nvars = 23; + + if(data == NULL || data->cols != nvars) + { + puts("Error in source data"); + return -1; + } + + cvGetSubRect( data, &train_data, cvRect(0, 0, nvars, ntrain_samples) ); + cvGetSubRect( data, &test_data, cvRect(0, ntrain_samples, nvars, + ntrain_samples + ntest_samples) ); + + resp_col = 0; + cvGetCol( &train_data, &response, resp_col); + + /* create missed variable matrix */ + missed = cvCreateMat(train_data.rows, train_data.cols, CV_8UC1); + for( i = 0; i < train_data.rows; i++ ) + for( j = 0; j < train_data.cols; j++ ) + CV_MAT_ELEM(*missed,uchar,i,j) + = (uchar)(CV_MAT_ELEM(train_data,float,i,j) < 0); + + /* create comp_idx vector */ + comp_idx = cvCreateMat(1, train_data.cols-1, CV_32SC1); + for( i = 0; i < train_data.cols; i++ ) + { + if(iresp_col)CV_MAT_ELEM(*comp_idx,int,0,i-1) = i; + } + + /* create sample_idx vector */ + sample_idx = cvCreateMat(1, train_data.rows, CV_32SC1); + for( j = i = 0; i < train_data.rows; i++ ) + { + if(CV_MAT_ELEM(response,float,i,0) < 0) continue; + CV_MAT_ELEM(*sample_idx,int,0,j) = i; + j++; + } + sample_idx->cols = j; + + /* create type mask */ + type_mask = cvCreateMat(1, train_data.cols+1, CV_8UC1); + cvSet( type_mask, cvRealScalar(CV_VAR_CATEGORICAL), 0); + + // initialize training parameters + cvSetDefaultParamTreeClassifier((CvStatModelParams*)&cart_params); + cart_params.wrong_feature_as_unknown = 1; + params.tree_params = &cart_params; + params.term_crit.max_iter = 50; + params.term_crit.epsilon = 0.1; + params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS; + + puts("Random forest results"); + cls = cvCreateRTreesClassifier( &train_data, + CV_ROW_SAMPLE, + &response, + (CvStatModelParams*)& + params, + comp_idx, + sample_idx, + type_mask, + missed ); + if( cls ) + { + CvMat sample = cvMat( 1, nvars, CV_32FC1, test_data.data.fl ); + CvMat test_resp; + int wrong = 0, total = 0; + cvGetCol( &test_data, &test_resp, resp_col); + for( i = 0; i < ntest_samples; i++, sample.data.fl += nvars ) + { + if( CV_MAT_ELEM(test_resp,float,i,0) >= 0 ) + { + float resp = cls->predict( cls, &sample, NULL ); + wrong += (fabs(resp-response.data.fl[i]) > 1e-3 ) ? 1 : 0; + total++; + } + } + printf( "Test set error = + } + else + puts("Error forest creation"); + + cvReleaseMat(&missed); + cvReleaseMat(&sample_idx); + cvReleaseMat(&comp_idx); + cvReleaseMat(&type_mask); + cvReleaseMat(&data); + cvReleaseStatModel(&cls); + cvReleaseFileStorage(&storage); + return 0; + } + + +.. + diff --git a/modules/ml/doc/statistical_models.rst b/modules/ml/doc/statistical_models.rst new file mode 100644 index 000000000..7e6941083 --- /dev/null +++ b/modules/ml/doc/statistical_models.rst @@ -0,0 +1,426 @@ +Statistical Models +================== + +.. highlight:: cpp + + + +.. index:: CvStatModel + +.. _CvStatModel: + +CvStatModel +----------- + +`id=0.709260507321 Comments from the Wiki `__ + +.. ctype:: CvStatModel + + + +Base class for the statistical models in ML. + + + + +:: + + + + class CvStatModel + { + public: + /* CvStatModel(); */ + /* CvStatModel( const CvMat* train_data ... ); */ + + virtual ~CvStatModel(); + + virtual void clear()=0; + + /* virtual bool train( const CvMat* train_data, [int tflag,] ..., const + CvMat* responses, ..., + [const CvMat* var_idx,] ..., [const CvMat* sample_idx,] ... + [const CvMat* var_type,] ..., [const CvMat* missing_mask,] + ... )=0; + */ + + /* virtual float predict( const CvMat* sample ... ) const=0; */ + + virtual void save( const char* filename, const char* name=0 )=0; + virtual void load( const char* filename, const char* name=0 )=0; + + virtual void write( CvFileStorage* storage, const char* name )=0; + virtual void read( CvFileStorage* storage, CvFileNode* node )=0; + }; + + +.. + +In this declaration some methods are commented off. Actually, these are methods for which there is no unified API (with the exception of the default constructor), however, there are many similarities in the syntax and semantics that are briefly described below in this section, as if they are a part of the base class. + + + +.. index:: CvStatModel::CvStatModel + +.. _CvStatModel::CvStatModel: + +CvStatModel::CvStatModel +------------------------ + +`id=0.362486770202 Comments from the Wiki `__ + + + + +.. cfunction:: CvStatModel::CvStatModel() + + Default constructor. + + + +Each statistical model class in ML has a default constructor without parameters. This constructor is useful for 2-stage model construction, when the default constructor is followed by +``train()`` +or +``load()`` +. + + + +.. index:: CvStatModel::CvStatModel(...) + +.. _CvStatModel::CvStatModel(...): + +CvStatModel::CvStatModel(...) +----------------------------- + +`id=0.672522046035 Comments from the Wiki `__ + + + + +.. cfunction:: CvStatModel::CvStatModel( const CvMat* train_data ... ) + + Training constructor. + + + +Most ML classes provide single-step construct and train constructors. This constructor is equivalent to the default constructor, followed by the +``train()`` +method with the parameters that are passed to the constructor. + + + +.. index:: CvStatModel::~CvStatModel + +.. _CvStatModel::~CvStatModel: + +CvStatModel::~CvStatModel +------------------------- + +`id=0.264685391089 Comments from the Wiki `__ + + + + +.. cfunction:: CvStatModel::~CvStatModel() + + Virtual destructor. + + + +The destructor of the base class is declared as virtual, so it is safe to write the following code: + + + + +:: + + + + CvStatModel* model; + if( use_svm ) + model = new CvSVM(... /* SVM params */); + else + model = new CvDTree(... /* Decision tree params */); + ... + delete model; + + +.. + +Normally, the destructor of each derived class does nothing, but in this instance it calls the overridden method +``clear()`` +that deallocates all the memory. + + + +.. index:: CvStatModel::clear + +.. _CvStatModel::clear: + +CvStatModel::clear +------------------ + +`id=0.0232469661173 Comments from the Wiki `__ + + + + +.. cfunction:: void CvStatModel::clear() + + Deallocates memory and resets the model state. + + + +The method +``clear`` +does the same job as the destructor; it deallocates all the memory occupied by the class members. But the object itself is not destructed, and can be reused further. This method is called from the destructor, from the +``train`` +methods of the derived classes, from the methods +``load()`` +, +``read()`` +or even explicitly by the user. + + + +.. index:: CvStatModel::save + +.. _CvStatModel::save: + +CvStatModel::save +----------------- + +`id=0.852967404887 Comments from the Wiki `__ + + + + +.. cfunction:: void CvStatModel::save( const char* filename, const char* name=0 ) + + Saves the model to a file. + + + +The method +``save`` +stores the complete model state to the specified XML or YAML file with the specified name or default name (that depends on the particular class). +``Data persistence`` +functionality from CxCore is used. + + + +.. index:: CvStatModel::load + +.. _CvStatModel::load: + +CvStatModel::load +----------------- + +`id=0.957875843108 Comments from the Wiki `__ + + + + +.. cfunction:: void CvStatModel::load( const char* filename, const char* name=0 ) + + Loads the model from a file. + + + +The method +``load`` +loads the complete model state with the specified name (or default model-dependent name) from the specified XML or YAML file. The previous model state is cleared by +``clear()`` +. + +Note that the method is virtual, so any model can be loaded using this virtual method. However, unlike the C types of OpenCV that can be loaded using the generic +\ +cross{cvLoad}, here the model type must be known, because an empty model must be constructed beforehand. This limitation will be removed in the later ML versions. + + + +.. index:: CvStatModel::write + +.. _CvStatModel::write: + +CvStatModel::write +------------------ + +`id=0.167242991674 Comments from the Wiki `__ + + + + +.. cfunction:: void CvStatModel::write( CvFileStorage* storage, const char* name ) + + Writes the model to file storage. + + + +The method +``write`` +stores the complete model state to the file storage with the specified name or default name (that depends on the particular class). The method is called by +``save()`` +. + + + +.. index:: CvStatModel::read + +.. _CvStatModel::read: + +CvStatModel::read +----------------- + +`id=0.959831015705 Comments from the Wiki `__ + + + + +.. cfunction:: void CvStatMode::read( CvFileStorage* storage, CvFileNode* node ) + + Reads the model from file storage. + + + +The method +``read`` +restores the complete model state from the specified node of the file storage. The node must be located by the user using the function +:ref:`GetFileNodeByName` +. + +The previous model state is cleared by +``clear()`` +. + + + +.. index:: CvStatModel::train + +.. _CvStatModel::train: + +CvStatModel::train +------------------ + +`id=0.616920786727 Comments from the Wiki `__ + + + + +.. cfunction:: bool CvStatMode::train( const CvMat* train_data, [int tflag,] ..., const CvMat* responses, ..., [const CvMat* var_idx,] ..., [const CvMat* sample_idx,] ... [const CvMat* var_type,] ..., [const CvMat* missing_mask,] ... ) + + Trains the model. + + + +The method trains the statistical model using a set of input feature vectors and the corresponding output values (responses). Both input and output vectors/values are passed as matrices. By default the input feature vectors are stored as +``train_data`` +rows, i.e. all the components (features) of a training vector are stored continuously. However, some algorithms can handle the transposed representation, when all values of each particular feature (component/input variable) over the whole input set are stored continuously. If both layouts are supported, the method includes +``tflag`` +parameter that specifies the orientation: + + + + +* + ``tflag=CV_ROW_SAMPLE`` + means that the feature vectors are stored as rows, + + + +* + ``tflag=CV_COL_SAMPLE`` + means that the feature vectors are stored as columns. + + +The +``train_data`` +must have a +``CV_32FC1`` +(32-bit floating-point, single-channel) format. Responses are usually stored in the 1d vector (a row or a column) of +``CV_32SC1`` +(only in the classification problem) or +``CV_32FC1`` +format, one value per input vector (although some algorithms, like various flavors of neural nets, take vector responses). + +For classification problems the responses are discrete class labels; for regression problems the responses are values of the function to be approximated. Some algorithms can deal only with classification problems, some - only with regression problems, and some can deal with both problems. In the latter case the type of output variable is either passed as separate parameter, or as a last element of +``var_type`` +vector: + + + + +* + ``CV_VAR_CATEGORICAL`` + means that the output values are discrete class labels, + + + +* + ``CV_VAR_ORDERED(=CV_VAR_NUMERICAL)`` + means that the output values are ordered, i.e. 2 different values can be compared as numbers, and this is a regression problem + + +The types of input variables can be also specified using +``var_type`` +. Most algorithms can handle only ordered input variables. + +Many models in the ML may be trained on a selected feature subset, and/or on a selected sample subset of the training set. To make it easier for the user, the method +``train`` +usually includes +``var_idx`` +and +``sample_idx`` +parameters. The former identifies variables (features) of interest, and the latter identifies samples of interest. Both vectors are either integer ( +``CV_32SC1`` +) vectors, i.e. lists of 0-based indices, or 8-bit ( +``CV_8UC1`` +) masks of active variables/samples. The user may pass +``NULL`` +pointers instead of either of the arguments, meaning that all of the variables/samples are used for training. + +Additionally some algorithms can handle missing measurements, that is when certain features of certain training samples have unknown values (for example, they forgot to measure a temperature of patient A on Monday). The parameter +``missing_mask`` +, an 8-bit matrix the same size as +``train_data`` +, is used to mark the missed values (non-zero elements of the mask). + +Usually, the previous model state is cleared by +``clear()`` +before running the training procedure. However, some algorithms may optionally update the model state with the new training data, instead of resetting it. + + + +.. index:: CvStatModel::predict + +.. _CvStatModel::predict: + +CvStatModel::predict +-------------------- + +`id=0.404351209628 Comments from the Wiki `__ + + + + +.. cfunction:: float CvStatMode::predict( const CvMat* sample[, ] ) const + + Predicts the response for the sample. + + + +The method is used to predict the response for a new sample. In the case of classification the method returns the class label, in the case of regression - the output function value. The input sample must have as many components as the +``train_data`` +passed to +``train`` +contains. If the +``var_idx`` +parameter is passed to +``train`` +, it is remembered and then is used to extract only the necessary components from the input sample in the method +``predict`` +. + +The suffix "const" means that prediction does not affect the internal model state, so the method can be safely called from within different threads. + diff --git a/modules/ml/doc/support_vector_machines.rst b/modules/ml/doc/support_vector_machines.rst new file mode 100644 index 000000000..9a59498d5 --- /dev/null +++ b/modules/ml/doc/support_vector_machines.rst @@ -0,0 +1,417 @@ +Support Vector Machines +======================= + +.. highlight:: cpp + + +Originally, support vector machines (SVM) was a technique for building an optimal (in some sense) binary (2-class) classifier. Then the technique has been extended to regression and clustering problems. SVM is a partial case of kernel-based methods, it maps feature vectors into higher-dimensional space using some kernel function, and then it builds an optimal linear discriminating function in this space (or an optimal hyper-plane that fits into the training data, ...). in the case of SVM the kernel is not defined explicitly. Instead, a distance between any 2 points in the hyper-space needs to be defined. + +The solution is optimal in a sense that the margin between the separating hyper-plane and the nearest feature vectors from the both classes (in the case of 2-class classifier) is maximal. The feature vectors that are the closest to the hyper-plane are called "support vectors", meaning that the position of other vectors does not affect the hyper-plane (the decision function). + +There are a lot of good references on SVM. Here are only a few ones to start with. + + + + +* + **[Burges98] C. Burges. "A tutorial on support vector machines for pattern recognition", Knowledge Discovery and Data Mining 2(2), 1998.** + (available online at + http://citeseer.ist.psu.edu/burges98tutorial.html + ). + + +* + **LIBSVM - A Library for Support Vector Machines. By Chih-Chung Chang and Chih-Jen Lin** + ( + http://www.csie.ntu.edu.tw/~cjlin/libsvm/ + ) + + + +.. index:: CvSVM + +.. _CvSVM: + +CvSVM +----- + +`id=0.838668945864 Comments from the Wiki `__ + +.. ctype:: CvSVM + + + +Support Vector Machines. + + + + +:: + + + + class CvSVM : public CvStatModel + { + public: + // SVM type + enum { C_SVC=100, NU_SVC=101, ONE_CLASS=102, EPS_SVR=103, NU_SVR=104 }; + + // SVM kernel type + enum { LINEAR=0, POLY=1, RBF=2, SIGMOID=3 }; + + // SVM params type + enum { C=0, GAMMA=1, P=2, NU=3, COEF=4, DEGREE=5 }; + + CvSVM(); + virtual ~CvSVM(); + + CvSVM( const CvMat* _train_data, const CvMat* _responses, + const CvMat* _var_idx=0, const CvMat* _sample_idx=0, + CvSVMParams _params=CvSVMParams() ); + + virtual bool train( const CvMat* _train_data, const CvMat* _responses, + const CvMat* _var_idx=0, const CvMat* _sample_idx=0, + CvSVMParams _params=CvSVMParams() ); + + virtual bool train_auto( const CvMat* _train_data, const CvMat* _responses, + const CvMat* _var_idx, const CvMat* _sample_idx, CvSVMParams _params, + int k_fold = 10, + CvParamGrid C_grid = get_default_grid(CvSVM::C), + CvParamGrid gamma_grid = get_default_grid(CvSVM::GAMMA), + CvParamGrid p_grid = get_default_grid(CvSVM::P), + CvParamGrid nu_grid = get_default_grid(CvSVM::NU), + CvParamGrid coef_grid = get_default_grid(CvSVM::COEF), + CvParamGrid degree_grid = get_default_grid(CvSVM::DEGREE) ); + + virtual float predict( const CvMat* _sample ) const; + virtual int get_support_vector_count() const; + virtual const float* get_support_vector(int i) const; + virtual CvSVMParams get_params() const { return params; }; + virtual void clear(); + + static CvParamGrid get_default_grid( int param_id ); + + virtual void save( const char* filename, const char* name=0 ); + virtual void load( const char* filename, const char* name=0 ); + + virtual void write( CvFileStorage* storage, const char* name ); + virtual void read( CvFileStorage* storage, CvFileNode* node ); + int get_var_count() const { return var_idx ? var_idx->cols : var_all; } + + protected: + ... + }; + + +.. + + +.. index:: CvSVMParams + +.. _CvSVMParams: + +CvSVMParams +----------- + +`id=0.577929883484 Comments from the Wiki `__ + +.. ctype:: CvSVMParams + + + +SVM training parameters. + + + + +:: + + + + struct CvSVMParams + { + CvSVMParams(); + CvSVMParams( int _svm_type, int _kernel_type, + double _degree, double _gamma, double _coef0, + double _C, double _nu, double _p, + CvMat* _class_weights, CvTermCriteria _term_crit ); + + int svm_type; + int kernel_type; + double degree; // for poly + double gamma; // for poly/rbf/sigmoid + double coef0; // for poly/sigmoid + + double C; // for CV_SVM_C_SVC, CV_SVM_EPS_SVR and CV_SVM_NU_SVR + double nu; // for CV_SVM_NU_SVC, CV_SVM_ONE_CLASS, and CV_SVM_NU_SVR + double p; // for CV_SVM_EPS_SVR + CvMat* class_weights; // for CV_SVM_C_SVC + CvTermCriteria term_crit; // termination criteria + }; + + + +.. + +The structure must be initialized and passed to the training method of +:ref:`CvSVM` +. + + + +.. index:: CvSVM::train + +.. _CvSVM::train: + +CvSVM::train +------------ + +`id=0.720656682385 Comments from the Wiki `__ + + + + +.. cfunction:: bool CvSVM::train( const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, CvSVMParams _params=CvSVMParams() ) + + Trains SVM. + + + +The method trains the SVM model. It follows the conventions of the generic +``train`` +"method" with the following limitations: only the CV +_ +ROW +_ +SAMPLE data layout is supported, the input variables are all ordered, the output variables can be either categorical ( +``_params.svm_type=CvSVM::C_SVC`` +or +``_params.svm_type=CvSVM::NU_SVC`` +), or ordered ( +``_params.svm_type=CvSVM::EPS_SVR`` +or +``_params.svm_type=CvSVM::NU_SVR`` +), or not required at all ( +``_params.svm_type=CvSVM::ONE_CLASS`` +), missing measurements are not supported. + +All the other parameters are gathered in +:ref:`CvSVMParams` +structure. + + + +.. index:: CvSVM::train_auto + +.. _CvSVM::train_auto: + +CvSVM::train_auto +----------------- + +`id=0.63289997524 Comments from the Wiki `__ + + + + +.. cfunction:: train_auto( const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx, const CvMat* _sample_idx, CvSVMParams params, int k_fold = 10, CvParamGrid C_grid = get_default_grid(CvSVM::C), CvParamGrid gamma_grid = get_default_grid(CvSVM::GAMMA), CvParamGrid p_grid = get_default_grid(CvSVM::P), CvParamGrid nu_grid = get_default_grid(CvSVM::NU), CvParamGrid coef_grid = get_default_grid(CvSVM::COEF), CvParamGrid degree_grid = get_default_grid(CvSVM::DEGREE) ) + + Trains SVM with optimal parameters. + + + + + + + :param k_fold: Cross-validation parameter. The training set is divided into ``k_fold`` subsets, one subset being used to train the model, the others forming the test set. So, the SVM algorithm is executed ``k_fold`` times. + + + +The method trains the SVM model automatically by choosing the optimal +parameters +``C`` +, +``gamma`` +, +``p`` +, +``nu`` +, +``coef0`` +, +``degree`` +from +:ref:`CvSVMParams` +. By optimal +one means that the cross-validation estimate of the test set error +is minimal. The parameters are iterated by a logarithmic grid, for +example, the parameter +``gamma`` +takes the values in the set +( +:math:`min` +, +:math:`min*step` +, +:math:`min*{step}^2` +, ... +:math:`min*{step}^n` +) +where +:math:`min` +is +``gamma_grid.min_val`` +, +:math:`step` +is +``gamma_grid.step`` +, and +:math:`n` +is the maximal index such, that + + + +.. math:: + + \texttt{gamma\_grid.min\_val} * \texttt{gamma\_grid.step} ^n < \texttt{gamma\_grid.max\_val} + + +So +``step`` +must always be greater than 1. + +If there is no need in optimization in some parameter, the according grid step should be set to any value less or equal to 1. For example, to avoid optimization in +``gamma`` +one should set +``gamma_grid.step = 0`` +, +``gamma_grid.min_val`` +, +``gamma_grid.max_val`` +being arbitrary numbers. In this case, the value +``params.gamma`` +will be taken for +``gamma`` +. + +And, finally, if the optimization in some parameter is required, but +there is no idea of the corresponding grid, one may call the function +``CvSVM::get_default_grid`` +. In +order to generate a grid, say, for +``gamma`` +, call +``CvSVM::get_default_grid(CvSVM::GAMMA)`` +. + +This function works for the case of classification +( +``params.svm_type=CvSVM::C_SVC`` +or +``params.svm_type=CvSVM::NU_SVC`` +) +as well as for the regression +( +``params.svm_type=CvSVM::EPS_SVR`` +or +``params.svm_type=CvSVM::NU_SVR`` +). If +``params.svm_type=CvSVM::ONE_CLASS`` +, no optimization is made and the usual SVM with specified in +``params`` +parameters is executed. + + +.. index:: CvSVM::get_default_grid + +.. _CvSVM::get_default_grid: + +CvSVM::get_default_grid +----------------------- + +`id=0.647625940741 Comments from the Wiki `__ + + + + +.. cfunction:: CvParamGrid CvSVM::get_default_grid( int param_id ) + + Generates a grid for the SVM parameters. + + + + + + + :param param_id: Must be one of the following: + + * **CvSVM::C** + + * **CvSVM::GAMMA** + + * **CvSVM::P** + + * **CvSVM::NU** + + * **CvSVM::COEF** + + * **CvSVM::DEGREE** + . + + The grid will be generated for the parameter with this ID. + + + +The function generates a grid for the specified parameter of the SVM algorithm. The grid may be passed to the function +``CvSVM::train_auto`` +. + + + +.. index:: CvSVM::get_params + +.. _CvSVM::get_params: + +CvSVM::get_params +----------------- + +`id=0.179013680104 Comments from the Wiki `__ + + + + +.. cfunction:: CvSVMParams CvSVM::get_params() const + + Returns the current SVM parameters. + + + +This function may be used to get the optimal parameters that were obtained while automatically training +``CvSVM::train_auto`` +. + + + +.. index:: CvSVM::get_support_vector* + +.. _CvSVM::get_support_vector*: + +CvSVM::get_support_vector* +-------------------------- + +`id=0.988886411952 Comments from the Wiki `__ + + + + +.. cfunction:: int CvSVM::get_support_vector_count() const + + + +.. cfunction:: const float* CvSVM::get_support_vector(int i) const + + Retrieves the number of support vectors and the particular vector. + + + +The methods can be used to retrieve the set of support vectors. + diff --git a/modules/objdetect/doc/cascade_classification.rst b/modules/objdetect/doc/cascade_classification.rst new file mode 100644 index 000000000..61228b87a --- /dev/null +++ b/modules/objdetect/doc/cascade_classification.rst @@ -0,0 +1,564 @@ +Cascade Classification +====================== + +.. highlight:: cpp + + + +.. index:: FeatureEvaluator + +.. _FeatureEvaluator: + +FeatureEvaluator +---------------- + +`id=0.360131889668 Comments from the Wiki `__ + +.. ctype:: FeatureEvaluator + + + +Base class for computing feature values in cascade classifiers. + + + + +:: + + + + class CV_EXPORTS FeatureEvaluator + { + public: + enum { HAAR = 0, LBP = 1 }; // supported feature types + virtual ~FeatureEvaluator(); // destructor + virtual bool read(const FileNode& node); + virtual Ptr clone() const; + virtual int getFeatureType() const; + + virtual bool setImage(const Mat& img, Size origWinSize); + virtual bool setWindow(Point p); + + virtual double calcOrd(int featureIdx) const; + virtual int calcCat(int featureIdx) const; + + static Ptr create(int type); + }; + + +.. + + +.. index:: FeatureEvaluator::read + + +cv::FeatureEvaluator::read +-------------------------- + +`id=0.201865718724 Comments from the Wiki `__ + + + + +.. cfunction:: bool FeatureEvaluator::read(const FileNode\& node) + + Reads parameters of the features from a FileStorage node. + + + + + + + :param node: File node from which the feature parameters are read. + + + + +.. index:: FeatureEvaluator::clone + + +cv::FeatureEvaluator::clone +--------------------------- + +`id=0.296896128079 Comments from the Wiki `__ + + + + +.. cfunction:: Ptr FeatureEvaluator::clone() const + + Returns a full copy of the feature evaluator. + + + + +.. index:: FeatureEvaluator::getFeatureType + + +cv::FeatureEvaluator::getFeatureType +------------------------------------ + +`id=0.0597446379803 Comments from the Wiki `__ + + + + +.. cfunction:: int FeatureEvaluator::getFeatureType() const + + Returns the feature type (HAAR or LBP for now). + + + + +.. index:: FeatureEvaluator::setImage + + +cv::FeatureEvaluator::setImage +------------------------------ + +`id=0.203782054077 Comments from the Wiki `__ + + + + +.. cfunction:: bool FeatureEvaluator::setImage(const Mat\& img, Size origWinSize) + + Sets the image in which to compute the features. + + + + + + + :param img: Matrix of type ``CV_8UC1`` containing the image in which to compute the features. + + + :param origWinSize: Size of training images. + + + + +.. index:: FeatureEvaluator::setWindow + + +cv::FeatureEvaluator::setWindow +------------------------------- + +`id=0.403436827824 Comments from the Wiki `__ + + +:func:`CascadeClassifier::runAt` + + +.. cfunction:: bool FeatureEvaluator::setWindow(Point p) + + Sets window in the current image in which the features will be computed (called by ). + + + + + + + :param p: The upper left point of window in which the features will be computed. Size of the window is equal to size of training images. + + + + +.. index:: FeatureEvaluator::calcOrd + + +cv::FeatureEvaluator::calcOrd +----------------------------- + +`id=0.549815479033 Comments from the Wiki `__ + + + + +.. cfunction:: double FeatureEvaluator::calcOrd(int featureIdx) const + + Computes value of an ordered (numerical) feature. + + + + + + + :param featureIdx: Index of feature whose value will be computed. + + + +Returns computed value of ordered feature. + + +.. index:: FeatureEvaluator::calcCat + + +cv::FeatureEvaluator::calcCat +----------------------------- + +`id=0.581631081759 Comments from the Wiki `__ + + + + +.. cfunction:: int FeatureEvaluator::calcCat(int featureIdx) const + + Computes value of a categorical feature. + + + + + + + :param featureIdx: Index of feature whose value will be computed. + + + +Returns computed label of categorical feature, i.e. value from [0,... (number of categories - 1)]. + + +.. index:: FeatureEvaluator::create + + +cv::FeatureEvaluator::create +---------------------------- + +`id=0.415170878436 Comments from the Wiki `__ + + + + +.. cfunction:: static Ptr FeatureEvaluator::create(int type) + + Constructs feature evaluator. + + + + + + + :param type: Type of features evaluated by cascade (HAAR or LBP for now). + + + + +.. index:: CascadeClassifier + +.. _CascadeClassifier: + +CascadeClassifier +----------------- + +`id=0.173067043388 Comments from the Wiki `__ + +.. ctype:: CascadeClassifier + + + +The cascade classifier class for object detection. + + + + +:: + + + + class CascadeClassifier + { + public: + // structure for storing tree node + struct CV_EXPORTS DTreeNode + { + int featureIdx; // feature index on which is a split + float threshold; // split threshold of ordered features only + int left; // left child index in the tree nodes array + int right; // right child index in the tree nodes array + }; + + // structure for storing desision tree + struct CV_EXPORTS DTree + { + int nodeCount; // nodes count + }; + + // structure for storing cascade stage (BOOST only for now) + struct CV_EXPORTS Stage + { + int first; // first tree index in tree array + int ntrees; // number of trees + float threshold; // treshold of stage sum + }; + + enum { BOOST = 0 }; // supported stage types + + // mode of detection (see parameter flags in function HaarDetectObjects) + enum { DO_CANNY_PRUNING = CV_HAAR_DO_CANNY_PRUNING, + SCALE_IMAGE = CV_HAAR_SCALE_IMAGE, + FIND_BIGGEST_OBJECT = CV_HAAR_FIND_BIGGEST_OBJECT, + DO_ROUGH_SEARCH = CV_HAAR_DO_ROUGH_SEARCH }; + + CascadeClassifier(); // default constructor + CascadeClassifier(const string& filename); + ~CascadeClassifier(); // destructor + + bool empty() const; + bool load(const string& filename); + bool read(const FileNode& node); + + void detectMultiScale( const Mat& image, vector& objects, + double scaleFactor=1.1, int minNeighbors=3, + int flags=0, Size minSize=Size()); + + bool setImage( Ptr&, const Mat& ); + int runAt( Ptr&, Point ); + + bool is_stump_based; // true, if the trees are stumps + + int stageType; // stage type (BOOST only for now) + int featureType; // feature type (HAAR or LBP for now) + int ncategories; // number of categories (for categorical features only) + Size origWinSize; // size of training images + + vector stages; // vector of stages (BOOST for now) + vector classifiers; // vector of decision trees + vector nodes; // vector of tree nodes + vector leaves; // vector of leaf values + vector subsets; // subsets of split by categorical feature + + Ptr feval; // pointer to feature evaluator + Ptr oldCascade; // pointer to old cascade + }; + + +.. + + +.. index:: CascadeClassifier::CascadeClassifier + + +cv::CascadeClassifier::CascadeClassifier +---------------------------------------- + +`id=0.751407128029 Comments from the Wiki `__ + + + + +.. cfunction:: CascadeClassifier::CascadeClassifier(const string\& filename) + + Loads the classifier from file. + + + + + + + :param filename: Name of file from which classifier will be load. + + + + +.. index:: CascadeClassifier::empty + + +cv::CascadeClassifier::empty +---------------------------- + +`id=0.907371026536 Comments from the Wiki `__ + + + + +.. cfunction:: bool CascadeClassifier::empty() const + + Checks if the classifier has been loaded or not. + + + + +.. index:: CascadeClassifier::load + + +cv::CascadeClassifier::load +--------------------------- + +`id=0.689328093704 Comments from the Wiki `__ + + + + +.. cfunction:: bool CascadeClassifier::load(const string\& filename) + + Loads the classifier from file. The previous content is destroyed. + + + + + + + :param filename: Name of file from which classifier will be load. File may contain as old haar classifier (trained by haartraining application) or new cascade classifier (trained traincascade application). + + + + +.. index:: CascadeClassifier::read + + +cv::CascadeClassifier::read +--------------------------- + +`id=0.21698114693 Comments from the Wiki `__ + + + + +.. cfunction:: bool CascadeClassifier::read(const FileNode\& node) + + Reads the classifier from a FileStorage node. File may contain a new cascade classifier (trained traincascade application) only. + + + + +.. index:: CascadeClassifier::detectMultiScale + + +cv::CascadeClassifier::detectMultiScale +--------------------------------------- + +`id=0.0317051017457 Comments from the Wiki `__ + + + + +.. cfunction:: void CascadeClassifier::detectMultiScale( const Mat\& image, vector\& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size()) + + Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles. + + + + + + + :param image: Matrix of type ``CV_8U`` containing the image in which to detect objects. + + + :param objects: Vector of rectangles such that each rectangle contains the detected object. + + + :param scaleFactor: Specifies how much the image size is reduced at each image scale. + + + :param minNeighbors: Speficifes how many neighbors should each candiate rectangle have to retain it. + + + :param flags: This parameter is not used for new cascade and have the same meaning for old cascade as in function cvHaarDetectObjects. + + + :param minSize: The minimum possible object size. Objects smaller than that are ignored. + + + + +.. index:: CascadeClassifier::setImage + + +cv::CascadeClassifier::setImage +------------------------------- + +`id=0.632605719384 Comments from the Wiki `__ + + + + +.. cfunction:: bool CascadeClassifier::setImage( Ptr\& feval, const Mat\& image ) + + Sets the image for detection (called by detectMultiScale at each image level). + + + + + + + :param feval: Pointer to feature evaluator which is used for computing features. + + + :param image: Matrix of type ``CV_8UC1`` containing the image in which to compute the features. + + + + +.. index:: CascadeClassifier::runAt + + +cv::CascadeClassifier::runAt +---------------------------- + +`id=0.159942031477 Comments from the Wiki `__ + + + + +.. cfunction:: int CascadeClassifier::runAt( Ptr\& feval, Point pt ) + + Runs the detector at the specified point (the image that the detector is working with should be set by setImage). + + + + + + + :param feval: Feature evaluator which is used for computing features. + + + :param pt: The upper left point of window in which the features will be computed. Size of the window is equal to size of training images. + + + +Returns: +1 - if cascade classifier detects object in the given location. +-si - otherwise. si is an index of stage which first predicted that given window is a background image. + + +.. index:: groupRectangles + + +cv::groupRectangles +------------------- + +`id=0.226659440065 Comments from the Wiki `__ + + + + +.. cfunction:: void groupRectangles(vector\& rectList, int groupThreshold, double eps=0.2) + + Groups the object candidate rectangles + + + + + + + :param rectList: The input/output vector of rectangles. On output there will be retained and grouped rectangles + + + :param groupThreshold: The minimum possible number of rectangles, minus 1, in a group of rectangles to retain it. + + + :param eps: The relative difference between sides of the rectangles to merge them into a group + + + +The function is a wrapper for a generic function +:func:`partition` +. It clusters all the input rectangles using the rectangle equivalence criteria, that combines rectangles that have similar sizes and similar locations (the similarity is defined by +``eps`` +). When +``eps=0`` +, no clustering is done at all. If +:math:`\texttt{eps}\rightarrow +\inf` +, all the rectangles will be put in one cluster. Then, the small clusters, containing less than or equal to +``groupThreshold`` +rectangles, will be rejected. In each other cluster the average rectangle will be computed and put into the output rectangle list. diff --git a/modules/objdetect/doc/objdetect.rst b/modules/objdetect/doc/objdetect.rst new file mode 100644 index 000000000..9ae37ce84 --- /dev/null +++ b/modules/objdetect/doc/objdetect.rst @@ -0,0 +1,8 @@ +**************** +Object Detection +**************** + +.. toctree:: + :maxdepth: 2 + + cascade_classification diff --git a/modules/python/hdr_parser.pyc b/modules/python/hdr_parser.pyc new file mode 100755 index 000000000..b9c62a4da Binary files /dev/null and b/modules/python/hdr_parser.pyc differ diff --git a/modules/video/doc/motion_analysis_and_object_tracking.rst b/modules/video/doc/motion_analysis_and_object_tracking.rst new file mode 100644 index 000000000..f5191c761 --- /dev/null +++ b/modules/video/doc/motion_analysis_and_object_tracking.rst @@ -0,0 +1,471 @@ +Motion Analysis and Object Tracking +=================================== + +.. highlight:: cpp + + + +.. index:: calcOpticalFlowPyrLK + + +cv::calcOpticalFlowPyrLK +------------------------ + +`id=0.801764538588 Comments from the Wiki `__ + + + + +.. cfunction:: void calcOpticalFlowPyrLK( const Mat\& prevImg, const Mat\& nextImg, const vector\& prevPts, vector\& nextPts, vector\& status, vector\& err, Size winSize=Size(15,15), int maxLevel=3, TermCriteria criteria=TermCriteria( TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), double derivLambda=0.5, int flags=0 ) + + Calculates the optical flow for a sparse feature set using the iterative Lucas-Kanade method with pyramids + + + + + + + :param prevImg: The first 8-bit single-channel or 3-channel input image + + + :param nextImg: The second input image of the same size and the same type as ``prevImg`` + + + :param prevPts: Vector of points for which the flow needs to be found + + + :param nextPts: The output vector of points containing the calculated new positions of the input features in the second image + + + :param status: The output status vector. Each element of the vector is set to 1 if the flow for the corresponding features has been found, 0 otherwise + + + :param err: The output vector that will contain the difference between patches around the original and moved points + + + :param winSize: Size of the search window at each pyramid level + + + :param maxLevel: 0-based maximal pyramid level number. If 0, pyramids are not used (single level), if 1, two levels are used etc. + + + :param criteria: Specifies the termination criteria of the iterative search algorithm (after the specified maximum number of iterations ``criteria.maxCount`` or when the search window moves by less than ``criteria.epsilon`` + + + :param derivLambda: The relative weight of the spatial image derivatives impact to the optical flow estimation. If ``derivLambda=0`` , only the image intensity is used, if ``derivLambda=1`` , only derivatives are used. Any other values between 0 and 1 means that both derivatives and the image intensity are used (in the corresponding proportions). + + + :param flags: The operation flags: + + + * **OPTFLOW_USE_INITIAL_FLOW** use initial estimations stored in ``nextPts`` . If the flag is not set, then initially :math:`\texttt{nextPts}\leftarrow\texttt{prevPts}` + + + + + +The function implements the sparse iterative version of the Lucas-Kanade optical flow in pyramids, see +Bouguet00 +. + + +.. index:: calcOpticalFlowFarneback + + +cv::calcOpticalFlowFarneback +---------------------------- + +`id=0.147581673853 Comments from the Wiki `__ + + + + +.. cfunction:: void calcOpticalFlowFarneback( const Mat\& prevImg, const Mat\& nextImg, Mat\& flow, double pyrScale, int levels, int winsize, int iterations, int polyN, double polySigma, int flags ) + + Computes dense optical flow using Gunnar Farneback's algorithm + + + + + + + :param prevImg: The first 8-bit single-channel input image + + + :param nextImg: The second input image of the same size and the same type as ``prevImg`` + + + :param flow: The computed flow image; will have the same size as ``prevImg`` and type ``CV_32FC2`` + + + :param pyrScale: Specifies the image scale (<1) to build the pyramids for each image. ``pyrScale=0.5`` means the classical pyramid, where each next layer is twice smaller than the previous + + + :param levels: The number of pyramid layers, including the initial image. ``levels=1`` means that no extra layers are created and only the original images are used + + + :param winsize: The averaging window size; The larger values increase the algorithm robustness to image noise and give more chances for fast motion detection, but yield more blurred motion field + + + :param iterations: The number of iterations the algorithm does at each pyramid level + + + :param polyN: Size of the pixel neighborhood used to find polynomial expansion in each pixel. The larger values mean that the image will be approximated with smoother surfaces, yielding more robust algorithm and more blurred motion field. Typically, ``polyN`` =5 or 7 + + + :param polySigma: Standard deviation of the Gaussian that is used to smooth derivatives that are used as a basis for the polynomial expansion. For ``polyN=5`` you can set ``polySigma=1.1`` , for ``polyN=7`` a good value would be ``polySigma=1.5`` + + + :param flags: The operation flags; can be a combination of the following: + + + * **OPTFLOW_USE_INITIAL_FLOW** Use the input ``flow`` as the initial flow approximation + + + * **OPTFLOW_FARNEBACK_GAUSSIAN** Use a Gaussian :math:`\texttt{winsize}\times\texttt{winsize}` filter instead of box filter of the same size for optical flow estimation. Usually, this option gives more accurate flow than with a box filter, at the cost of lower speed (and normally ``winsize`` for a Gaussian window should be set to a larger value to achieve the same level of robustness) + + + + + +The function finds optical flow for each +``prevImg`` +pixel using the alorithm so that + + + +.. math:: + + \texttt{prevImg} (x,y) \sim \texttt{nextImg} ( \texttt{flow} (x,y)[0], \texttt{flow} (x,y)[1]) + + + +.. index:: updateMotionHistory + + +cv::updateMotionHistory +----------------------- + +`id=0.684725809289 Comments from the Wiki `__ + + + + +.. cfunction:: void updateMotionHistory( const Mat\& silhouette, Mat\& mhi, double timestamp, double duration ) + + Updates the motion history image by a moving silhouette. + + + + + + + :param silhouette: Silhouette mask that has non-zero pixels where the motion occurs + + + :param mhi: Motion history image, that is updated by the function (single-channel, 32-bit floating-point) + + + :param timestamp: Current time in milliseconds or other units + + + :param duration: Maximal duration of the motion track in the same units as ``timestamp`` + + + +The function updates the motion history image as following: + + + +.. math:: + + \texttt{mhi} (x,y)= \forkthree{\texttt{timestamp}}{if $\texttt{silhouette}(x,y) \ne 0$}{0}{if $\texttt{silhouette}(x,y) = 0$ and $\texttt{mhi} < (\texttt{timestamp} - \texttt{duration})$}{\texttt{mhi}(x,y)}{otherwise} + + +That is, MHI pixels where motion occurs are set to the current +``timestamp`` +, while the pixels where motion happened last time a long time ago are cleared. + +The function, together with +:func:`calcMotionGradient` +and +:func:`calcGlobalOrientation` +, implements the motion templates technique, described in +Davis97 +and +Bradski00 +. +See also the OpenCV sample +``motempl.c`` +that demonstrates the use of all the motion template functions. + + +.. index:: calcMotionGradient + + +cv::calcMotionGradient +---------------------- + +`id=0.911487015982 Comments from the Wiki `__ + + + + +.. cfunction:: void calcMotionGradient( const Mat\& mhi, Mat\& mask, Mat\& orientation, double delta1, double delta2, int apertureSize=3 ) + + Calculates the gradient orientation of a motion history image. + + + + + + + :param mhi: Motion history single-channel floating-point image + + + :param mask: The output mask image; will have the type ``CV_8UC1`` and the same size as ``mhi`` . Its non-zero elements will mark pixels where the motion gradient data is correct + + + :param orientation: The output motion gradient orientation image; will have the same type and the same size as ``mhi`` . Each pixel of it will the motion orientation in degrees, from 0 to 360. + + + :param delta1, delta2: The minimal and maximal allowed difference between ``mhi`` values within a pixel neighorhood. That is, the function finds the minimum ( :math:`m(x,y)` ) and maximum ( :math:`M(x,y)` ) ``mhi`` values over :math:`3 \times 3` neighborhood of each pixel and marks the motion orientation at :math:`(x, y)` as valid only if + + .. math:: + + \min ( \texttt{delta1} , \texttt{delta2} ) \le M(x,y)-m(x,y) \le \max ( \texttt{delta1} , \texttt{delta2} ). + + + + + :param apertureSize: The aperture size of :func:`Sobel` operator + + + +The function calculates the gradient orientation at each pixel +:math:`(x, y)` +as: + + + +.. math:: + + \texttt{orientation} (x,y)= \arctan{\frac{d\texttt{mhi}/dy}{d\texttt{mhi}/dx}} + + +(in fact, +:func:`fastArctan` +and +:func:`phase` +are used, so that the computed angle is measured in degrees and covers the full range 0..360). Also, the +``mask`` +is filled to indicate pixels where the computed angle is valid. + + +.. index:: calcGlobalOrientation + + +cv::calcGlobalOrientation +------------------------- + +`id=0.785441857219 Comments from the Wiki `__ + + + + +.. cfunction:: double calcGlobalOrientation( const Mat\& orientation, const Mat\& mask, const Mat\& mhi, double timestamp, double duration ) + + Calculates the global motion orientation in some selected region. + + + + + + + :param orientation: Motion gradient orientation image, calculated by the function :func:`calcMotionGradient` + + + :param mask: Mask image. It may be a conjunction of a valid gradient mask, also calculated by :func:`calcMotionGradient` , and the mask of the region, whose direction needs to be calculated + + + :param mhi: The motion history image, calculated by :func:`updateMotionHistory` + + + :param timestamp: The timestamp passed to :func:`updateMotionHistory` + + + :param duration: Maximal duration of motion track in milliseconds, passed to :func:`updateMotionHistory` + + + +The function calculates the average +motion direction in the selected region and returns the angle between +0 degrees and 360 degrees. The average direction is computed from +the weighted orientation histogram, where a recent motion has larger +weight and the motion occurred in the past has smaller weight, as recorded in +``mhi`` +. + + +.. index:: CamShift + + +cv::CamShift +------------ + +`id=0.364212510583 Comments from the Wiki `__ + + + + +.. cfunction:: RotatedRect CamShift( const Mat\& probImage, Rect\& window, TermCriteria criteria ) + + Finds the object center, size, and orientation + + + + + + + :param probImage: Back projection of the object histogram; see :func:`calcBackProject` + + + :param window: Initial search window + + + :param criteria: Stop criteria for the underlying :func:`meanShift` + + + +The function implements the CAMSHIFT object tracking algrorithm +Bradski98 +. +First, it finds an object center using +:func:`meanShift` +and then adjust the window size and finds the optimal rotation. The function returns the rotated rectangle structure that includes the object position, size and the orientation. The next position of the search window can be obtained with +``RotatedRect::boundingRect()`` +. + +See the OpenCV sample +``camshiftdemo.c`` +that tracks colored objects. + + +.. index:: meanShift + + +cv::meanShift +------------- + +`id=0.437046716762 Comments from the Wiki `__ + + + + +.. cfunction:: int meanShift( const Mat\& probImage, Rect\& window, TermCriteria criteria ) + + Finds the object on a back projection image. + + + + + + + :param probImage: Back projection of the object histogram; see :func:`calcBackProject` + + + :param window: Initial search window + + + :param criteria: The stop criteria for the iterative search algorithm + + + +The function implements iterative object search algorithm. It takes the object back projection on input and the initial position. The mass center in +``window`` +of the back projection image is computed and the search window center shifts to the mass center. The procedure is repeated until the specified number of iterations +``criteria.maxCount`` +is done or until the window center shifts by less than +``criteria.epsilon`` +. The algorithm is used inside +:func:`CamShift` +and, unlike +:func:`CamShift` +, the search window size or orientation do not change during the search. You can simply pass the output of +:func:`calcBackProject` +to this function, but better results can be obtained if you pre-filter the back projection and remove the noise (e.g. by retrieving connected components with +:func:`findContours` +, throwing away contours with small area ( +:func:`contourArea` +) and rendering the remaining contours with +:func:`drawContours` +) + + + +.. index:: KalmanFilter + +.. _KalmanFilter: + +KalmanFilter +------------ + +`id=0.4483617174 Comments from the Wiki `__ + +.. ctype:: KalmanFilter + + + +Kalman filter class + + + + +:: + + + + class KalmanFilter + { + public: + KalmanFilter(); + KalmanFilter(int dynamParams, int measureParams, int controlParams=0); + void init(int dynamParams, int measureParams, int controlParams=0); + // predicts statePre from statePost + const Mat& predict(const Mat& control=Mat()); + // corrects statePre based on the input measurement vector + // and stores the result to statePost. + const Mat& correct(const Mat& measurement); + + Mat statePre; // predicted state (x'(k)): + // x(k)=A*x(k-1)+B*u(k) + Mat statePost; // corrected state (x(k)): + // x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) + Mat transitionMatrix; // state transition matrix (A) + Mat controlMatrix; // control matrix (B) + // (it is not used if there is no control) + Mat measurementMatrix; // measurement matrix (H) + Mat processNoiseCov; // process noise covariance matrix (Q) + Mat measurementNoiseCov;// measurement noise covariance matrix (R) + Mat errorCovPre; // priori error estimate covariance matrix (P'(k)): + // P'(k)=A*P(k-1)*At + Q)*/ + Mat gain; // Kalman gain matrix (K(k)): + // K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R) + Mat errorCovPost; // posteriori error estimate covariance matrix (P(k)): + // P(k)=(I-K(k)*H)*P'(k) + ... + }; + + +.. + +The class implements standard Kalman filter +http://en.wikipedia.org/wiki/Kalman_filter +. However, you can modify +``transitionMatrix`` +, +``controlMatrix`` +and +``measurementMatrix`` +to get the extended Kalman filter functionality. See the OpenCV sample +``kalman.c`` diff --git a/modules/video/doc/video.rst b/modules/video/doc/video.rst new file mode 100644 index 000000000..7241464de --- /dev/null +++ b/modules/video/doc/video.rst @@ -0,0 +1,8 @@ +************** +Video Analysis +************** + +.. toctree:: + :maxdepth: 2 + + motion_analysis_and_object_tracking