Merge remote-tracking branch 'upstream/master' into rho
This commit is contained in:
commit
bcc630a8dc
2
3rdparty/libpng/CMakeLists.txt
vendored
2
3rdparty/libpng/CMakeLists.txt
vendored
@ -14,7 +14,7 @@ ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" ${ZLIB_INCLUDE_DIRS})
|
|||||||
file(GLOB lib_srcs *.c)
|
file(GLOB lib_srcs *.c)
|
||||||
file(GLOB lib_hdrs *.h)
|
file(GLOB lib_hdrs *.h)
|
||||||
|
|
||||||
if(NEON)
|
if(NEON AND CMAKE_SIZEOF_VOID_P EQUAL 4)
|
||||||
list(APPEND lib_srcs arm/filter_neon.S)
|
list(APPEND lib_srcs arm/filter_neon.S)
|
||||||
add_definitions(-DPNG_ARM_NEON)
|
add_definitions(-DPNG_ARM_NEON)
|
||||||
endif()
|
endif()
|
||||||
|
@ -147,17 +147,6 @@ endif()
|
|||||||
|
|
||||||
# ========= Doxygen docs =========
|
# ========= Doxygen docs =========
|
||||||
|
|
||||||
macro(make_reference result modules_list black_list)
|
|
||||||
set(_res)
|
|
||||||
foreach(m ${${modules_list}})
|
|
||||||
list(FIND ${black_list} ${m} _pos)
|
|
||||||
if(${_pos} EQUAL -1)
|
|
||||||
set(_res "${_res} @ref ${m} | ${m} \n")
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
set(${result} ${_res})
|
|
||||||
endmacro()
|
|
||||||
|
|
||||||
if(BUILD_DOCS AND HAVE_DOXYGEN)
|
if(BUILD_DOCS AND HAVE_DOXYGEN)
|
||||||
# not documented modules list
|
# not documented modules list
|
||||||
list(APPEND blacklist "ts" "java" "python2" "python3" "world")
|
list(APPEND blacklist "ts" "java" "python2" "python3" "world")
|
||||||
@ -166,6 +155,10 @@ if(BUILD_DOCS AND HAVE_DOXYGEN)
|
|||||||
set(paths_include)
|
set(paths_include)
|
||||||
set(paths_doc)
|
set(paths_doc)
|
||||||
set(paths_bib)
|
set(paths_bib)
|
||||||
|
set(paths_sample)
|
||||||
|
set(paths_tutorial)
|
||||||
|
set(refs_main)
|
||||||
|
set(refs_extra)
|
||||||
set(deps)
|
set(deps)
|
||||||
foreach(m ${BASE_MODULES} ${EXTRA_MODULES})
|
foreach(m ${BASE_MODULES} ${EXTRA_MODULES})
|
||||||
list(FIND blacklist ${m} _pos)
|
list(FIND blacklist ${m} _pos)
|
||||||
@ -182,40 +175,86 @@ if(BUILD_DOCS AND HAVE_DOXYGEN)
|
|||||||
list(APPEND paths_doc "${docs_dir}")
|
list(APPEND paths_doc "${docs_dir}")
|
||||||
list(APPEND deps ${docs_dir})
|
list(APPEND deps ${docs_dir})
|
||||||
endif()
|
endif()
|
||||||
|
# sample folder
|
||||||
|
set(sample_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/samples")
|
||||||
|
if(EXISTS "${sample_dir}")
|
||||||
|
list(APPEND paths_sample "${sample_dir}")
|
||||||
|
list(APPEND deps ${sample_dir})
|
||||||
|
endif()
|
||||||
|
# tutorial folder
|
||||||
|
set(tutorial_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/tutorials")
|
||||||
|
if(EXISTS "${tutorial_dir}")
|
||||||
|
list(APPEND paths_tutorial "${tutorial_dir}")
|
||||||
|
list(APPEND deps ${tutorial_dir})
|
||||||
|
endif()
|
||||||
# BiBTeX file
|
# BiBTeX file
|
||||||
set(bib_file "${docs_dir}/${m}.bib")
|
set(bib_file "${docs_dir}/${m}.bib")
|
||||||
if(EXISTS "${bib_file}")
|
if(EXISTS "${bib_file}")
|
||||||
set(paths_bib "${paths_bib} ${bib_file}")
|
set(paths_bib "${paths_bib} ${bib_file}")
|
||||||
list(APPEND deps ${bib_file})
|
list(APPEND deps ${bib_file})
|
||||||
endif()
|
endif()
|
||||||
|
# Reference entry
|
||||||
|
# set(one_ref "@ref ${m} | ${m}\n")
|
||||||
|
set(one_ref "\t- ${m}. @ref ${m}\n")
|
||||||
|
list(FIND EXTRA_MODULES ${m} _pos)
|
||||||
|
if(${_pos} EQUAL -1)
|
||||||
|
set(refs_main "${refs_main}${one_ref}")
|
||||||
|
else()
|
||||||
|
set(refs_extra "${refs_extra}${one_ref}")
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
|
# fix references
|
||||||
|
# set(ref_header "Module name | Folder\n----------- | ------")
|
||||||
|
# if(refs_main)
|
||||||
|
# set(refs_main "### Main modules\n${ref_header}\n${refs_main}")
|
||||||
|
# endif()
|
||||||
|
# if(refs_extra)
|
||||||
|
# set(refs_extra "### Extra modules\n${ref_header}\n${refs_extra}")
|
||||||
|
# endif()
|
||||||
|
if(refs_main)
|
||||||
|
set(refs_main "- Main modules:\n${refs_main}")
|
||||||
|
endif()
|
||||||
|
if(refs_extra)
|
||||||
|
set(refs_extra "- Extra modules:\n${refs_extra}")
|
||||||
|
endif()
|
||||||
|
|
||||||
# additional config
|
# additional config
|
||||||
set(doxyfile "${CMAKE_CURRENT_BINARY_DIR}/Doxyfile")
|
set(doxyfile "${CMAKE_CURRENT_BINARY_DIR}/Doxyfile")
|
||||||
set(rootfile "${CMAKE_CURRENT_BINARY_DIR}/root.markdown")
|
set(rootfile "${CMAKE_CURRENT_BINARY_DIR}/root.markdown")
|
||||||
set(bibfile "${CMAKE_CURRENT_SOURCE_DIR}/opencv.bib")
|
set(bibfile "${CMAKE_CURRENT_SOURCE_DIR}/opencv.bib")
|
||||||
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${paths_include} ; ${paths_doc}")
|
set(faqfile "${CMAKE_CURRENT_SOURCE_DIR}/faq.markdown")
|
||||||
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${paths_doc}")
|
set(tutorial_path "${CMAKE_CURRENT_SOURCE_DIR}/tutorials")
|
||||||
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXAMPLE_PATH "${CMAKE_SOURCE_DIR}/samples/cpp ; ${paths_doc}")
|
set(tutorial_py_path "${CMAKE_CURRENT_SOURCE_DIR}/py_tutorials")
|
||||||
|
set(user_guide_path "${CMAKE_CURRENT_SOURCE_DIR}/user_guide")
|
||||||
|
set(example_path "${CMAKE_SOURCE_DIR}/samples")
|
||||||
|
|
||||||
|
# set export variables
|
||||||
|
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${faqfile} ; ${paths_include} ; ${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${user_guide_path} ; ${paths_tutorial}")
|
||||||
|
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${user_guide_path} ; ${paths_tutorial}")
|
||||||
|
# TODO: remove paths_doc from EXAMPLE_PATH after face module tutorials/samples moved to separate folders
|
||||||
|
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXAMPLE_PATH "${example_path} ; ${paths_doc} ; ${paths_sample}")
|
||||||
set(CMAKE_DOXYGEN_LAYOUT "${CMAKE_CURRENT_SOURCE_DIR}/DoxygenLayout.xml")
|
set(CMAKE_DOXYGEN_LAYOUT "${CMAKE_CURRENT_SOURCE_DIR}/DoxygenLayout.xml")
|
||||||
set(CMAKE_DOXYGEN_OUTPUT_PATH "doxygen")
|
set(CMAKE_DOXYGEN_OUTPUT_PATH "doxygen")
|
||||||
|
set(CMAKE_DOXYGEN_MAIN_REFERENCE "${refs_main}")
|
||||||
|
set(CMAKE_DOXYGEN_EXTRA_REFERENCE "${refs_extra}")
|
||||||
set(CMAKE_EXTRA_BIB_FILES "${bibfile} ${paths_bib}")
|
set(CMAKE_EXTRA_BIB_FILES "${bibfile} ${paths_bib}")
|
||||||
|
|
||||||
# generate references
|
|
||||||
make_reference(CMAKE_DOXYGEN_MAIN_REFERENCE BASE_MODULES blacklist)
|
|
||||||
make_reference(CMAKE_DOXYGEN_EXTRA_REFERENCE EXTRA_MODULES blacklist)
|
|
||||||
|
|
||||||
# writing file
|
# writing file
|
||||||
configure_file(Doxyfile.in ${doxyfile} @ONLY)
|
configure_file(Doxyfile.in ${doxyfile} @ONLY)
|
||||||
configure_file(root.markdown.in ${rootfile} @ONLY)
|
configure_file(root.markdown.in ${rootfile} @ONLY)
|
||||||
configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/html/mymath.sty" @ONLY)
|
configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/html/mymath.sty" @ONLY)
|
||||||
configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/latex/mymath.sty" @ONLY)
|
configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/latex/mymath.sty" @ONLY)
|
||||||
|
|
||||||
|
# TODO: do not store downloadable samples, but give github link instead
|
||||||
add_custom_target(doxygen
|
add_custom_target(doxygen
|
||||||
|
COMMAND "${CMAKE_COMMAND}" -E copy_directory "${CMAKE_SOURCE_DIR}/samples" "${CMAKE_DOXYGEN_OUTPUT_PATH}/html/samples"
|
||||||
|
COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/pattern.png" "${CMAKE_DOXYGEN_OUTPUT_PATH}/html"
|
||||||
|
COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/acircles_pattern.png" "${CMAKE_DOXYGEN_OUTPUT_PATH}/html"
|
||||||
COMMAND ${DOXYGEN_BUILD} ${doxyfile}
|
COMMAND ${DOXYGEN_BUILD} ${doxyfile}
|
||||||
DEPENDS ${doxyfile} ${rootfile} ${bibfile} ${deps})
|
DEPENDS ${doxyfile} ${rootfile} ${bibfile} ${deps}
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(HAVE_DOC_GENERATOR)
|
if(HAVE_DOC_GENERATOR)
|
||||||
|
11
doc/faq.markdown
Normal file
11
doc/faq.markdown
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
Frequently Asked Questions {#faq}
|
||||||
|
==========================
|
||||||
|
|
||||||
|
- Q: Example question?
|
||||||
|
|
||||||
|
A: Example answer
|
||||||
|
|
||||||
|
|
||||||
|
- Q: Example question?
|
||||||
|
|
||||||
|
A: Example answer
|
@ -9,7 +9,8 @@ MathJax.Hub.Config(
|
|||||||
vecthree: ["\\begin{bmatrix} #1\\\\ #2\\\\ #3 \\end{bmatrix}", 3],
|
vecthree: ["\\begin{bmatrix} #1\\\\ #2\\\\ #3 \\end{bmatrix}", 3],
|
||||||
vecthreethree: ["\\begin{bmatrix} #1 & #2 & #3\\\\ #4 & #5 & #6\\\\ #7 & #8 & #9 \\end{bmatrix}", 9],
|
vecthreethree: ["\\begin{bmatrix} #1 & #2 & #3\\\\ #4 & #5 & #6\\\\ #7 & #8 & #9 \\end{bmatrix}", 9],
|
||||||
hdotsfor: ["\\dots", 1],
|
hdotsfor: ["\\dots", 1],
|
||||||
mathbbm: ["\\mathbb{#1}", 1]
|
mathbbm: ["\\mathbb{#1}", 1],
|
||||||
|
bordermatrix: ["\\matrix{#1}", 1]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -824,3 +824,19 @@
|
|||||||
journal = {Machine learning},
|
journal = {Machine learning},
|
||||||
volume = {10}
|
volume = {10}
|
||||||
}
|
}
|
||||||
|
@inproceedings{vacavant2013benchmark,
|
||||||
|
title={A benchmark dataset for outdoor foreground/background extraction},
|
||||||
|
author={Vacavant, Antoine and Chateau, Thierry and Wilhelm, Alexis and Lequi{\`e}vre, Laurent},
|
||||||
|
booktitle={Computer Vision-ACCV 2012 Workshops},
|
||||||
|
pages={291--300},
|
||||||
|
year={2013},
|
||||||
|
organization={Springer}
|
||||||
|
}
|
||||||
|
@incollection{Liao2007,
|
||||||
|
title={Learning multi-scale block local binary patterns for face recognition},
|
||||||
|
author={Liao, Shengcai and Zhu, Xiangxin and Lei, Zhen and Zhang, Lun and Li, Stan Z},
|
||||||
|
booktitle={Advances in Biometrics},
|
||||||
|
pages={828--837},
|
||||||
|
year={2007},
|
||||||
|
publisher={Springer}
|
||||||
|
}
|
||||||
|
@ -0,0 +1,146 @@
|
|||||||
|
How OpenCV-Python Bindings Works? {#tutorial_py_bindings_basics}
|
||||||
|
=================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
Learn:
|
||||||
|
|
||||||
|
- How OpenCV-Python bindings are generated?
|
||||||
|
- How to extend new OpenCV modules to Python?
|
||||||
|
|
||||||
|
How OpenCV-Python bindings are generated?
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
In OpenCV, all algorithms are implemented in C++. But these algorithms can be used from different
|
||||||
|
languages like Python, Java etc. This is made possible by the bindings generators. These generators
|
||||||
|
create a bridge between C++ and Python which enables users to call C++ functions from Python. To get
|
||||||
|
a complete picture of what is happening in background, a good knowledge of Python/C API is required.
|
||||||
|
A simple example on extending C++ functions to Python can be found in official Python
|
||||||
|
documentation[1]. So extending all functions in OpenCV to Python by writing their wrapper functions
|
||||||
|
manually is a time-consuming task. So OpenCV does it in a more intelligent way. OpenCV generates
|
||||||
|
these wrapper functions automatically from the C++ headers using some Python scripts which are
|
||||||
|
located in modules/python/src2. We will look into what they do.
|
||||||
|
|
||||||
|
First, modules/python/CMakeFiles.txt is a CMake script which checks the modules to be extended to
|
||||||
|
Python. It will automatically check all the modules to be extended and grab their header files.
|
||||||
|
These header files contain list of all classes, functions, constants etc. for that particular
|
||||||
|
modules.
|
||||||
|
|
||||||
|
Second, these header files are passed to a Python script, modules/python/src2/gen2.py. This is the
|
||||||
|
Python bindings generator script. It calls another Python script modules/python/src2/hdr_parser.py.
|
||||||
|
This is the header parser script. This header parser splits the complete header file into small
|
||||||
|
Python lists. So these lists contain all details about a particular function, class etc. For
|
||||||
|
example, a function will be parsed to get a list containing function name, return type, input
|
||||||
|
arguments, argument types etc. Final list contains details of all the functions, structs, classes
|
||||||
|
etc. in that header file.
|
||||||
|
|
||||||
|
But header parser doesn't parse all the functions/classes in the header file. The developer has to
|
||||||
|
specify which functions should be exported to Python. For that, there are certain macros added to
|
||||||
|
the beginning of these declarations which enables the header parser to identify functions to be
|
||||||
|
parsed. These macros are added by the developer who programs the particular function. In short, the
|
||||||
|
developer decides which functions should be extended to Python and which are not. Details of those
|
||||||
|
macros will be given in next session.
|
||||||
|
|
||||||
|
So header parser returns a final big list of parsed functions. Our generator script (gen2.py) will
|
||||||
|
create wrapper functions for all the functions/classes/enums/structs parsed by header parser (You
|
||||||
|
can find these header files during compilation in the build/modules/python/ folder as
|
||||||
|
pyopencv_generated_\*.h files). But there may be some basic OpenCV datatypes like Mat, Vec4i,
|
||||||
|
Size. They need to be extended manually. For example, a Mat type should be extended to Numpy array,
|
||||||
|
Size should be extended to a tuple of two integers etc. Similarly, there may be some complex
|
||||||
|
structs/classes/functions etc. which need to be extended manually. All such manual wrapper functions
|
||||||
|
are placed in modules/python/src2/pycv2.hpp.
|
||||||
|
|
||||||
|
So now only thing left is the compilation of these wrapper files which gives us **cv2** module. So
|
||||||
|
when you call a function, say res = equalizeHist(img1,img2) in Python, you pass two numpy arrays and
|
||||||
|
you expect another numpy array as the output. So these numpy arrays are converted to cv::Mat and
|
||||||
|
then calls the equalizeHist() function in C++. Final result, res will be converted back into a Numpy
|
||||||
|
array. So in short, almost all operations are done in C++ which gives us almost same speed as that
|
||||||
|
of C++.
|
||||||
|
|
||||||
|
So this is the basic version of how OpenCV-Python bindings are generated.
|
||||||
|
|
||||||
|
How to extend new modules to Python?
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
Header parser parse the header files based on some wrapper macros added to function declaration.
|
||||||
|
Enumeration constants don't need any wrapper macros. They are automatically wrapped. But remaining
|
||||||
|
functions, classes etc. need wrapper macros.
|
||||||
|
|
||||||
|
Functions are extended using CV_EXPORTS_W macro. An example is shown below.
|
||||||
|
@code{.cpp}
|
||||||
|
CV_EXPORTS_W void equalizeHist( InputArray src, OutputArray dst );
|
||||||
|
@endcode
|
||||||
|
Header parser can understand the input and output arguments from keywords like
|
||||||
|
InputArray, OutputArray etc. But sometimes, we may need to hardcode inputs and outputs. For that,
|
||||||
|
macros like CV_OUT, CV_IN_OUT etc. are used.
|
||||||
|
@code{.cpp}
|
||||||
|
CV_EXPORTS_W void minEnclosingCircle( InputArray points,
|
||||||
|
CV_OUT Point2f& center, CV_OUT float& radius );
|
||||||
|
@endcode
|
||||||
|
For large classes also, CV_EXPORTS_W is used. To extend class methods, CV_WRAP is used.
|
||||||
|
Similarly, CV_PROP is used for class fields.
|
||||||
|
@code{.cpp}
|
||||||
|
class CV_EXPORTS_W CLAHE : public Algorithm
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CV_WRAP virtual void apply(InputArray src, OutputArray dst) = 0;
|
||||||
|
|
||||||
|
CV_WRAP virtual void setClipLimit(double clipLimit) = 0;
|
||||||
|
CV_WRAP virtual double getClipLimit() const = 0;
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
Overloaded functions can be extended using CV_EXPORTS_AS. But we need to pass a new name so that
|
||||||
|
each function will be called by that name in Python. Take the case of integral function below. Three
|
||||||
|
functions are available, so each one is named with a suffix in Python. Similarly CV_WRAP_AS can be
|
||||||
|
used to wrap overloaded methods.
|
||||||
|
@code{.cpp}
|
||||||
|
//! computes the integral image
|
||||||
|
CV_EXPORTS_W void integral( InputArray src, OutputArray sum, int sdepth = -1 );
|
||||||
|
|
||||||
|
//! computes the integral image and integral for the squared image
|
||||||
|
CV_EXPORTS_AS(integral2) void integral( InputArray src, OutputArray sum,
|
||||||
|
OutputArray sqsum, int sdepth = -1, int sqdepth = -1 );
|
||||||
|
|
||||||
|
//! computes the integral image, integral for the squared image and the tilted integral image
|
||||||
|
CV_EXPORTS_AS(integral3) void integral( InputArray src, OutputArray sum,
|
||||||
|
OutputArray sqsum, OutputArray tilted,
|
||||||
|
int sdepth = -1, int sqdepth = -1 );
|
||||||
|
@endcode
|
||||||
|
Small classes/structs are extended using CV_EXPORTS_W_SIMPLE. These structs are passed by value
|
||||||
|
to C++ functions. Examples are KeyPoint, Match etc. Their methods are extended by CV_WRAP and
|
||||||
|
fields are extended by CV_PROP_RW.
|
||||||
|
@code{.cpp}
|
||||||
|
class CV_EXPORTS_W_SIMPLE DMatch
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CV_WRAP DMatch();
|
||||||
|
CV_WRAP DMatch(int _queryIdx, int _trainIdx, float _distance);
|
||||||
|
CV_WRAP DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance);
|
||||||
|
|
||||||
|
CV_PROP_RW int queryIdx; // query descriptor index
|
||||||
|
CV_PROP_RW int trainIdx; // train descriptor index
|
||||||
|
CV_PROP_RW int imgIdx; // train image index
|
||||||
|
|
||||||
|
CV_PROP_RW float distance;
|
||||||
|
};
|
||||||
|
@endcode
|
||||||
|
Some other small classes/structs can be exported using CV_EXPORTS_W_MAP where it is exported to a
|
||||||
|
Python native dictionary. Moments() is an example of it.
|
||||||
|
@code{.cpp}
|
||||||
|
class CV_EXPORTS_W_MAP Moments
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//! spatial moments
|
||||||
|
CV_PROP_RW double m00, m10, m01, m20, m11, m02, m30, m21, m12, m03;
|
||||||
|
//! central moments
|
||||||
|
CV_PROP_RW double mu20, mu11, mu02, mu30, mu21, mu12, mu03;
|
||||||
|
//! central normalized moments
|
||||||
|
CV_PROP_RW double nu20, nu11, nu02, nu30, nu21, nu12, nu03;
|
||||||
|
};
|
||||||
|
@endcode
|
||||||
|
So these are the major extension macros available in OpenCV. Typically, a developer has to put
|
||||||
|
proper macros in their appropriate positions. Rest is done by generator scripts. Sometimes, there
|
||||||
|
may be an exceptional cases where generator scripts cannot create the wrappers. Such functions need
|
||||||
|
to be handled manually. But most of the time, a code written according to OpenCV coding guidelines
|
||||||
|
will be automatically wrapped by generator scripts.
|
@ -0,0 +1,8 @@
|
|||||||
|
OpenCV-Python Bindings {#tutorial_py_table_of_contents_bindings}
|
||||||
|
======================
|
||||||
|
|
||||||
|
Here, you will learn how OpenCV-Python bindings are generated.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_bindings_basics
|
||||||
|
|
||||||
|
Learn how OpenCV-Python bindings are generated.
|
@ -0,0 +1,229 @@
|
|||||||
|
Camera Calibration {#tutorial_py_calibration}
|
||||||
|
==================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this section,
|
||||||
|
- We will learn about distortions in camera, intrinsic and extrinsic parameters of camera etc.
|
||||||
|
- We will learn to find these parameters, undistort images etc.
|
||||||
|
|
||||||
|
Basics
|
||||||
|
------
|
||||||
|
|
||||||
|
Today's cheap pinhole cameras introduces a lot of distortion to images. Two major distortions are
|
||||||
|
radial distortion and tangential distortion.
|
||||||
|
|
||||||
|
Due to radial distortion, straight lines will appear curved. Its effect is more as we move away from
|
||||||
|
the center of image. For example, one image is shown below, where two edges of a chess board are
|
||||||
|
marked with red lines. But you can see that border is not a straight line and doesn't match with the
|
||||||
|
red line. All the expected straight lines are bulged out. Visit [Distortion
|
||||||
|
(optics)](http://en.wikipedia.org/wiki/Distortion_%28optics%29) for more details.
|
||||||
|
|
||||||
|
![image](images/calib_radial.jpg)
|
||||||
|
|
||||||
|
This distortion is solved as follows:
|
||||||
|
|
||||||
|
\f[x_{corrected} = x( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) \\
|
||||||
|
y_{corrected} = y( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6)\f]
|
||||||
|
|
||||||
|
Similarly, another distortion is the tangential distortion which occurs because image taking lense
|
||||||
|
is not aligned perfectly parallel to the imaging plane. So some areas in image may look nearer than
|
||||||
|
expected. It is solved as below:
|
||||||
|
|
||||||
|
\f[x_{corrected} = x + [ 2p_1xy + p_2(r^2+2x^2)] \\
|
||||||
|
y_{corrected} = y + [ p_1(r^2+ 2y^2)+ 2p_2xy]\f]
|
||||||
|
|
||||||
|
In short, we need to find five parameters, known as distortion coefficients given by:
|
||||||
|
|
||||||
|
\f[Distortion \; coefficients=(k_1 \hspace{10pt} k_2 \hspace{10pt} p_1 \hspace{10pt} p_2 \hspace{10pt} k_3)\f]
|
||||||
|
|
||||||
|
In addition to this, we need to find a few more information, like intrinsic and extrinsic parameters
|
||||||
|
of a camera. Intrinsic parameters are specific to a camera. It includes information like focal
|
||||||
|
length (\f$f_x,f_y\f$), optical centers (\f$c_x, c_y\f$) etc. It is also called camera matrix. It depends on
|
||||||
|
the camera only, so once calculated, it can be stored for future purposes. It is expressed as a 3x3
|
||||||
|
matrix:
|
||||||
|
|
||||||
|
\f[camera \; matrix = \left [ \begin{matrix} f_x & 0 & c_x \\ 0 & f_y & c_y \\ 0 & 0 & 1 \end{matrix} \right ]\f]
|
||||||
|
|
||||||
|
Extrinsic parameters corresponds to rotation and translation vectors which translates a coordinates
|
||||||
|
of a 3D point to a coordinate system.
|
||||||
|
|
||||||
|
For stereo applications, these distortions need to be corrected first. To find all these parameters,
|
||||||
|
what we have to do is to provide some sample images of a well defined pattern (eg, chess board). We
|
||||||
|
find some specific points in it ( square corners in chess board). We know its coordinates in real
|
||||||
|
world space and we know its coordinates in image. With these data, some mathematical problem is
|
||||||
|
solved in background to get the distortion coefficients. That is the summary of the whole story. For
|
||||||
|
better results, we need atleast 10 test patterns.
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
As mentioned above, we need atleast 10 test patterns for camera calibration. OpenCV comes with some
|
||||||
|
images of chess board (see samples/cpp/left01.jpg -- left14.jpg), so we will utilize it. For sake of
|
||||||
|
understanding, consider just one image of a chess board. Important input datas needed for camera
|
||||||
|
calibration is a set of 3D real world points and its corresponding 2D image points. 2D image points
|
||||||
|
are OK which we can easily find from the image. (These image points are locations where two black
|
||||||
|
squares touch each other in chess boards)
|
||||||
|
|
||||||
|
What about the 3D points from real world space? Those images are taken from a static camera and
|
||||||
|
chess boards are placed at different locations and orientations. So we need to know \f$(X,Y,Z)\f$
|
||||||
|
values. But for simplicity, we can say chess board was kept stationary at XY plane, (so Z=0 always)
|
||||||
|
and camera was moved accordingly. This consideration helps us to find only X,Y values. Now for X,Y
|
||||||
|
values, we can simply pass the points as (0,0), (1,0), (2,0), ... which denotes the location of
|
||||||
|
points. In this case, the results we get will be in the scale of size of chess board square. But if
|
||||||
|
we know the square size, (say 30 mm), and we can pass the values as (0,0),(30,0),(60,0),..., we get
|
||||||
|
the results in mm. (In this case, we don't know square size since we didn't take those images, so we
|
||||||
|
pass in terms of square size).
|
||||||
|
|
||||||
|
3D points are called **object points** and 2D image points are called **image points.**
|
||||||
|
|
||||||
|
### Setup
|
||||||
|
|
||||||
|
So to find pattern in chess board, we use the function, **cv2.findChessboardCorners()**. We also
|
||||||
|
need to pass what kind of pattern we are looking, like 8x8 grid, 5x5 grid etc. In this example, we
|
||||||
|
use 7x6 grid. (Normally a chess board has 8x8 squares and 7x7 internal corners). It returns the
|
||||||
|
corner points and retval which will be True if pattern is obtained. These corners will be placed in
|
||||||
|
an order (from left-to-right, top-to-bottom)
|
||||||
|
|
||||||
|
@sa This function may not be able to find the required pattern in all the images. So one good option
|
||||||
|
is to write the code such that, it starts the camera and check each frame for required pattern. Once
|
||||||
|
pattern is obtained, find the corners and store it in a list. Also provides some interval before
|
||||||
|
reading next frame so that we can adjust our chess board in different direction. Continue this
|
||||||
|
process until required number of good patterns are obtained. Even in the example provided here, we
|
||||||
|
are not sure out of 14 images given, how many are good. So we read all the images and take the good
|
||||||
|
ones.
|
||||||
|
|
||||||
|
@sa Instead of chess board, we can use some circular grid, but then use the function
|
||||||
|
**cv2.findCirclesGrid()** to find the pattern. It is said that less number of images are enough when
|
||||||
|
using circular grid.
|
||||||
|
|
||||||
|
Once we find the corners, we can increase their accuracy using **cv2.cornerSubPix()**. We can also
|
||||||
|
draw the pattern using **cv2.drawChessboardCorners()**. All these steps are included in below code:
|
||||||
|
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
import glob
|
||||||
|
|
||||||
|
# termination criteria
|
||||||
|
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
|
||||||
|
|
||||||
|
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
|
||||||
|
objp = np.zeros((6*7,3), np.float32)
|
||||||
|
objp[:,:2] = np.mgrid[0:7,0:6].T.reshape(-1,2)
|
||||||
|
|
||||||
|
# Arrays to store object points and image points from all the images.
|
||||||
|
objpoints = [] # 3d point in real world space
|
||||||
|
imgpoints = [] # 2d points in image plane.
|
||||||
|
|
||||||
|
images = glob.glob('*.jpg')
|
||||||
|
|
||||||
|
for fname in images:
|
||||||
|
img = cv2.imread(fname)
|
||||||
|
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# Find the chess board corners
|
||||||
|
ret, corners = cv2.findChessboardCorners(gray, (7,6),None)
|
||||||
|
|
||||||
|
# If found, add object points, image points (after refining them)
|
||||||
|
if ret == True:
|
||||||
|
objpoints.append(objp)
|
||||||
|
|
||||||
|
cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
|
||||||
|
imgpoints.append(corners)
|
||||||
|
|
||||||
|
# Draw and display the corners
|
||||||
|
cv2.drawChessboardCorners(img, (7,6), corners2,ret)
|
||||||
|
cv2.imshow('img',img)
|
||||||
|
cv2.waitKey(500)
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
One image with pattern drawn on it is shown below:
|
||||||
|
|
||||||
|
![image](images/calib_pattern.jpg)
|
||||||
|
|
||||||
|
### Calibration
|
||||||
|
|
||||||
|
So now we have our object points and image points we are ready to go for calibration. For that we
|
||||||
|
use the function, **cv2.calibrateCamera()**. It returns the camera matrix, distortion coefficients,
|
||||||
|
rotation and translation vectors etc.
|
||||||
|
@code{.py}
|
||||||
|
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1],None,None)
|
||||||
|
@endcode
|
||||||
|
### Undistortion
|
||||||
|
|
||||||
|
We have got what we were trying. Now we can take an image and undistort it. OpenCV comes with two
|
||||||
|
methods, we will see both. But before that, we can refine the camera matrix based on a free scaling
|
||||||
|
parameter using **cv2.getOptimalNewCameraMatrix()**. If the scaling parameter alpha=0, it returns
|
||||||
|
undistorted image with minimum unwanted pixels. So it may even remove some pixels at image corners.
|
||||||
|
If alpha=1, all pixels are retained with some extra black images. It also returns an image ROI which
|
||||||
|
can be used to crop the result.
|
||||||
|
|
||||||
|
So we take a new image (left12.jpg in this case. That is the first image in this chapter)
|
||||||
|
@code{.py}
|
||||||
|
img = cv2.imread('left12.jpg')
|
||||||
|
h, w = img.shape[:2]
|
||||||
|
newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),1,(w,h))
|
||||||
|
@endcode
|
||||||
|
#### 1. Using **cv2.undistort()**
|
||||||
|
|
||||||
|
This is the shortest path. Just call the function and use ROI obtained above to crop the result.
|
||||||
|
@code{.py}
|
||||||
|
# undistort
|
||||||
|
dst = cv2.undistort(img, mtx, dist, None, newcameramtx)
|
||||||
|
|
||||||
|
# crop the image
|
||||||
|
x,y,w,h = roi
|
||||||
|
dst = dst[y:y+h, x:x+w]
|
||||||
|
cv2.imwrite('calibresult.png',dst)
|
||||||
|
@endcode
|
||||||
|
#### 2. Using **remapping**
|
||||||
|
|
||||||
|
This is curved path. First find a mapping function from distorted image to undistorted image. Then
|
||||||
|
use the remap function.
|
||||||
|
@code{.py}
|
||||||
|
# undistort
|
||||||
|
mapx,mapy = cv2.initUndistortRectifyMap(mtx,dist,None,newcameramtx,(w,h),5)
|
||||||
|
dst = cv2.remap(img,mapx,mapy,cv2.INTER_LINEAR)
|
||||||
|
|
||||||
|
# crop the image
|
||||||
|
x,y,w,h = roi
|
||||||
|
dst = dst[y:y+h, x:x+w]
|
||||||
|
cv2.imwrite('calibresult.png',dst)
|
||||||
|
@endcode
|
||||||
|
Both the methods give the same result. See the result below:
|
||||||
|
|
||||||
|
![image](images/calib_result.jpg)
|
||||||
|
|
||||||
|
You can see in the result that all the edges are straight.
|
||||||
|
|
||||||
|
Now you can store the camera matrix and distortion coefficients using write functions in Numpy
|
||||||
|
(np.savez, np.savetxt etc) for future uses.
|
||||||
|
|
||||||
|
Re-projection Error
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
Re-projection error gives a good estimation of just how exact is the found parameters. This should
|
||||||
|
be as close to zero as possible. Given the intrinsic, distortion, rotation and translation matrices,
|
||||||
|
we first transform the object point to image point using **cv2.projectPoints()**. Then we calculate
|
||||||
|
the absolute norm between what we got with our transformation and the corner finding algorithm. To
|
||||||
|
find the average error we calculate the arithmetical mean of the errors calculate for all the
|
||||||
|
calibration images.
|
||||||
|
@code{.py}
|
||||||
|
mean_error = 0
|
||||||
|
for i in xrange(len(objpoints)):
|
||||||
|
imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
|
||||||
|
error = cv2.norm(imgpoints[i],imgpoints2, cv2.NORM_L2)/len(imgpoints2)
|
||||||
|
tot_error += error
|
||||||
|
|
||||||
|
print "total error: ", mean_error/len(objpoints)
|
||||||
|
@endcode
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# Try camera calibration with circular grid.
|
67
doc/py_tutorials/py_calib3d/py_depthmap/py_depthmap.markdown
Normal file
67
doc/py_tutorials/py_calib3d/py_depthmap/py_depthmap.markdown
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
Depth Map from Stereo Images {#tutorial_py_depthmap}
|
||||||
|
============================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this session,
|
||||||
|
- We will learn to create depth map from stereo images.
|
||||||
|
|
||||||
|
Basics
|
||||||
|
------
|
||||||
|
|
||||||
|
In last session, we saw basic concepts like epipolar constraints and other related terms. We also
|
||||||
|
saw that if we have two images of same scene, we can get depth information from that in an intuitive
|
||||||
|
way. Below is an image and some simple mathematical formulas which proves that intuition. (Image
|
||||||
|
Courtesy :
|
||||||
|
|
||||||
|
![image](images/stereo_depth.jpg)
|
||||||
|
|
||||||
|
The above diagram contains equivalent triangles. Writing their equivalent equations will yield us
|
||||||
|
following result:
|
||||||
|
|
||||||
|
\f[disparity = x - x' = \frac{Bf}{Z}\f]
|
||||||
|
|
||||||
|
\f$x\f$ and \f$x'\f$ are the distance between points in image plane corresponding to the scene point 3D and
|
||||||
|
their camera center. \f$B\f$ is the distance between two cameras (which we know) and \f$f\f$ is the focal
|
||||||
|
length of camera (already known). So in short, above equation says that the depth of a point in a
|
||||||
|
scene is inversely proportional to the difference in distance of corresponding image points and
|
||||||
|
their camera centers. So with this information, we can derive the depth of all pixels in an image.
|
||||||
|
|
||||||
|
So it finds corresponding matches between two images. We have already seen how epiline constraint
|
||||||
|
make this operation faster and accurate. Once it finds matches, it finds the disparity. Let's see
|
||||||
|
how we can do it with OpenCV.
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
Below code snippet shows a simple procedure to create disparity map.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
imgL = cv2.imread('tsukuba_l.png',0)
|
||||||
|
imgR = cv2.imread('tsukuba_r.png',0)
|
||||||
|
|
||||||
|
stereo = cv2.createStereoBM(numDisparities=16, blockSize=15)
|
||||||
|
disparity = stereo.compute(imgL,imgR)
|
||||||
|
plt.imshow(disparity,'gray')
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Below image contains the original image (left) and its disparity map (right). As you can see, result
|
||||||
|
is contaminated with high degree of noise. By adjusting the values of numDisparities and blockSize,
|
||||||
|
you can get a better result.
|
||||||
|
|
||||||
|
![image](images/disparity_map.jpg)
|
||||||
|
|
||||||
|
@note More details to be added
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# OpenCV samples contain an example of generating disparity map and its 3D reconstruction. Check
|
||||||
|
stereo_match.py in OpenCV-Python samples.
|
@ -0,0 +1,174 @@
|
|||||||
|
Epipolar Geometry {#tutorial_py_epipolar_geometry}
|
||||||
|
=================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this section,
|
||||||
|
|
||||||
|
- We will learn about the basics of multiview geometry
|
||||||
|
- We will see what is epipole, epipolar lines, epipolar constraint etc.
|
||||||
|
|
||||||
|
Basic Concepts
|
||||||
|
--------------
|
||||||
|
|
||||||
|
When we take an image using pin-hole camera, we loose an important information, ie depth of the
|
||||||
|
image. Or how far is each point in the image from the camera because it is a 3D-to-2D conversion. So
|
||||||
|
it is an important question whether we can find the depth information using these cameras. And the
|
||||||
|
answer is to use more than one camera. Our eyes works in similar way where we use two cameras (two
|
||||||
|
eyes) which is called stereo vision. So let's see what OpenCV provides in this field.
|
||||||
|
|
||||||
|
(*Learning OpenCV* by Gary Bradsky has a lot of information in this field.)
|
||||||
|
|
||||||
|
Before going to depth images, let's first understand some basic concepts in multiview geometry. In
|
||||||
|
this section we will deal with epipolar geometry. See the image below which shows a basic setup with
|
||||||
|
two cameras taking the image of same scene.
|
||||||
|
|
||||||
|
![image](images/epipolar.jpg)
|
||||||
|
|
||||||
|
If we are using only the left camera, we can't find the 3D point corresponding to the point \f$x\f$ in
|
||||||
|
image because every point on the line \f$OX\f$ projects to the same point on the image plane. But
|
||||||
|
consider the right image also. Now different points on the line \f$OX\f$ projects to different points
|
||||||
|
(\f$x'\f$) in right plane. So with these two images, we can triangulate the correct 3D point. This is
|
||||||
|
the whole idea.
|
||||||
|
|
||||||
|
The projection of the different points on \f$OX\f$ form a line on right plane (line \f$l'\f$). We call it
|
||||||
|
**epiline** corresponding to the point \f$x\f$. It means, to find the point \f$x\f$ on the right image,
|
||||||
|
search along this epiline. It should be somewhere on this line (Think of it this way, to find the
|
||||||
|
matching point in other image, you need not search the whole image, just search along the epiline.
|
||||||
|
So it provides better performance and accuracy). This is called **Epipolar Constraint**. Similarly
|
||||||
|
all points will have its corresponding epilines in the other image. The plane \f$XOO'\f$ is called
|
||||||
|
**Epipolar Plane**.
|
||||||
|
|
||||||
|
\f$O\f$ and \f$O'\f$ are the camera centers. From the setup given above, you can see that projection of
|
||||||
|
right camera \f$O'\f$ is seen on the left image at the point, \f$e\f$. It is called the **epipole**. Epipole
|
||||||
|
is the point of intersection of line through camera centers and the image planes. Similarly \f$e'\f$ is
|
||||||
|
the epipole of the left camera. In some cases, you won't be able to locate the epipole in the image,
|
||||||
|
they may be outside the image (which means, one camera doesn't see the other).
|
||||||
|
|
||||||
|
All the epilines pass through its epipole. So to find the location of epipole, we can find many
|
||||||
|
epilines and find their intersection point.
|
||||||
|
|
||||||
|
So in this session, we focus on finding epipolar lines and epipoles. But to find them, we need two
|
||||||
|
more ingredients, **Fundamental Matrix (F)** and **Essential Matrix (E)**. Essential Matrix contains
|
||||||
|
the information about translation and rotation, which describe the location of the second camera
|
||||||
|
relative to the first in global coordinates. See the image below (Image courtesy: Learning OpenCV by
|
||||||
|
Gary Bradsky):
|
||||||
|
|
||||||
|
![image](images/essential_matrix.jpg)
|
||||||
|
|
||||||
|
But we prefer measurements to be done in pixel coordinates, right? Fundamental Matrix contains the
|
||||||
|
same information as Essential Matrix in addition to the information about the intrinsics of both
|
||||||
|
cameras so that we can relate the two cameras in pixel coordinates. (If we are using rectified
|
||||||
|
images and normalize the point by dividing by the focal lengths, \f$F=E\f$). In simple words,
|
||||||
|
Fundamental Matrix F, maps a point in one image to a line (epiline) in the other image. This is
|
||||||
|
calculated from matching points from both the images. A minimum of 8 such points are required to
|
||||||
|
find the fundamental matrix (while using 8-point algorithm). More points are preferred and use
|
||||||
|
RANSAC to get a more robust result.
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
So first we need to find as many possible matches between two images to find the fundamental matrix.
|
||||||
|
For this, we use SIFT descriptors with FLANN based matcher and ratio test.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img1 = cv2.imread('myleft.jpg',0) #queryimage # left image
|
||||||
|
img2 = cv2.imread('myright.jpg',0) #trainimage # right image
|
||||||
|
|
||||||
|
sift = cv2.SIFT()
|
||||||
|
|
||||||
|
# find the keypoints and descriptors with SIFT
|
||||||
|
kp1, des1 = sift.detectAndCompute(img1,None)
|
||||||
|
kp2, des2 = sift.detectAndCompute(img2,None)
|
||||||
|
|
||||||
|
# FLANN parameters
|
||||||
|
FLANN_INDEX_KDTREE = 0
|
||||||
|
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
|
||||||
|
search_params = dict(checks=50)
|
||||||
|
|
||||||
|
flann = cv2.FlannBasedMatcher(index_params,search_params)
|
||||||
|
matches = flann.knnMatch(des1,des2,k=2)
|
||||||
|
|
||||||
|
good = []
|
||||||
|
pts1 = []
|
||||||
|
pts2 = []
|
||||||
|
|
||||||
|
# ratio test as per Lowe's paper
|
||||||
|
for i,(m,n) in enumerate(matches):
|
||||||
|
if m.distance < 0.8*n.distance:
|
||||||
|
good.append(m)
|
||||||
|
pts2.append(kp2[m.trainIdx].pt)
|
||||||
|
pts1.append(kp1[m.queryIdx].pt)
|
||||||
|
@endcode
|
||||||
|
Now we have the list of best matches from both the images. Let's find the Fundamental Matrix.
|
||||||
|
@code{.py}
|
||||||
|
pts1 = np.int32(pts1)
|
||||||
|
pts2 = np.int32(pts2)
|
||||||
|
F, mask = cv2.findFundamentalMat(pts1,pts2,cv2.FM_LMEDS)
|
||||||
|
|
||||||
|
# We select only inlier points
|
||||||
|
pts1 = pts1[mask.ravel()==1]
|
||||||
|
pts2 = pts2[mask.ravel()==1]
|
||||||
|
@endcode
|
||||||
|
Next we find the epilines. Epilines corresponding to the points in first image is drawn on second
|
||||||
|
image. So mentioning of correct images are important here. We get an array of lines. So we define a
|
||||||
|
new function to draw these lines on the images.
|
||||||
|
@code{.py}
|
||||||
|
def drawlines(img1,img2,lines,pts1,pts2):
|
||||||
|
''' img1 - image on which we draw the epilines for the points in img2
|
||||||
|
lines - corresponding epilines '''
|
||||||
|
r,c = img1.shape
|
||||||
|
img1 = cv2.cvtColor(img1,cv2.COLOR_GRAY2BGR)
|
||||||
|
img2 = cv2.cvtColor(img2,cv2.COLOR_GRAY2BGR)
|
||||||
|
for r,pt1,pt2 in zip(lines,pts1,pts2):
|
||||||
|
color = tuple(np.random.randint(0,255,3).tolist())
|
||||||
|
x0,y0 = map(int, [0, -r[2]/r[1] ])
|
||||||
|
x1,y1 = map(int, [c, -(r[2]+r[0]*c)/r[1] ])
|
||||||
|
img1 = cv2.line(img1, (x0,y0), (x1,y1), color,1)
|
||||||
|
img1 = cv2.circle(img1,tuple(pt1),5,color,-1)
|
||||||
|
img2 = cv2.circle(img2,tuple(pt2),5,color,-1)
|
||||||
|
return img1,img2
|
||||||
|
@endcode
|
||||||
|
Now we find the epilines in both the images and draw them.
|
||||||
|
@code{.py}
|
||||||
|
# Find epilines corresponding to points in right image (second image) and
|
||||||
|
# drawing its lines on left image
|
||||||
|
lines1 = cv2.computeCorrespondEpilines(pts2.reshape(-1,1,2), 2,F)
|
||||||
|
lines1 = lines1.reshape(-1,3)
|
||||||
|
img5,img6 = drawlines(img1,img2,lines1,pts1,pts2)
|
||||||
|
|
||||||
|
# Find epilines corresponding to points in left image (first image) and
|
||||||
|
# drawing its lines on right image
|
||||||
|
lines2 = cv2.computeCorrespondEpilines(pts1.reshape(-1,1,2), 1,F)
|
||||||
|
lines2 = lines2.reshape(-1,3)
|
||||||
|
img3,img4 = drawlines(img2,img1,lines2,pts2,pts1)
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img5)
|
||||||
|
plt.subplot(122),plt.imshow(img3)
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Below is the result we get:
|
||||||
|
|
||||||
|
![image](images/epiresult.jpg)
|
||||||
|
|
||||||
|
You can see in the left image that all epilines are converging at a point outside the image at right
|
||||||
|
side. That meeting point is the epipole.
|
||||||
|
|
||||||
|
For better results, images with good resolution and many non-planar points should be used.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# One important topic is the forward movement of camera. Then epipoles will be seen at the same
|
||||||
|
locations in both with epilines emerging from a fixed point. [See this
|
||||||
|
discussion](http://answers.opencv.org/question/17912/location-of-epipole/).
|
||||||
|
2. Fundamental Matrix estimation is sensitive to quality of matches, outliers etc. It becomes worse
|
||||||
|
when all selected matches lie on the same plane. [Check this
|
||||||
|
discussion](http://answers.opencv.org/question/18125/epilines-not-correct/).
|
127
doc/py_tutorials/py_calib3d/py_pose/py_pose.markdown
Normal file
127
doc/py_tutorials/py_calib3d/py_pose/py_pose.markdown
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
Pose Estimation {#tutorial_py_pose}
|
||||||
|
===============
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this section,
|
||||||
|
- We will learn to exploit calib3d module to create some 3D effects in images.
|
||||||
|
|
||||||
|
Basics
|
||||||
|
------
|
||||||
|
|
||||||
|
This is going to be a small section. During the last session on camera calibration, you have found
|
||||||
|
the camera matrix, distortion coefficients etc. Given a pattern image, we can utilize the above
|
||||||
|
information to calculate its pose, or how the object is situated in space, like how it is rotated,
|
||||||
|
how it is displaced etc. For a planar object, we can assume Z=0, such that, the problem now becomes
|
||||||
|
how camera is placed in space to see our pattern image. So, if we know how the object lies in the
|
||||||
|
space, we can draw some 2D diagrams in it to simulate the 3D effect. Let's see how to do it.
|
||||||
|
|
||||||
|
Our problem is, we want to draw our 3D coordinate axis (X, Y, Z axes) on our chessboard's first
|
||||||
|
corner. X axis in blue color, Y axis in green color and Z axis in red color. So in-effect, Z axis
|
||||||
|
should feel like it is perpendicular to our chessboard plane.
|
||||||
|
|
||||||
|
First, let's load the camera matrix and distortion coefficients from the previous calibration
|
||||||
|
result.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import glob
|
||||||
|
|
||||||
|
# Load previously saved data
|
||||||
|
with np.load('B.npz') as X:
|
||||||
|
mtx, dist, _, _ = [X[i] for i in ('mtx','dist','rvecs','tvecs')]
|
||||||
|
@endcode
|
||||||
|
Now let's create a function, draw which takes the corners in the chessboard (obtained using
|
||||||
|
**cv2.findChessboardCorners()**) and **axis points** to draw a 3D axis.
|
||||||
|
@code{.py}
|
||||||
|
def draw(img, corners, imgpts):
|
||||||
|
corner = tuple(corners[0].ravel())
|
||||||
|
img = cv2.line(img, corner, tuple(imgpts[0].ravel()), (255,0,0), 5)
|
||||||
|
img = cv2.line(img, corner, tuple(imgpts[1].ravel()), (0,255,0), 5)
|
||||||
|
img = cv2.line(img, corner, tuple(imgpts[2].ravel()), (0,0,255), 5)
|
||||||
|
return img
|
||||||
|
@endcode
|
||||||
|
Then as in previous case, we create termination criteria, object points (3D points of corners in
|
||||||
|
chessboard) and axis points. Axis points are points in 3D space for drawing the axis. We draw axis
|
||||||
|
of length 3 (units will be in terms of chess square size since we calibrated based on that size). So
|
||||||
|
our X axis is drawn from (0,0,0) to (3,0,0), so for Y axis. For Z axis, it is drawn from (0,0,0) to
|
||||||
|
(0,0,-3). Negative denotes it is drawn towards the camera.
|
||||||
|
@code{.py}
|
||||||
|
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
|
||||||
|
objp = np.zeros((6*7,3), np.float32)
|
||||||
|
objp[:,:2] = np.mgrid[0:7,0:6].T.reshape(-1,2)
|
||||||
|
|
||||||
|
axis = np.float32([[3,0,0], [0,3,0], [0,0,-3]]).reshape(-1,3)
|
||||||
|
@endcode
|
||||||
|
Now, as usual, we load each image. Search for 7x6 grid. If found, we refine it with subcorner
|
||||||
|
pixels. Then to calculate the rotation and translation, we use the function,
|
||||||
|
**cv2.solvePnPRansac()**. Once we those transformation matrices, we use them to project our **axis
|
||||||
|
points** to the image plane. In simple words, we find the points on image plane corresponding to
|
||||||
|
each of (3,0,0),(0,3,0),(0,0,3) in 3D space. Once we get them, we draw lines from the first corner
|
||||||
|
to each of these points using our draw() function. Done !!!
|
||||||
|
@code{.py}
|
||||||
|
for fname in glob.glob('left*.jpg'):
|
||||||
|
img = cv2.imread(fname)
|
||||||
|
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
ret, corners = cv2.findChessboardCorners(gray, (7,6),None)
|
||||||
|
|
||||||
|
if ret == True:
|
||||||
|
corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
|
||||||
|
|
||||||
|
# Find the rotation and translation vectors.
|
||||||
|
rvecs, tvecs, inliers = cv2.solvePnPRansac(objp, corners2, mtx, dist)
|
||||||
|
|
||||||
|
# project 3D points to image plane
|
||||||
|
imgpts, jac = cv2.projectPoints(axis, rvecs, tvecs, mtx, dist)
|
||||||
|
|
||||||
|
img = draw(img,corners2,imgpts)
|
||||||
|
cv2.imshow('img',img)
|
||||||
|
k = cv2.waitKey(0) & 0xff
|
||||||
|
if k == 's':
|
||||||
|
cv2.imwrite(fname[:6]+'.png', img)
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
See some results below. Notice that each axis is 3 squares long.:
|
||||||
|
|
||||||
|
![image](images/pose_1.jpg)
|
||||||
|
|
||||||
|
### Render a Cube
|
||||||
|
|
||||||
|
If you want to draw a cube, modify the draw() function and axis points as follows.
|
||||||
|
|
||||||
|
Modified draw() function:
|
||||||
|
@code{.py}
|
||||||
|
def draw(img, corners, imgpts):
|
||||||
|
imgpts = np.int32(imgpts).reshape(-1,2)
|
||||||
|
|
||||||
|
# draw ground floor in green
|
||||||
|
img = cv2.drawContours(img, [imgpts[:4]],-1,(0,255,0),-3)
|
||||||
|
|
||||||
|
# draw pillars in blue color
|
||||||
|
for i,j in zip(range(4),range(4,8)):
|
||||||
|
img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]),(255),3)
|
||||||
|
|
||||||
|
# draw top layer in red color
|
||||||
|
img = cv2.drawContours(img, [imgpts[4:]],-1,(0,0,255),3)
|
||||||
|
|
||||||
|
return img
|
||||||
|
@endcode
|
||||||
|
Modified axis points. They are the 8 corners of a cube in 3D space:
|
||||||
|
@code{.py}
|
||||||
|
axis = np.float32([[0,0,0], [0,3,0], [3,3,0], [3,0,0],
|
||||||
|
[0,0,-3],[0,3,-3],[3,3,-3],[3,0,-3] ])
|
||||||
|
@endcode
|
||||||
|
And look at the result below:
|
||||||
|
|
||||||
|
![image](images/pose_2.jpg)
|
||||||
|
|
||||||
|
If you are interested in graphics, augmented reality etc, you can use OpenGL to render more
|
||||||
|
complicated figures.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,22 @@
|
|||||||
|
Camera Calibration and 3D Reconstruction {#tutorial_py_table_of_contents_calib3d}
|
||||||
|
========================================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_calibration
|
||||||
|
|
||||||
|
Let's find how good
|
||||||
|
is our camera. Is there any distortion in images taken with it? If so how to correct it?
|
||||||
|
|
||||||
|
- @subpage tutorial_py_pose
|
||||||
|
|
||||||
|
This is a small
|
||||||
|
section which will help you to create some cool 3D effects with calib module.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_epipolar_geometry
|
||||||
|
|
||||||
|
Let's understand
|
||||||
|
epipolar geometry and epipolar constraint.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_depthmap
|
||||||
|
|
||||||
|
Extract depth
|
||||||
|
information from 2D images.
|
202
doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown
Normal file
202
doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
Basic Operations on Images {#tutorial_py_basic_ops}
|
||||||
|
==========================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
Learn to:
|
||||||
|
|
||||||
|
- Access pixel values and modify them
|
||||||
|
- Access image properties
|
||||||
|
- Setting Region of Image (ROI)
|
||||||
|
- Splitting and Merging images
|
||||||
|
|
||||||
|
Almost all the operations in this section is mainly related to Numpy rather than OpenCV. A good
|
||||||
|
knowledge of Numpy is required to write better optimized code with OpenCV.
|
||||||
|
|
||||||
|
*( Examples will be shown in Python terminal since most of them are just single line codes )*
|
||||||
|
|
||||||
|
Accessing and Modifying pixel values
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
Let's load a color image first:
|
||||||
|
@code{.py}
|
||||||
|
>>> import cv2
|
||||||
|
>>> import numpy as np
|
||||||
|
|
||||||
|
>>> img = cv2.imread('messi5.jpg')
|
||||||
|
@endcode
|
||||||
|
You can access a pixel value by its row and column coordinates. For BGR image, it returns an array
|
||||||
|
of Blue, Green, Red values. For grayscale image, just corresponding intensity is returned.
|
||||||
|
@code{.py}
|
||||||
|
>>> px = img[100,100]
|
||||||
|
>>> print px
|
||||||
|
[157 166 200]
|
||||||
|
|
||||||
|
# accessing only blue pixel
|
||||||
|
>>> blue = img[100,100,0]
|
||||||
|
>>> print blue
|
||||||
|
157
|
||||||
|
@endcode
|
||||||
|
You can modify the pixel values the same way.
|
||||||
|
@code{.py}
|
||||||
|
>>> img[100,100] = [255,255,255]
|
||||||
|
>>> print img[100,100]
|
||||||
|
[255 255 255]
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
**warning**
|
||||||
|
|
||||||
|
Numpy is a optimized library for fast array calculations. So simply accessing each and every pixel
|
||||||
|
values and modifying it will be very slow and it is discouraged.
|
||||||
|
|
||||||
|
@note Above mentioned method is normally used for selecting a region of array, say first 5 rows and
|
||||||
|
last 3 columns like that. For individual pixel access, Numpy array methods, array.item() and
|
||||||
|
array.itemset() is considered to be better. But it always returns a scalar. So if you want to access
|
||||||
|
all B,G,R values, you need to call array.item() separately for all.
|
||||||
|
|
||||||
|
Better pixel accessing and editing method :
|
||||||
|
@code{.py}
|
||||||
|
# accessing RED value
|
||||||
|
>>> img.item(10,10,2)
|
||||||
|
59
|
||||||
|
|
||||||
|
# modifying RED value
|
||||||
|
>>> img.itemset((10,10,2),100)
|
||||||
|
>>> img.item(10,10,2)
|
||||||
|
100
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
Accessing Image Properties
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Image properties include number of rows, columns and channels, type of image data, number of pixels
|
||||||
|
etc.
|
||||||
|
|
||||||
|
Shape of image is accessed by img.shape. It returns a tuple of number of rows, columns and channels
|
||||||
|
(if image is color):
|
||||||
|
@code{.py}
|
||||||
|
>>> print img.shape
|
||||||
|
(342, 548, 3)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
@note If image is grayscale, tuple returned contains only number of rows and columns. So it is a
|
||||||
|
good method to check if loaded image is grayscale or color image.
|
||||||
|
|
||||||
|
Total number of pixels is accessed by `img.size`:
|
||||||
|
@code{.py}
|
||||||
|
>>> print img.size
|
||||||
|
562248
|
||||||
|
@endcode
|
||||||
|
Image datatype is obtained by \`img.dtype\`:
|
||||||
|
@code{.py}
|
||||||
|
>>> print img.dtype
|
||||||
|
uint8
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
@note img.dtype is very important while debugging because a large number of errors in OpenCV-Python
|
||||||
|
code is caused by invalid datatype.
|
||||||
|
|
||||||
|
Image ROI
|
||||||
|
---------
|
||||||
|
|
||||||
|
Sometimes, you will have to play with certain region of images. For eye detection in images, first
|
||||||
|
face detection is done all over the image and when face is obtained, we select the face region alone
|
||||||
|
and search for eyes inside it instead of searching whole image. It improves accuracy (because eyes
|
||||||
|
are always on faces :D ) and performance (because we search for a small area)
|
||||||
|
|
||||||
|
ROI is again obtained using Numpy indexing. Here I am selecting the ball and copying it to another
|
||||||
|
region in the image:
|
||||||
|
@code{.py}
|
||||||
|
>>> ball = img[280:340, 330:390]
|
||||||
|
>>> img[273:333, 100:160] = ball
|
||||||
|
@endcode
|
||||||
|
Check the results below:
|
||||||
|
|
||||||
|
![image](images/roi.jpg)
|
||||||
|
|
||||||
|
Splitting and Merging Image Channels
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
Sometimes you will need to work separately on B,G,R channels of image. Then you need to split the
|
||||||
|
BGR images to single planes. Or another time, you may need to join these individual channels to BGR
|
||||||
|
image. You can do it simply by:
|
||||||
|
@code{.py}
|
||||||
|
>>> b,g,r = cv2.split(img)
|
||||||
|
>>> img = cv2.merge((b,g,r))
|
||||||
|
@endcode
|
||||||
|
Or
|
||||||
|
@code
|
||||||
|
>>> b = img[:,:,0]
|
||||||
|
@endcode
|
||||||
|
Suppose, you want to make all the red pixels to zero, you need not split like this and put it equal
|
||||||
|
to zero. You can simply use Numpy indexing, and that is more faster.
|
||||||
|
@code{.py}
|
||||||
|
>>> img[:,:,2] = 0
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
**warning**
|
||||||
|
|
||||||
|
cv2.split() is a costly operation (in terms of time). So do it only if you need it. Otherwise go
|
||||||
|
for Numpy indexing.
|
||||||
|
|
||||||
|
Making Borders for Images (Padding)
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
|
If you want to create a border around the image, something like a photo frame, you can use
|
||||||
|
**cv2.copyMakeBorder()** function. But it has more applications for convolution operation, zero
|
||||||
|
padding etc. This function takes following arguments:
|
||||||
|
|
||||||
|
- **src** - input image
|
||||||
|
- **top**, **bottom**, **left**, **right** - border width in number of pixels in corresponding
|
||||||
|
directions
|
||||||
|
|
||||||
|
- **borderType** - Flag defining what kind of border to be added. It can be following types:
|
||||||
|
- **cv2.BORDER_CONSTANT** - Adds a constant colored border. The value should be given
|
||||||
|
as next argument.
|
||||||
|
- **cv2.BORDER_REFLECT** - Border will be mirror reflection of the border elements,
|
||||||
|
like this : *fedcba|abcdefgh|hgfedcb*
|
||||||
|
- **cv2.BORDER_REFLECT_101** or **cv2.BORDER_DEFAULT** - Same as above, but with a
|
||||||
|
slight change, like this : *gfedcb|abcdefgh|gfedcba*
|
||||||
|
- **cv2.BORDER_REPLICATE** - Last element is replicated throughout, like this:
|
||||||
|
*aaaaaa|abcdefgh|hhhhhhh*
|
||||||
|
- **cv2.BORDER_WRAP** - Can't explain, it will look like this :
|
||||||
|
*cdefgh|abcdefgh|abcdefg*
|
||||||
|
|
||||||
|
- **value** - Color of border if border type is cv2.BORDER_CONSTANT
|
||||||
|
|
||||||
|
Below is a sample code demonstrating all these border types for better understanding:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
BLUE = [255,0,0]
|
||||||
|
|
||||||
|
img1 = cv2.imread('opencv_logo.png')
|
||||||
|
|
||||||
|
replicate = cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_REPLICATE)
|
||||||
|
reflect = cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_REFLECT)
|
||||||
|
reflect101 = cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_REFLECT_101)
|
||||||
|
wrap = cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_WRAP)
|
||||||
|
constant= cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_CONSTANT,value=BLUE)
|
||||||
|
|
||||||
|
plt.subplot(231),plt.imshow(img1,'gray'),plt.title('ORIGINAL')
|
||||||
|
plt.subplot(232),plt.imshow(replicate,'gray'),plt.title('REPLICATE')
|
||||||
|
plt.subplot(233),plt.imshow(reflect,'gray'),plt.title('REFLECT')
|
||||||
|
plt.subplot(234),plt.imshow(reflect101,'gray'),plt.title('REFLECT_101')
|
||||||
|
plt.subplot(235),plt.imshow(wrap,'gray'),plt.title('WRAP')
|
||||||
|
plt.subplot(236),plt.imshow(constant,'gray'),plt.title('CONSTANT')
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result below. (Image is displayed with matplotlib. So RED and BLUE planes will be
|
||||||
|
interchanged):
|
||||||
|
|
||||||
|
![image](images/border.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,118 @@
|
|||||||
|
Arithmetic Operations on Images {#tutorial_py_image_arithmetics}
|
||||||
|
===============================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
- Learn several arithmetic operations on images like addition, subtraction, bitwise operations
|
||||||
|
etc.
|
||||||
|
- You will learn these functions : **cv2.add()**, **cv2.addWeighted()** etc.
|
||||||
|
|
||||||
|
Image Addition
|
||||||
|
--------------
|
||||||
|
|
||||||
|
You can add two images by OpenCV function, cv2.add() or simply by numpy operation,
|
||||||
|
res = img1 + img2. Both images should be of same depth and type, or second image can just be a
|
||||||
|
scalar value.
|
||||||
|
|
||||||
|
@note There is a difference between OpenCV addition and Numpy addition. OpenCV addition is a
|
||||||
|
saturated operation while Numpy addition is a modulo operation.
|
||||||
|
|
||||||
|
For example, consider below sample:
|
||||||
|
@code{.py}
|
||||||
|
>>> x = np.uint8([250])
|
||||||
|
>>> y = np.uint8([10])
|
||||||
|
|
||||||
|
>>> print cv2.add(x,y) # 250+10 = 260 => 255
|
||||||
|
[[255]]
|
||||||
|
|
||||||
|
>>> print x+y # 250+10 = 260 % 256 = 4
|
||||||
|
[4]
|
||||||
|
@endcode
|
||||||
|
It will be more visible when you add two images. OpenCV function will provide a better result. So
|
||||||
|
always better stick to OpenCV functions.
|
||||||
|
|
||||||
|
Image Blending
|
||||||
|
--------------
|
||||||
|
|
||||||
|
This is also image addition, but different weights are given to images so that it gives a feeling of
|
||||||
|
blending or transparency. Images are added as per the equation below:
|
||||||
|
|
||||||
|
\f[g(x) = (1 - \alpha)f_{0}(x) + \alpha f_{1}(x)\f]
|
||||||
|
|
||||||
|
By varying \f$\alpha\f$ from \f$0 \rightarrow 1\f$, you can perform a cool transition between one image to
|
||||||
|
another.
|
||||||
|
|
||||||
|
Here I took two images to blend them together. First image is given a weight of 0.7 and second image
|
||||||
|
is given 0.3. cv2.addWeighted() applies following equation on the image.
|
||||||
|
|
||||||
|
\f[dst = \alpha \cdot img1 + \beta \cdot img2 + \gamma\f]
|
||||||
|
|
||||||
|
Here \f$\gamma\f$ is taken as zero.
|
||||||
|
@code{.py}
|
||||||
|
img1 = cv2.imread('ml.png')
|
||||||
|
img2 = cv2.imread('opencv_logo.jpg')
|
||||||
|
|
||||||
|
dst = cv2.addWeighted(img1,0.7,img2,0.3,0)
|
||||||
|
|
||||||
|
cv2.imshow('dst',dst)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
Check the result below:
|
||||||
|
|
||||||
|
![image](images/blending.jpg)
|
||||||
|
|
||||||
|
Bitwise Operations
|
||||||
|
------------------
|
||||||
|
|
||||||
|
This includes bitwise AND, OR, NOT and XOR operations. They will be highly useful while extracting
|
||||||
|
any part of the image (as we will see in coming chapters), defining and working with non-rectangular
|
||||||
|
ROI etc. Below we will see an example on how to change a particular region of an image.
|
||||||
|
|
||||||
|
I want to put OpenCV logo above an image. If I add two images, it will change color. If I blend it,
|
||||||
|
I get an transparent effect. But I want it to be opaque. If it was a rectangular region, I could use
|
||||||
|
ROI as we did in last chapter. But OpenCV logo is a not a rectangular shape. So you can do it with
|
||||||
|
bitwise operations as below:
|
||||||
|
@code{.py}
|
||||||
|
# Load two images
|
||||||
|
img1 = cv2.imread('messi5.jpg')
|
||||||
|
img2 = cv2.imread('opencv_logo.png')
|
||||||
|
|
||||||
|
# I want to put logo on top-left corner, So I create a ROI
|
||||||
|
rows,cols,channels = img2.shape
|
||||||
|
roi = img1[0:rows, 0:cols ]
|
||||||
|
|
||||||
|
# Now create a mask of logo and create its inverse mask also
|
||||||
|
img2gray = cv2.cvtColor(img2,cv2.COLOR_BGR2GRAY)
|
||||||
|
ret, mask = cv2.threshold(img2gray, 10, 255, cv2.THRESH_BINARY)
|
||||||
|
mask_inv = cv2.bitwise_not(mask)
|
||||||
|
|
||||||
|
# Now black-out the area of logo in ROI
|
||||||
|
img1_bg = cv2.bitwise_and(roi,roi,mask = mask_inv)
|
||||||
|
|
||||||
|
# Take only region of logo from logo image.
|
||||||
|
img2_fg = cv2.bitwise_and(img2,img2,mask = mask)
|
||||||
|
|
||||||
|
# Put logo in ROI and modify the main image
|
||||||
|
dst = cv2.add(img1_bg,img2_fg)
|
||||||
|
img1[0:rows, 0:cols ] = dst
|
||||||
|
|
||||||
|
cv2.imshow('res',img1)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
See the result below. Left image shows the mask we created. Right image shows the final result. For
|
||||||
|
more understanding, display all the intermediate images in the above code, especially img1_bg and
|
||||||
|
img2_fg.
|
||||||
|
|
||||||
|
![image](images/overlay.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# Create a slide show of images in a folder with smooth transition between images using
|
||||||
|
cv2.addWeighted function
|
@ -0,0 +1,169 @@
|
|||||||
|
Performance Measurement and Improvement Techniques {#tutorial_py_optimization}
|
||||||
|
==================================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In image processing, since you are dealing with large number of operations per second, it is
|
||||||
|
mandatory that your code is not only providing the correct solution, but also in the fastest manner.
|
||||||
|
So in this chapter, you will learn
|
||||||
|
|
||||||
|
- To measure the performance of your code.
|
||||||
|
- Some tips to improve the performance of your code.
|
||||||
|
- You will see these functions : **cv2.getTickCount**, **cv2.getTickFrequency** etc.
|
||||||
|
|
||||||
|
Apart from OpenCV, Python also provides a module **time** which is helpful in measuring the time of
|
||||||
|
execution. Another module **profile** helps to get detailed report on the code, like how much time
|
||||||
|
each function in the code took, how many times the function was called etc. But, if you are using
|
||||||
|
IPython, all these features are integrated in an user-friendly manner. We will see some important
|
||||||
|
ones, and for more details, check links in **Additional Resouces** section.
|
||||||
|
|
||||||
|
Measuring Performance with OpenCV
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
**cv2.getTickCount** function returns the number of clock-cycles after a reference event (like the
|
||||||
|
moment machine was switched ON) to the moment this function is called. So if you call it before and
|
||||||
|
after the function execution, you get number of clock-cycles used to execute a function.
|
||||||
|
|
||||||
|
**cv2.getTickFrequency** function returns the frequency of clock-cycles, or the number of
|
||||||
|
clock-cycles per second. So to find the time of execution in seconds, you can do following:
|
||||||
|
@code{.py}
|
||||||
|
e1 = cv2.getTickCount()
|
||||||
|
# your code execution
|
||||||
|
e2 = cv2.getTickCount()
|
||||||
|
time = (e2 - e1)/ cv2.getTickFrequency()
|
||||||
|
@endcode
|
||||||
|
We will demonstrate with following example. Following example apply median filtering with a kernel
|
||||||
|
of odd size ranging from 5 to 49. (Don't worry about what will the result look like, that is not our
|
||||||
|
goal):
|
||||||
|
@code{.py}
|
||||||
|
img1 = cv2.imread('messi5.jpg')
|
||||||
|
|
||||||
|
e1 = cv2.getTickCount()
|
||||||
|
for i in xrange(5,49,2):
|
||||||
|
img1 = cv2.medianBlur(img1,i)
|
||||||
|
e2 = cv2.getTickCount()
|
||||||
|
t = (e2 - e1)/cv2.getTickFrequency()
|
||||||
|
print t
|
||||||
|
|
||||||
|
# Result I got is 0.521107655 seconds
|
||||||
|
@endcode
|
||||||
|
@note You can do the same with time module. Instead of cv2.getTickCount, use time.time() function.
|
||||||
|
Then take the difference of two times.
|
||||||
|
|
||||||
|
Default Optimization in OpenCV
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
Many of the OpenCV functions are optimized using SSE2, AVX etc. It contains unoptimized code also.
|
||||||
|
So if our system support these features, we should exploit them (almost all modern day processors
|
||||||
|
support them). It is enabled by default while compiling. So OpenCV runs the optimized code if it is
|
||||||
|
enabled, else it runs the unoptimized code. You can use **cv2.useOptimized()** to check if it is
|
||||||
|
enabled/disabled and **cv2.setUseOptimized()** to enable/disable it. Let's see a simple example.
|
||||||
|
@code{.py}
|
||||||
|
# check if optimization is enabled
|
||||||
|
In [5]: cv2.useOptimized()
|
||||||
|
Out[5]: True
|
||||||
|
|
||||||
|
In [6]: %timeit res = cv2.medianBlur(img,49)
|
||||||
|
10 loops, best of 3: 34.9 ms per loop
|
||||||
|
|
||||||
|
# Disable it
|
||||||
|
In [7]: cv2.setUseOptimized(False)
|
||||||
|
|
||||||
|
In [8]: cv2.useOptimized()
|
||||||
|
Out[8]: False
|
||||||
|
|
||||||
|
In [9]: %timeit res = cv2.medianBlur(img,49)
|
||||||
|
10 loops, best of 3: 64.1 ms per loop
|
||||||
|
@endcode
|
||||||
|
See, optimized median filtering is \~2x faster than unoptimized version. If you check its source,
|
||||||
|
you can see median filtering is SIMD optimized. So you can use this to enable optimization at the
|
||||||
|
top of your code (remember it is enabled by default).
|
||||||
|
|
||||||
|
Measuring Performance in IPython
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
Sometimes you may need to compare the performance of two similar operations. IPython gives you a
|
||||||
|
magic command %timeit to perform this. It runs the code several times to get more accurate results.
|
||||||
|
Once again, they are suitable to measure single line codes.
|
||||||
|
|
||||||
|
For example, do you know which of the following addition operation is better, x = 5; y = x\*\*2,
|
||||||
|
x = 5; y = x\*x, x = np.uint8([5]); y = x\*x or y = np.square(x) ? We will find it with %timeit in
|
||||||
|
IPython shell.
|
||||||
|
@code{.py}
|
||||||
|
In [10]: x = 5
|
||||||
|
|
||||||
|
In [11]: %timeit y=x**2
|
||||||
|
10000000 loops, best of 3: 73 ns per loop
|
||||||
|
|
||||||
|
In [12]: %timeit y=x*x
|
||||||
|
10000000 loops, best of 3: 58.3 ns per loop
|
||||||
|
|
||||||
|
In [15]: z = np.uint8([5])
|
||||||
|
|
||||||
|
In [17]: %timeit y=z*z
|
||||||
|
1000000 loops, best of 3: 1.25 us per loop
|
||||||
|
|
||||||
|
In [19]: %timeit y=np.square(z)
|
||||||
|
1000000 loops, best of 3: 1.16 us per loop
|
||||||
|
@endcode
|
||||||
|
You can see that, x = 5 ; y = x\*x is fastest and it is around 20x faster compared to Numpy. If you
|
||||||
|
consider the array creation also, it may reach upto 100x faster. Cool, right? *(Numpy devs are
|
||||||
|
working on this issue)*
|
||||||
|
|
||||||
|
@note Python scalar operations are faster than Numpy scalar operations. So for operations including
|
||||||
|
one or two elements, Python scalar is better than Numpy arrays. Numpy takes advantage when size of
|
||||||
|
array is a little bit bigger.
|
||||||
|
|
||||||
|
We will try one more example. This time, we will compare the performance of **cv2.countNonZero()**
|
||||||
|
and **np.count_nonzero()** for same image.
|
||||||
|
|
||||||
|
@code{.py}
|
||||||
|
In [35]: %timeit z = cv2.countNonZero(img)
|
||||||
|
100000 loops, best of 3: 15.8 us per loop
|
||||||
|
|
||||||
|
In [36]: %timeit z = np.count_nonzero(img)
|
||||||
|
1000 loops, best of 3: 370 us per loop
|
||||||
|
@endcode
|
||||||
|
See, OpenCV function is nearly 25x faster than Numpy function.
|
||||||
|
|
||||||
|
@note Normally, OpenCV functions are faster than Numpy functions. So for same operation, OpenCV
|
||||||
|
functions are preferred. But, there can be exceptions, especially when Numpy works with views
|
||||||
|
instead of copies.
|
||||||
|
|
||||||
|
More IPython magic commands
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
There are several other magic commands to measure the performance, profiling, line profiling, memory
|
||||||
|
measurement etc. They all are well documented. So only links to those docs are provided here.
|
||||||
|
Interested readers are recommended to try them out.
|
||||||
|
|
||||||
|
Performance Optimization Techniques
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
|
There are several techniques and coding methods to exploit maximum performance of Python and Numpy.
|
||||||
|
Only relevant ones are noted here and links are given to important sources. The main thing to be
|
||||||
|
noted here is that, first try to implement the algorithm in a simple manner. Once it is working,
|
||||||
|
profile it, find the bottlenecks and optimize them.
|
||||||
|
|
||||||
|
-# Avoid using loops in Python as far as possible, especially double/triple loops etc. They are
|
||||||
|
inherently slow.
|
||||||
|
2. Vectorize the algorithm/code to the maximum possible extent because Numpy and OpenCV are
|
||||||
|
optimized for vector operations.
|
||||||
|
3. Exploit the cache coherence.
|
||||||
|
4. Never make copies of array unless it is needed. Try to use views instead. Array copying is a
|
||||||
|
costly operation.
|
||||||
|
|
||||||
|
Even after doing all these operations, if your code is still slow, or use of large loops are
|
||||||
|
inevitable, use additional libraries like Cython to make it faster.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [Python Optimization Techniques](http://wiki.python.org/moin/PythonSpeed/PerformanceTips)
|
||||||
|
2. Scipy Lecture Notes - [Advanced
|
||||||
|
Numpy](http://scipy-lectures.github.io/advanced/advanced_numpy/index.html#advanced-numpy)
|
||||||
|
3. [Timing and Profiling in IPython](http://pynash.org/2013/03/06/timing-and-profiling.html)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,18 @@
|
|||||||
|
Core Operations {#tutorial_py_table_of_contents_core}
|
||||||
|
===============
|
||||||
|
|
||||||
|
- @subpage tutorial_py_basic_ops
|
||||||
|
|
||||||
|
Learn to read and
|
||||||
|
edit pixel values, working with image ROI and other basic operations.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_image_arithmetics
|
||||||
|
|
||||||
|
Perform arithmetic
|
||||||
|
operations on images
|
||||||
|
|
||||||
|
- @subpage tutorial_py_optimization
|
||||||
|
|
||||||
|
Getting a solution is
|
||||||
|
important. But getting it in the fastest way is more important. Learn to check the speed of your
|
||||||
|
code, optimize the code etc.
|
82
doc/py_tutorials/py_feature2d/py_brief/py_brief.markdown
Normal file
82
doc/py_tutorials/py_feature2d/py_brief/py_brief.markdown
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
BRIEF (Binary Robust Independent Elementary Features) {#tutorial_py_brief}
|
||||||
|
=====================================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter
|
||||||
|
- We will see the basics of BRIEF algorithm
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
We know SIFT uses 128-dim vector for descriptors. Since it is using floating point numbers, it takes
|
||||||
|
basically 512 bytes. Similarly SURF also takes minimum of 256 bytes (for 64-dim). Creating such a
|
||||||
|
vector for thousands of features takes a lot of memory which are not feasible for resouce-constraint
|
||||||
|
applications especially for embedded systems. Larger the memory, longer the time it takes for
|
||||||
|
matching.
|
||||||
|
|
||||||
|
But all these dimensions may not be needed for actual matching. We can compress it using several
|
||||||
|
methods like PCA, LDA etc. Even other methods like hashing using LSH (Locality Sensitive Hashing) is
|
||||||
|
used to convert these SIFT descriptors in floating point numbers to binary strings. These binary
|
||||||
|
strings are used to match features using Hamming distance. This provides better speed-up because
|
||||||
|
finding hamming distance is just applying XOR and bit count, which are very fast in modern CPUs with
|
||||||
|
SSE instructions. But here, we need to find the descriptors first, then only we can apply hashing,
|
||||||
|
which doesn't solve our initial problem on memory.
|
||||||
|
|
||||||
|
BRIEF comes into picture at this moment. It provides a shortcut to find the binary strings directly
|
||||||
|
without finding descriptors. It takes smoothened image patch and selects a set of \f$n_d\f$ (x,y)
|
||||||
|
location pairs in an unique way (explained in paper). Then some pixel intensity comparisons are done
|
||||||
|
on these location pairs. For eg, let first location pairs be \f$p\f$ and \f$q\f$. If \f$I(p) < I(q)\f$, then its
|
||||||
|
result is 1, else it is 0. This is applied for all the \f$n_d\f$ location pairs to get a
|
||||||
|
\f$n_d\f$-dimensional bitstring.
|
||||||
|
|
||||||
|
This \f$n_d\f$ can be 128, 256 or 512. OpenCV supports all of these, but by default, it would be 256
|
||||||
|
(OpenCV represents it in bytes. So the values will be 16, 32 and 64). So once you get this, you can
|
||||||
|
use Hamming Distance to match these descriptors.
|
||||||
|
|
||||||
|
One important point is that BRIEF is a feature descriptor, it doesn't provide any method to find the
|
||||||
|
features. So you will have to use any other feature detectors like SIFT, SURF etc. The paper
|
||||||
|
recommends to use CenSurE which is a fast detector and BRIEF works even slightly better for CenSurE
|
||||||
|
points than for SURF points.
|
||||||
|
|
||||||
|
In short, BRIEF is a faster method feature descriptor calculation and matching. It also provides
|
||||||
|
high recognition rate unless there is large in-plane rotation.
|
||||||
|
|
||||||
|
BRIEF in OpenCV
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Below code shows the computation of BRIEF descriptors with the help of CenSurE detector. (CenSurE
|
||||||
|
detector is called STAR detector in OpenCV)
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('simple.jpg',0)
|
||||||
|
|
||||||
|
# Initiate STAR detector
|
||||||
|
star = cv2.FeatureDetector_create("STAR")
|
||||||
|
|
||||||
|
# Initiate BRIEF extractor
|
||||||
|
brief = cv2.DescriptorExtractor_create("BRIEF")
|
||||||
|
|
||||||
|
# find the keypoints with STAR
|
||||||
|
kp = star.detect(img,None)
|
||||||
|
|
||||||
|
# compute the descriptors with BRIEF
|
||||||
|
kp, des = brief.compute(img, kp)
|
||||||
|
|
||||||
|
print brief.getInt('bytes')
|
||||||
|
print des.shape
|
||||||
|
@endcode
|
||||||
|
The function brief.getInt('bytes') gives the \f$n_d\f$ size used in bytes. By default it is 32. Next one
|
||||||
|
is matching, which will be done in another chapter.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# Michael Calonder, Vincent Lepetit, Christoph Strecha, and Pascal Fua, "BRIEF: Binary Robust
|
||||||
|
Independent Elementary Features", 11th European Conference on Computer Vision (ECCV), Heraklion,
|
||||||
|
Crete. LNCS Springer, September 2010.
|
||||||
|
2. LSH (Locality Sensitive Hasing) at wikipedia.
|
143
doc/py_tutorials/py_feature2d/py_fast/py_fast.markdown
Normal file
143
doc/py_tutorials/py_feature2d/py_fast/py_fast.markdown
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
FAST Algorithm for Corner Detection {#tutorial_py_fast}
|
||||||
|
===================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will understand the basics of FAST algorithm
|
||||||
|
- We will find corners using OpenCV functionalities for FAST algorithm.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
We saw several feature detectors and many of them are really good. But when looking from a real-time
|
||||||
|
application point of view, they are not fast enough. One best example would be SLAM (Simultaneous
|
||||||
|
Localization and Mapping) mobile robot which have limited computational resources.
|
||||||
|
|
||||||
|
As a solution to this, FAST (Features from Accelerated Segment Test) algorithm was proposed by
|
||||||
|
Edward Rosten and Tom Drummond in their paper "Machine learning for high-speed corner detection" in
|
||||||
|
2006 (Later revised it in 2010). A basic summary of the algorithm is presented below. Refer original
|
||||||
|
paper for more details (All the images are taken from original paper).
|
||||||
|
|
||||||
|
### Feature Detection using FAST
|
||||||
|
|
||||||
|
-# Select a pixel \f$p\f$ in the image which is to be identified as an interest point or not. Let its
|
||||||
|
intensity be \f$I_p\f$.
|
||||||
|
2. Select appropriate threshold value \f$t\f$.
|
||||||
|
3. Consider a circle of 16 pixels around the pixel under test. (See the image below)
|
||||||
|
|
||||||
|
![image](images/fast_speedtest.jpg)
|
||||||
|
|
||||||
|
-# Now the pixel \f$p\f$ is a corner if there exists a set of \f$n\f$ contiguous pixels in the circle (of
|
||||||
|
16 pixels) which are all brighter than \f$I_p + t\f$, or all darker than \f$I_p − t\f$. (Shown as white
|
||||||
|
dash lines in the above image). \f$n\f$ was chosen to be 12.
|
||||||
|
5. A **high-speed test** was proposed to exclude a large number of non-corners. This test examines
|
||||||
|
only the four pixels at 1, 9, 5 and 13 (First 1 and 9 are tested if they are too brighter or
|
||||||
|
darker. If so, then checks 5 and 13). If \f$p\f$ is a corner, then at least three of these must all
|
||||||
|
be brighter than \f$I_p + t\f$ or darker than \f$I_p − t\f$. If neither of these is the case, then \f$p\f$
|
||||||
|
cannot be a corner. The full segment test criterion can then be applied to the passed candidates
|
||||||
|
by examining all pixels in the circle. This detector in itself exhibits high performance, but
|
||||||
|
there are several weaknesses:
|
||||||
|
|
||||||
|
- It does not reject as many candidates for n \< 12.
|
||||||
|
- The choice of pixels is not optimal because its efficiency depends on ordering of the
|
||||||
|
questions and distribution of corner appearances.
|
||||||
|
- Results of high-speed tests are thrown away.
|
||||||
|
- Multiple features are detected adjacent to one another.
|
||||||
|
|
||||||
|
First 3 points are addressed with a machine learning approach. Last one is addressed using
|
||||||
|
non-maximal suppression.
|
||||||
|
|
||||||
|
### Machine Learning a Corner Detector
|
||||||
|
|
||||||
|
-# Select a set of images for training (preferably from the target application domain)
|
||||||
|
2. Run FAST algorithm in every images to find feature points.
|
||||||
|
3. For every feature point, store the 16 pixels around it as a vector. Do it for all the images to
|
||||||
|
get feature vector \f$P\f$.
|
||||||
|
4. Each pixel (say \f$x\f$) in these 16 pixels can have one of the following three states:
|
||||||
|
|
||||||
|
![image](images/fast_eqns.jpg)
|
||||||
|
|
||||||
|
-# Depending on these states, the feature vector \f$P\f$ is subdivided into 3 subsets, \f$P_d\f$, \f$P_s\f$,
|
||||||
|
\f$P_b\f$.
|
||||||
|
6. Define a new boolean variable, \f$K_p\f$, which is true if \f$p\f$ is a corner and false otherwise.
|
||||||
|
7. Use the ID3 algorithm (decision tree classifier) to query each subset using the variable \f$K_p\f$
|
||||||
|
for the knowledge about the true class. It selects the \f$x\f$ which yields the most information
|
||||||
|
about whether the candidate pixel is a corner, measured by the entropy of \f$K_p\f$.
|
||||||
|
8. This is recursively applied to all the subsets until its entropy is zero.
|
||||||
|
9. The decision tree so created is used for fast detection in other images.
|
||||||
|
|
||||||
|
### Non-maximal Suppression
|
||||||
|
|
||||||
|
Detecting multiple interest points in adjacent locations is another problem. It is solved by using
|
||||||
|
Non-maximum Suppression.
|
||||||
|
|
||||||
|
-# Compute a score function, \f$V\f$ for all the detected feature points. \f$V\f$ is the sum of absolute
|
||||||
|
difference between \f$p\f$ and 16 surrounding pixels values.
|
||||||
|
2. Consider two adjacent keypoints and compute their \f$V\f$ values.
|
||||||
|
3. Discard the one with lower \f$V\f$ value.
|
||||||
|
|
||||||
|
### Summary
|
||||||
|
|
||||||
|
It is several times faster than other existing corner detectors.
|
||||||
|
|
||||||
|
But it is not robust to high levels of noise. It is dependant on a threshold.
|
||||||
|
|
||||||
|
FAST Feature Detector in OpenCV
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
It is called as any other feature detector in OpenCV. If you want, you can specify the threshold,
|
||||||
|
whether non-maximum suppression to be applied or not, the neighborhood to be used etc.
|
||||||
|
|
||||||
|
For the neighborhood, three flags are defined, cv2.FAST_FEATURE_DETECTOR_TYPE_5_8,
|
||||||
|
cv2.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv2.FAST_FEATURE_DETECTOR_TYPE_9_16. Below is a
|
||||||
|
simple code on how to detect and draw the FAST feature points.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('simple.jpg',0)
|
||||||
|
|
||||||
|
# Initiate FAST object with default values
|
||||||
|
fast = cv2.FastFeatureDetector()
|
||||||
|
|
||||||
|
# find and draw the keypoints
|
||||||
|
kp = fast.detect(img,None)
|
||||||
|
img2 = cv2.drawKeypoints(img, kp, color=(255,0,0))
|
||||||
|
|
||||||
|
# Print all default params
|
||||||
|
print "Threshold: ", fast.getInt('threshold')
|
||||||
|
print "nonmaxSuppression: ", fast.getBool('nonmaxSuppression')
|
||||||
|
print "neighborhood: ", fast.getInt('type')
|
||||||
|
print "Total Keypoints with nonmaxSuppression: ", len(kp)
|
||||||
|
|
||||||
|
cv2.imwrite('fast_true.png',img2)
|
||||||
|
|
||||||
|
# Disable nonmaxSuppression
|
||||||
|
fast.setBool('nonmaxSuppression',0)
|
||||||
|
kp = fast.detect(img,None)
|
||||||
|
|
||||||
|
print "Total Keypoints without nonmaxSuppression: ", len(kp)
|
||||||
|
|
||||||
|
img3 = cv2.drawKeypoints(img, kp, color=(255,0,0))
|
||||||
|
|
||||||
|
cv2.imwrite('fast_false.png',img3)
|
||||||
|
@endcode
|
||||||
|
See the results. First image shows FAST with nonmaxSuppression and second one without
|
||||||
|
nonmaxSuppression:
|
||||||
|
|
||||||
|
![image](images/fast_kp.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# Edward Rosten and Tom Drummond, “Machine learning for high speed corner detection” in 9th
|
||||||
|
European Conference on Computer Vision, vol. 1, 2006, pp. 430–443.
|
||||||
|
2. Edward Rosten, Reid Porter, and Tom Drummond, "Faster and better: a machine learning approach to
|
||||||
|
corner detection" in IEEE Trans. Pattern Analysis and Machine Intelligence, 2010, vol 32, pp.
|
||||||
|
105-119.
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,110 @@
|
|||||||
|
Feature Matching + Homography to find Objects {#tutorial_py_feature_homography}
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will mix up the feature matching and findHomography from calib3d module to find known
|
||||||
|
objects in a complex image.
|
||||||
|
|
||||||
|
Basics
|
||||||
|
------
|
||||||
|
|
||||||
|
So what we did in last session? We used a queryImage, found some feature points in it, we took
|
||||||
|
another trainImage, found the features in that image too and we found the best matches among them.
|
||||||
|
In short, we found locations of some parts of an object in another cluttered image. This information
|
||||||
|
is sufficient to find the object exactly on the trainImage.
|
||||||
|
|
||||||
|
For that, we can use a function from calib3d module, ie **cv2.findHomography()**. If we pass the set
|
||||||
|
of points from both the images, it will find the perpective transformation of that object. Then we
|
||||||
|
can use **cv2.perspectiveTransform()** to find the object. It needs atleast four correct points to
|
||||||
|
find the transformation.
|
||||||
|
|
||||||
|
We have seen that there can be some possible errors while matching which may affect the result. To
|
||||||
|
solve this problem, algorithm uses RANSAC or LEAST_MEDIAN (which can be decided by the flags). So
|
||||||
|
good matches which provide correct estimation are called inliers and remaining are called outliers.
|
||||||
|
**cv2.findHomography()** returns a mask which specifies the inlier and outlier points.
|
||||||
|
|
||||||
|
So let's do it !!!
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
First, as usual, let's find SIFT features in images and apply the ratio test to find the best
|
||||||
|
matches.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
MIN_MATCH_COUNT = 10
|
||||||
|
|
||||||
|
img1 = cv2.imread('box.png',0) # queryImage
|
||||||
|
img2 = cv2.imread('box_in_scene.png',0) # trainImage
|
||||||
|
|
||||||
|
# Initiate SIFT detector
|
||||||
|
sift = cv2.SIFT()
|
||||||
|
|
||||||
|
# find the keypoints and descriptors with SIFT
|
||||||
|
kp1, des1 = sift.detectAndCompute(img1,None)
|
||||||
|
kp2, des2 = sift.detectAndCompute(img2,None)
|
||||||
|
|
||||||
|
FLANN_INDEX_KDTREE = 0
|
||||||
|
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
|
||||||
|
search_params = dict(checks = 50)
|
||||||
|
|
||||||
|
flann = cv2.FlannBasedMatcher(index_params, search_params)
|
||||||
|
|
||||||
|
matches = flann.knnMatch(des1,des2,k=2)
|
||||||
|
|
||||||
|
# store all the good matches as per Lowe's ratio test.
|
||||||
|
good = []
|
||||||
|
for m,n in matches:
|
||||||
|
if m.distance < 0.7*n.distance:
|
||||||
|
good.append(m)
|
||||||
|
@endcode
|
||||||
|
Now we set a condition that atleast 10 matches (defined by MIN_MATCH_COUNT) are to be there to
|
||||||
|
find the object. Otherwise simply show a message saying not enough matches are present.
|
||||||
|
|
||||||
|
If enough matches are found, we extract the locations of matched keypoints in both the images. They
|
||||||
|
are passed to find the perpective transformation. Once we get this 3x3 transformation matrix, we use
|
||||||
|
it to transform the corners of queryImage to corresponding points in trainImage. Then we draw it.
|
||||||
|
@code{.py}
|
||||||
|
if len(good)>MIN_MATCH_COUNT:
|
||||||
|
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
|
||||||
|
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
|
||||||
|
|
||||||
|
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
|
||||||
|
matchesMask = mask.ravel().tolist()
|
||||||
|
|
||||||
|
h,w = img1.shape
|
||||||
|
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
|
||||||
|
dst = cv2.perspectiveTransform(pts,M)
|
||||||
|
|
||||||
|
img2 = cv2.polylines(img2,[np.int32(dst)],True,255,3, cv2.LINE_AA)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print "Not enough matches are found - %d/%d" % (len(good),MIN_MATCH_COUNT)
|
||||||
|
matchesMask = None
|
||||||
|
@endcode
|
||||||
|
Finally we draw our inliers (if successfully found the object) or matching keypoints (if failed).
|
||||||
|
@code{.py}
|
||||||
|
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
|
||||||
|
singlePointColor = None,
|
||||||
|
matchesMask = matchesMask, # draw only inliers
|
||||||
|
flags = 2)
|
||||||
|
|
||||||
|
img3 = cv2.drawMatches(img1,kp1,img2,kp2,good,None,**draw_params)
|
||||||
|
|
||||||
|
plt.imshow(img3, 'gray'),plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result below. Object is marked in white color in cluttered image:
|
||||||
|
|
||||||
|
![image](images/homography_findobj.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,150 @@
|
|||||||
|
Harris Corner Detection {#tutorial_py_features_harris}
|
||||||
|
=======================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
|
||||||
|
- We will understand the concepts behind Harris Corner Detection.
|
||||||
|
- We will see the functions: **cv2.cornerHarris()**, **cv2.cornerSubPix()**
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
In last chapter, we saw that corners are regions in the image with large variation in intensity in
|
||||||
|
all the directions. One early attempt to find these corners was done by **Chris Harris & Mike
|
||||||
|
Stephens** in their paper **A Combined Corner and Edge Detector** in 1988, so now it is called
|
||||||
|
Harris Corner Detector. He took this simple idea to a mathematical form. It basically finds the
|
||||||
|
difference in intensity for a displacement of \f$(u,v)\f$ in all directions. This is expressed as below:
|
||||||
|
|
||||||
|
\f[E(u,v) = \sum_{x,y} \underbrace{w(x,y)}_\text{window function} \, [\underbrace{I(x+u,y+v)}_\text{shifted intensity}-\underbrace{I(x,y)}_\text{intensity}]^2\f]
|
||||||
|
|
||||||
|
Window function is either a rectangular window or gaussian window which gives weights to pixels
|
||||||
|
underneath.
|
||||||
|
|
||||||
|
We have to maximize this function \f$E(u,v)\f$ for corner detection. That means, we have to maximize the
|
||||||
|
second term. Applying Taylor Expansion to above equation and using some mathematical steps (please
|
||||||
|
refer any standard text books you like for full derivation), we get the final equation as:
|
||||||
|
|
||||||
|
\f[E(u,v) \approx \begin{bmatrix} u & v \end{bmatrix} M \begin{bmatrix} u \\ v \end{bmatrix}\f]
|
||||||
|
|
||||||
|
where
|
||||||
|
|
||||||
|
\f[M = \sum_{x,y} w(x,y) \begin{bmatrix}I_x I_x & I_x I_y \\
|
||||||
|
I_x I_y & I_y I_y \end{bmatrix}\f]
|
||||||
|
|
||||||
|
Here, \f$I_x\f$ and \f$I_y\f$ are image derivatives in x and y directions respectively. (Can be easily found
|
||||||
|
out using **cv2.Sobel()**).
|
||||||
|
|
||||||
|
Then comes the main part. After this, they created a score, basically an equation, which will
|
||||||
|
determine if a window can contain a corner or not.
|
||||||
|
|
||||||
|
\f[R = det(M) - k(trace(M))^2\f]
|
||||||
|
|
||||||
|
where
|
||||||
|
- \f$det(M) = \lambda_1 \lambda_2\f$
|
||||||
|
- \f$trace(M) = \lambda_1 + \lambda_2\f$
|
||||||
|
- \f$\lambda_1\f$ and \f$\lambda_2\f$ are the eigen values of M
|
||||||
|
|
||||||
|
So the values of these eigen values decide whether a region is corner, edge or flat.
|
||||||
|
|
||||||
|
- When \f$|R|\f$ is small, which happens when \f$\lambda_1\f$ and \f$\lambda_2\f$ are small, the region is
|
||||||
|
flat.
|
||||||
|
- When \f$R<0\f$, which happens when \f$\lambda_1 >> \lambda_2\f$ or vice versa, the region is edge.
|
||||||
|
- When \f$R\f$ is large, which happens when \f$\lambda_1\f$ and \f$\lambda_2\f$ are large and
|
||||||
|
\f$\lambda_1 \sim \lambda_2\f$, the region is a corner.
|
||||||
|
|
||||||
|
It can be represented in a nice picture as follows:
|
||||||
|
|
||||||
|
![image](images/harris_region.jpg)
|
||||||
|
|
||||||
|
So the result of Harris Corner Detection is a grayscale image with these scores. Thresholding for a
|
||||||
|
suitable give you the corners in the image. We will do it with a simple image.
|
||||||
|
|
||||||
|
Harris Corner Detector in OpenCV
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
OpenCV has the function **cv2.cornerHarris()** for this purpose. Its arguments are :
|
||||||
|
|
||||||
|
- **img** - Input image, it should be grayscale and float32 type.
|
||||||
|
- **blockSize** - It is the size of neighbourhood considered for corner detection
|
||||||
|
- **ksize** - Aperture parameter of Sobel derivative used.
|
||||||
|
- **k** - Harris detector free parameter in the equation.
|
||||||
|
|
||||||
|
See the example below:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
filename = 'chessboard.jpg'
|
||||||
|
img = cv2.imread(filename)
|
||||||
|
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
gray = np.float32(gray)
|
||||||
|
dst = cv2.cornerHarris(gray,2,3,0.04)
|
||||||
|
|
||||||
|
#result is dilated for marking the corners, not important
|
||||||
|
dst = cv2.dilate(dst,None)
|
||||||
|
|
||||||
|
# Threshold for an optimal value, it may vary depending on the image.
|
||||||
|
img[dst>0.01*dst.max()]=[0,0,255]
|
||||||
|
|
||||||
|
cv2.imshow('dst',img)
|
||||||
|
if cv2.waitKey(0) & 0xff == 27:
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
Below are the three results:
|
||||||
|
|
||||||
|
![image](images/harris_result.jpg)
|
||||||
|
|
||||||
|
Corner with SubPixel Accuracy
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Sometimes, you may need to find the corners with maximum accuracy. OpenCV comes with a function
|
||||||
|
**cv2.cornerSubPix()** which further refines the corners detected with sub-pixel accuracy. Below is
|
||||||
|
an example. As usual, we need to find the harris corners first. Then we pass the centroids of these
|
||||||
|
corners (There may be a bunch of pixels at a corner, we take their centroid) to refine them. Harris
|
||||||
|
corners are marked in red pixels and refined corners are marked in green pixels. For this function,
|
||||||
|
we have to define the criteria when to stop the iteration. We stop it after a specified number of
|
||||||
|
iteration or a certain accuracy is achieved, whichever occurs first. We also need to define the size
|
||||||
|
of neighbourhood it would search for corners.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
filename = 'chessboard2.jpg'
|
||||||
|
img = cv2.imread(filename)
|
||||||
|
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# find Harris corners
|
||||||
|
gray = np.float32(gray)
|
||||||
|
dst = cv2.cornerHarris(gray,2,3,0.04)
|
||||||
|
dst = cv2.dilate(dst,None)
|
||||||
|
ret, dst = cv2.threshold(dst,0.01*dst.max(),255,0)
|
||||||
|
dst = np.uint8(dst)
|
||||||
|
|
||||||
|
# find centroids
|
||||||
|
ret, labels, stats, centroids = cv2.connectedComponentsWithStats(dst)
|
||||||
|
|
||||||
|
# define the criteria to stop and refine the corners
|
||||||
|
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001)
|
||||||
|
corners = cv2.cornerSubPix(gray,np.float32(centroids),(5,5),(-1,-1),criteria)
|
||||||
|
|
||||||
|
# Now draw them
|
||||||
|
res = np.hstack((centroids,corners))
|
||||||
|
res = np.int0(res)
|
||||||
|
img[res[:,1],res[:,0]]=[0,0,255]
|
||||||
|
img[res[:,3],res[:,2]] = [0,255,0]
|
||||||
|
|
||||||
|
cv2.imwrite('subpixel5.png',img)
|
||||||
|
@endcode
|
||||||
|
Below is the result, where some important locations are shown in zoomed window to visualize:
|
||||||
|
|
||||||
|
![image](images/subpixel3.png)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,89 @@
|
|||||||
|
Understanding Features {#tutorial_py_features_meaning}
|
||||||
|
======================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter, we will just try to understand what are features, why are they important, why
|
||||||
|
corners are important etc.
|
||||||
|
|
||||||
|
Explanation
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Most of you will have played the jigsaw puzzle games. You get a lot of small pieces of a images,
|
||||||
|
where you need to assemble them correctly to form a big real image. **The question is, how you do
|
||||||
|
it?** What about the projecting the same theory to a computer program so that computer can play
|
||||||
|
jigsaw puzzles? If the computer can play jigsaw puzzles, why can't we give a lot of real-life images
|
||||||
|
of a good natural scenery to computer and tell it to stitch all those images to a big single image?
|
||||||
|
If the computer can stitch several natural images to one, what about giving a lot of pictures of a
|
||||||
|
building or any structure and tell computer to create a 3D model out of it?
|
||||||
|
|
||||||
|
Well, the questions and imaginations continue. But it all depends on the most basic question: How do
|
||||||
|
you play jigsaw puzzles? How do you arrange lots of scrambled image pieces into a big single image?
|
||||||
|
How can you stitch a lot of natural images to a single image?
|
||||||
|
|
||||||
|
The answer is, we are looking for specific patterns or specific features which are unique, which can
|
||||||
|
be easily tracked, which can be easily compared. If we go for a definition of such a feature, we may
|
||||||
|
find it difficult to express it in words, but we know what are they. If some one asks you to point
|
||||||
|
out one good feature which can be compared across several images, you can point out one. That is
|
||||||
|
why, even small children can simply play these games. We search for these features in an image, we
|
||||||
|
find them, we find the same features in other images, we align them. That's it. (In jigsaw puzzle,
|
||||||
|
we look more into continuity of different images). All these abilities are present in us inherently.
|
||||||
|
|
||||||
|
So our one basic question expands to more in number, but becomes more specific. **What are these
|
||||||
|
features?**. *(The answer should be understandable to a computer also.)*
|
||||||
|
|
||||||
|
Well, it is difficult to say how humans find these features. It is already programmed in our brain.
|
||||||
|
But if we look deep into some pictures and search for different patterns, we will find something
|
||||||
|
interesting. For example, take below image:
|
||||||
|
|
||||||
|
![image](images/feature_building.jpg)
|
||||||
|
|
||||||
|
Image is very simple. At the top of image, six small image patches are given. Question for you is to
|
||||||
|
find the exact location of these patches in the original image. How many correct results you can
|
||||||
|
find ?
|
||||||
|
|
||||||
|
A and B are flat surfaces, and they are spread in a lot of area. It is difficult to find the exact
|
||||||
|
location of these patches.
|
||||||
|
|
||||||
|
C and D are much more simpler. They are edges of the building. You can find an approximate location,
|
||||||
|
but exact location is still difficult. It is because, along the edge, it is same everywhere. Normal
|
||||||
|
to the edge, it is different. So edge is a much better feature compared to flat area, but not good
|
||||||
|
enough (It is good in jigsaw puzzle for comparing continuity of edges).
|
||||||
|
|
||||||
|
Finally, E and F are some corners of the building. And they can be easily found out. Because at
|
||||||
|
corners, wherever you move this patch, it will look different. So they can be considered as a good
|
||||||
|
feature. So now we move into more simpler (and widely used image) for better understanding.
|
||||||
|
|
||||||
|
![image](images/feature_simple.png)
|
||||||
|
|
||||||
|
Just like above, blue patch is flat area and difficult to find and track. Wherever you move the blue
|
||||||
|
patch, it looks the same. For black patch, it is an edge. If you move it in vertical direction (i.e.
|
||||||
|
along the gradient) it changes. Put along the edge (parallel to edge), it looks the same. And for
|
||||||
|
red patch, it is a corner. Wherever you move the patch, it looks different, means it is unique. So
|
||||||
|
basically, corners are considered to be good features in an image. (Not just corners, in some cases
|
||||||
|
blobs are considered good features).
|
||||||
|
|
||||||
|
So now we answered our question, "what are these features?". But next question arises. How do we
|
||||||
|
find them? Or how do we find the corners?. That also we answered in an intuitive way, i.e., look for
|
||||||
|
the regions in images which have maximum variation when moved (by a small amount) in all regions
|
||||||
|
around it. This would be projected into computer language in coming chapters. So finding these image
|
||||||
|
features is called **Feature Detection**.
|
||||||
|
|
||||||
|
So we found the features in image (Assume you did it). Once you found it, you should find the same
|
||||||
|
in the other images. What we do? We take a region around the feature, we explain it in our own
|
||||||
|
words, like "upper part is blue sky, lower part is building region, on that building there are some
|
||||||
|
glasses etc" and you search for the same area in other images. Basically, you are describing the
|
||||||
|
feature. Similar way, computer also should describe the region around the feature so that it can
|
||||||
|
find it in other images. So called description is called **Feature Description**. Once you have the
|
||||||
|
features and its description, you can find same features in all images and align them, stitch them
|
||||||
|
or do whatever you want.
|
||||||
|
|
||||||
|
So in this module, we are looking to different algorithms in OpenCV to find features, describe them,
|
||||||
|
match them etc.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
215
doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown
Normal file
215
doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
Feature Matching {#tutorial_py_matcher}
|
||||||
|
================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter
|
||||||
|
- We will see how to match features in one image with others.
|
||||||
|
- We will use the Brute-Force matcher and FLANN Matcher in OpenCV
|
||||||
|
|
||||||
|
Basics of Brute-Force Matcher
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Brute-Force matcher is simple. It takes the descriptor of one feature in first set and is matched
|
||||||
|
with all other features in second set using some distance calculation. And the closest one is
|
||||||
|
returned.
|
||||||
|
|
||||||
|
For BF matcher, first we have to create the BFMatcher object using **cv2.BFMatcher()**. It takes two
|
||||||
|
optional params. First one is normType. It specifies the distance measurement to be used. By
|
||||||
|
default, it is cv2.NORM_L2. It is good for SIFT, SURF etc (cv2.NORM_L1 is also there). For binary
|
||||||
|
string based descriptors like ORB, BRIEF, BRISK etc, cv2.NORM_HAMMING should be used, which used
|
||||||
|
Hamming distance as measurement. If ORB is using WTA_K == 3 or 4, cv2.NORM_HAMMING2 should be
|
||||||
|
used.
|
||||||
|
|
||||||
|
Second param is boolean variable, crossCheck which is false by default. If it is true, Matcher
|
||||||
|
returns only those matches with value (i,j) such that i-th descriptor in set A has j-th descriptor
|
||||||
|
in set B as the best match and vice-versa. That is, the two features in both sets should match each
|
||||||
|
other. It provides consistant result, and is a good alternative to ratio test proposed by D.Lowe in
|
||||||
|
SIFT paper.
|
||||||
|
|
||||||
|
Once it is created, two important methods are *BFMatcher.match()* and *BFMatcher.knnMatch()*. First
|
||||||
|
one returns the best match. Second method returns k best matches where k is specified by the user.
|
||||||
|
It may be useful when we need to do additional work on that.
|
||||||
|
|
||||||
|
Like we used cv2.drawKeypoints() to draw keypoints, **cv2.drawMatches()** helps us to draw the
|
||||||
|
matches. It stacks two images horizontally and draw lines from first image to second image showing
|
||||||
|
best matches. There is also **cv2.drawMatchesKnn** which draws all the k best matches. If k=2, it
|
||||||
|
will draw two match-lines for each keypoint. So we have to pass a mask if we want to selectively
|
||||||
|
draw it.
|
||||||
|
|
||||||
|
Let's see one example for each of SURF and ORB (Both use different distance measurements).
|
||||||
|
|
||||||
|
### Brute-Force Matching with ORB Descriptors
|
||||||
|
|
||||||
|
Here, we will see a simple example on how to match features between two images. In this case, I have
|
||||||
|
a queryImage and a trainImage. We will try to find the queryImage in trainImage using feature
|
||||||
|
matching. ( The images are /samples/c/box.png and /samples/c/box_in_scene.png)
|
||||||
|
|
||||||
|
We are using SIFT descriptors to match features. So let's start with loading images, finding
|
||||||
|
descriptors etc.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img1 = cv2.imread('box.png',0) # queryImage
|
||||||
|
img2 = cv2.imread('box_in_scene.png',0) # trainImage
|
||||||
|
|
||||||
|
# Initiate SIFT detector
|
||||||
|
orb = cv2.ORB()
|
||||||
|
|
||||||
|
# find the keypoints and descriptors with SIFT
|
||||||
|
kp1, des1 = orb.detectAndCompute(img1,None)
|
||||||
|
kp2, des2 = orb.detectAndCompute(img2,None)
|
||||||
|
@endcode
|
||||||
|
Next we create a BFMatcher object with distance measurement cv2.NORM_HAMMING (since we are using
|
||||||
|
ORB) and crossCheck is switched on for better results. Then we use Matcher.match() method to get the
|
||||||
|
best matches in two images. We sort them in ascending order of their distances so that best matches
|
||||||
|
(with low distance) come to front. Then we draw only first 10 matches (Just for sake of visibility.
|
||||||
|
You can increase it as you like)
|
||||||
|
@code{.py}
|
||||||
|
# create BFMatcher object
|
||||||
|
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
|
||||||
|
|
||||||
|
# Match descriptors.
|
||||||
|
matches = bf.match(des1,des2)
|
||||||
|
|
||||||
|
# Sort them in the order of their distance.
|
||||||
|
matches = sorted(matches, key = lambda x:x.distance)
|
||||||
|
|
||||||
|
# Draw first 10 matches.
|
||||||
|
img3 = cv2.drawMatches(img1,kp1,img2,kp2,matches[:10], flags=2)
|
||||||
|
|
||||||
|
plt.imshow(img3),plt.show()
|
||||||
|
@endcode
|
||||||
|
Below is the result I got:
|
||||||
|
|
||||||
|
![image](images/matcher_result1.jpg)
|
||||||
|
|
||||||
|
### What is this Matcher Object?
|
||||||
|
|
||||||
|
The result of matches = bf.match(des1,des2) line is a list of DMatch objects. This DMatch object has
|
||||||
|
following attributes:
|
||||||
|
|
||||||
|
- DMatch.distance - Distance between descriptors. The lower, the better it is.
|
||||||
|
- DMatch.trainIdx - Index of the descriptor in train descriptors
|
||||||
|
- DMatch.queryIdx - Index of the descriptor in query descriptors
|
||||||
|
- DMatch.imgIdx - Index of the train image.
|
||||||
|
|
||||||
|
### Brute-Force Matching with SIFT Descriptors and Ratio Test
|
||||||
|
|
||||||
|
This time, we will use BFMatcher.knnMatch() to get k best matches. In this example, we will take k=2
|
||||||
|
so that we can apply ratio test explained by D.Lowe in his paper.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img1 = cv2.imread('box.png',0) # queryImage
|
||||||
|
img2 = cv2.imread('box_in_scene.png',0) # trainImage
|
||||||
|
|
||||||
|
# Initiate SIFT detector
|
||||||
|
sift = cv2.SIFT()
|
||||||
|
|
||||||
|
# find the keypoints and descriptors with SIFT
|
||||||
|
kp1, des1 = sift.detectAndCompute(img1,None)
|
||||||
|
kp2, des2 = sift.detectAndCompute(img2,None)
|
||||||
|
|
||||||
|
# BFMatcher with default params
|
||||||
|
bf = cv2.BFMatcher()
|
||||||
|
matches = bf.knnMatch(des1,des2, k=2)
|
||||||
|
|
||||||
|
# Apply ratio test
|
||||||
|
good = []
|
||||||
|
for m,n in matches:
|
||||||
|
if m.distance < 0.75*n.distance:
|
||||||
|
good.append([m])
|
||||||
|
|
||||||
|
# cv2.drawMatchesKnn expects list of lists as matches.
|
||||||
|
img3 = cv2.drawMatchesKnn(img1,kp1,img2,kp2,good,flags=2)
|
||||||
|
|
||||||
|
plt.imshow(img3),plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result below:
|
||||||
|
|
||||||
|
![image](images/matcher_result2.jpg)
|
||||||
|
|
||||||
|
FLANN based Matcher
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
FLANN stands for Fast Library for Approximate Nearest Neighbors. It contains a collection of
|
||||||
|
algorithms optimized for fast nearest neighbor search in large datasets and for high dimensional
|
||||||
|
features. It works more faster than BFMatcher for large datasets. We will see the second example
|
||||||
|
with FLANN based matcher.
|
||||||
|
|
||||||
|
For FLANN based matcher, we need to pass two dictionaries which specifies the algorithm to be used,
|
||||||
|
its related parameters etc. First one is IndexParams. For various algorithms, the information to be
|
||||||
|
passed is explained in FLANN docs. As a summary, for algorithms like SIFT, SURF etc. you can pass
|
||||||
|
following:
|
||||||
|
@code{.py}
|
||||||
|
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
|
||||||
|
@endcode
|
||||||
|
While using ORB, you can pass the following. The commented values are recommended as per the docs,
|
||||||
|
but it didn't provide required results in some cases. Other values worked fine.:
|
||||||
|
@code{.py}
|
||||||
|
index_params= dict(algorithm = FLANN_INDEX_LSH,
|
||||||
|
table_number = 6, # 12
|
||||||
|
key_size = 12, # 20
|
||||||
|
multi_probe_level = 1) #2
|
||||||
|
@endcode
|
||||||
|
Second dictionary is the SearchParams. It specifies the number of times the trees in the index
|
||||||
|
should be recursively traversed. Higher values gives better precision, but also takes more time. If
|
||||||
|
you want to change the value, pass search_params = dict(checks=100).
|
||||||
|
|
||||||
|
With these informations, we are good to go.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img1 = cv2.imread('box.png',0) # queryImage
|
||||||
|
img2 = cv2.imread('box_in_scene.png',0) # trainImage
|
||||||
|
|
||||||
|
# Initiate SIFT detector
|
||||||
|
sift = cv2.SIFT()
|
||||||
|
|
||||||
|
# find the keypoints and descriptors with SIFT
|
||||||
|
kp1, des1 = sift.detectAndCompute(img1,None)
|
||||||
|
kp2, des2 = sift.detectAndCompute(img2,None)
|
||||||
|
|
||||||
|
# FLANN parameters
|
||||||
|
FLANN_INDEX_KDTREE = 0
|
||||||
|
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
|
||||||
|
search_params = dict(checks=50) # or pass empty dictionary
|
||||||
|
|
||||||
|
flann = cv2.FlannBasedMatcher(index_params,search_params)
|
||||||
|
|
||||||
|
matches = flann.knnMatch(des1,des2,k=2)
|
||||||
|
|
||||||
|
# Need to draw only good matches, so create a mask
|
||||||
|
matchesMask = [[0,0] for i in xrange(len(matches))]
|
||||||
|
|
||||||
|
# ratio test as per Lowe's paper
|
||||||
|
for i,(m,n) in enumerate(matches):
|
||||||
|
if m.distance < 0.7*n.distance:
|
||||||
|
matchesMask[i]=[1,0]
|
||||||
|
|
||||||
|
draw_params = dict(matchColor = (0,255,0),
|
||||||
|
singlePointColor = (255,0,0),
|
||||||
|
matchesMask = matchesMask,
|
||||||
|
flags = 0)
|
||||||
|
|
||||||
|
img3 = cv2.drawMatchesKnn(img1,kp1,img2,kp2,matches,None,**draw_params)
|
||||||
|
|
||||||
|
plt.imshow(img3,),plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result below:
|
||||||
|
|
||||||
|
![image](images/matcher_flann.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
98
doc/py_tutorials/py_feature2d/py_orb/py_orb.markdown
Normal file
98
doc/py_tutorials/py_feature2d/py_orb/py_orb.markdown
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
ORB (Oriented FAST and Rotated BRIEF) {#tutorial_py_orb}
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will see the basics of ORB
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
As an OpenCV enthusiast, the most important thing about the ORB is that it came from "OpenCV Labs".
|
||||||
|
This algorithm was brought up by Ethan Rublee, Vincent Rabaud, Kurt Konolige and Gary R. Bradski in
|
||||||
|
their paper **ORB: An efficient alternative to SIFT or SURF** in 2011. As the title says, it is a
|
||||||
|
good alternative to SIFT and SURF in computation cost, matching performance and mainly the patents.
|
||||||
|
Yes, SIFT and SURF are patented and you are supposed to pay them for its use. But ORB is not !!!
|
||||||
|
|
||||||
|
ORB is basically a fusion of FAST keypoint detector and BRIEF descriptor with many modifications to
|
||||||
|
enhance the performance. First it use FAST to find keypoints, then apply Harris corner measure to
|
||||||
|
find top N points among them. It also use pyramid to produce multiscale-features. But one problem is
|
||||||
|
that, FAST doesn't compute the orientation. So what about rotation invariance? Authors came up with
|
||||||
|
following modification.
|
||||||
|
|
||||||
|
It computes the intensity weighted centroid of the patch with located corner at center. The
|
||||||
|
direction of the vector from this corner point to centroid gives the orientation. To improve the
|
||||||
|
rotation invariance, moments are computed with x and y which should be in a circular region of
|
||||||
|
radius \f$r\f$, where \f$r\f$ is the size of the patch.
|
||||||
|
|
||||||
|
Now for descriptors, ORB use BRIEF descriptors. But we have already seen that BRIEF performs poorly
|
||||||
|
with rotation. So what ORB does is to "steer" BRIEF according to the orientation of keypoints. For
|
||||||
|
any feature set of \f$n\f$ binary tests at location \f$(x_i, y_i)\f$, define a \f$2 \times n\f$ matrix, \f$S\f$
|
||||||
|
which contains the coordinates of these pixels. Then using the orientation of patch, \f$\theta\f$, its
|
||||||
|
rotation matrix is found and rotates the \f$S\f$ to get steered(rotated) version \f$S_\theta\f$.
|
||||||
|
|
||||||
|
ORB discretize the angle to increments of \f$2 \pi /30\f$ (12 degrees), and construct a lookup table of
|
||||||
|
precomputed BRIEF patterns. As long as the keypoint orientation \f$\theta\f$ is consistent across views,
|
||||||
|
the correct set of points \f$S_\theta\f$ will be used to compute its descriptor.
|
||||||
|
|
||||||
|
BRIEF has an important property that each bit feature has a large variance and a mean near 0.5. But
|
||||||
|
once it is oriented along keypoint direction, it loses this property and become more distributed.
|
||||||
|
High variance makes a feature more discriminative, since it responds differentially to inputs.
|
||||||
|
Another desirable property is to have the tests uncorrelated, since then each test will contribute
|
||||||
|
to the result. To resolve all these, ORB runs a greedy search among all possible binary tests to
|
||||||
|
find the ones that have both high variance and means close to 0.5, as well as being uncorrelated.
|
||||||
|
The result is called **rBRIEF**.
|
||||||
|
|
||||||
|
For descriptor matching, multi-probe LSH which improves on the traditional LSH, is used. The paper
|
||||||
|
says ORB is much faster than SURF and SIFT and ORB descriptor works better than SURF. ORB is a good
|
||||||
|
choice in low-power devices for panorama stitching etc.
|
||||||
|
|
||||||
|
ORB in OpenCV
|
||||||
|
-------------
|
||||||
|
|
||||||
|
As usual, we have to create an ORB object with the function, **cv2.ORB()** or using feature2d common
|
||||||
|
interface. It has a number of optional parameters. Most useful ones are nFeatures which denotes
|
||||||
|
maximum number of features to be retained (by default 500), scoreType which denotes whether Harris
|
||||||
|
score or FAST score to rank the features (by default, Harris score) etc. Another parameter, WTA_K
|
||||||
|
decides number of points that produce each element of the oriented BRIEF descriptor. By default it
|
||||||
|
is two, ie selects two points at a time. In that case, for matching, NORM_HAMMING distance is used.
|
||||||
|
If WTA_K is 3 or 4, which takes 3 or 4 points to produce BRIEF descriptor, then matching distance
|
||||||
|
is defined by NORM_HAMMING2.
|
||||||
|
|
||||||
|
Below is a simple code which shows the use of ORB.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('simple.jpg',0)
|
||||||
|
|
||||||
|
# Initiate STAR detector
|
||||||
|
orb = cv2.ORB()
|
||||||
|
|
||||||
|
# find the keypoints with ORB
|
||||||
|
kp = orb.detect(img,None)
|
||||||
|
|
||||||
|
# compute the descriptors with ORB
|
||||||
|
kp, des = orb.compute(img, kp)
|
||||||
|
|
||||||
|
# draw only keypoints location,not size and orientation
|
||||||
|
img2 = cv2.drawKeypoints(img,kp,color=(0,255,0), flags=0)
|
||||||
|
plt.imshow(img2),plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result below:
|
||||||
|
|
||||||
|
![image](images/orb_kp.jpg)
|
||||||
|
|
||||||
|
ORB feature matching, we will do in another chapter.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# Ethan Rublee, Vincent Rabaud, Kurt Konolige, Gary R. Bradski: ORB: An efficient alternative to
|
||||||
|
SIFT or SURF. ICCV 2011: 2564-2571.
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,75 @@
|
|||||||
|
Shi-Tomasi Corner Detector & Good Features to Track {#tutorial_py_shi_tomasi}
|
||||||
|
===================================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
|
||||||
|
- We will learn about the another corner detector: Shi-Tomasi Corner Detector
|
||||||
|
- We will see the function: **cv2.goodFeaturesToTrack()**
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
In last chapter, we saw Harris Corner Detector. Later in 1994, J. Shi and C. Tomasi made a small
|
||||||
|
modification to it in their paper **Good Features to Track** which shows better results compared to
|
||||||
|
Harris Corner Detector. The scoring function in Harris Corner Detector was given by:
|
||||||
|
|
||||||
|
\f[R = \lambda_1 \lambda_2 - k(\lambda_1+\lambda_2)^2\f]
|
||||||
|
|
||||||
|
Instead of this, Shi-Tomasi proposed:
|
||||||
|
|
||||||
|
\f[R = min(\lambda_1, \lambda_2)\f]
|
||||||
|
|
||||||
|
If it is a greater than a threshold value, it is considered as a corner. If we plot it in
|
||||||
|
\f$\lambda_1 - \lambda_2\f$ space as we did in Harris Corner Detector, we get an image as below:
|
||||||
|
|
||||||
|
![image](images/shitomasi_space.png)
|
||||||
|
|
||||||
|
From the figure, you can see that only when \f$\lambda_1\f$ and \f$\lambda_2\f$ are above a minimum value,
|
||||||
|
\f$\lambda_{min}\f$, it is conidered as a corner(green region).
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
OpenCV has a function, **cv2.goodFeaturesToTrack()**. It finds N strongest corners in the image by
|
||||||
|
Shi-Tomasi method (or Harris Corner Detection, if you specify it). As usual, image should be a
|
||||||
|
grayscale image. Then you specify number of corners you want to find. Then you specify the quality
|
||||||
|
level, which is a value between 0-1, which denotes the minimum quality of corner below which
|
||||||
|
everyone is rejected. Then we provide the minimum euclidean distance between corners detected.
|
||||||
|
|
||||||
|
With all these informations, the function finds corners in the image. All corners below quality
|
||||||
|
level are rejected. Then it sorts the remaining corners based on quality in the descending order.
|
||||||
|
Then function takes first strongest corner, throws away all the nearby corners in the range of
|
||||||
|
minimum distance and returns N strongest corners.
|
||||||
|
|
||||||
|
In below example, we will try to find 25 best corners:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('simple.jpg')
|
||||||
|
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
corners = cv2.goodFeaturesToTrack(gray,25,0.01,10)
|
||||||
|
corners = np.int0(corners)
|
||||||
|
|
||||||
|
for i in corners:
|
||||||
|
x,y = i.ravel()
|
||||||
|
cv2.circle(img,(x,y),3,255,-1)
|
||||||
|
|
||||||
|
plt.imshow(img),plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result below:
|
||||||
|
|
||||||
|
![image](images/shitomasi_block1.jpg)
|
||||||
|
|
||||||
|
This function is more appropriate for tracking. We will see that when its time comes.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,167 @@
|
|||||||
|
Introduction to SIFT (Scale-Invariant Feature Transform) {#tutorial_py_sift_intro}
|
||||||
|
========================================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will learn about the concepts of SIFT algorithm
|
||||||
|
- We will learn to find SIFT Keypoints and Descriptors.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
In last couple of chapters, we saw some corner detectors like Harris etc. They are
|
||||||
|
rotation-invariant, which means, even if the image is rotated, we can find the same corners. It is
|
||||||
|
obvious because corners remain corners in rotated image also. But what about scaling? A corner may
|
||||||
|
not be a corner if the image is scaled. For example, check a simple image below. A corner in a small
|
||||||
|
image within a small window is flat when it is zoomed in the same window. So Harris corner is not
|
||||||
|
scale invariant.
|
||||||
|
|
||||||
|
![image](images/sift_scale_invariant.jpg)
|
||||||
|
|
||||||
|
So, in 2004, **D.Lowe**, University of British Columbia, came up with a new algorithm, Scale
|
||||||
|
Invariant Feature Transform (SIFT) in his paper, **Distinctive Image Features from Scale-Invariant
|
||||||
|
Keypoints**, which extract keypoints and compute its descriptors. *(This paper is easy to understand
|
||||||
|
and considered to be best material available on SIFT. So this explanation is just a short summary of
|
||||||
|
this paper)*.
|
||||||
|
|
||||||
|
There are mainly four steps involved in SIFT algorithm. We will see them one-by-one.
|
||||||
|
|
||||||
|
### 1. Scale-space Extrema Detection
|
||||||
|
|
||||||
|
From the image above, it is obvious that we can't use the same window to detect keypoints with
|
||||||
|
different scale. It is OK with small corner. But to detect larger corners we need larger windows.
|
||||||
|
For this, scale-space filtering is used. In it, Laplacian of Gaussian is found for the image with
|
||||||
|
various \f$\sigma\f$ values. LoG acts as a blob detector which detects blobs in various sizes due to
|
||||||
|
change in \f$\sigma\f$. In short, \f$\sigma\f$ acts as a scaling parameter. For eg, in the above image,
|
||||||
|
gaussian kernel with low \f$\sigma\f$ gives high value for small corner while guassian kernel with high
|
||||||
|
\f$\sigma\f$ fits well for larger corner. So, we can find the local maxima across the scale and space
|
||||||
|
which gives us a list of \f$(x,y,\sigma)\f$ values which means there is a potential keypoint at (x,y) at
|
||||||
|
\f$\sigma\f$ scale.
|
||||||
|
|
||||||
|
But this LoG is a little costly, so SIFT algorithm uses Difference of Gaussians which is an
|
||||||
|
approximation of LoG. Difference of Gaussian is obtained as the difference of Gaussian blurring of
|
||||||
|
an image with two different \f$\sigma\f$, let it be \f$\sigma\f$ and \f$k\sigma\f$. This process is done for
|
||||||
|
different octaves of the image in Gaussian Pyramid. It is represented in below image:
|
||||||
|
|
||||||
|
![image](images/sift_dog.jpg)
|
||||||
|
|
||||||
|
Once this DoG are found, images are searched for local extrema over scale and space. For eg, one
|
||||||
|
pixel in an image is compared with its 8 neighbours as well as 9 pixels in next scale and 9 pixels
|
||||||
|
in previous scales. If it is a local extrema, it is a potential keypoint. It basically means that
|
||||||
|
keypoint is best represented in that scale. It is shown in below image:
|
||||||
|
|
||||||
|
![image](images/sift_local_extrema.jpg)
|
||||||
|
|
||||||
|
Regarding different parameters, the paper gives some empirical data which can be summarized as,
|
||||||
|
number of octaves = 4, number of scale levels = 5, initial \f$\sigma=1.6\f$, \f$k=\sqrt{2}\f$ etc as optimal
|
||||||
|
values.
|
||||||
|
|
||||||
|
### 2. Keypoint Localization
|
||||||
|
|
||||||
|
Once potential keypoints locations are found, they have to be refined to get more accurate results.
|
||||||
|
They used Taylor series expansion of scale space to get more accurate location of extrema, and if
|
||||||
|
the intensity at this extrema is less than a threshold value (0.03 as per the paper), it is
|
||||||
|
rejected. This threshold is called **contrastThreshold** in OpenCV
|
||||||
|
|
||||||
|
DoG has higher response for edges, so edges also need to be removed. For this, a concept similar to
|
||||||
|
Harris corner detector is used. They used a 2x2 Hessian matrix (H) to compute the pricipal
|
||||||
|
curvature. We know from Harris corner detector that for edges, one eigen value is larger than the
|
||||||
|
other. So here they used a simple function,
|
||||||
|
|
||||||
|
If this ratio is greater than a threshold, called **edgeThreshold** in OpenCV, that keypoint is
|
||||||
|
discarded. It is given as 10 in paper.
|
||||||
|
|
||||||
|
So it eliminates any low-contrast keypoints and edge keypoints and what remains is strong interest
|
||||||
|
points.
|
||||||
|
|
||||||
|
### 3. Orientation Assignment
|
||||||
|
|
||||||
|
Now an orientation is assigned to each keypoint to achieve invariance to image rotation. A
|
||||||
|
neigbourhood is taken around the keypoint location depending on the scale, and the gradient
|
||||||
|
magnitude and direction is calculated in that region. An orientation histogram with 36 bins covering
|
||||||
|
360 degrees is created. (It is weighted by gradient magnitude and gaussian-weighted circular window
|
||||||
|
with \f$\sigma\f$ equal to 1.5 times the scale of keypoint. The highest peak in the histogram is taken
|
||||||
|
and any peak above 80% of it is also considered to calculate the orientation. It creates keypoints
|
||||||
|
with same location and scale, but different directions. It contribute to stability of matching.
|
||||||
|
|
||||||
|
### 4. Keypoint Descriptor
|
||||||
|
|
||||||
|
Now keypoint descriptor is created. A 16x16 neighbourhood around the keypoint is taken. It is
|
||||||
|
devided into 16 sub-blocks of 4x4 size. For each sub-block, 8 bin orientation histogram is created.
|
||||||
|
So a total of 128 bin values are available. It is represented as a vector to form keypoint
|
||||||
|
descriptor. In addition to this, several measures are taken to achieve robustness against
|
||||||
|
illumination changes, rotation etc.
|
||||||
|
|
||||||
|
### 5. Keypoint Matching
|
||||||
|
|
||||||
|
Keypoints between two images are matched by identifying their nearest neighbours. But in some cases,
|
||||||
|
the second closest-match may be very near to the first. It may happen due to noise or some other
|
||||||
|
reasons. In that case, ratio of closest-distance to second-closest distance is taken. If it is
|
||||||
|
greater than 0.8, they are rejected. It eliminaters around 90% of false matches while discards only
|
||||||
|
5% correct matches, as per the paper.
|
||||||
|
|
||||||
|
So this is a summary of SIFT algorithm. For more details and understanding, reading the original
|
||||||
|
paper is highly recommended. Remember one thing, this algorithm is patented. So this algorithm is
|
||||||
|
included in Non-free module in OpenCV.
|
||||||
|
|
||||||
|
SIFT in OpenCV
|
||||||
|
--------------
|
||||||
|
|
||||||
|
So now let's see SIFT functionalities available in OpenCV. Let's start with keypoint detection and
|
||||||
|
draw them. First we have to construct a SIFT object. We can pass different parameters to it which
|
||||||
|
are optional and they are well explained in docs.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('home.jpg')
|
||||||
|
gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
sift = cv2.SIFT()
|
||||||
|
kp = sift.detect(gray,None)
|
||||||
|
|
||||||
|
img=cv2.drawKeypoints(gray,kp)
|
||||||
|
|
||||||
|
cv2.imwrite('sift_keypoints.jpg',img)
|
||||||
|
@endcode
|
||||||
|
**sift.detect()** function finds the keypoint in the images. You can pass a mask if you want to
|
||||||
|
search only a part of image. Each keypoint is a special structure which has many attributes like its
|
||||||
|
(x,y) coordinates, size of the meaningful neighbourhood, angle which specifies its orientation,
|
||||||
|
response that specifies strength of keypoints etc.
|
||||||
|
|
||||||
|
OpenCV also provides **cv2.drawKeyPoints()** function which draws the small circles on the locations
|
||||||
|
of keypoints. If you pass a flag, **cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS** to it, it will
|
||||||
|
draw a circle with size of keypoint and it will even show its orientation. See below example.
|
||||||
|
@code{.py}
|
||||||
|
img=cv2.drawKeypoints(gray,kp,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
|
||||||
|
cv2.imwrite('sift_keypoints.jpg',img)
|
||||||
|
@endcode
|
||||||
|
See the two results below:
|
||||||
|
|
||||||
|
![image](images/sift_keypoints.jpg)
|
||||||
|
|
||||||
|
Now to calculate the descriptor, OpenCV provides two methods.
|
||||||
|
|
||||||
|
-# Since you already found keypoints, you can call **sift.compute()** which computes the
|
||||||
|
descriptors from the keypoints we have found. Eg: kp,des = sift.compute(gray,kp)
|
||||||
|
2. If you didn't find keypoints, directly find keypoints and descriptors in a single step with the
|
||||||
|
function, **sift.detectAndCompute()**.
|
||||||
|
|
||||||
|
We will see the second method:
|
||||||
|
@code{.py}
|
||||||
|
sift = cv2.SIFT()
|
||||||
|
kp, des = sift.detectAndCompute(gray,None)
|
||||||
|
@endcode
|
||||||
|
Here kp will be a list of keypoints and des is a numpy array of shape
|
||||||
|
\f$Number\_of\_Keypoints \times 128\f$.
|
||||||
|
|
||||||
|
So we got keypoints, descriptors etc. Now we want to see how to match keypoints in different images.
|
||||||
|
That we will learn in coming chapters.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,163 @@
|
|||||||
|
Introduction to SURF (Speeded-Up Robust Features) {#tutorial_py_surf_intro}
|
||||||
|
=================================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will see the basics of SURF
|
||||||
|
- We will see SURF functionalities in OpenCV
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
In last chapter, we saw SIFT for keypoint detection and description. But it was comparatively slow
|
||||||
|
and people needed more speeded-up version. In 2006, three people, Bay, H., Tuytelaars, T. and Van
|
||||||
|
Gool, L, published another paper, "SURF: Speeded Up Robust Features" which introduced a new
|
||||||
|
algorithm called SURF. As name suggests, it is a speeded-up version of SIFT.
|
||||||
|
|
||||||
|
In SIFT, Lowe approximated Laplacian of Gaussian with Difference of Gaussian for finding
|
||||||
|
scale-space. SURF goes a little further and approximates LoG with Box Filter. Below image shows a
|
||||||
|
demonstration of such an approximation. One big advantage of this approximation is that, convolution
|
||||||
|
with box filter can be easily calculated with the help of integral images. And it can be done in
|
||||||
|
parallel for different scales. Also the SURF rely on determinant of Hessian matrix for both scale
|
||||||
|
and location.
|
||||||
|
|
||||||
|
![image](images/surf_boxfilter.jpg)
|
||||||
|
|
||||||
|
For orientation assignment, SURF uses wavelet responses in horizontal and vertical direction for a
|
||||||
|
neighbourhood of size 6s. Adequate guassian weights are also applied to it. Then they are plotted in
|
||||||
|
a space as given in below image. The dominant orientation is estimated by calculating the sum of all
|
||||||
|
responses within a sliding orientation window of angle 60 degrees. Interesting thing is that,
|
||||||
|
wavelet response can be found out using integral images very easily at any scale. For many
|
||||||
|
applications, rotation invariance is not required, so no need of finding this orientation, which
|
||||||
|
speeds up the process. SURF provides such a functionality called Upright-SURF or U-SURF. It improves
|
||||||
|
speed and is robust upto \f$\pm 15^{\circ}\f$. OpenCV supports both, depending upon the flag,
|
||||||
|
**upright**. If it is 0, orientation is calculated. If it is 1, orientation is not calculated and it
|
||||||
|
is more faster.
|
||||||
|
|
||||||
|
![image](images/surf_orientation.jpg)
|
||||||
|
|
||||||
|
For feature description, SURF uses Wavelet responses in horizontal and vertical direction (again,
|
||||||
|
use of integral images makes things easier). A neighbourhood of size 20sX20s is taken around the
|
||||||
|
keypoint where s is the size. It is divided into 4x4 subregions. For each subregion, horizontal and
|
||||||
|
vertical wavelet responses are taken and a vector is formed like this,
|
||||||
|
\f$v=( \sum{d_x}, \sum{d_y}, \sum{|d_x|}, \sum{|d_y|})\f$. This when represented as a vector gives SURF
|
||||||
|
feature descriptor with total 64 dimensions. Lower the dimension, higher the speed of computation
|
||||||
|
and matching, but provide better distinctiveness of features.
|
||||||
|
|
||||||
|
For more distinctiveness, SURF feature descriptor has an extended 128 dimension version. The sums of
|
||||||
|
\f$d_x\f$ and \f$|d_x|\f$ are computed separately for \f$d_y < 0\f$ and \f$d_y \geq 0\f$. Similarly, the sums of
|
||||||
|
\f$d_y\f$ and \f$|d_y|\f$ are split up according to the sign of \f$d_x\f$ , thereby doubling the number of
|
||||||
|
features. It doesn't add much computation complexity. OpenCV supports both by setting the value of
|
||||||
|
flag **extended** with 0 and 1 for 64-dim and 128-dim respectively (default is 128-dim)
|
||||||
|
|
||||||
|
Another important improvement is the use of sign of Laplacian (trace of Hessian Matrix) for
|
||||||
|
underlying interest point. It adds no computation cost since it is already computed during
|
||||||
|
detection. The sign of the Laplacian distinguishes bright blobs on dark backgrounds from the reverse
|
||||||
|
situation. In the matching stage, we only compare features if they have the same type of contrast
|
||||||
|
(as shown in image below). This minimal information allows for faster matching, without reducing the
|
||||||
|
descriptor's performance.
|
||||||
|
|
||||||
|
![image](images/surf_matching.jpg)
|
||||||
|
|
||||||
|
In short, SURF adds a lot of features to improve the speed in every step. Analysis shows it is 3
|
||||||
|
times faster than SIFT while performance is comparable to SIFT. SURF is good at handling images with
|
||||||
|
blurring and rotation, but not good at handling viewpoint change and illumination change.
|
||||||
|
|
||||||
|
SURF in OpenCV
|
||||||
|
--------------
|
||||||
|
|
||||||
|
OpenCV provides SURF functionalities just like SIFT. You initiate a SURF object with some optional
|
||||||
|
conditions like 64/128-dim descriptors, Upright/Normal SURF etc. All the details are well explained
|
||||||
|
in docs. Then as we did in SIFT, we can use SURF.detect(), SURF.compute() etc for finding keypoints
|
||||||
|
and descriptors.
|
||||||
|
|
||||||
|
First we will see a simple demo on how to find SURF keypoints and descriptors and draw it. All
|
||||||
|
examples are shown in Python terminal since it is just same as SIFT only.
|
||||||
|
@code{.py}
|
||||||
|
>>> img = cv2.imread('fly.png',0)
|
||||||
|
|
||||||
|
# Create SURF object. You can specify params here or later.
|
||||||
|
# Here I set Hessian Threshold to 400
|
||||||
|
>>> surf = cv2.SURF(400)
|
||||||
|
|
||||||
|
# Find keypoints and descriptors directly
|
||||||
|
>>> kp, des = surf.detectAndCompute(img,None)
|
||||||
|
|
||||||
|
>>> len(kp)
|
||||||
|
699
|
||||||
|
@endcode
|
||||||
|
1199 keypoints is too much to show in a picture. We reduce it to some 50 to draw it on an image.
|
||||||
|
While matching, we may need all those features, but not now. So we increase the Hessian Threshold.
|
||||||
|
@code{.py}
|
||||||
|
# Check present Hessian threshold
|
||||||
|
>>> print surf.hessianThreshold
|
||||||
|
400.0
|
||||||
|
|
||||||
|
# We set it to some 50000. Remember, it is just for representing in picture.
|
||||||
|
# In actual cases, it is better to have a value 300-500
|
||||||
|
>>> surf.hessianThreshold = 50000
|
||||||
|
|
||||||
|
# Again compute keypoints and check its number.
|
||||||
|
>>> kp, des = surf.detectAndCompute(img,None)
|
||||||
|
|
||||||
|
>>> print len(kp)
|
||||||
|
47
|
||||||
|
@endcode
|
||||||
|
It is less than 50. Let's draw it on the image.
|
||||||
|
@code{.py}
|
||||||
|
>>> img2 = cv2.drawKeypoints(img,kp,None,(255,0,0),4)
|
||||||
|
|
||||||
|
>>> plt.imshow(img2),plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result below. You can see that SURF is more like a blob detector. It detects the white blobs
|
||||||
|
on wings of butterfly. You can test it with other images.
|
||||||
|
|
||||||
|
![image](images/surf_kp1.jpg)
|
||||||
|
|
||||||
|
Now I want to apply U-SURF, so that it won't find the orientation.
|
||||||
|
@code{.py}
|
||||||
|
# Check upright flag, if it False, set it to True
|
||||||
|
>>> print surf.upright
|
||||||
|
False
|
||||||
|
|
||||||
|
>>> surf.upright = True
|
||||||
|
|
||||||
|
# Recompute the feature points and draw it
|
||||||
|
>>> kp = surf.detect(img,None)
|
||||||
|
>>> img2 = cv2.drawKeypoints(img,kp,None,(255,0,0),4)
|
||||||
|
|
||||||
|
>>> plt.imshow(img2),plt.show()
|
||||||
|
@endcode
|
||||||
|
See the results below. All the orientations are shown in same direction. It is more faster than
|
||||||
|
previous. If you are working on cases where orientation is not a problem (like panorama stitching)
|
||||||
|
etc, this is better.
|
||||||
|
|
||||||
|
![image](images/surf_kp2.jpg)
|
||||||
|
|
||||||
|
Finally we check the descriptor size and change it to 128 if it is only 64-dim.
|
||||||
|
@code{.py}
|
||||||
|
# Find size of descriptor
|
||||||
|
>>> print surf.descriptorSize()
|
||||||
|
64
|
||||||
|
|
||||||
|
# That means flag, "extended" is False.
|
||||||
|
>>> surf.extended
|
||||||
|
False
|
||||||
|
|
||||||
|
# So we make it to True to get 128-dim descriptors.
|
||||||
|
>>> surf.extended = True
|
||||||
|
>>> kp, des = surf.detectAndCompute(img,None)
|
||||||
|
>>> print surf.descriptorSize()
|
||||||
|
128
|
||||||
|
>>> print des.shape
|
||||||
|
(47, 128)
|
||||||
|
@endcode
|
||||||
|
Remaining part is matching which we will do in another chapter.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,54 @@
|
|||||||
|
Feature Detection and Description {#tutorial_py_table_of_contents_feature2d}
|
||||||
|
=================================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_features_meaning
|
||||||
|
|
||||||
|
What are the main
|
||||||
|
features in an image? How can finding those features be useful to us?
|
||||||
|
|
||||||
|
- @subpage tutorial_py_features_harris
|
||||||
|
|
||||||
|
Okay, Corners are good
|
||||||
|
features? But how do we find them?
|
||||||
|
|
||||||
|
- @subpage tutorial_py_shi_tomasi
|
||||||
|
|
||||||
|
We will look into
|
||||||
|
Shi-Tomasi corner detection
|
||||||
|
|
||||||
|
- @subpage tutorial_py_sift_intro
|
||||||
|
|
||||||
|
Harris corner detector
|
||||||
|
is not good enough when scale of image changes. Lowe developed a breakthrough method to find
|
||||||
|
scale-invariant features and it is called SIFT
|
||||||
|
|
||||||
|
- @subpage tutorial_py_surf_intro
|
||||||
|
|
||||||
|
SIFT is really good,
|
||||||
|
but not fast enough, so people came up with a speeded-up version called SURF.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_fast
|
||||||
|
|
||||||
|
All the above feature
|
||||||
|
detection methods are good in some way. But they are not fast enough to work in real-time
|
||||||
|
applications like SLAM. There comes the FAST algorithm, which is really "FAST".
|
||||||
|
|
||||||
|
- @subpage tutorial_py_brief
|
||||||
|
|
||||||
|
SIFT uses a feature
|
||||||
|
descriptor with 128 floating point numbers. Consider thousands of such features. It takes lots of
|
||||||
|
memory and more time for matching. We can compress it to make it faster. But still we have to
|
||||||
|
calculate it first. There comes BRIEF which gives the shortcut to find binary descriptors with
|
||||||
|
less memory, faster matching, still higher recognition rate.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_orb
|
||||||
|
|
||||||
|
SIFT and SURF are good in what they do, but what if you have to pay a few dollars every year to use them in your applications? Yeah, they are patented!!! To solve that problem, OpenCV devs came up with a new "FREE" alternative to SIFT & SURF, and that is ORB.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_matcher
|
||||||
|
|
||||||
|
We know a great deal about feature detectors and descriptors. It is time to learn how to match different descriptors. OpenCV provides two techniques, Brute-Force matcher and FLANN based matcher.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_feature_homography
|
||||||
|
|
||||||
|
Now we know about feature matching. Let's mix it up with calib3d module to find objects in a complex image.
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
@ -0,0 +1,113 @@
|
|||||||
|
Drawing Functions in OpenCV {#tutorial_py_drawing_functions}
|
||||||
|
===========================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
- Learn to draw different geometric shapes with OpenCV
|
||||||
|
- You will learn these functions : **cv2.line()**, **cv2.circle()** , **cv2.rectangle()**,
|
||||||
|
**cv2.ellipse()**, **cv2.putText()** etc.
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
In all the above functions, you will see some common arguments as given below:
|
||||||
|
|
||||||
|
- img : The image where you want to draw the shapes
|
||||||
|
- color : Color of the shape. for BGR, pass it as a tuple, eg: (255,0,0) for blue. For
|
||||||
|
grayscale, just pass the scalar value.
|
||||||
|
- thickness : Thickness of the line or circle etc. If **-1** is passed for closed figures like
|
||||||
|
circles, it will fill the shape. *default thickness = 1*
|
||||||
|
- lineType : Type of line, whether 8-connected, anti-aliased line etc. *By default, it is
|
||||||
|
8-connected.* cv2.LINE_AA gives anti-aliased line which looks great for curves.
|
||||||
|
|
||||||
|
### Drawing Line
|
||||||
|
|
||||||
|
To draw a line, you need to pass starting and ending coordinates of line. We will create a black
|
||||||
|
image and draw a blue line on it from top-left to bottom-right corners.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
# Create a black image
|
||||||
|
img = np.zeros((512,512,3), np.uint8)
|
||||||
|
|
||||||
|
# Draw a diagonal blue line with thickness of 5 px
|
||||||
|
cv2.line(img,(0,0),(511,511),(255,0,0),5)
|
||||||
|
@endcode
|
||||||
|
### Drawing Rectangle
|
||||||
|
|
||||||
|
To draw a rectangle, you need top-left corner and bottom-right corner of rectangle. This time we
|
||||||
|
will draw a green rectangle at the top-right corner of image.
|
||||||
|
@code{.py}
|
||||||
|
cv2.rectangle(img,(384,0),(510,128),(0,255,0),3)
|
||||||
|
@endcode
|
||||||
|
### Drawing Circle
|
||||||
|
|
||||||
|
To draw a circle, you need its center coordinates and radius. We will draw a circle inside the
|
||||||
|
rectangle drawn above.
|
||||||
|
@code{.py}
|
||||||
|
cv2.circle(img,(447,63), 63, (0,0,255), -1)
|
||||||
|
@endcode
|
||||||
|
### Drawing Ellipse
|
||||||
|
|
||||||
|
To draw the ellipse, we need to pass several arguments. One argument is the center location (x,y).
|
||||||
|
Next argument is axes lengths (major axis length, minor axis length). angle is the angle of rotation
|
||||||
|
of ellipse in anti-clockwise direction. startAngle and endAngle denotes the starting and ending of
|
||||||
|
ellipse arc measured in clockwise direction from major axis. i.e. giving values 0 and 360 gives the
|
||||||
|
full ellipse. For more details, check the documentation of **cv2.ellipse()**. Below example draws a
|
||||||
|
half ellipse at the center of the image.
|
||||||
|
@code{.py}
|
||||||
|
cv2.ellipse(img,(256,256),(100,50),0,0,180,255,-1)
|
||||||
|
@endcode
|
||||||
|
### Drawing Polygon
|
||||||
|
|
||||||
|
To draw a polygon, first you need coordinates of vertices. Make those points into an array of shape
|
||||||
|
ROWSx1x2 where ROWS are number of vertices and it should be of type int32. Here we draw a small
|
||||||
|
polygon of with four vertices in yellow color.
|
||||||
|
@code{.py}
|
||||||
|
pts = np.array([[10,5],[20,30],[70,20],[50,10]], np.int32)
|
||||||
|
pts = pts.reshape((-1,1,2))
|
||||||
|
cv2.polylines(img,[pts],True,(0,255,255))
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
@note If third argument is False, you will get a polylines joining all the points, not a closed
|
||||||
|
shape.
|
||||||
|
|
||||||
|
@note cv2.polylines() can be used to draw multiple lines. Just create a list of all the lines you
|
||||||
|
want to draw and pass it to the function. All lines will be drawn individually. It is a much better
|
||||||
|
and faster way to draw a group of lines than calling cv2.line() for each line.
|
||||||
|
|
||||||
|
### Adding Text to Images:
|
||||||
|
|
||||||
|
To put texts in images, you need specify following things.
|
||||||
|
- Text data that you want to write
|
||||||
|
- Position coordinates of where you want put it (i.e. bottom-left corner where data starts).
|
||||||
|
- Font type (Check **cv2.putText()** docs for supported fonts)
|
||||||
|
- Font Scale (specifies the size of font)
|
||||||
|
- regular things like color, thickness, lineType etc. For better look, lineType = cv2.LINE_AA
|
||||||
|
is recommended.
|
||||||
|
|
||||||
|
We will write **OpenCV** on our image in white color.
|
||||||
|
@code{.py}
|
||||||
|
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||||
|
cv2.putText(img,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
### Result
|
||||||
|
|
||||||
|
So it is time to see the final result of our drawing. As you studied in previous articles, display
|
||||||
|
the image to see it.
|
||||||
|
|
||||||
|
![image](images/drawing_result.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# The angles used in ellipse function is not our circular angles. For more details, visit [this
|
||||||
|
discussion](http://answers.opencv.org/question/14541/angles-in-ellipse-function/).
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# Try to create the logo of OpenCV using drawing functions available in OpenCV.
|
@ -90,7 +90,7 @@ Result
|
|||||||
----------
|
----------
|
||||||
So it is time to see the final result of our drawing. As you studied in previous articles, display the image to see it.
|
So it is time to see the final result of our drawing. As you studied in previous articles, display the image to see it.
|
||||||
|
|
||||||
.. image:: images/drawing.jpg
|
.. image:: images/drawing_result.jpg
|
||||||
:alt: Drawing Functions in OpenCV
|
:alt: Drawing Functions in OpenCV
|
||||||
:align: center
|
:align: center
|
||||||
|
|
||||||
|
@ -0,0 +1,153 @@
|
|||||||
|
Getting Started with Images {#tutorial_py_image_display}
|
||||||
|
===========================
|
||||||
|
|
||||||
|
Goals
|
||||||
|
-----
|
||||||
|
|
||||||
|
- Here, you will learn how to read an image, how to display it and how to save it back
|
||||||
|
- You will learn these functions : **cv2.imread()**, **cv2.imshow()** , **cv2.imwrite()**
|
||||||
|
- Optionally, you will learn how to display images with Matplotlib
|
||||||
|
|
||||||
|
Using OpenCV
|
||||||
|
------------
|
||||||
|
|
||||||
|
### Read an image
|
||||||
|
|
||||||
|
Use the function **cv2.imread()** to read an image. The image should be in the working directory or
|
||||||
|
a full path of image should be given.
|
||||||
|
|
||||||
|
Second argument is a flag which specifies the way image should be read.
|
||||||
|
|
||||||
|
- cv2.IMREAD_COLOR : Loads a color image. Any transparency of image will be neglected. It is the
|
||||||
|
default flag.
|
||||||
|
- cv2.IMREAD_GRAYSCALE : Loads image in grayscale mode
|
||||||
|
- cv2.IMREAD_UNCHANGED : Loads image as such including alpha channel
|
||||||
|
|
||||||
|
@note Instead of these three flags, you can simply pass integers 1, 0 or -1 respectively.
|
||||||
|
|
||||||
|
See the code below:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
# Load an color image in grayscale
|
||||||
|
img = cv2.imread('messi5.jpg',0)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
**warning**
|
||||||
|
|
||||||
|
Even if the image path is wrong, it won't throw any error, but print img will give you None
|
||||||
|
|
||||||
|
### Display an image
|
||||||
|
|
||||||
|
Use the function **cv2.imshow()** to display an image in a window. The window automatically fits to
|
||||||
|
the image size.
|
||||||
|
|
||||||
|
First argument is a window name which is a string. second argument is our image. You can create as
|
||||||
|
many windows as you wish, but with different window names.
|
||||||
|
@code{.py}
|
||||||
|
cv2.imshow('image',img)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
A screenshot of the window will look like this (in Fedora-Gnome machine):
|
||||||
|
|
||||||
|
![image](images/opencv_screenshot.jpg)
|
||||||
|
|
||||||
|
**cv2.waitKey()** is a keyboard binding function. Its argument is the time in milliseconds. The
|
||||||
|
function waits for specified milliseconds for any keyboard event. If you press any key in that time,
|
||||||
|
the program continues. If **0** is passed, it waits indefinitely for a key stroke. It can also be
|
||||||
|
set to detect specific key strokes like, if key a is pressed etc which we will discuss below.
|
||||||
|
|
||||||
|
@note Besides binding keyboard events this function also processes many other GUI events, so you
|
||||||
|
MUST use it to actually display the image.
|
||||||
|
|
||||||
|
**cv2.destroyAllWindows()** simply destroys all the windows we created. If you want to destroy any
|
||||||
|
specific window, use the function **cv2.destroyWindow()** where you pass the exact window name as
|
||||||
|
the argument.
|
||||||
|
|
||||||
|
@note There is a special case where you can already create a window and load image to it later. In
|
||||||
|
that case, you can specify whether window is resizable or not. It is done with the function
|
||||||
|
**cv2.namedWindow()**. By default, the flag is cv2.WINDOW_AUTOSIZE. But if you specify flag to be
|
||||||
|
cv2.WINDOW_NORMAL, you can resize window. It will be helpful when image is too large in dimension
|
||||||
|
and adding track bar to windows.
|
||||||
|
|
||||||
|
See the code below:
|
||||||
|
@code{.py}
|
||||||
|
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
|
||||||
|
cv2.imshow('image',img)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
### Write an image
|
||||||
|
|
||||||
|
Use the function **cv2.imwrite()** to save an image.
|
||||||
|
|
||||||
|
First argument is the file name, second argument is the image you want to save.
|
||||||
|
@code{.py}
|
||||||
|
cv2.imwrite('messigray.png',img)
|
||||||
|
@endcode
|
||||||
|
This will save the image in PNG format in the working directory.
|
||||||
|
|
||||||
|
### Sum it up
|
||||||
|
|
||||||
|
Below program loads an image in grayscale, displays it, save the image if you press 's' and exit, or
|
||||||
|
simply exit without saving if you press ESC key.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
img = cv2.imread('messi5.jpg',0)
|
||||||
|
cv2.imshow('image',img)
|
||||||
|
k = cv2.waitKey(0)
|
||||||
|
if k == 27: # wait for ESC key to exit
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
elif k == ord('s'): # wait for 's' key to save and exit
|
||||||
|
cv2.imwrite('messigray.png',img)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
**warning**
|
||||||
|
|
||||||
|
If you are using a 64-bit machine, you will have to modify k = cv2.waitKey(0) line as follows :
|
||||||
|
k = cv2.waitKey(0) & 0xFF
|
||||||
|
|
||||||
|
Using Matplotlib
|
||||||
|
----------------
|
||||||
|
|
||||||
|
Matplotlib is a plotting library for Python which gives you wide variety of plotting methods. You
|
||||||
|
will see them in coming articles. Here, you will learn how to display image with Matplotlib. You can
|
||||||
|
zoom images, save it etc using Matplotlib.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('messi5.jpg',0)
|
||||||
|
plt.imshow(img, cmap = 'gray', interpolation = 'bicubic')
|
||||||
|
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
A screen-shot of the window will look like this :
|
||||||
|
|
||||||
|
![image](images/matplotlib_screenshot.jpg)
|
||||||
|
|
||||||
|
@sa Plenty of plotting options are available in Matplotlib. Please refer to Matplotlib docs for more
|
||||||
|
details. Some, we will see on the way.
|
||||||
|
|
||||||
|
__warning__
|
||||||
|
|
||||||
|
Color image loaded by OpenCV is in BGR mode. But Matplotlib displays in RGB mode. So color images
|
||||||
|
will not be displayed correctly in Matplotlib if image is read with OpenCV. Please see the exercises
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [Matplotlib Plotting Styles and Features](http://matplotlib.org/api/pyplot_api.html)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# There is some problem when you try to load color image in OpenCV and display it in Matplotlib.
|
||||||
|
Read [this discussion](http://stackoverflow.com/a/15074748/1134940) and understand it.
|
@ -0,0 +1,111 @@
|
|||||||
|
Mouse as a Paint-Brush {#tutorial_py_mouse_handling}
|
||||||
|
======================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
- Learn to handle mouse events in OpenCV
|
||||||
|
- You will learn these functions : **cv2.setMouseCallback()**
|
||||||
|
|
||||||
|
Simple Demo
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Here, we create a simple application which draws a circle on an image wherever we double-click on
|
||||||
|
it.
|
||||||
|
|
||||||
|
First we create a mouse callback function which is executed when a mouse event take place. Mouse
|
||||||
|
event can be anything related to mouse like left-button down, left-button up, left-button
|
||||||
|
double-click etc. It gives us the coordinates (x,y) for every mouse event. With this event and
|
||||||
|
location, we can do whatever we like. To list all available events available, run the following code
|
||||||
|
in Python terminal:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
events = [i for i in dir(cv2) if 'EVENT' in i]
|
||||||
|
print events
|
||||||
|
@endcode
|
||||||
|
Creating mouse callback function has a specific format which is same everywhere. It differs only in
|
||||||
|
what the function does. So our mouse callback function does one thing, it draws a circle where we
|
||||||
|
double-click. So see the code below. Code is self-explanatory from comments :
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# mouse callback function
|
||||||
|
def draw_circle(event,x,y,flags,param):
|
||||||
|
if event == cv2.EVENT_LBUTTONDBLCLK:
|
||||||
|
cv2.circle(img,(x,y),100,(255,0,0),-1)
|
||||||
|
|
||||||
|
# Create a black image, a window and bind the function to window
|
||||||
|
img = np.zeros((512,512,3), np.uint8)
|
||||||
|
cv2.namedWindow('image')
|
||||||
|
cv2.setMouseCallback('image',draw_circle)
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
cv2.imshow('image',img)
|
||||||
|
if cv2.waitKey(20) & 0xFF == 27:
|
||||||
|
break
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
More Advanced Demo
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Now we go for a much better application. In this, we draw either rectangles or circles (depending on
|
||||||
|
the mode we select) by dragging the mouse like we do in Paint application. So our mouse callback
|
||||||
|
function has two parts, one to draw rectangle and other to draw the circles. This specific example
|
||||||
|
will be really helpful in creating and understanding some interactive applications like object
|
||||||
|
tracking, image segmentation etc.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
drawing = False # true if mouse is pressed
|
||||||
|
mode = True # if True, draw rectangle. Press 'm' to toggle to curve
|
||||||
|
ix,iy = -1,-1
|
||||||
|
|
||||||
|
# mouse callback function
|
||||||
|
def draw_circle(event,x,y,flags,param):
|
||||||
|
global ix,iy,drawing,mode
|
||||||
|
|
||||||
|
if event == cv2.EVENT_LBUTTONDOWN:
|
||||||
|
drawing = True
|
||||||
|
ix,iy = x,y
|
||||||
|
|
||||||
|
elif event == cv2.EVENT_MOUSEMOVE:
|
||||||
|
if drawing == True:
|
||||||
|
if mode == True:
|
||||||
|
cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)
|
||||||
|
else:
|
||||||
|
cv2.circle(img,(x,y),5,(0,0,255),-1)
|
||||||
|
|
||||||
|
elif event == cv2.EVENT_LBUTTONUP:
|
||||||
|
drawing = False
|
||||||
|
if mode == True:
|
||||||
|
cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)
|
||||||
|
else:
|
||||||
|
cv2.circle(img,(x,y),5,(0,0,255),-1)
|
||||||
|
@endcode
|
||||||
|
Next we have to bind this mouse callback function to OpenCV window. In the main loop, we should set
|
||||||
|
a keyboard binding for key 'm' to toggle between rectangle and circle.
|
||||||
|
@code{.py}
|
||||||
|
img = np.zeros((512,512,3), np.uint8)
|
||||||
|
cv2.namedWindow('image')
|
||||||
|
cv2.setMouseCallback('image',draw_circle)
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
cv2.imshow('image',img)
|
||||||
|
k = cv2.waitKey(1) & 0xFF
|
||||||
|
if k == ord('m'):
|
||||||
|
mode = not mode
|
||||||
|
elif k == 27:
|
||||||
|
break
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# In our last example, we drew filled rectangle. You modify the code to draw an unfilled
|
||||||
|
rectangle.
|
@ -0,0 +1,27 @@
|
|||||||
|
Gui Features in OpenCV {#tutorial_py_table_of_contents_gui}
|
||||||
|
======================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_image_display
|
||||||
|
|
||||||
|
Learn to load an
|
||||||
|
image, display it and save it back
|
||||||
|
|
||||||
|
- @subpage tutorial_py_video_display
|
||||||
|
|
||||||
|
Learn to play videos,
|
||||||
|
capture videos from Camera and write it as a video
|
||||||
|
|
||||||
|
- @subpage tutorial_py_drawing_functions
|
||||||
|
|
||||||
|
Learn to draw lines,
|
||||||
|
rectangles, ellipses, circles etc with OpenCV
|
||||||
|
|
||||||
|
- @subpage tutorial_py_mouse_handling
|
||||||
|
|
||||||
|
Draw stuffs with your
|
||||||
|
mouse
|
||||||
|
|
||||||
|
- @subpage tutorial_py_trackbar
|
||||||
|
|
||||||
|
Create trackbar to
|
||||||
|
control certain parameters
|
74
doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown
Normal file
74
doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
Trackbar as the Color Palette {#tutorial_py_trackbar}
|
||||||
|
=============================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
- Learn to bind trackbar to OpenCV windows
|
||||||
|
- You will learn these functions : **cv2.getTrackbarPos()**, **cv2.createTrackbar()** etc.
|
||||||
|
|
||||||
|
Code Demo
|
||||||
|
---------
|
||||||
|
|
||||||
|
Here we will create a simple application which shows the color you specify. You have a window which
|
||||||
|
shows the color and three trackbars to specify each of B,G,R colors. You slide the trackbar and
|
||||||
|
correspondingly window color changes. By default, initial color will be set to Black.
|
||||||
|
|
||||||
|
For cv2.getTrackbarPos() function, first argument is the trackbar name, second one is the window
|
||||||
|
name to which it is attached, third argument is the default value, fourth one is the maximum value
|
||||||
|
and fifth one is the callback function which is executed everytime trackbar value changes. The
|
||||||
|
callback function always has a default argument which is the trackbar position. In our case,
|
||||||
|
function does nothing, so we simply pass.
|
||||||
|
|
||||||
|
Another important application of trackbar is to use it as a button or switch. OpenCV, by default,
|
||||||
|
doesn't have button functionality. So you can use trackbar to get such functionality. In our
|
||||||
|
application, we have created one switch in which application works only if switch is ON, otherwise
|
||||||
|
screen is always black.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def nothing(x):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Create a black image, a window
|
||||||
|
img = np.zeros((300,512,3), np.uint8)
|
||||||
|
cv2.namedWindow('image')
|
||||||
|
|
||||||
|
# create trackbars for color change
|
||||||
|
cv2.createTrackbar('R','image',0,255,nothing)
|
||||||
|
cv2.createTrackbar('G','image',0,255,nothing)
|
||||||
|
cv2.createTrackbar('B','image',0,255,nothing)
|
||||||
|
|
||||||
|
# create switch for ON/OFF functionality
|
||||||
|
switch = '0 : OFF \n1 : ON'
|
||||||
|
cv2.createTrackbar(switch, 'image',0,1,nothing)
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
cv2.imshow('image',img)
|
||||||
|
k = cv2.waitKey(1) & 0xFF
|
||||||
|
if k == 27:
|
||||||
|
break
|
||||||
|
|
||||||
|
# get current positions of four trackbars
|
||||||
|
r = cv2.getTrackbarPos('R','image')
|
||||||
|
g = cv2.getTrackbarPos('G','image')
|
||||||
|
b = cv2.getTrackbarPos('B','image')
|
||||||
|
s = cv2.getTrackbarPos(switch,'image')
|
||||||
|
|
||||||
|
if s == 0:
|
||||||
|
img[:] = 0
|
||||||
|
else:
|
||||||
|
img[:] = [b,g,r]
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
The screenshot of the application looks like below :
|
||||||
|
|
||||||
|
![image](images/trackbar_screenshot.jpg)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# Create a Paint application with adjustable colors and brush radius using trackbars. For drawing,
|
||||||
|
refer previous tutorial on mouse handling.
|
@ -0,0 +1,153 @@
|
|||||||
|
Getting Started with Videos {#tutorial_py_video_display}
|
||||||
|
===========================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
- Learn to read video, display video and save video.
|
||||||
|
- Learn to capture from Camera and display it.
|
||||||
|
- You will learn these functions : **cv2.VideoCapture()**, **cv2.VideoWriter()**
|
||||||
|
|
||||||
|
Capture Video from Camera
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
Often, we have to capture live stream with camera. OpenCV provides a very simple interface to this.
|
||||||
|
Let's capture a video from the camera (I am using the in-built webcam of my laptop), convert it into
|
||||||
|
grayscale video and display it. Just a simple task to get started.
|
||||||
|
|
||||||
|
To capture a video, you need to create a **VideoCapture** object. Its argument can be either the
|
||||||
|
device index or the name of a video file. Device index is just the number to specify which camera.
|
||||||
|
Normally one camera will be connected (as in my case). So I simply pass 0 (or -1). You can select
|
||||||
|
the second camera by passing 1 and so on. After that, you can capture frame-by-frame. But at the
|
||||||
|
end, don't forget to release the capture.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture(0)
|
||||||
|
|
||||||
|
while(True):
|
||||||
|
# Capture frame-by-frame
|
||||||
|
ret, frame = cap.read()
|
||||||
|
|
||||||
|
# Our operations on the frame come here
|
||||||
|
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# Display the resulting frame
|
||||||
|
cv2.imshow('frame',gray)
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
|
||||||
|
# When everything done, release the capture
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
cap.read() returns a bool (True/False). If frame is read correctly, it will be True. So you can
|
||||||
|
check end of the video by checking this return value.
|
||||||
|
|
||||||
|
Sometimes, cap may not have initialized the capture. In that case, this code shows error. You can
|
||||||
|
check whether it is initialized or not by the method **cap.isOpened()**. If it is True, OK.
|
||||||
|
Otherwise open it using **cap.open()**.
|
||||||
|
|
||||||
|
You can also access some of the features of this video using **cap.get(propId)** method where propId
|
||||||
|
is a number from 0 to 18. Each number denotes a property of the video (if it is applicable to that
|
||||||
|
video) and full details can be seen here: [Property
|
||||||
|
Identifier](http://docs.opencv.org/modules/highgui/doc/reading_and_writing_video.html#videocapture-get).
|
||||||
|
Some of these values can be modified using **cap.set(propId, value)**. Value is the new value you
|
||||||
|
want.
|
||||||
|
|
||||||
|
For example, I can check the frame width and height by cap.get(3) and cap.get(4). It gives me
|
||||||
|
640x480 by default. But I want to modify it to 320x240. Just use ret = cap.set(3,320) and
|
||||||
|
ret = cap.set(4,240).
|
||||||
|
|
||||||
|
@note If you are getting error, make sure camera is working fine using any other camera application
|
||||||
|
(like Cheese in Linux).
|
||||||
|
|
||||||
|
Playing Video from file
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
It is same as capturing from Camera, just change camera index with video file name. Also while
|
||||||
|
displaying the frame, use appropriate time for cv2.waitKey(). If it is too less, video will be very
|
||||||
|
fast and if it is too high, video will be slow (Well, that is how you can display videos in slow
|
||||||
|
motion). 25 milliseconds will be OK in normal cases.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture('vtest.avi')
|
||||||
|
|
||||||
|
while(cap.isOpened()):
|
||||||
|
ret, frame = cap.read()
|
||||||
|
|
||||||
|
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
cv2.imshow('frame',gray)
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
@note Make sure proper versions of ffmpeg or gstreamer is installed. Sometimes, it is a headache to
|
||||||
|
work with Video Capture mostly due to wrong installation of ffmpeg/gstreamer.
|
||||||
|
|
||||||
|
Saving a Video
|
||||||
|
--------------
|
||||||
|
|
||||||
|
So we capture a video, process it frame-by-frame and we want to save that video. For images, it is
|
||||||
|
very simple, just use cv2.imwrite(). Here a little more work is required.
|
||||||
|
|
||||||
|
This time we create a **VideoWriter** object. We should specify the output file name (eg:
|
||||||
|
output.avi). Then we should specify the **FourCC** code (details in next paragraph). Then number of
|
||||||
|
frames per second (fps) and frame size should be passed. And last one is **isColor** flag. If it is
|
||||||
|
True, encoder expect color frame, otherwise it works with grayscale frame.
|
||||||
|
|
||||||
|
[FourCC](http://en.wikipedia.org/wiki/FourCC) is a 4-byte code used to specify the video codec. The
|
||||||
|
list of available codes can be found in [fourcc.org](http://www.fourcc.org/codecs.php). It is
|
||||||
|
platform dependent. Following codecs works fine for me.
|
||||||
|
|
||||||
|
- In Fedora: DIVX, XVID, MJPG, X264, WMV1, WMV2. (XVID is more preferable. MJPG results in high
|
||||||
|
size video. X264 gives very small size video)
|
||||||
|
- In Windows: DIVX (More to be tested and added)
|
||||||
|
- In OSX : *(I don't have access to OSX. Can some one fill this?)*
|
||||||
|
|
||||||
|
FourCC code is passed as cv2.VideoWriter_fourcc('M','J','P','G') or
|
||||||
|
cv2.VideoWriter_fourcc(\*'MJPG) for MJPG.
|
||||||
|
|
||||||
|
Below code capture from a Camera, flip every frame in vertical direction and saves it.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture(0)
|
||||||
|
|
||||||
|
# Define the codec and create VideoWriter object
|
||||||
|
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
||||||
|
out = cv2.VideoWriter('output.avi',fourcc, 20.0, (640,480))
|
||||||
|
|
||||||
|
while(cap.isOpened()):
|
||||||
|
ret, frame = cap.read()
|
||||||
|
if ret==True:
|
||||||
|
frame = cv2.flip(frame,0)
|
||||||
|
|
||||||
|
# write the flipped frame
|
||||||
|
out.write(frame)
|
||||||
|
|
||||||
|
cv2.imshow('frame',frame)
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Release everything if job is finished
|
||||||
|
cap.release()
|
||||||
|
out.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
111
doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown
Normal file
111
doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
Canny Edge Detection {#tutorial_py_canny}
|
||||||
|
====================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter, we will learn about
|
||||||
|
|
||||||
|
- Concept of Canny edge detection
|
||||||
|
- OpenCV functions for that : **cv2.Canny()**
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
Canny Edge Detection is a popular edge detection algorithm. It was developed by John F. Canny in
|
||||||
|
1986. It is a multi-stage algorithm and we will go through each stages.
|
||||||
|
|
||||||
|
-# **Noise Reduction**
|
||||||
|
|
||||||
|
Since edge detection is susceptible to noise in the image, first step is to remove the noise in the
|
||||||
|
image with a 5x5 Gaussian filter. We have already seen this in previous chapters.
|
||||||
|
|
||||||
|
-# **Finding Intensity Gradient of the Image**
|
||||||
|
|
||||||
|
Smoothened image is then filtered with a Sobel kernel in both horizontal and vertical direction to
|
||||||
|
get first derivative in horizontal direction (\f$G_x\f$) and vertical direction (\f$G_y\f$). From these two
|
||||||
|
images, we can find edge gradient and direction for each pixel as follows:
|
||||||
|
|
||||||
|
\f[
|
||||||
|
Edge\_Gradient \; (G) = \sqrt{G_x^2 + G_y^2} \\
|
||||||
|
Angle \; (\theta) = \tan^{-1} \bigg(\frac{G_y}{G_x}\bigg)
|
||||||
|
\f]
|
||||||
|
|
||||||
|
Gradient direction is always perpendicular to edges. It is rounded to one of four angles
|
||||||
|
representing vertical, horizontal and two diagonal directions.
|
||||||
|
|
||||||
|
-# **Non-maximum Suppression**
|
||||||
|
|
||||||
|
After getting gradient magnitude and direction, a full scan of image is done to remove any unwanted
|
||||||
|
pixels which may not constitute the edge. For this, at every pixel, pixel is checked if it is a
|
||||||
|
local maximum in its neighborhood in the direction of gradient. Check the image below:
|
||||||
|
|
||||||
|
![image](images/nms.jpg)
|
||||||
|
|
||||||
|
Point A is on the edge ( in vertical direction). Gradient direction is normal to the edge. Point B
|
||||||
|
and C are in gradient directions. So point A is checked with point B and C to see if it forms a
|
||||||
|
local maximum. If so, it is considered for next stage, otherwise, it is suppressed ( put to zero).
|
||||||
|
|
||||||
|
In short, the result you get is a binary image with "thin edges".
|
||||||
|
|
||||||
|
-# **Hysteresis Thresholding**
|
||||||
|
|
||||||
|
This stage decides which are all edges are really edges and which are not. For this, we need two
|
||||||
|
threshold values, minVal and maxVal. Any edges with intensity gradient more than maxVal are sure to
|
||||||
|
be edges and those below minVal are sure to be non-edges, so discarded. Those who lie between these
|
||||||
|
two thresholds are classified edges or non-edges based on their connectivity. If they are connected
|
||||||
|
to "sure-edge" pixels, they are considered to be part of edges. Otherwise, they are also discarded.
|
||||||
|
See the image below:
|
||||||
|
|
||||||
|
![image](images/hysteresis.jpg)
|
||||||
|
|
||||||
|
The edge A is above the maxVal, so considered as "sure-edge". Although edge C is below maxVal, it is
|
||||||
|
connected to edge A, so that also considered as valid edge and we get that full curve. But edge B,
|
||||||
|
although it is above minVal and is in same region as that of edge C, it is not connected to any
|
||||||
|
"sure-edge", so that is discarded. So it is very important that we have to select minVal and maxVal
|
||||||
|
accordingly to get the correct result.
|
||||||
|
|
||||||
|
This stage also removes small pixels noises on the assumption that edges are long lines.
|
||||||
|
|
||||||
|
So what we finally get is strong edges in the image.
|
||||||
|
|
||||||
|
Canny Edge Detection in OpenCV
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
OpenCV puts all the above in single function, **cv2.Canny()**. We will see how to use it. First
|
||||||
|
argument is our input image. Second and third arguments are our minVal and maxVal respectively.
|
||||||
|
Third argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By
|
||||||
|
default it is 3. Last argument is L2gradient which specifies the equation for finding gradient
|
||||||
|
magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it
|
||||||
|
uses this function: \f$Edge\_Gradient \; (G) = |G_x| + |G_y|\f$. By default, it is False.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('messi5.jpg',0)
|
||||||
|
edges = cv2.Canny(img,100,200)
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img,cmap = 'gray')
|
||||||
|
plt.title('Original Image'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(122),plt.imshow(edges,cmap = 'gray')
|
||||||
|
plt.title('Edge Image'), plt.xticks([]), plt.yticks([])
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result below:
|
||||||
|
|
||||||
|
![image](images/canny1.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# Canny edge detector at [Wikipedia](http://en.wikipedia.org/wiki/Canny_edge_detector)
|
||||||
|
-# [Canny Edge Detection Tutorial](http://dasl.mem.drexel.edu/alumni/bGreen/www.pages.drexel.edu/_weg22/can_tut.html) by
|
||||||
|
Bill Green, 2002.
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# Write a small application to find the Canny edge detection whose threshold values can be varied
|
||||||
|
using two trackbars. This way, you can understand the effect of threshold values.
|
@ -0,0 +1,113 @@
|
|||||||
|
Changing Colorspaces {#tutorial_py_colorspaces}
|
||||||
|
====================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
- In this tutorial, you will learn how to convert images from one color-space to another, like
|
||||||
|
BGR \f$\leftrightarrow\f$ Gray, BGR \f$\leftrightarrow\f$ HSV etc.
|
||||||
|
- In addition to that, we will create an application which extracts a colored object in a video
|
||||||
|
- You will learn following functions : **cv2.cvtColor()**, **cv2.inRange()** etc.
|
||||||
|
|
||||||
|
Changing Color-space
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
There are more than 150 color-space conversion methods available in OpenCV. But we will look into
|
||||||
|
only two which are most widely used ones, BGR \f$\leftrightarrow\f$ Gray and BGR \f$\leftrightarrow\f$ HSV.
|
||||||
|
|
||||||
|
For color conversion, we use the function cv2.cvtColor(input_image, flag) where flag determines the
|
||||||
|
type of conversion.
|
||||||
|
|
||||||
|
For BGR \f$\rightarrow\f$ Gray conversion we use the flags cv2.COLOR_BGR2GRAY. Similarly for BGR
|
||||||
|
\f$\rightarrow\f$ HSV, we use the flag cv2.COLOR_BGR2HSV. To get other flags, just run following
|
||||||
|
commands in your Python terminal :
|
||||||
|
@code{.py}
|
||||||
|
>>> import cv2
|
||||||
|
>>> flags = [i for i in dir(cv2) if i.startswith('COLOR_')]
|
||||||
|
>>> print flags
|
||||||
|
@endcode
|
||||||
|
@note For HSV, Hue range is [0,179], Saturation range is [0,255] and Value range is [0,255].
|
||||||
|
Different softwares use different scales. So if you are comparing OpenCV values with them, you need
|
||||||
|
to normalize these ranges.
|
||||||
|
|
||||||
|
Object Tracking
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Now we know how to convert BGR image to HSV, we can use this to extract a colored object. In HSV, it
|
||||||
|
is more easier to represent a color than RGB color-space. In our application, we will try to extract
|
||||||
|
a blue colored object. So here is the method:
|
||||||
|
|
||||||
|
- Take each frame of the video
|
||||||
|
- Convert from BGR to HSV color-space
|
||||||
|
- We threshold the HSV image for a range of blue color
|
||||||
|
- Now extract the blue object alone, we can do whatever on that image we want.
|
||||||
|
|
||||||
|
Below is the code which are commented in detail :
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture(0)
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
|
||||||
|
# Take each frame
|
||||||
|
_, frame = cap.read()
|
||||||
|
|
||||||
|
# Convert BGR to HSV
|
||||||
|
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
|
||||||
|
|
||||||
|
# define range of blue color in HSV
|
||||||
|
lower_blue = np.array([110,50,50])
|
||||||
|
upper_blue = np.array([130,255,255])
|
||||||
|
|
||||||
|
# Threshold the HSV image to get only blue colors
|
||||||
|
mask = cv2.inRange(hsv, lower_green, upper_green)
|
||||||
|
|
||||||
|
# Bitwise-AND mask and original image
|
||||||
|
res = cv2.bitwise_and(frame,frame, mask= mask)
|
||||||
|
|
||||||
|
cv2.imshow('frame',frame)
|
||||||
|
cv2.imshow('mask',mask)
|
||||||
|
cv2.imshow('res',res)
|
||||||
|
k = cv2.waitKey(5) & 0xFF
|
||||||
|
if k == 27:
|
||||||
|
break
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
Below image shows tracking of the blue object:
|
||||||
|
|
||||||
|
![image](images/frame.jpg)
|
||||||
|
|
||||||
|
@note There are some noises in the image. We will see how to remove them in later chapters.
|
||||||
|
|
||||||
|
@note This is the simplest method in object tracking. Once you learn functions of contours, you can
|
||||||
|
do plenty of things like find centroid of this object and use it to track the object, draw diagrams
|
||||||
|
just by moving your hand in front of camera and many other funny stuffs.
|
||||||
|
|
||||||
|
How to find HSV values to track?
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
This is a common question found in [stackoverflow.com](www.stackoverflow.com). It is very simple and
|
||||||
|
you can use the same function, cv2.cvtColor(). Instead of passing an image, you just pass the BGR
|
||||||
|
values you want. For example, to find the HSV value of Green, try following commands in Python
|
||||||
|
terminal:
|
||||||
|
@code{.py}
|
||||||
|
>>> green = np.uint8([[[0,255,0 ]]])
|
||||||
|
>>> hsv_green = cv2.cvtColor(green,cv2.COLOR_BGR2HSV)
|
||||||
|
>>> print hsv_green
|
||||||
|
[[[ 60 255 255]]]
|
||||||
|
@endcode
|
||||||
|
Now you take [H-10, 100,100] and [H+10, 255, 255] as lower bound and upper bound respectively. Apart
|
||||||
|
from this method, you can use any image editing tools like GIMP or any online converters to find
|
||||||
|
these values, but don't forget to adjust the HSV ranges.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# Try to find a way to extract more than one colored objects, for eg, extract red, blue, green
|
||||||
|
objects simultaneously.
|
@ -0,0 +1,203 @@
|
|||||||
|
Contour Features {#tutorial_py_contour_features}
|
||||||
|
================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this article, we will learn
|
||||||
|
|
||||||
|
- To find the different features of contours, like area, perimeter, centroid, bounding box etc
|
||||||
|
- You will see plenty of functions related to contours.
|
||||||
|
|
||||||
|
1. Moments
|
||||||
|
----------
|
||||||
|
|
||||||
|
Image moments help you to calculate some features like center of mass of the object, area of the
|
||||||
|
object etc. Check out the wikipedia page on [Image
|
||||||
|
Moments](http://en.wikipedia.org/wiki/Image_moment)
|
||||||
|
|
||||||
|
The function **cv2.moments()** gives a dictionary of all moment values calculated. See below:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('star.jpg',0)
|
||||||
|
ret,thresh = cv2.threshold(img,127,255,0)
|
||||||
|
contours,hierarchy = cv2.findContours(thresh, 1, 2)
|
||||||
|
|
||||||
|
cnt = contours[0]
|
||||||
|
M = cv2.moments(cnt)
|
||||||
|
print M
|
||||||
|
@endcode
|
||||||
|
From this moments, you can extract useful data like area, centroid etc. Centroid is given by the
|
||||||
|
relations, \f$C_x = \frac{M_{10}}{M_{00}}\f$ and \f$C_y = \frac{M_{01}}{M_{00}}\f$. This can be done as
|
||||||
|
follows:
|
||||||
|
@code{.py}
|
||||||
|
cx = int(M['m10']/M['m00'])
|
||||||
|
cy = int(M['m01']/M['m00'])
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
2. Contour Area
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Contour area is given by the function **cv2.contourArea()** or from moments, **M['m00']**.
|
||||||
|
@code{.py}
|
||||||
|
area = cv2.contourArea(cnt)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
3. Contour Perimeter
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
It is also called arc length. It can be found out using **cv2.arcLength()** function. Second
|
||||||
|
argument specify whether shape is a closed contour (if passed True), or just a curve.
|
||||||
|
@code{.py}
|
||||||
|
perimeter = cv2.arcLength(cnt,True)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
4. Contour Approximation
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
It approximates a contour shape to another shape with less number of vertices depending upon the
|
||||||
|
precision we specify. It is an implementation of [Douglas-Peucker
|
||||||
|
algorithm](http://en.wikipedia.org/wiki/Ramer-Douglas-Peucker_algorithm). Check the wikipedia page
|
||||||
|
for algorithm and demonstration.
|
||||||
|
|
||||||
|
To understand this, suppose you are trying to find a square in an image, but due to some problems in
|
||||||
|
the image, you didn't get a perfect square, but a "bad shape" (As shown in first image below). Now
|
||||||
|
you can use this function to approximate the shape. In this, second argument is called epsilon,
|
||||||
|
which is maximum distance from contour to approximated contour. It is an accuracy parameter. A wise
|
||||||
|
selection of epsilon is needed to get the correct output.
|
||||||
|
@code{.py}
|
||||||
|
epsilon = 0.1*cv2.arcLength(cnt,True)
|
||||||
|
approx = cv2.approxPolyDP(cnt,epsilon,True)
|
||||||
|
@endcode
|
||||||
|
Below, in second image, green line shows the approximated curve for epsilon = 10% of arc length.
|
||||||
|
Third image shows the same for epsilon = 1% of the arc length. Third argument specifies whether
|
||||||
|
curve is closed or not.
|
||||||
|
|
||||||
|
![image](images/approx.jpg)
|
||||||
|
|
||||||
|
5. Convex Hull
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Convex Hull will look similar to contour approximation, but it is not (Both may provide same results
|
||||||
|
in some cases). Here, **cv2.convexHull()** function checks a curve for convexity defects and
|
||||||
|
corrects it. Generally speaking, convex curves are the curves which are always bulged out, or
|
||||||
|
at-least flat. And if it is bulged inside, it is called convexity defects. For example, check the
|
||||||
|
below image of hand. Red line shows the convex hull of hand. The double-sided arrow marks shows the
|
||||||
|
convexity defects, which are the local maximum deviations of hull from contours.
|
||||||
|
|
||||||
|
![image](images/convexitydefects.jpg)
|
||||||
|
|
||||||
|
There is a little bit things to discuss about it its syntax:
|
||||||
|
@code{.py}
|
||||||
|
hull = cv2.convexHull(points[, hull[, clockwise[, returnPoints]]
|
||||||
|
@endcode
|
||||||
|
Arguments details:
|
||||||
|
|
||||||
|
- **points** are the contours we pass into.
|
||||||
|
- **hull** is the output, normally we avoid it.
|
||||||
|
- **clockwise** : Orientation flag. If it is True, the output convex hull is oriented clockwise.
|
||||||
|
Otherwise, it is oriented counter-clockwise.
|
||||||
|
- **returnPoints** : By default, True. Then it returns the coordinates of the hull points. If
|
||||||
|
False, it returns the indices of contour points corresponding to the hull points.
|
||||||
|
|
||||||
|
So to get a convex hull as in above image, following is sufficient:
|
||||||
|
@code{.py}
|
||||||
|
hull = cv2.convexHull(cnt)
|
||||||
|
@endcode
|
||||||
|
But if you want to find convexity defects, you need to pass returnPoints = False. To understand it,
|
||||||
|
we will take the rectangle image above. First I found its contour as cnt. Now I found its convex
|
||||||
|
hull with returnPoints = True, I got following values:
|
||||||
|
[[[234 202]], [[ 51 202]], [[ 51 79]], [[234 79]]] which are the four corner points of rectangle.
|
||||||
|
Now if do the same with returnPoints = False, I get following result: [[129],[ 67],[ 0],[142]].
|
||||||
|
These are the indices of corresponding points in contours. For eg, check the first value:
|
||||||
|
cnt[129] = [[234, 202]] which is same as first result (and so on for others).
|
||||||
|
|
||||||
|
You will see it again when we discuss about convexity defects.
|
||||||
|
|
||||||
|
6. Checking Convexity
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
There is a function to check if a curve is convex or not, **cv2.isContourConvex()**. It just return
|
||||||
|
whether True or False. Not a big deal.
|
||||||
|
@code{.py}
|
||||||
|
k = cv2.isContourConvex(cnt)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
7. Bounding Rectangle
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
There are two types of bounding rectangles.
|
||||||
|
|
||||||
|
### 7.a. Straight Bounding Rectangle
|
||||||
|
|
||||||
|
It is a straight rectangle, it doesn't consider the rotation of the object. So area of the bounding
|
||||||
|
rectangle won't be minimum. It is found by the function **cv2.boundingRect()**.
|
||||||
|
|
||||||
|
Let (x,y) be the top-left coordinate of the rectangle and (w,h) be its width and height.
|
||||||
|
@code{.py}
|
||||||
|
x,y,w,h = cv2.boundingRect(cnt)
|
||||||
|
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
### 7.b. Rotated Rectangle
|
||||||
|
|
||||||
|
Here, bounding rectangle is drawn with minimum area, so it considers the rotation also. The function
|
||||||
|
used is **cv2.minAreaRect()**. It returns a Box2D structure which contains following detals - (
|
||||||
|
center (x,y), (width, height), angle of rotation ). But to draw this rectangle, we need 4 corners of
|
||||||
|
the rectangle. It is obtained by the function **cv2.boxPoints()**
|
||||||
|
@code{.py}
|
||||||
|
rect = cv2.minAreaRect(cnt)
|
||||||
|
box = cv2.boxPoints(rect)
|
||||||
|
box = np.int0(box)
|
||||||
|
cv2.drawContours(img,[box],0,(0,0,255),2)
|
||||||
|
@endcode
|
||||||
|
Both the rectangles are shown in a single image. Green rectangle shows the normal bounding rect. Red
|
||||||
|
rectangle is the rotated rect.
|
||||||
|
|
||||||
|
![image](images/boundingrect.png)
|
||||||
|
|
||||||
|
8. Minimum Enclosing Circle
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
Next we find the circumcircle of an object using the function **cv2.minEnclosingCircle()**. It is a
|
||||||
|
circle which completely covers the object with minimum area.
|
||||||
|
@code{.py}
|
||||||
|
(x,y),radius = cv2.minEnclosingCircle(cnt)
|
||||||
|
center = (int(x),int(y))
|
||||||
|
radius = int(radius)
|
||||||
|
cv2.circle(img,center,radius,(0,255,0),2)
|
||||||
|
@endcode
|
||||||
|
![image](images/circumcircle.png)
|
||||||
|
|
||||||
|
9. Fitting an Ellipse
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Next one is to fit an ellipse to an object. It returns the rotated rectangle in which the ellipse is
|
||||||
|
inscribed.
|
||||||
|
@code{.py}
|
||||||
|
ellipse = cv2.fitEllipse(cnt)
|
||||||
|
cv2.ellipse(img,ellipse,(0,255,0),2)
|
||||||
|
@endcode
|
||||||
|
![image](images/fitellipse.png)
|
||||||
|
|
||||||
|
10. Fitting a Line
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Similarly we can fit a line to a set of points. Below image contains a set of white points. We can
|
||||||
|
approximate a straight line to it.
|
||||||
|
@code{.py}
|
||||||
|
rows,cols = img.shape[:2]
|
||||||
|
[vx,vy,x,y] = cv2.fitLine(cnt, cv2.DIST_L2,0,0.01,0.01)
|
||||||
|
lefty = int((-x*vy/vx) + y)
|
||||||
|
righty = int(((cols-x)*vy/vx)+y)
|
||||||
|
cv2.line(img,(cols-1,righty),(0,lefty),(0,255,0),2)
|
||||||
|
@endcode
|
||||||
|
![image](images/fitline.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,120 @@
|
|||||||
|
Contour Properties {#tutorial_py_contour_properties}
|
||||||
|
==================
|
||||||
|
|
||||||
|
Here we will learn to extract some frequently used properties of objects like Solidity, Equivalent
|
||||||
|
Diameter, Mask image, Mean Intensity etc. More features can be found at [Matlab regionprops
|
||||||
|
documentation](http://www.mathworks.in/help/images/ref/regionprops.html).
|
||||||
|
|
||||||
|
*(NB : Centroid, Area, Perimeter etc also belong to this category, but we have seen it in last
|
||||||
|
chapter)*
|
||||||
|
|
||||||
|
1. Aspect Ratio
|
||||||
|
---------------
|
||||||
|
|
||||||
|
It is the ratio of width to height of bounding rect of the object.
|
||||||
|
|
||||||
|
\f[Aspect \; Ratio = \frac{Width}{Height}\f]
|
||||||
|
@code{.py}
|
||||||
|
x,y,w,h = cv2.boundingRect(cnt)
|
||||||
|
aspect_ratio = float(w)/h
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
2. Extent
|
||||||
|
---------
|
||||||
|
|
||||||
|
Extent is the ratio of contour area to bounding rectangle area.
|
||||||
|
|
||||||
|
\f[Extent = \frac{Object \; Area}{Bounding \; Rectangle \; Area}\f]
|
||||||
|
@code{.py}
|
||||||
|
area = cv2.contourArea(cnt)
|
||||||
|
x,y,w,h = cv2.boundingRect(cnt)
|
||||||
|
rect_area = w*h
|
||||||
|
extent = float(area)/rect_area
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
3. Solidity
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Solidity is the ratio of contour area to its convex hull area.
|
||||||
|
|
||||||
|
\f[Solidity = \frac{Contour \; Area}{Convex \; Hull \; Area}\f]
|
||||||
|
@code{.py}
|
||||||
|
area = cv2.contourArea(cnt)
|
||||||
|
hull = cv2.convexHull(cnt)
|
||||||
|
hull_area = cv2.contourArea(hull)
|
||||||
|
solidity = float(area)/hull_area
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
4. Equivalent Diameter
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Equivalent Diameter is the diameter of the circle whose area is same as the contour area.
|
||||||
|
|
||||||
|
\f[Equivalent \; Diameter = \sqrt{\frac{4 \times Contour \; Area}{\pi}}\f]
|
||||||
|
@code{.py}
|
||||||
|
area = cv2.contourArea(cnt)
|
||||||
|
equi_diameter = np.sqrt(4*area/np.pi)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
5. Orientation
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Orientation is the angle at which object is directed. Following method also gives the Major Axis and
|
||||||
|
Minor Axis lengths.
|
||||||
|
@code{.py}
|
||||||
|
(x,y),(MA,ma),angle = cv2.fitEllipse(cnt)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
6. Mask and Pixel Points
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
In some cases, we may need all the points which comprises that object. It can be done as follows:
|
||||||
|
@code{.py}
|
||||||
|
mask = np.zeros(imgray.shape,np.uint8)
|
||||||
|
cv2.drawContours(mask,[cnt],0,255,-1)
|
||||||
|
pixelpoints = np.transpose(np.nonzero(mask))
|
||||||
|
#pixelpoints = cv2.findNonZero(mask)
|
||||||
|
@endcode
|
||||||
|
Here, two methods, one using Numpy functions, next one using OpenCV function (last commented line)
|
||||||
|
are given to do the same. Results are also same, but with a slight difference. Numpy gives
|
||||||
|
coordinates in **(row, column)** format, while OpenCV gives coordinates in **(x,y)** format. So
|
||||||
|
basically the answers will be interchanged. Note that, **row = x** and **column = y**.
|
||||||
|
|
||||||
|
7. Maximum Value, Minimum Value and their locations
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
We can find these parameters using a mask image.
|
||||||
|
@code{.py}
|
||||||
|
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(imgray,mask = mask)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
8. Mean Color or Mean Intensity
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
Here, we can find the average color of an object. Or it can be average intensity of the object in
|
||||||
|
grayscale mode. We again use the same mask to do it.
|
||||||
|
@code{.py}
|
||||||
|
mean_val = cv2.mean(im,mask = mask)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
9. Extreme Points
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Extreme Points means topmost, bottommost, rightmost and leftmost points of the object.
|
||||||
|
@code{.py}
|
||||||
|
leftmost = tuple(cnt[cnt[:,:,0].argmin()][0])
|
||||||
|
rightmost = tuple(cnt[cnt[:,:,0].argmax()][0])
|
||||||
|
topmost = tuple(cnt[cnt[:,:,1].argmin()][0])
|
||||||
|
bottommost = tuple(cnt[cnt[:,:,1].argmax()][0])
|
||||||
|
@endcode
|
||||||
|
For eg, if I apply it to an Indian map, I get the following result :
|
||||||
|
|
||||||
|
![image](images/extremepoints.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# There are still some features left in matlab regionprops doc. Try to implement them.
|
@ -0,0 +1,94 @@
|
|||||||
|
Contours : Getting Started {#tutorial_py_contours_begin}
|
||||||
|
==========================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
- Understand what contours are.
|
||||||
|
- Learn to find contours, draw contours etc
|
||||||
|
- You will see these functions : **cv2.findContours()**, **cv2.drawContours()**
|
||||||
|
|
||||||
|
What are contours?
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Contours can be explained simply as a curve joining all the continuous points (along the boundary),
|
||||||
|
having same color or intensity. The contours are a useful tool for shape analysis and object
|
||||||
|
detection and recognition.
|
||||||
|
|
||||||
|
- For better accuracy, use binary images. So before finding contours, apply threshold or canny
|
||||||
|
edge detection.
|
||||||
|
- findContours function modifies the source image. So if you want source image even after
|
||||||
|
finding contours, already store it to some other variables.
|
||||||
|
- In OpenCV, finding contours is like finding white object from black background. So remember,
|
||||||
|
object to be found should be white and background should be black.
|
||||||
|
|
||||||
|
Let's see how to find contours of a binary image:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
im = cv2.imread('test.jpg')
|
||||||
|
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
|
||||||
|
ret,thresh = cv2.threshold(imgray,127,255,0)
|
||||||
|
contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
@endcode
|
||||||
|
See, there are three arguments in **cv2.findContours()** function, first one is source image, second
|
||||||
|
is contour retrieval mode, third is contour approximation method. And it outputs the contours and
|
||||||
|
hierarchy. contours is a Python list of all the contours in the image. Each individual contour is a
|
||||||
|
Numpy array of (x,y) coordinates of boundary points of the object.
|
||||||
|
|
||||||
|
@note We will discuss second and third arguments and about hierarchy in details later. Until then,
|
||||||
|
the values given to them in code sample will work fine for all images.
|
||||||
|
|
||||||
|
How to draw the contours?
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
To draw the contours, cv2.drawContours function is used. It can also be used to draw any shape
|
||||||
|
provided you have its boundary points. Its first argument is source image, second argument is the
|
||||||
|
contours which should be passed as a Python list, third argument is index of contours (useful when
|
||||||
|
drawing individual contour. To draw all contours, pass -1) and remaining arguments are color,
|
||||||
|
thickness etc.
|
||||||
|
|
||||||
|
To draw all the contours in an image:
|
||||||
|
@code{.py}
|
||||||
|
cv2.drawContours(img, contours, -1, (0,255,0), 3)
|
||||||
|
@endcode
|
||||||
|
To draw an individual contour, say 4th contour:
|
||||||
|
@code{.py}
|
||||||
|
cv2.drawContours(img, contours, 3, (0,255,0), 3)
|
||||||
|
@endcode
|
||||||
|
But most of the time, below method will be useful:
|
||||||
|
@code{.py}
|
||||||
|
cnt = contours[4]
|
||||||
|
cv2.drawContours(img, [cnt], 0, (0,255,0), 3)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
@note Last two methods are same, but when you go forward, you will see last one is more useful.
|
||||||
|
|
||||||
|
Contour Approximation Method
|
||||||
|
============================
|
||||||
|
|
||||||
|
This is the third argument in cv2.findContours function. What does it denote actually?
|
||||||
|
|
||||||
|
Above, we told that contours are the boundaries of a shape with same intensity. It stores the (x,y)
|
||||||
|
coordinates of the boundary of a shape. But does it store all the coordinates ? That is specified by
|
||||||
|
this contour approximation method.
|
||||||
|
|
||||||
|
If you pass cv2.CHAIN_APPROX_NONE, all the boundary points are stored. But actually do we need all
|
||||||
|
the points? For eg, you found the contour of a straight line. Do you need all the points on the line
|
||||||
|
to represent that line? No, we need just two end points of that line. This is what
|
||||||
|
cv2.CHAIN_APPROX_SIMPLE does. It removes all redundant points and compresses the contour, thereby
|
||||||
|
saving memory.
|
||||||
|
|
||||||
|
Below image of a rectangle demonstrate this technique. Just draw a circle on all the coordinates in
|
||||||
|
the contour array (drawn in blue color). First image shows points I got with cv2.CHAIN_APPROX_NONE
|
||||||
|
(734 points) and second image shows the one with cv2.CHAIN_APPROX_SIMPLE (only 4 points). See, how
|
||||||
|
much memory it saves!!!
|
||||||
|
|
||||||
|
![image](images/none.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,218 @@
|
|||||||
|
Contours Hierarchy {#tutorial_py_contours_hierarchy}
|
||||||
|
==================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
This time, we learn about the hierarchy of contours, i.e. the parent-child relationship in Contours.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
In the last few articles on contours, we have worked with several functions related to contours
|
||||||
|
provided by OpenCV. But when we found the contours in image using **cv2.findContours()** function,
|
||||||
|
we have passed an argument, **Contour Retrieval Mode**. We usually passed **cv2.RETR_LIST** or
|
||||||
|
**cv2.RETR_TREE** and it worked nice. But what does it actually mean ?
|
||||||
|
|
||||||
|
Also, in the output, we got three arrays, first is the image, second is our contours, and one more
|
||||||
|
output which we named as **hierarchy** (Please checkout the codes in previous articles). But we
|
||||||
|
never used this hierarchy anywhere. Then what is this hierarchy and what is it for ? What is its
|
||||||
|
relationship with the previous mentioned function argument ?
|
||||||
|
|
||||||
|
That is what we are going to deal in this article.
|
||||||
|
|
||||||
|
### What is Hierarchy?
|
||||||
|
|
||||||
|
Normally we use the **cv2.findContours()** function to detect objects in an image, right ? Sometimes
|
||||||
|
objects are in different locations. But in some cases, some shapes are inside other shapes. Just
|
||||||
|
like nested figures. In this case, we call outer one as **parent** and inner one as **child**. This
|
||||||
|
way, contours in an image has some relationship to each other. And we can specify how one contour is
|
||||||
|
connected to each other, like, is it child of some other contour, or is it a parent etc.
|
||||||
|
Representation of this relationship is called the **Hierarchy**.
|
||||||
|
|
||||||
|
Consider an example image below :
|
||||||
|
|
||||||
|
![image](images/hierarchy.png)
|
||||||
|
|
||||||
|
In this image, there are a few shapes which I have numbered from **0-5**. *2 and 2a* denotes the
|
||||||
|
external and internal contours of the outermost box.
|
||||||
|
|
||||||
|
Here, contours 0,1,2 are **external or outermost**. We can say, they are in **hierarchy-0** or
|
||||||
|
simply they are in **same hierarchy level**.
|
||||||
|
|
||||||
|
Next comes **contour-2a**. It can be considered as a **child of contour-2** (or in opposite way,
|
||||||
|
contour-2 is parent of contour-2a). So let it be in **hierarchy-1**. Similarly contour-3 is child of
|
||||||
|
contour-2 and it comes in next hierarchy. Finally contours 4,5 are the children of contour-3a, and
|
||||||
|
they come in the last hierarchy level. From the way I numbered the boxes, I would say contour-4 is
|
||||||
|
the first child of contour-3a (It can be contour-5 also).
|
||||||
|
|
||||||
|
I mentioned these things to understand terms like **same hierarchy level**, **external contour**,
|
||||||
|
**child contour**, **parent contour**, **first child** etc. Now let's get into OpenCV.
|
||||||
|
|
||||||
|
### Hierarchy Representation in OpenCV
|
||||||
|
|
||||||
|
So each contour has its own information regarding what hierarchy it is, who is its child, who is its
|
||||||
|
parent etc. OpenCV represents it as an array of four values : **[Next, Previous, First_Child,
|
||||||
|
Parent]**
|
||||||
|
|
||||||
|
<center>*"Next denotes next contour at the same hierarchical level."*</center>
|
||||||
|
|
||||||
|
For eg, take contour-0 in our picture. Who is next contour in its same level ? It is contour-1. So
|
||||||
|
simply put Next = 1. Similarly for Contour-1, next is contour-2. So Next = 2.
|
||||||
|
|
||||||
|
What about contour-2? There is no next contour in the same level. So simply, put Next = -1. What
|
||||||
|
about contour-4? It is in same level with contour-5. So its next contour is contour-5, so Next = 5.
|
||||||
|
|
||||||
|
<center>*"Previous denotes previous contour at the same hierarchical level."*</center>
|
||||||
|
|
||||||
|
It is same as above. Previous contour of contour-1 is contour-0 in the same level. Similarly for
|
||||||
|
contour-2, it is contour-1. And for contour-0, there is no previous, so put it as -1.
|
||||||
|
|
||||||
|
<center>*"First_Child denotes its first child contour."*</center>
|
||||||
|
|
||||||
|
There is no need of any explanation. For contour-2, child is contour-2a. So it gets the
|
||||||
|
corresponding index value of contour-2a. What about contour-3a? It has two children. But we take
|
||||||
|
only first child. And it is contour-4. So First_Child = 4 for contour-3a.
|
||||||
|
|
||||||
|
<center>*"Parent denotes index of its parent contour."*</center>
|
||||||
|
|
||||||
|
It is just opposite of **First_Child**. Both for contour-4 and contour-5, parent contour is
|
||||||
|
contour-3a. For contour-3a, it is contour-3 and so on.
|
||||||
|
|
||||||
|
@note If there is no child or parent, that field is taken as -1
|
||||||
|
|
||||||
|
So now we know about the hierarchy style used in OpenCV, we can check into Contour Retrieval Modes
|
||||||
|
in OpenCV with the help of same image given above. ie what do flags like cv2.RETR_LIST,
|
||||||
|
cv2.RETR_TREE, cv2.RETR_CCOMP, cv2.RETR_EXTERNAL etc mean?
|
||||||
|
|
||||||
|
Contour Retrieval Mode
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
### 1. RETR_LIST
|
||||||
|
|
||||||
|
This is the simplest of the four flags (from explanation point of view). It simply retrieves all the
|
||||||
|
contours, but doesn't create any parent-child relationship. **Parents and kids are equal under this
|
||||||
|
rule, and they are just contours**. ie they all belongs to same hierarchy level.
|
||||||
|
|
||||||
|
So here, 3rd and 4th term in hierarchy array is always -1. But obviously, Next and Previous terms
|
||||||
|
will have their corresponding values. Just check it yourself and verify it.
|
||||||
|
|
||||||
|
Below is the result I got, and each row is hierarchy details of corresponding contour. For eg, first
|
||||||
|
row corresponds to contour 0. Next contour is contour 1. So Next = 1. There is no previous contour,
|
||||||
|
so Previous = 0. And the remaining two, as told before, it is -1.
|
||||||
|
@code{.py}
|
||||||
|
>>> hierarchy
|
||||||
|
array([[[ 1, -1, -1, -1],
|
||||||
|
[ 2, 0, -1, -1],
|
||||||
|
[ 3, 1, -1, -1],
|
||||||
|
[ 4, 2, -1, -1],
|
||||||
|
[ 5, 3, -1, -1],
|
||||||
|
[ 6, 4, -1, -1],
|
||||||
|
[ 7, 5, -1, -1],
|
||||||
|
[-1, 6, -1, -1]]])
|
||||||
|
@endcode
|
||||||
|
This is the good choice to use in your code, if you are not using any hierarchy features.
|
||||||
|
|
||||||
|
### 2. RETR_EXTERNAL
|
||||||
|
|
||||||
|
If you use this flag, it returns only extreme outer flags. All child contours are left behind. **We
|
||||||
|
can say, under this law, Only the eldest in every family is taken care of. It doesn't care about
|
||||||
|
other members of the family :)**.
|
||||||
|
|
||||||
|
So, in our image, how many extreme outer contours are there? ie at hierarchy-0 level?. Only 3, ie
|
||||||
|
contours 0,1,2, right? Now try to find the contours using this flag. Here also, values given to each
|
||||||
|
element is same as above. Compare it with above result. Below is what I got :
|
||||||
|
@code{.py}
|
||||||
|
>>> hierarchy
|
||||||
|
array([[[ 1, -1, -1, -1],
|
||||||
|
[ 2, 0, -1, -1],
|
||||||
|
[-1, 1, -1, -1]]])
|
||||||
|
@endcode
|
||||||
|
You can use this flag if you want to extract only the outer contours. It might be useful in some
|
||||||
|
cases.
|
||||||
|
|
||||||
|
### 3. RETR_CCOMP
|
||||||
|
|
||||||
|
This flag retrieves all the contours and arranges them to a 2-level hierarchy. ie external contours
|
||||||
|
of the object (ie its boundary) are placed in hierarchy-1. And the contours of holes inside object
|
||||||
|
(if any) is placed in hierarchy-2. If any object inside it, its contour is placed again in
|
||||||
|
hierarchy-1 only. And its hole in hierarchy-2 and so on.
|
||||||
|
|
||||||
|
Just consider the image of a "big white zero" on a black background. Outer circle of zero belongs to
|
||||||
|
first hierarchy, and inner circle of zero belongs to second hierarchy.
|
||||||
|
|
||||||
|
We can explain it with a simple image. Here I have labelled the order of contours in red color and
|
||||||
|
the hierarchy they belongs to, in green color (either 1 or 2). The order is same as the order OpenCV
|
||||||
|
detects contours.
|
||||||
|
|
||||||
|
![image](images/ccomp_hierarchy.png)
|
||||||
|
|
||||||
|
So consider first contour, ie contour-0. It is hierarchy-1. It has two holes, contours 1&2, and they
|
||||||
|
belong to hierarchy-2. So for contour-0, Next contour in same hierarchy level is contour-3. And
|
||||||
|
there is no previous one. And its first is child is contour-1 in hierarchy-2. It has no parent,
|
||||||
|
because it is in hierarchy-1. So its hierarchy array is [3,-1,1,-1]
|
||||||
|
|
||||||
|
Now take contour-1. It is in hierarchy-2. Next one in same hierarchy (under the parenthood of
|
||||||
|
contour-1) is contour-2. No previous one. No child, but parent is contour-0. So array is
|
||||||
|
[2,-1,-1,0].
|
||||||
|
|
||||||
|
Similarly contour-2 : It is in hierarchy-2. There is not next contour in same hierarchy under
|
||||||
|
contour-0. So no Next. Previous is contour-1. No child, parent is contour-0. So array is
|
||||||
|
[-1,1,-1,0].
|
||||||
|
|
||||||
|
Contour - 3 : Next in hierarchy-1 is contour-5. Previous is contour-0. Child is contour-4 and no
|
||||||
|
parent. So array is [5,0,4,-1].
|
||||||
|
|
||||||
|
Contour - 4 : It is in hierarchy 2 under contour-3 and it has no sibling. So no next, no previous,
|
||||||
|
no child, parent is contour-3. So array is [-1,-1,-1,3].
|
||||||
|
|
||||||
|
Remaining you can fill up. This is the final answer I got:
|
||||||
|
@code{.py}
|
||||||
|
>>> hierarchy
|
||||||
|
array([[[ 3, -1, 1, -1],
|
||||||
|
[ 2, -1, -1, 0],
|
||||||
|
[-1, 1, -1, 0],
|
||||||
|
[ 5, 0, 4, -1],
|
||||||
|
[-1, -1, -1, 3],
|
||||||
|
[ 7, 3, 6, -1],
|
||||||
|
[-1, -1, -1, 5],
|
||||||
|
[ 8, 5, -1, -1],
|
||||||
|
[-1, 7, -1, -1]]])
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
### 4. RETR_TREE
|
||||||
|
|
||||||
|
And this is the final guy, Mr.Perfect. It retrieves all the contours and creates a full family
|
||||||
|
hierarchy list. **It even tells, who is the grandpa, father, son, grandson and even beyond... :)**.
|
||||||
|
|
||||||
|
For examle, I took above image, rewrite the code for cv2.RETR_TREE, reorder the contours as per the
|
||||||
|
result given by OpenCV and analyze it. Again, red letters give the contour number and green letters
|
||||||
|
give the hierarchy order.
|
||||||
|
|
||||||
|
![image](images/tree_hierarchy.png)
|
||||||
|
|
||||||
|
Take contour-0 : It is in hierarchy-0. Next contour in same hierarchy is contour-7. No previous
|
||||||
|
contours. Child is contour-1. And no parent. So array is [7,-1,1,-1].
|
||||||
|
|
||||||
|
Take contour-2 : It is in hierarchy-1. No contour in same level. No previous one. Child is
|
||||||
|
contour-2. Parent is contour-0. So array is [-1,-1,2,0].
|
||||||
|
|
||||||
|
And remaining, try yourself. Below is the full answer:
|
||||||
|
@code{.py}
|
||||||
|
>>> hierarchy
|
||||||
|
array([[[ 7, -1, 1, -1],
|
||||||
|
[-1, -1, 2, 0],
|
||||||
|
[-1, -1, 3, 1],
|
||||||
|
[-1, -1, 4, 2],
|
||||||
|
[-1, -1, 5, 3],
|
||||||
|
[ 6, -1, -1, 4],
|
||||||
|
[-1, 5, -1, 4],
|
||||||
|
[ 8, 0, -1, -1],
|
||||||
|
[-1, 7, -1, -1]]])
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,132 @@
|
|||||||
|
Contours : More Functions {#tutorial_py_contours_more_functions}
|
||||||
|
=========================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter, we will learn about
|
||||||
|
- Convexity defects and how to find them.
|
||||||
|
- Finding shortest distance from a point to a polygon
|
||||||
|
- Matching different shapes
|
||||||
|
|
||||||
|
Theory and Code
|
||||||
|
---------------
|
||||||
|
|
||||||
|
### 1. Convexity Defects
|
||||||
|
|
||||||
|
We saw what is convex hull in second chapter about contours. Any deviation of the object from this
|
||||||
|
hull can be considered as convexity defect.
|
||||||
|
|
||||||
|
OpenCV comes with a ready-made function to find this, **cv2.convexityDefects()**. A basic function
|
||||||
|
call would look like below:
|
||||||
|
@code{.py}
|
||||||
|
hull = cv2.convexHull(cnt,returnPoints = False)
|
||||||
|
defects = cv2.convexityDefects(cnt,hull)
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
@note Remember we have to pass returnPoints = False while finding convex hull, in order to find
|
||||||
|
convexity defects.
|
||||||
|
|
||||||
|
It returns an array where each row contains these values - **[ start point, end point, farthest
|
||||||
|
point, approximate distance to farthest point ]**. We can visualize it using an image. We draw a
|
||||||
|
line joining start point and end point, then draw a circle at the farthest point. Remember first
|
||||||
|
three values returned are indices of cnt. So we have to bring those values from cnt.
|
||||||
|
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('star.jpg')
|
||||||
|
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
ret, thresh = cv2.threshold(img_gray, 127, 255,0)
|
||||||
|
contours,hierarchy = cv2.findContours(thresh,2,1)
|
||||||
|
cnt = contours[0]
|
||||||
|
|
||||||
|
hull = cv2.convexHull(cnt,returnPoints = False)
|
||||||
|
defects = cv2.convexityDefects(cnt,hull)
|
||||||
|
|
||||||
|
for i in range(defects.shape[0]):
|
||||||
|
s,e,f,d = defects[i,0]
|
||||||
|
start = tuple(cnt[s][0])
|
||||||
|
end = tuple(cnt[e][0])
|
||||||
|
far = tuple(cnt[f][0])
|
||||||
|
cv2.line(img,start,end,[0,255,0],2)
|
||||||
|
cv2.circle(img,far,5,[0,0,255],-1)
|
||||||
|
|
||||||
|
cv2.imshow('img',img)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
And see the result:
|
||||||
|
|
||||||
|
![image](images/defects.jpg)
|
||||||
|
|
||||||
|
### 2. Point Polygon Test
|
||||||
|
|
||||||
|
This function finds the shortest distance between a point in the image and a contour. It returns the
|
||||||
|
distance which is negative when point is outside the contour, positive when point is inside and zero
|
||||||
|
if point is on the contour.
|
||||||
|
|
||||||
|
For example, we can check the point (50,50) as follows:
|
||||||
|
@code{.py}
|
||||||
|
dist = cv2.pointPolygonTest(cnt,(50,50),True)
|
||||||
|
@endcode
|
||||||
|
In the function, third argument is measureDist. If it is True, it finds the signed distance. If
|
||||||
|
False, it finds whether the point is inside or outside or on the contour (it returns +1, -1, 0
|
||||||
|
respectively).
|
||||||
|
|
||||||
|
@note If you don't want to find the distance, make sure third argument is False, because, it is a
|
||||||
|
time consuming process. So, making it False gives about 2-3X speedup.
|
||||||
|
|
||||||
|
### 3. Match Shapes
|
||||||
|
|
||||||
|
OpenCV comes with a function **cv2.matchShapes()** which enables us to compare two shapes, or two
|
||||||
|
contours and returns a metric showing the similarity. The lower the result, the better match it is.
|
||||||
|
It is calculated based on the hu-moment values. Different measurement methods are explained in the
|
||||||
|
docs.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img1 = cv2.imread('star.jpg',0)
|
||||||
|
img2 = cv2.imread('star2.jpg',0)
|
||||||
|
|
||||||
|
ret, thresh = cv2.threshold(img1, 127, 255,0)
|
||||||
|
ret, thresh2 = cv2.threshold(img2, 127, 255,0)
|
||||||
|
contours,hierarchy = cv2.findContours(thresh,2,1)
|
||||||
|
cnt1 = contours[0]
|
||||||
|
contours,hierarchy = cv2.findContours(thresh2,2,1)
|
||||||
|
cnt2 = contours[0]
|
||||||
|
|
||||||
|
ret = cv2.matchShapes(cnt1,cnt2,1,0.0)
|
||||||
|
print ret
|
||||||
|
@endcode
|
||||||
|
I tried matching shapes with different shapes given below:
|
||||||
|
|
||||||
|
![image](images/matchshapes.jpg)
|
||||||
|
|
||||||
|
I got following results:
|
||||||
|
|
||||||
|
- Matching Image A with itself = 0.0
|
||||||
|
- Matching Image A with Image B = 0.001946
|
||||||
|
- Matching Image A with Image C = 0.326911
|
||||||
|
|
||||||
|
See, even image rotation doesn't affect much on this comparison.
|
||||||
|
|
||||||
|
@sa [Hu-Moments](http://en.wikipedia.org/wiki/Image_moment#Rotation_invariant_moments) are seven
|
||||||
|
moments invariant to translation, rotation and scale. Seventh one is skew-invariant. Those values
|
||||||
|
can be found using **cv2.HuMoments()** function.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
====================
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# Check the documentation for **cv2.pointPolygonTest()**, you can find a nice image in Red and
|
||||||
|
Blue color. It represents the distance from all pixels to the white curve on it. All pixels
|
||||||
|
inside curve is blue depending on the distance. Similarly outside points are red. Contour edges
|
||||||
|
are marked with White. So problem is simple. Write a code to create such a representation of
|
||||||
|
distance.
|
||||||
|
-# Compare images of digits or letters using **cv2.matchShapes()**. ( That would be a simple step
|
||||||
|
towards OCR )
|
@ -0,0 +1,26 @@
|
|||||||
|
Contours in OpenCV {#tutorial_py_table_of_contents_contours}
|
||||||
|
==================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_contours_begin
|
||||||
|
|
||||||
|
Learn to find and draw Contours
|
||||||
|
|
||||||
|
- @subpage tutorial_py_contour_features
|
||||||
|
|
||||||
|
Learn
|
||||||
|
to find different features of contours like area, perimeter, bounding rectangle etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_contour_properties
|
||||||
|
|
||||||
|
Learn
|
||||||
|
to find different properties of contours like Solidity, Mean Intensity etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_contours_more_functions
|
||||||
|
|
||||||
|
Learn
|
||||||
|
to find convexity defects, pointPolygonTest, match different shapes etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_contours_hierarchy
|
||||||
|
|
||||||
|
Learn
|
||||||
|
about Contour Hierarchy
|
153
doc/py_tutorials/py_imgproc/py_filtering/py_filtering.markdown
Normal file
153
doc/py_tutorials/py_imgproc/py_filtering/py_filtering.markdown
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
Smoothing Images {#tutorial_py_filtering}
|
||||||
|
================
|
||||||
|
|
||||||
|
Goals
|
||||||
|
-----
|
||||||
|
|
||||||
|
Learn to:
|
||||||
|
- Blur the images with various low pass filters
|
||||||
|
- Apply custom-made filters to images (2D convolution)
|
||||||
|
|
||||||
|
2D Convolution ( Image Filtering )
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
As in one-dimensional signals, images also can be filtered with various low-pass filters(LPF),
|
||||||
|
high-pass filters(HPF) etc. LPF helps in removing noises, blurring the images etc. HPF filters helps
|
||||||
|
in finding edges in the images.
|
||||||
|
|
||||||
|
OpenCV provides a function **cv2.filter2D()** to convolve a kernel with an image. As an example, we
|
||||||
|
will try an averaging filter on an image. A 5x5 averaging filter kernel will look like below:
|
||||||
|
|
||||||
|
\f[K = \frac{1}{25} \begin{bmatrix} 1 & 1 & 1 & 1 & 1 \\ 1 & 1 & 1 & 1 & 1 \\ 1 & 1 & 1 & 1 & 1 \\ 1 & 1 & 1 & 1 & 1 \\ 1 & 1 & 1 & 1 & 1 \end{bmatrix}\f]
|
||||||
|
|
||||||
|
Operation is like this: keep this kernel above a pixel, add all the 25 pixels below this kernel,
|
||||||
|
take its average and replace the central pixel with the new average value. It continues this
|
||||||
|
operation for all the pixels in the image. Try this code and check the result:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('opencv_logo.png')
|
||||||
|
|
||||||
|
kernel = np.ones((5,5),np.float32)/25
|
||||||
|
dst = cv2.filter2D(img,-1,kernel)
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img),plt.title('Original')
|
||||||
|
plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(122),plt.imshow(dst),plt.title('Averaging')
|
||||||
|
plt.xticks([]), plt.yticks([])
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/filter.jpg)
|
||||||
|
|
||||||
|
Image Blurring (Image Smoothing)
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
Image blurring is achieved by convolving the image with a low-pass filter kernel. It is useful for
|
||||||
|
removing noises. It actually removes high frequency content (eg: noise, edges) from the image. So
|
||||||
|
edges are blurred a little bit in this operation. (Well, there are blurring techniques which doesn't
|
||||||
|
blur the edges too). OpenCV provides mainly four types of blurring techniques.
|
||||||
|
|
||||||
|
### 1. Averaging
|
||||||
|
|
||||||
|
This is done by convolving image with a normalized box filter. It simply takes the average of all
|
||||||
|
the pixels under kernel area and replace the central element. This is done by the function
|
||||||
|
**cv2.blur()** or **cv2.boxFilter()**. Check the docs for more details about the kernel. We should
|
||||||
|
specify the width and height of kernel. A 3x3 normalized box filter would look like below:
|
||||||
|
|
||||||
|
\f[K = \frac{1}{9} \begin{bmatrix} 1 & 1 & 1 \\ 1 & 1 & 1 \\ 1 & 1 & 1 \end{bmatrix}\f]
|
||||||
|
|
||||||
|
@note If you don't want to use normalized box filter, use **cv2.boxFilter()**. Pass an argument
|
||||||
|
normalize=False to the function.
|
||||||
|
|
||||||
|
Check a sample demo below with a kernel of 5x5 size:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('opencv_logo.png')
|
||||||
|
|
||||||
|
blur = cv2.blur(img,(5,5))
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img),plt.title('Original')
|
||||||
|
plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(122),plt.imshow(blur),plt.title('Blurred')
|
||||||
|
plt.xticks([]), plt.yticks([])
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/blur.jpg)
|
||||||
|
|
||||||
|
### 2. Gaussian Blurring
|
||||||
|
|
||||||
|
In this, instead of box filter, gaussian kernel is used. It is done with the function,
|
||||||
|
**cv2.GaussianBlur()**. We should specify the width and height of kernel which should be positive
|
||||||
|
and odd. We also should specify the standard deviation in X and Y direction, sigmaX and sigmaY
|
||||||
|
respectively. If only sigmaX is specified, sigmaY is taken as same as sigmaX. If both are given as
|
||||||
|
zeros, they are calculated from kernel size. Gaussian blurring is highly effective in removing
|
||||||
|
gaussian noise from the image.
|
||||||
|
|
||||||
|
If you want, you can create a Gaussian kernel with the function, **cv2.getGaussianKernel()**.
|
||||||
|
|
||||||
|
The above code can be modified for Gaussian blurring:
|
||||||
|
@code{.py}
|
||||||
|
blur = cv2.GaussianBlur(img,(5,5),0)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/gaussian.jpg)
|
||||||
|
|
||||||
|
### 3. Median Blurring
|
||||||
|
|
||||||
|
Here, the function **cv2.medianBlur()** takes median of all the pixels under kernel area and central
|
||||||
|
element is replaced with this median value. This is highly effective against salt-and-pepper noise
|
||||||
|
in the images. Interesting thing is that, in the above filters, central element is a newly
|
||||||
|
calculated value which may be a pixel value in the image or a new value. But in median blurring,
|
||||||
|
central element is always replaced by some pixel value in the image. It reduces the noise
|
||||||
|
effectively. Its kernel size should be a positive odd integer.
|
||||||
|
|
||||||
|
In this demo, I added a 50% noise to our original image and applied median blur. Check the result:
|
||||||
|
@code{.py}
|
||||||
|
median = cv2.medianBlur(img,5)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/median.jpg)
|
||||||
|
|
||||||
|
### 4. Bilateral Filtering
|
||||||
|
|
||||||
|
**cv2.bilateralFilter()** is highly effective in noise removal while keeping edges sharp. But the
|
||||||
|
operation is slower compared to other filters. We already saw that gaussian filter takes the a
|
||||||
|
neighbourhood around the pixel and find its gaussian weighted average. This gaussian filter is a
|
||||||
|
function of space alone, that is, nearby pixels are considered while filtering. It doesn't consider
|
||||||
|
whether pixels have almost same intensity. It doesn't consider whether pixel is an edge pixel or
|
||||||
|
not. So it blurs the edges also, which we don't want to do.
|
||||||
|
|
||||||
|
Bilateral filter also takes a gaussian filter in space, but one more gaussian filter which is a
|
||||||
|
function of pixel difference. Gaussian function of space make sure only nearby pixels are considered
|
||||||
|
for blurring while gaussian function of intensity difference make sure only those pixels with
|
||||||
|
similar intensity to central pixel is considered for blurring. So it preserves the edges since
|
||||||
|
pixels at edges will have large intensity variation.
|
||||||
|
|
||||||
|
Below samples shows use bilateral filter (For details on arguments, visit docs).
|
||||||
|
@code{.py}
|
||||||
|
blur = cv2.bilateralFilter(img,9,75,75)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/bilateral.jpg)
|
||||||
|
|
||||||
|
See, the texture on the surface is gone, but edges are still preserved.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# Details about the [bilateral filtering](http://people.csail.mit.edu/sparis/bf_course/)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,162 @@
|
|||||||
|
Geometric Transformations of Images {#tutorial_py_geometric_transformations}
|
||||||
|
===================================
|
||||||
|
|
||||||
|
Goals
|
||||||
|
-----
|
||||||
|
|
||||||
|
- Learn to apply different geometric transformation to images like translation, rotation, affine
|
||||||
|
transformation etc.
|
||||||
|
- You will see these functions: **cv2.getPerspectiveTransform**
|
||||||
|
|
||||||
|
Transformations
|
||||||
|
---------------
|
||||||
|
|
||||||
|
OpenCV provides two transformation functions, **cv2.warpAffine** and **cv2.warpPerspective**, with
|
||||||
|
which you can have all kinds of transformations. **cv2.warpAffine** takes a 2x3 transformation
|
||||||
|
matrix while **cv2.warpPerspective** takes a 3x3 transformation matrix as input.
|
||||||
|
|
||||||
|
### Scaling
|
||||||
|
|
||||||
|
Scaling is just resizing of the image. OpenCV comes with a function **cv2.resize()** for this
|
||||||
|
purpose. The size of the image can be specified manually, or you can specify the scaling factor.
|
||||||
|
Different interpolation methods are used. Preferable interpolation methods are **cv2.INTER_AREA**
|
||||||
|
for shrinking and **cv2.INTER_CUBIC** (slow) & **cv2.INTER_LINEAR** for zooming. By default,
|
||||||
|
interpolation method used is **cv2.INTER_LINEAR** for all resizing purposes. You can resize an
|
||||||
|
input image either of following methods:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('messi5.jpg')
|
||||||
|
|
||||||
|
res = cv2.resize(img,None,fx=2, fy=2, interpolation = cv2.INTER_CUBIC)
|
||||||
|
|
||||||
|
#OR
|
||||||
|
|
||||||
|
height, width = img.shape[:2]
|
||||||
|
res = cv2.resize(img,(2*width, 2*height), interpolation = cv2.INTER_CUBIC)
|
||||||
|
@endcode
|
||||||
|
### Translation
|
||||||
|
|
||||||
|
Translation is the shifting of object's location. If you know the shift in (x,y) direction, let it
|
||||||
|
be \f$(t_x,t_y)\f$, you can create the transformation matrix \f$\textbf{M}\f$ as follows:
|
||||||
|
|
||||||
|
\f[M = \begin{bmatrix} 1 & 0 & t_x \\ 0 & 1 & t_y \end{bmatrix}\f]
|
||||||
|
|
||||||
|
You can take make it into a Numpy array of type np.float32 and pass it into **cv2.warpAffine()**
|
||||||
|
function. See below example for a shift of (100,50):
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('messi5.jpg',0)
|
||||||
|
rows,cols = img.shape
|
||||||
|
|
||||||
|
M = np.float32([[1,0,100],[0,1,50]])
|
||||||
|
dst = cv2.warpAffine(img,M,(cols,rows))
|
||||||
|
|
||||||
|
cv2.imshow('img',dst)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
**warning**
|
||||||
|
|
||||||
|
Third argument of the **cv2.warpAffine()** function is the size of the output image, which should
|
||||||
|
be in the form of **(width, height)**. Remember width = number of columns, and height = number of
|
||||||
|
rows.
|
||||||
|
|
||||||
|
See the result below:
|
||||||
|
|
||||||
|
![image](images/translation.jpg)
|
||||||
|
|
||||||
|
### Rotation
|
||||||
|
|
||||||
|
Rotation of an image for an angle \f$\theta\f$ is achieved by the transformation matrix of the form
|
||||||
|
|
||||||
|
\f[M = \begin{bmatrix} cos\theta & -sin\theta \\ sin\theta & cos\theta \end{bmatrix}\f]
|
||||||
|
|
||||||
|
But OpenCV provides scaled rotation with adjustable center of rotation so that you can rotate at any
|
||||||
|
location you prefer. Modified transformation matrix is given by
|
||||||
|
|
||||||
|
\f[\begin{bmatrix} \alpha & \beta & (1- \alpha ) \cdot center.x - \beta \cdot center.y \\ - \beta & \alpha & \beta \cdot center.x + (1- \alpha ) \cdot center.y \end{bmatrix}\f]
|
||||||
|
|
||||||
|
where:
|
||||||
|
|
||||||
|
\f[\begin{array}{l} \alpha = scale \cdot \cos \theta , \\ \beta = scale \cdot \sin \theta \end{array}\f]
|
||||||
|
|
||||||
|
To find this transformation matrix, OpenCV provides a function, **cv2.getRotationMatrix2D**. Check
|
||||||
|
below example which rotates the image by 90 degree with respect to center without any scaling.
|
||||||
|
@code{.py}
|
||||||
|
img = cv2.imread('messi5.jpg',0)
|
||||||
|
rows,cols = img.shape
|
||||||
|
|
||||||
|
M = cv2.getRotationMatrix2D((cols/2,rows/2),90,1)
|
||||||
|
dst = cv2.warpAffine(img,M,(cols,rows))
|
||||||
|
@endcode
|
||||||
|
See the result:
|
||||||
|
|
||||||
|
![image](images/rotation.jpg)
|
||||||
|
|
||||||
|
### Affine Transformation
|
||||||
|
|
||||||
|
In affine transformation, all parallel lines in the original image will still be parallel in the
|
||||||
|
output image. To find the transformation matrix, we need three points from input image and their
|
||||||
|
corresponding locations in output image. Then **cv2.getAffineTransform** will create a 2x3 matrix
|
||||||
|
which is to be passed to **cv2.warpAffine**.
|
||||||
|
|
||||||
|
Check below example, and also look at the points I selected (which are marked in Green color):
|
||||||
|
@code{.py}
|
||||||
|
img = cv2.imread('drawing.png')
|
||||||
|
rows,cols,ch = img.shape
|
||||||
|
|
||||||
|
pts1 = np.float32([[50,50],[200,50],[50,200]])
|
||||||
|
pts2 = np.float32([[10,100],[200,50],[100,250]])
|
||||||
|
|
||||||
|
M = cv2.getAffineTransform(pts1,pts2)
|
||||||
|
|
||||||
|
dst = cv2.warpAffine(img,M,(cols,rows))
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img),plt.title('Input')
|
||||||
|
plt.subplot(122),plt.imshow(dst),plt.title('Output')
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result:
|
||||||
|
|
||||||
|
![image](images/affine.jpg)
|
||||||
|
|
||||||
|
### Perspective Transformation
|
||||||
|
|
||||||
|
For perspective transformation, you need a 3x3 transformation matrix. Straight lines will remain
|
||||||
|
straight even after the transformation. To find this transformation matrix, you need 4 points on the
|
||||||
|
input image and corresponding points on the output image. Among these 4 points, 3 of them should not
|
||||||
|
be collinear. Then transformation matrix can be found by the function
|
||||||
|
**cv2.getPerspectiveTransform**. Then apply **cv2.warpPerspective** with this 3x3 transformation
|
||||||
|
matrix.
|
||||||
|
|
||||||
|
See the code below:
|
||||||
|
@code{.py}
|
||||||
|
img = cv2.imread('sudokusmall.png')
|
||||||
|
rows,cols,ch = img.shape
|
||||||
|
|
||||||
|
pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])
|
||||||
|
pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])
|
||||||
|
|
||||||
|
M = cv2.getPerspectiveTransform(pts1,pts2)
|
||||||
|
|
||||||
|
dst = cv2.warpPerspective(img,M,(300,300))
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img),plt.title('Input')
|
||||||
|
plt.subplot(122),plt.imshow(dst),plt.title('Output')
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/perspective.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# "Computer Vision: Algorithms and Applications", Richard Szeliski
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
156
doc/py_tutorials/py_imgproc/py_grabcut/py_grabcut.markdown
Normal file
156
doc/py_tutorials/py_imgproc/py_grabcut/py_grabcut.markdown
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
Interactive Foreground Extraction using GrabCut Algorithm {#tutorial_py_grabcut}
|
||||||
|
=========================================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter
|
||||||
|
- We will see GrabCut algorithm to extract foreground in images
|
||||||
|
- We will create an interactive application for this.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
GrabCut algorithm was designed by Carsten Rother, Vladimir Kolmogorov & Andrew Blake from Microsoft
|
||||||
|
Research Cambridge, UK. in their paper, ["GrabCut": interactive foreground extraction using iterated
|
||||||
|
graph cuts](http://dl.acm.org/citation.cfm?id=1015720) . An algorithm was needed for foreground
|
||||||
|
extraction with minimal user interaction, and the result was GrabCut.
|
||||||
|
|
||||||
|
How it works from user point of view ? Initially user draws a rectangle around the foreground region
|
||||||
|
(foreground region shoule be completely inside the rectangle). Then algorithm segments it
|
||||||
|
iteratively to get the best result. Done. But in some cases, the segmentation won't be fine, like,
|
||||||
|
it may have marked some foreground region as background and vice versa. In that case, user need to
|
||||||
|
do fine touch-ups. Just give some strokes on the images where some faulty results are there. Strokes
|
||||||
|
basically says *"Hey, this region should be foreground, you marked it background, correct it in next
|
||||||
|
iteration"* or its opposite for background. Then in the next iteration, you get better results.
|
||||||
|
|
||||||
|
See the image below. First player and football is enclosed in a blue rectangle. Then some final
|
||||||
|
touchups with white strokes (denoting foreground) and black strokes (denoting background) is made.
|
||||||
|
And we get a nice result.
|
||||||
|
|
||||||
|
![image](images/grabcut_output1.jpg)
|
||||||
|
|
||||||
|
So what happens in background ?
|
||||||
|
|
||||||
|
- User inputs the rectangle. Everything outside this rectangle will be taken as sure background
|
||||||
|
(That is the reason it is mentioned before that your rectangle should include all the
|
||||||
|
objects). Everything inside rectangle is unknown. Similarly any user input specifying
|
||||||
|
foreground and background are considered as hard-labelling which means they won't change in
|
||||||
|
the process.
|
||||||
|
- Computer does an initial labelling depeding on the data we gave. It labels the foreground and
|
||||||
|
background pixels (or it hard-labels)
|
||||||
|
- Now a Gaussian Mixture Model(GMM) is used to model the foreground and background.
|
||||||
|
- Depending on the data we gave, GMM learns and create new pixel distribution. That is, the
|
||||||
|
unknown pixels are labelled either probable foreground or probable background depending on its
|
||||||
|
relation with the other hard-labelled pixels in terms of color statistics (It is just like
|
||||||
|
clustering).
|
||||||
|
- A graph is built from this pixel distribution. Nodes in the graphs are pixels. Additional two
|
||||||
|
nodes are added, **Source node** and **Sink node**. Every foreground pixel is connected to
|
||||||
|
Source node and every background pixel is connected to Sink node.
|
||||||
|
- The weights of edges connecting pixels to source node/end node are defined by the probability
|
||||||
|
of a pixel being foreground/background. The weights between the pixels are defined by the edge
|
||||||
|
information or pixel similarity. If there is a large difference in pixel color, the edge
|
||||||
|
between them will get a low weight.
|
||||||
|
- Then a mincut algorithm is used to segment the graph. It cuts the graph into two separating
|
||||||
|
source node and sink node with minimum cost function. The cost function is the sum of all
|
||||||
|
weights of the edges that are cut. After the cut, all the pixels connected to Source node
|
||||||
|
become foreground and those connected to Sink node become background.
|
||||||
|
- The process is continued until the classification converges.
|
||||||
|
|
||||||
|
It is illustrated in below image (Image Courtesy: <http://www.cs.ru.ac.za/research/g02m1682/>)
|
||||||
|
|
||||||
|
![image](images/grabcut_scheme.jpg)
|
||||||
|
|
||||||
|
Demo
|
||||||
|
----
|
||||||
|
|
||||||
|
Now we go for grabcut algorithm with OpenCV. OpenCV has the function, **cv2.grabCut()** for this. We
|
||||||
|
will see its arguments first:
|
||||||
|
|
||||||
|
- *img* - Input image
|
||||||
|
- *mask* - It is a mask image where we specify which areas are background, foreground or
|
||||||
|
probable background/foreground etc. It is done by the following flags, **cv2.GC_BGD,
|
||||||
|
cv2.GC_FGD, cv2.GC_PR_BGD, cv2.GC_PR_FGD**, or simply pass 0,1,2,3 to image.
|
||||||
|
- *rect* - It is the coordinates of a rectangle which includes the foreground object in the
|
||||||
|
format (x,y,w,h)
|
||||||
|
- *bdgModel*, *fgdModel* - These are arrays used by the algorithm internally. You just create
|
||||||
|
two np.float64 type zero arrays of size (1,65).
|
||||||
|
- *iterCount* - Number of iterations the algorithm should run.
|
||||||
|
- *mode* - It should be **cv2.GC_INIT_WITH_RECT** or **cv2.GC_INIT_WITH_MASK** or combined
|
||||||
|
which decides whether we are drawing rectangle or final touchup strokes.
|
||||||
|
|
||||||
|
First let's see with rectangular mode. We load the image, create a similar mask image. We create
|
||||||
|
*fgdModel* and *bgdModel*. We give the rectangle parameters. It's all straight-forward. Let the
|
||||||
|
algorithm run for 5 iterations. Mode should be *cv2.GC_INIT_WITH_RECT* since we are using
|
||||||
|
rectangle. Then run the grabcut. It modifies the mask image. In the new mask image, pixels will be
|
||||||
|
marked with four flags denoting background/foreground as specified above. So we modify the mask such
|
||||||
|
that all 0-pixels and 2-pixels are put to 0 (ie background) and all 1-pixels and 3-pixels are put to
|
||||||
|
1(ie foreground pixels). Now our final mask is ready. Just multiply it with input image to get the
|
||||||
|
segmented image.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('messi5.jpg')
|
||||||
|
mask = np.zeros(img.shape[:2],np.uint8)
|
||||||
|
|
||||||
|
bgdModel = np.zeros((1,65),np.float64)
|
||||||
|
fgdModel = np.zeros((1,65),np.float64)
|
||||||
|
|
||||||
|
rect = (50,50,450,290)
|
||||||
|
cv2.grabCut(img,mask,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT)
|
||||||
|
|
||||||
|
mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8')
|
||||||
|
img = img*mask2[:,:,np.newaxis]
|
||||||
|
|
||||||
|
plt.imshow(img),plt.colorbar(),plt.show()
|
||||||
|
@endcode
|
||||||
|
See the results below:
|
||||||
|
|
||||||
|
![image](images/grabcut_rect.jpg)
|
||||||
|
|
||||||
|
Oops, Messi's hair is gone. *Who likes Messi without his hair?* We need to bring it back. So we will
|
||||||
|
give there a fine touchup with 1-pixel (sure foreground). At the same time, Some part of ground has
|
||||||
|
come to picture which we don't want, and also some logo. We need to remove them. There we give some
|
||||||
|
0-pixel touchup (sure background). So we modify our resulting mask in previous case as we told now.
|
||||||
|
|
||||||
|
*What I actually did is that, I opened input image in paint application and added another layer to
|
||||||
|
the image. Using brush tool in the paint, I marked missed foreground (hair, shoes, ball etc) with
|
||||||
|
white and unwanted background (like logo, ground etc) with black on this new layer. Then filled
|
||||||
|
remaining background with gray. Then loaded that mask image in OpenCV, edited original mask image we
|
||||||
|
got with corresponding values in newly added mask image. Check the code below:*
|
||||||
|
@code{.py}
|
||||||
|
# newmask is the mask image I manually labelled
|
||||||
|
newmask = cv2.imread('newmask.png',0)
|
||||||
|
|
||||||
|
# whereever it is marked white (sure foreground), change mask=1
|
||||||
|
# whereever it is marked black (sure background), change mask=0
|
||||||
|
mask[newmask == 0] = 0
|
||||||
|
mask[newmask == 255] = 1
|
||||||
|
|
||||||
|
mask, bgdModel, fgdModel = cv2.grabCut(img,mask,None,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_MASK)
|
||||||
|
|
||||||
|
mask = np.where((mask==2)|(mask==0),0,1).astype('uint8')
|
||||||
|
img = img*mask[:,:,np.newaxis]
|
||||||
|
plt.imshow(img),plt.colorbar(),plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result below:
|
||||||
|
|
||||||
|
![image](images/grabcut_mask.jpg)
|
||||||
|
|
||||||
|
So that's it. Here instead of initializing in rect mode, you can directly go into mask mode. Just
|
||||||
|
mark the rectangle area in mask image with 2-pixel or 3-pixel (probable background/foreground). Then
|
||||||
|
mark our sure_foreground with 1-pixel as we did in second example. Then directly apply the grabCut
|
||||||
|
function with mask mode.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# OpenCV samples contain a sample grabcut.py which is an interactive tool using grabcut. Check it.
|
||||||
|
Also watch this [youtube video](http://www.youtube.com/watch?v=kAwxLTDDAwU) on how to use it.
|
||||||
|
-# Here, you can make this into a interactive sample with drawing rectangle and strokes with mouse,
|
||||||
|
create trackbar to adjust stroke width etc.
|
@ -36,7 +36,7 @@ So what happens in background ?
|
|||||||
|
|
||||||
It is illustrated in below image (Image Courtesy: http://www.cs.ru.ac.za/research/g02m1682/)
|
It is illustrated in below image (Image Courtesy: http://www.cs.ru.ac.za/research/g02m1682/)
|
||||||
|
|
||||||
.. image:: images/grabcut.jpg
|
.. image:: images/grabcut_scheme.jpg
|
||||||
:alt: Simplified Diagram of GrabCut Algorithm
|
:alt: Simplified Diagram of GrabCut Algorithm
|
||||||
:align: center
|
:align: center
|
||||||
|
|
||||||
|
109
doc/py_tutorials/py_imgproc/py_gradients/py_gradients.markdown
Normal file
109
doc/py_tutorials/py_imgproc/py_gradients/py_gradients.markdown
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
Image Gradients {#tutorial_py_gradients}
|
||||||
|
===============
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter, we will learn to:
|
||||||
|
|
||||||
|
- Find Image gradients, edges etc
|
||||||
|
- We will see following functions : **cv2.Sobel()**, **cv2.Scharr()**, **cv2.Laplacian()** etc
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
OpenCV provides three types of gradient filters or High-pass filters, Sobel, Scharr and Laplacian.
|
||||||
|
We will see each one of them.
|
||||||
|
|
||||||
|
### 1. Sobel and Scharr Derivatives
|
||||||
|
|
||||||
|
Sobel operators is a joint Gausssian smoothing plus differentiation operation, so it is more
|
||||||
|
resistant to noise. You can specify the direction of derivatives to be taken, vertical or horizontal
|
||||||
|
(by the arguments, yorder and xorder respectively). You can also specify the size of kernel by the
|
||||||
|
argument ksize. If ksize = -1, a 3x3 Scharr filter is used which gives better results than 3x3 Sobel
|
||||||
|
filter. Please see the docs for kernels used.
|
||||||
|
|
||||||
|
### 2. Laplacian Derivatives
|
||||||
|
|
||||||
|
It calculates the Laplacian of the image given by the relation,
|
||||||
|
\f$\Delta src = \frac{\partial ^2{src}}{\partial x^2} + \frac{\partial ^2{src}}{\partial y^2}\f$ where
|
||||||
|
each derivative is found using Sobel derivatives. If ksize = 1, then following kernel is used for
|
||||||
|
filtering:
|
||||||
|
|
||||||
|
\f[kernel = \begin{bmatrix} 0 & 1 & 0 \\ 1 & -4 & 1 \\ 0 & 1 & 0 \end{bmatrix}\f]
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
Below code shows all operators in a single diagram. All kernels are of 5x5 size. Depth of output
|
||||||
|
image is passed -1 to get the result in np.uint8 type.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('dave.jpg',0)
|
||||||
|
|
||||||
|
laplacian = cv2.Laplacian(img,cv2.CV_64F)
|
||||||
|
sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)
|
||||||
|
sobely = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5)
|
||||||
|
|
||||||
|
plt.subplot(2,2,1),plt.imshow(img,cmap = 'gray')
|
||||||
|
plt.title('Original'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(2,2,2),plt.imshow(laplacian,cmap = 'gray')
|
||||||
|
plt.title('Laplacian'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(2,2,3),plt.imshow(sobelx,cmap = 'gray')
|
||||||
|
plt.title('Sobel X'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(2,2,4),plt.imshow(sobely,cmap = 'gray')
|
||||||
|
plt.title('Sobel Y'), plt.xticks([]), plt.yticks([])
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/gradients.jpg)
|
||||||
|
|
||||||
|
One Important Matter!
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
In our last example, output datatype is cv2.CV_8U or np.uint8. But there is a slight problem with
|
||||||
|
that. Black-to-White transition is taken as Positive slope (it has a positive value) while
|
||||||
|
White-to-Black transition is taken as a Negative slope (It has negative value). So when you convert
|
||||||
|
data to np.uint8, all negative slopes are made zero. In simple words, you miss that edge.
|
||||||
|
|
||||||
|
If you want to detect both edges, better option is to keep the output datatype to some higher forms,
|
||||||
|
like cv2.CV_16S, cv2.CV_64F etc, take its absolute value and then convert back to cv2.CV_8U.
|
||||||
|
Below code demonstrates this procedure for a horizontal Sobel filter and difference in results.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('box.png',0)
|
||||||
|
|
||||||
|
# Output dtype = cv2.CV_8U
|
||||||
|
sobelx8u = cv2.Sobel(img,cv2.CV_8U,1,0,ksize=5)
|
||||||
|
|
||||||
|
# Output dtype = cv2.CV_64F. Then take its absolute and convert to cv2.CV_8U
|
||||||
|
sobelx64f = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)
|
||||||
|
abs_sobel64f = np.absolute(sobelx64f)
|
||||||
|
sobel_8u = np.uint8(abs_sobel64f)
|
||||||
|
|
||||||
|
plt.subplot(1,3,1),plt.imshow(img,cmap = 'gray')
|
||||||
|
plt.title('Original'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(1,3,2),plt.imshow(sobelx8u,cmap = 'gray')
|
||||||
|
plt.title('Sobel CV_8U'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(1,3,3),plt.imshow(sobel_8u,cmap = 'gray')
|
||||||
|
plt.title('Sobel abs(CV_64F)'), plt.xticks([]), plt.yticks([])
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Check the result below:
|
||||||
|
|
||||||
|
![image](images/double_edge.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,130 @@
|
|||||||
|
Histograms - 3 : 2D Histograms {#tutorial_py_2d_histogram}
|
||||||
|
==============================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter, we will learn to find and plot 2D histograms. It will be helpful in coming
|
||||||
|
chapters.
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
|
||||||
|
In the first article, we calculated and plotted one-dimensional histogram. It is called
|
||||||
|
one-dimensional because we are taking only one feature into our consideration, ie grayscale
|
||||||
|
intensity value of the pixel. But in two-dimensional histograms, you consider two features. Normally
|
||||||
|
it is used for finding color histograms where two features are Hue & Saturation values of every
|
||||||
|
pixel.
|
||||||
|
|
||||||
|
There is a [python sample in the official
|
||||||
|
samples](https://github.com/Itseez/opencv/blob/master/samples/python2/color_histogram.py) already
|
||||||
|
for finding color histograms. We will try to understand how to create such a color histogram, and it
|
||||||
|
will be useful in understanding further topics like Histogram Back-Projection.
|
||||||
|
|
||||||
|
2D Histogram in OpenCV
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
It is quite simple and calculated using the same function, **cv2.calcHist()**. For color histograms,
|
||||||
|
we need to convert the image from BGR to HSV. (Remember, for 1D histogram, we converted from BGR to
|
||||||
|
Grayscale). For 2D histograms, its parameters will be modified as follows:
|
||||||
|
|
||||||
|
- **channels = [0,1]** *because we need to process both H and S plane.*
|
||||||
|
- **bins = [180,256]** *180 for H plane and 256 for S plane.*
|
||||||
|
- **range = [0,180,0,256]** *Hue value lies between 0 and 180 & Saturation lies between 0 and
|
||||||
|
256.*
|
||||||
|
|
||||||
|
Now check the code below:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('home.jpg')
|
||||||
|
hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
|
||||||
|
|
||||||
|
hist = cv2.calcHist([hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
|
||||||
|
@endcode
|
||||||
|
That's it.
|
||||||
|
|
||||||
|
2D Histogram in Numpy
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Numpy also provides a specific function for this : **np.histogram2d()**. (Remember, for 1D histogram
|
||||||
|
we used **np.histogram()** ).
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('home.jpg')
|
||||||
|
hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
|
||||||
|
|
||||||
|
hist, xbins, ybins = np.histogram2d(h.ravel(),s.ravel(),[180,256],[[0,180],[0,256]])
|
||||||
|
@endcode
|
||||||
|
First argument is H plane, second one is the S plane, third is number of bins for each and fourth is
|
||||||
|
their range.
|
||||||
|
|
||||||
|
Now we can check how to plot this color histogram.
|
||||||
|
|
||||||
|
Plotting 2D Histograms
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
### Method - 1 : Using cv2.imshow()
|
||||||
|
|
||||||
|
The result we get is a two dimensional array of size 180x256. So we can show them as we do normally,
|
||||||
|
using cv2.imshow() function. It will be a grayscale image and it won't give much idea what colors
|
||||||
|
are there, unless you know the Hue values of different colors.
|
||||||
|
|
||||||
|
### Method - 2 : Using Matplotlib
|
||||||
|
|
||||||
|
We can use **matplotlib.pyplot.imshow()** function to plot 2D histogram with different color maps.
|
||||||
|
It gives us a much better idea about the different pixel density. But this also, doesn't gives us
|
||||||
|
idea what color is there on a first look, unless you know the Hue values of different colors. Still
|
||||||
|
I prefer this method. It is simple and better.
|
||||||
|
|
||||||
|
@note While using this function, remember, interpolation flag should be nearest for better results.
|
||||||
|
|
||||||
|
Consider code:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('home.jpg')
|
||||||
|
hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
|
||||||
|
hist = cv2.calcHist( [hsv], [0, 1], None, [180, 256], [0, 180, 0, 256] )
|
||||||
|
|
||||||
|
plt.imshow(hist,interpolation = 'nearest')
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Below is the input image and its color histogram plot. X axis shows S values and Y axis shows Hue.
|
||||||
|
|
||||||
|
![image](images/2dhist_matplotlib.jpg)
|
||||||
|
|
||||||
|
In histogram, you can see some high values near H = 100 and S = 200. It corresponds to blue of sky.
|
||||||
|
Similarly another peak can be seen near H = 25 and S = 100. It corresponds to yellow of the palace.
|
||||||
|
You can verify it with any image editing tools like GIMP.
|
||||||
|
|
||||||
|
### Method 3 : OpenCV sample style !!
|
||||||
|
|
||||||
|
There is a [sample code for color-histogram in OpenCV-Python2
|
||||||
|
samples](https://github.com/Itseez/opencv/blob/master/samples/python2/color_histogram.py). If you
|
||||||
|
run the code, you can see the histogram shows the corresponding color also. Or simply it outputs a
|
||||||
|
color coded histogram. Its result is very good (although you need to add extra bunch of lines).
|
||||||
|
|
||||||
|
In that code, the author created a color map in HSV. Then converted it into BGR. The resulting
|
||||||
|
histogram image is multiplied with this color map. He also uses some preprocessing steps to remove
|
||||||
|
small isolated pixels, resulting in a good histogram.
|
||||||
|
|
||||||
|
I leave it to the readers to run the code, analyze it and have your own hack arounds. Below is the
|
||||||
|
output of that code for the same image as above:
|
||||||
|
|
||||||
|
![image](images/2dhist_opencv.jpg)
|
||||||
|
|
||||||
|
You can clearly see in the histogram what colors are present, blue is there, yellow is there, and
|
||||||
|
some white due to chessboard is there. Nice !!!
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,125 @@
|
|||||||
|
Histogram - 4 : Histogram Backprojection {#tutorial_py_histogram_backprojection}
|
||||||
|
========================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter, we will learn about histogram backprojection.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
It was proposed by **Michael J. Swain , Dana H. Ballard** in their paper **Indexing via color
|
||||||
|
histograms**.
|
||||||
|
|
||||||
|
**What is it actually in simple words?** It is used for image segmentation or finding objects of
|
||||||
|
interest in an image. In simple words, it creates an image of the same size (but single channel) as
|
||||||
|
that of our input image, where each pixel corresponds to the probability of that pixel belonging to
|
||||||
|
our object. In more simpler worlds, the output image will have our object of interest in more white
|
||||||
|
compared to remaining part. Well, that is an intuitive explanation. (I can't make it more simpler).
|
||||||
|
Histogram Backprojection is used with camshift algorithm etc.
|
||||||
|
|
||||||
|
**How do we do it ?** We create a histogram of an image containing our object of interest (in our
|
||||||
|
case, the ground, leaving player and other things). The object should fill the image as far as
|
||||||
|
possible for better results. And a color histogram is preferred over grayscale histogram, because
|
||||||
|
color of the object is a better way to define the object than its grayscale intensity. We then
|
||||||
|
"back-project" this histogram over our test image where we need to find the object, ie in other
|
||||||
|
words, we calculate the probability of every pixel belonging to the ground and show it. The
|
||||||
|
resulting output on proper thresholding gives us the ground alone.
|
||||||
|
|
||||||
|
Algorithm in Numpy
|
||||||
|
------------------
|
||||||
|
|
||||||
|
-# First we need to calculate the color histogram of both the object we need to find (let it be
|
||||||
|
'M') and the image where we are going to search (let it be 'I').
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
#roi is the object or region of object we need to find
|
||||||
|
roi = cv2.imread('rose_red.png')
|
||||||
|
hsv = cv2.cvtColor(roi,cv2.COLOR_BGR2HSV)
|
||||||
|
|
||||||
|
#target is the image we search in
|
||||||
|
target = cv2.imread('rose.png')
|
||||||
|
hsvt = cv2.cvtColor(target,cv2.COLOR_BGR2HSV)
|
||||||
|
|
||||||
|
# Find the histograms using calcHist. Can be done with np.histogram2d also
|
||||||
|
M = cv2.calcHist([hsv],[0, 1], None, [180, 256], [0, 180, 0, 256] )
|
||||||
|
I = cv2.calcHist([hsvt],[0, 1], None, [180, 256], [0, 180, 0, 256] )
|
||||||
|
@endcode
|
||||||
|
2. Find the ratio \f$R = \frac{M}{I}\f$. Then backproject R, ie use R as palette and create a new image
|
||||||
|
with every pixel as its corresponding probability of being target. ie B(x,y) = R[h(x,y),s(x,y)]
|
||||||
|
where h is hue and s is saturation of the pixel at (x,y). After that apply the condition
|
||||||
|
\f$B(x,y) = min[B(x,y), 1]\f$.
|
||||||
|
@code{.py}
|
||||||
|
h,s,v = cv2.split(hsvt)
|
||||||
|
B = R[h.ravel(),s.ravel()]
|
||||||
|
B = np.minimum(B,1)
|
||||||
|
B = B.reshape(hsvt.shape[:2])
|
||||||
|
@endcode
|
||||||
|
3. Now apply a convolution with a circular disc, \f$B = D \ast B\f$, where D is the disc kernel.
|
||||||
|
@code{.py}
|
||||||
|
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
|
||||||
|
cv2.filter2D(B,-1,disc,B)
|
||||||
|
B = np.uint8(B)
|
||||||
|
cv2.normalize(B,B,0,255,cv2.NORM_MINMAX)
|
||||||
|
@endcode
|
||||||
|
4. Now the location of maximum intensity gives us the location of object. If we are expecting a
|
||||||
|
region in the image, thresholding for a suitable value gives a nice result.
|
||||||
|
@code{.py}
|
||||||
|
ret,thresh = cv2.threshold(B,50,255,0)
|
||||||
|
@endcode
|
||||||
|
That's it !!
|
||||||
|
|
||||||
|
Backprojection in OpenCV
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
OpenCV provides an inbuilt function **cv2.calcBackProject()**. Its parameters are almost same as the
|
||||||
|
**cv2.calcHist()** function. One of its parameter is histogram which is histogram of the object and
|
||||||
|
we have to find it. Also, the object histogram should be normalized before passing on to the
|
||||||
|
backproject function. It returns the probability image. Then we convolve the image with a disc
|
||||||
|
kernel and apply threshold. Below is my code and output :
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
roi = cv2.imread('rose_red.png')
|
||||||
|
hsv = cv2.cvtColor(roi,cv2.COLOR_BGR2HSV)
|
||||||
|
|
||||||
|
target = cv2.imread('rose.png')
|
||||||
|
hsvt = cv2.cvtColor(target,cv2.COLOR_BGR2HSV)
|
||||||
|
|
||||||
|
# calculating object histogram
|
||||||
|
roihist = cv2.calcHist([hsv],[0, 1], None, [180, 256], [0, 180, 0, 256] )
|
||||||
|
|
||||||
|
# normalize histogram and apply backprojection
|
||||||
|
cv2.normalize(roihist,roihist,0,255,cv2.NORM_MINMAX)
|
||||||
|
dst = cv2.calcBackProject([hsvt],[0,1],roihist,[0,180,0,256],1)
|
||||||
|
|
||||||
|
# Now convolute with circular disc
|
||||||
|
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
|
||||||
|
cv2.filter2D(dst,-1,disc,dst)
|
||||||
|
|
||||||
|
# threshold and binary AND
|
||||||
|
ret,thresh = cv2.threshold(dst,50,255,0)
|
||||||
|
thresh = cv2.merge((thresh,thresh,thresh))
|
||||||
|
res = cv2.bitwise_and(target,thresh)
|
||||||
|
|
||||||
|
res = np.vstack((target,thresh,res))
|
||||||
|
cv2.imwrite('res.jpg',res)
|
||||||
|
@endcode
|
||||||
|
Below is one example I worked with. I used the region inside blue rectangle as sample object and I
|
||||||
|
wanted to extract the full ground.
|
||||||
|
|
||||||
|
![image](images/backproject_opencv.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# "Indexing via color histograms", Swain, Michael J. , Third international conference on computer
|
||||||
|
vision,1990.
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,199 @@
|
|||||||
|
Histograms - 1 : Find, Plot, Analyze !!! {#tutorial_py_histogram_begins}
|
||||||
|
========================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
Learn to
|
||||||
|
- Find histograms, using both OpenCV and Numpy functions
|
||||||
|
- Plot histograms, using OpenCV and Matplotlib functions
|
||||||
|
- You will see these functions : **cv2.calcHist()**, **np.histogram()** etc.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
So what is histogram ? You can consider histogram as a graph or plot, which gives you an overall
|
||||||
|
idea about the intensity distribution of an image. It is a plot with pixel values (ranging from 0 to
|
||||||
|
255, not always) in X-axis and corresponding number of pixels in the image on Y-axis.
|
||||||
|
|
||||||
|
It is just another way of understanding the image. By looking at the histogram of an image, you get
|
||||||
|
intuition about contrast, brightness, intensity distribution etc of that image. Almost all image
|
||||||
|
processing tools today, provides features on histogram. Below is an image from [Cambridge in Color
|
||||||
|
website](http://www.cambridgeincolour.com/tutorials/histograms1.htm), and I recommend you to visit
|
||||||
|
the site for more details.
|
||||||
|
|
||||||
|
![image](images/histogram_sample.jpg)
|
||||||
|
|
||||||
|
You can see the image and its histogram. (Remember, this histogram is drawn for grayscale image, not
|
||||||
|
color image). Left region of histogram shows the amount of darker pixels in image and right region
|
||||||
|
shows the amount of brighter pixels. From the histogram, you can see dark region is more than
|
||||||
|
brighter region, and amount of midtones (pixel values in mid-range, say around 127) are very less.
|
||||||
|
|
||||||
|
Find Histogram
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Now we have an idea on what is histogram, we can look into how to find this. Both OpenCV and Numpy
|
||||||
|
come with in-built function for this. Before using those functions, we need to understand some
|
||||||
|
terminologies related with histograms.
|
||||||
|
|
||||||
|
**BINS** :The above histogram shows the number of pixels for every pixel value, ie from 0 to 255. ie
|
||||||
|
you need 256 values to show the above histogram. But consider, what if you need not find the number
|
||||||
|
of pixels for all pixel values separately, but number of pixels in a interval of pixel values? say
|
||||||
|
for example, you need to find the number of pixels lying between 0 to 15, then 16 to 31, ..., 240 to 255.
|
||||||
|
You will need only 16 values to represent the histogram. And that is what is shown in example
|
||||||
|
given in [OpenCV Tutorials on
|
||||||
|
histograms](http://docs.opencv.org/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.html#histogram-calculation).
|
||||||
|
|
||||||
|
So what you do is simply split the whole histogram to 16 sub-parts and value of each sub-part is the
|
||||||
|
sum of all pixel count in it. This each sub-part is called "BIN". In first case, number of bins
|
||||||
|
where 256 (one for each pixel) while in second case, it is only 16. BINS is represented by the term
|
||||||
|
**histSize** in OpenCV docs.
|
||||||
|
|
||||||
|
**DIMS** : It is the number of parameters for which we collect the data. In this case, we collect
|
||||||
|
data regarding only one thing, intensity value. So here it is 1.
|
||||||
|
|
||||||
|
**RANGE** : It is the range of intensity values you want to measure. Normally, it is [0,256], ie all
|
||||||
|
intensity values.
|
||||||
|
|
||||||
|
### 1. Histogram Calculation in OpenCV
|
||||||
|
|
||||||
|
So now we use **cv2.calcHist()** function to find the histogram. Let's familiarize with the function
|
||||||
|
and its parameters :
|
||||||
|
|
||||||
|
<center><em>cv2.calcHist(images, channels, mask, histSize, ranges[, hist[, accumulate]])</em></center>
|
||||||
|
|
||||||
|
-# images : it is the source image of type uint8 or float32. it should be given in square brackets,
|
||||||
|
ie, "[img]".
|
||||||
|
-# channels : it is also given in square brackets. It is the index of channel for which we
|
||||||
|
calculate histogram. For example, if input is grayscale image, its value is [0]. For color
|
||||||
|
image, you can pass [0], [1] or [2] to calculate histogram of blue, green or red channel
|
||||||
|
respectively.
|
||||||
|
-# mask : mask image. To find histogram of full image, it is given as "None". But if you want to
|
||||||
|
find histogram of particular region of image, you have to create a mask image for that and give
|
||||||
|
it as mask. (I will show an example later.)
|
||||||
|
-# histSize : this represents our BIN count. Need to be given in square brackets. For full scale,
|
||||||
|
we pass [256].
|
||||||
|
-# ranges : this is our RANGE. Normally, it is [0,256].
|
||||||
|
|
||||||
|
So let's start with a sample image. Simply load an image in grayscale mode and find its full
|
||||||
|
histogram.
|
||||||
|
@code{.py}
|
||||||
|
img = cv2.imread('home.jpg',0)
|
||||||
|
hist = cv2.calcHist([img],[0],None,[256],[0,256])
|
||||||
|
@endcode
|
||||||
|
hist is a 256x1 array, each value corresponds to number of pixels in that image with its
|
||||||
|
corresponding pixel value.
|
||||||
|
|
||||||
|
### 2. Histogram Calculation in Numpy
|
||||||
|
|
||||||
|
Numpy also provides you a function, **np.histogram()**. So instead of calcHist() function, you can
|
||||||
|
try below line :
|
||||||
|
@code{.py}
|
||||||
|
hist,bins = np.histogram(img.ravel(),256,[0,256])
|
||||||
|
@endcode
|
||||||
|
hist is same as we calculated before. But bins will have 257 elements, because Numpy calculates bins
|
||||||
|
as 0-0.99, 1-1.99, 2-2.99 etc. So final range would be 255-255.99. To represent that, they also add
|
||||||
|
256 at end of bins. But we don't need that 256. Upto 255 is sufficient.
|
||||||
|
|
||||||
|
@sa Numpy has another function, **np.bincount()** which is much faster than (around 10X)
|
||||||
|
np.histogram(). So for one-dimensional histograms, you can better try that. Don't forget to set
|
||||||
|
minlength = 256 in np.bincount. For example, hist = np.bincount(img.ravel(),minlength=256)
|
||||||
|
|
||||||
|
@note OpenCV function is more faster than (around 40X) than np.histogram(). So stick with OpenCV
|
||||||
|
function.
|
||||||
|
|
||||||
|
Now we should plot histograms, but how?
|
||||||
|
|
||||||
|
Plotting Histograms
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
There are two ways for this,
|
||||||
|
-# Short Way : use Matplotlib plotting functions
|
||||||
|
-# Long Way : use OpenCV drawing functions
|
||||||
|
|
||||||
|
### 1. Using Matplotlib
|
||||||
|
|
||||||
|
Matplotlib comes with a histogram plotting function : matplotlib.pyplot.hist()
|
||||||
|
|
||||||
|
It directly finds the histogram and plot it. You need not use calcHist() or np.histogram() function
|
||||||
|
to find the histogram. See the code below:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('home.jpg',0)
|
||||||
|
plt.hist(img.ravel(),256,[0,256]); plt.show()
|
||||||
|
@endcode
|
||||||
|
You will get a plot as below :
|
||||||
|
|
||||||
|
![image](images/histogram_matplotlib.jpg)
|
||||||
|
|
||||||
|
Or you can use normal plot of matplotlib, which would be good for BGR plot. For that, you need to
|
||||||
|
find the histogram data first. Try below code:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('home.jpg')
|
||||||
|
color = ('b','g','r')
|
||||||
|
for i,col in enumerate(color):
|
||||||
|
histr = cv2.calcHist([img],[i],None,[256],[0,256])
|
||||||
|
plt.plot(histr,color = col)
|
||||||
|
plt.xlim([0,256])
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/histogram_rgb_plot.jpg)
|
||||||
|
|
||||||
|
You can deduct from the above graph that, blue has some high value areas in the image (obviously it
|
||||||
|
should be due to the sky)
|
||||||
|
|
||||||
|
### 2. Using OpenCV
|
||||||
|
|
||||||
|
Well, here you adjust the values of histograms along with its bin values to look like x,y
|
||||||
|
coordinates so that you can draw it using cv2.line() or cv2.polyline() function to generate same
|
||||||
|
image as above. This is already available with OpenCV-Python2 official samples. [Check the
|
||||||
|
Code](https://github.com/Itseez/opencv/raw/master/samples/python2/hist.py)
|
||||||
|
|
||||||
|
Application of Mask
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
We used cv2.calcHist() to find the histogram of the full image. What if you want to find histograms
|
||||||
|
of some regions of an image? Just create a mask image with white color on the region you want to
|
||||||
|
find histogram and black otherwise. Then pass this as the mask.
|
||||||
|
@code{.py}
|
||||||
|
img = cv2.imread('home.jpg',0)
|
||||||
|
|
||||||
|
# create a mask
|
||||||
|
mask = np.zeros(img.shape[:2], np.uint8)
|
||||||
|
mask[100:300, 100:400] = 255
|
||||||
|
masked_img = cv2.bitwise_and(img,img,mask = mask)
|
||||||
|
|
||||||
|
# Calculate histogram with mask and without mask
|
||||||
|
# Check third argument for mask
|
||||||
|
hist_full = cv2.calcHist([img],[0],None,[256],[0,256])
|
||||||
|
hist_mask = cv2.calcHist([img],[0],mask,[256],[0,256])
|
||||||
|
|
||||||
|
plt.subplot(221), plt.imshow(img, 'gray')
|
||||||
|
plt.subplot(222), plt.imshow(mask,'gray')
|
||||||
|
plt.subplot(223), plt.imshow(masked_img, 'gray')
|
||||||
|
plt.subplot(224), plt.plot(hist_full), plt.plot(hist_mask)
|
||||||
|
plt.xlim([0,256])
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result. In the histogram plot, blue line shows histogram of full image while green line
|
||||||
|
shows histogram of masked region.
|
||||||
|
|
||||||
|
![image](images/histogram_masking.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [Cambridge in Color website](http://www.cambridgeincolour.com/tutorials/histograms1.htm)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,153 @@
|
|||||||
|
Histograms - 2: Histogram Equalization {#tutorial_py_histogram_equalization}
|
||||||
|
======================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this section,
|
||||||
|
|
||||||
|
- We will learn the concepts of histogram equalization and use it to improve the contrast of our
|
||||||
|
images.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
Consider an image whose pixel values are confined to some specific range of values only. For eg,
|
||||||
|
brighter image will have all pixels confined to high values. But a good image will have pixels from
|
||||||
|
all regions of the image. So you need to stretch this histogram to either ends (as given in below
|
||||||
|
image, from wikipedia) and that is what Histogram Equalization does (in simple words). This normally
|
||||||
|
improves the contrast of the image.
|
||||||
|
|
||||||
|
![image](images/histogram_equalization.png)
|
||||||
|
|
||||||
|
I would recommend you to read the wikipedia page on [Histogram
|
||||||
|
Equalization](http://en.wikipedia.org/wiki/Histogram_equalization) for more details about it. It has
|
||||||
|
a very good explanation with worked out examples, so that you would understand almost everything
|
||||||
|
after reading that. Instead, here we will see its Numpy implementation. After that, we will see
|
||||||
|
OpenCV function.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('wiki.jpg',0)
|
||||||
|
|
||||||
|
hist,bins = np.histogram(img.flatten(),256,[0,256])
|
||||||
|
|
||||||
|
cdf = hist.cumsum()
|
||||||
|
cdf_normalized = cdf * hist.max()/ cdf.max()
|
||||||
|
|
||||||
|
plt.plot(cdf_normalized, color = 'b')
|
||||||
|
plt.hist(img.flatten(),256,[0,256], color = 'r')
|
||||||
|
plt.xlim([0,256])
|
||||||
|
plt.legend(('cdf','histogram'), loc = 'upper left')
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
![image](images/histeq_numpy1.jpg)
|
||||||
|
|
||||||
|
You can see histogram lies in brighter region. We need the full spectrum. For that, we need a
|
||||||
|
transformation function which maps the input pixels in brighter region to output pixels in full
|
||||||
|
region. That is what histogram equalization does.
|
||||||
|
|
||||||
|
Now we find the minimum histogram value (excluding 0) and apply the histogram equalization equation
|
||||||
|
as given in wiki page. But I have used here, the masked array concept array from Numpy. For masked
|
||||||
|
array, all operations are performed on non-masked elements. You can read more about it from Numpy
|
||||||
|
docs on masked arrays.
|
||||||
|
@code{.py}
|
||||||
|
cdf_m = np.ma.masked_equal(cdf,0)
|
||||||
|
cdf_m = (cdf_m - cdf_m.min())*255/(cdf_m.max()-cdf_m.min())
|
||||||
|
cdf = np.ma.filled(cdf_m,0).astype('uint8')
|
||||||
|
@endcode
|
||||||
|
Now we have the look-up table that gives us the information on what is the output pixel value for
|
||||||
|
every input pixel value. So we just apply the transform.
|
||||||
|
@code{.py}
|
||||||
|
img2 = cdf[img]
|
||||||
|
@endcode
|
||||||
|
Now we calculate its histogram and cdf as before ( you do it) and result looks like below :
|
||||||
|
|
||||||
|
![image](images/histeq_numpy2.jpg)
|
||||||
|
|
||||||
|
Another important feature is that, even if the image was a darker image (instead of a brighter one
|
||||||
|
we used), after equalization we will get almost the same image as we got. As a result, this is used
|
||||||
|
as a "reference tool" to make all images with same lighting conditions. This is useful in many
|
||||||
|
cases. For example, in face recognition, before training the face data, the images of faces are
|
||||||
|
histogram equalized to make them all with same lighting conditions.
|
||||||
|
|
||||||
|
Histograms Equalization in OpenCV
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
OpenCV has a function to do this, **cv2.equalizeHist()**. Its input is just grayscale image and
|
||||||
|
output is our histogram equalized image.
|
||||||
|
|
||||||
|
Below is a simple code snippet showing its usage for same image we used :
|
||||||
|
@code{.py}
|
||||||
|
img = cv2.imread('wiki.jpg',0)
|
||||||
|
equ = cv2.equalizeHist(img)
|
||||||
|
res = np.hstack((img,equ)) #stacking images side-by-side
|
||||||
|
cv2.imwrite('res.png',res)
|
||||||
|
@endcode
|
||||||
|
![image](images/equalization_opencv.jpg)
|
||||||
|
|
||||||
|
So now you can take different images with different light conditions, equalize it and check the
|
||||||
|
results.
|
||||||
|
|
||||||
|
Histogram equalization is good when histogram of the image is confined to a particular region. It
|
||||||
|
won't work good in places where there is large intensity variations where histogram covers a large
|
||||||
|
region, ie both bright and dark pixels are present. Please check the SOF links in Additional
|
||||||
|
Resources.
|
||||||
|
|
||||||
|
CLAHE (Contrast Limited Adaptive Histogram Equalization)
|
||||||
|
--------------------------------------------------------
|
||||||
|
|
||||||
|
The first histogram equalization we just saw, considers the global contrast of the image. In many
|
||||||
|
cases, it is not a good idea. For example, below image shows an input image and its result after
|
||||||
|
global histogram equalization.
|
||||||
|
|
||||||
|
![image](images/clahe_1.jpg)
|
||||||
|
|
||||||
|
It is true that the background contrast has improved after histogram equalization. But compare the
|
||||||
|
face of statue in both images. We lost most of the information there due to over-brightness. It is
|
||||||
|
because its histogram is not confined to a particular region as we saw in previous cases (Try to
|
||||||
|
plot histogram of input image, you will get more intuition).
|
||||||
|
|
||||||
|
So to solve this problem, **adaptive histogram equalization** is used. In this, image is divided
|
||||||
|
into small blocks called "tiles" (tileSize is 8x8 by default in OpenCV). Then each of these blocks
|
||||||
|
are histogram equalized as usual. So in a small area, histogram would confine to a small region
|
||||||
|
(unless there is noise). If noise is there, it will be amplified. To avoid this, **contrast
|
||||||
|
limiting** is applied. If any histogram bin is above the specified contrast limit (by default 40 in
|
||||||
|
OpenCV), those pixels are clipped and distributed uniformly to other bins before applying histogram
|
||||||
|
equalization. After equalization, to remove artifacts in tile borders, bilinear interpolation is
|
||||||
|
applied.
|
||||||
|
|
||||||
|
Below code snippet shows how to apply CLAHE in OpenCV:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
img = cv2.imread('tsukuba_l.png',0)
|
||||||
|
|
||||||
|
# create a CLAHE object (Arguments are optional).
|
||||||
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
|
||||||
|
cl1 = clahe.apply(img)
|
||||||
|
|
||||||
|
cv2.imwrite('clahe_2.jpg',cl1)
|
||||||
|
@endcode
|
||||||
|
See the result below and compare it with results above, especially the statue region:
|
||||||
|
|
||||||
|
![image](images/clahe_2.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# Wikipedia page on [Histogram Equalization](http://en.wikipedia.org/wiki/Histogram_equalization)
|
||||||
|
2. [Masked Arrays in Numpy](http://docs.scipy.org/doc/numpy/reference/maskedarray.html)
|
||||||
|
|
||||||
|
Also check these SOF questions regarding contrast adjustment:
|
||||||
|
|
||||||
|
-# [How can I adjust contrast in OpenCV in
|
||||||
|
C?](http://stackoverflow.com/questions/10549245/how-can-i-adjust-contrast-in-opencv-in-c)
|
||||||
|
4. [How do I equalize contrast & brightness of images using
|
||||||
|
opencv?](http://stackoverflow.com/questions/10561222/how-do-i-equalize-contrast-brightness-of-images-using-opencv)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,18 @@
|
|||||||
|
Histograms in OpenCV {#tutorial_py_table_of_contents_histograms}
|
||||||
|
====================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_histogram_begins
|
||||||
|
|
||||||
|
Learn to find and draw Contours
|
||||||
|
|
||||||
|
- @subpage tutorial_py_histogram_equalization
|
||||||
|
|
||||||
|
Learn to Equalize Histograms to get better contrast for images
|
||||||
|
|
||||||
|
- @subpage tutorial_py_2d_histogram
|
||||||
|
|
||||||
|
Learn to find and plot 2D Histograms
|
||||||
|
|
||||||
|
- @subpage tutorial_py_histogram_backprojection
|
||||||
|
|
||||||
|
Learn histogram backprojection to segment colored objects
|
@ -0,0 +1,52 @@
|
|||||||
|
Hough Circle Transform {#tutorial_py_houghcircles}
|
||||||
|
======================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will learn to use Hough Transform to find circles in an image.
|
||||||
|
- We will see these functions: **cv2.HoughCircles()**
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
A circle is represented mathematically as \f$(x-x_{center})^2 + (y - y_{center})^2 = r^2\f$ where
|
||||||
|
\f$(x_{center},y_{center})\f$ is the center of the circle, and \f$r\f$ is the radius of the circle. From
|
||||||
|
equation, we can see we have 3 parameters, so we need a 3D accumulator for hough transform, which
|
||||||
|
would be highly ineffective. So OpenCV uses more trickier method, **Hough Gradient Method** which
|
||||||
|
uses the gradient information of edges.
|
||||||
|
|
||||||
|
The function we use here is **cv2.HoughCircles()**. It has plenty of arguments which are well
|
||||||
|
explained in the documentation. So we directly go to the code.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('opencv_logo.png',0)
|
||||||
|
img = cv2.medianBlur(img,5)
|
||||||
|
cimg = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR)
|
||||||
|
|
||||||
|
circles = cv2.HoughCircles(img,cv2.HOUGH_GRADIENT,1,20,
|
||||||
|
param1=50,param2=30,minRadius=0,maxRadius=0)
|
||||||
|
|
||||||
|
circles = np.uint16(np.around(circles))
|
||||||
|
for i in circles[0,:]:
|
||||||
|
# draw the outer circle
|
||||||
|
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
|
||||||
|
# draw the center of the circle
|
||||||
|
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
|
||||||
|
|
||||||
|
cv2.imshow('detected circles',cimg)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
Result is shown below:
|
||||||
|
|
||||||
|
![image](images/houghcircles2.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
144
doc/py_tutorials/py_imgproc/py_houghlines/py_houghlines.markdown
Normal file
144
doc/py_tutorials/py_imgproc/py_houghlines/py_houghlines.markdown
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
Hough Line Transform {#tutorial_py_houghlines}
|
||||||
|
====================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will understand the concept of Hough Tranform.
|
||||||
|
- We will see how to use it detect lines in an image.
|
||||||
|
- We will see following functions: **cv2.HoughLines()**, **cv2.HoughLinesP()**
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
Hough Transform is a popular technique to detect any shape, if you can represent that shape in
|
||||||
|
mathematical form. It can detect the shape even if it is broken or distorted a little bit. We will
|
||||||
|
see how it works for a line.
|
||||||
|
|
||||||
|
A line can be represented as \f$y = mx+c\f$ or in parametric form, as
|
||||||
|
\f$\rho = x \cos \theta + y \sin \theta\f$ where \f$\rho\f$ is the perpendicular distance from origin to the
|
||||||
|
line, and \f$\theta\f$ is the angle formed by this perpendicular line and horizontal axis measured in
|
||||||
|
counter-clockwise ( That direction varies on how you represent the coordinate system. This
|
||||||
|
representation is used in OpenCV). Check below image:
|
||||||
|
|
||||||
|
![image](images/houghlines1.svg)
|
||||||
|
|
||||||
|
So if line is passing below the origin, it will have a positive rho and angle less than 180. If it
|
||||||
|
is going above the origin, instead of taking angle greater than 180, angle is taken less than 180,
|
||||||
|
and rho is taken negative. Any vertical line will have 0 degree and horizontal lines will have 90
|
||||||
|
degree.
|
||||||
|
|
||||||
|
Now let's see how Hough Transform works for lines. Any line can be represented in these two terms,
|
||||||
|
\f$(\rho, \theta)\f$. So first it creates a 2D array or accumulator (to hold values of two parameters)
|
||||||
|
and it is set to 0 initially. Let rows denote the \f$\rho\f$ and columns denote the \f$\theta\f$. Size of
|
||||||
|
array depends on the accuracy you need. Suppose you want the accuracy of angles to be 1 degree, you
|
||||||
|
need 180 columns. For \f$\rho\f$, the maximum distance possible is the diagonal length of the image. So
|
||||||
|
taking one pixel accuracy, number of rows can be diagonal length of the image.
|
||||||
|
|
||||||
|
Consider a 100x100 image with a horizontal line at the middle. Take the first point of the line. You
|
||||||
|
know its (x,y) values. Now in the line equation, put the values \f$\theta = 0,1,2,....,180\f$ and check
|
||||||
|
the \f$\rho\f$ you get. For every \f$(\rho, \theta)\f$ pair, you increment value by one in our accumulator
|
||||||
|
in its corresponding \f$(\rho, \theta)\f$ cells. So now in accumulator, the cell (50,90) = 1 along with
|
||||||
|
some other cells.
|
||||||
|
|
||||||
|
Now take the second point on the line. Do the same as above. Increment the the values in the cells
|
||||||
|
corresponding to `(rho, theta)` you got. This time, the cell (50,90) = 2. What you actually
|
||||||
|
do is voting the \f$(\rho, \theta)\f$ values. You continue this process for every point on the line. At
|
||||||
|
each point, the cell (50,90) will be incremented or voted up, while other cells may or may not be
|
||||||
|
voted up. This way, at the end, the cell (50,90) will have maximum votes. So if you search the
|
||||||
|
accumulator for maximum votes, you get the value (50,90) which says, there is a line in this image
|
||||||
|
at distance 50 from origin and at angle 90 degrees. It is well shown in below animation (Image
|
||||||
|
Courtesy: [Amos Storkey](http://homepages.inf.ed.ac.uk/amos/hough.html) )
|
||||||
|
|
||||||
|
![](images/houghlinesdemo.gif)
|
||||||
|
|
||||||
|
This is how hough transform for lines works. It is simple, and may be you can implement it using
|
||||||
|
Numpy on your own. Below is an image which shows the accumulator. Bright spots at some locations
|
||||||
|
denotes they are the parameters of possible lines in the image. (Image courtesy: [Wikipedia](http://en.wikipedia.org/wiki/Hough_transform))
|
||||||
|
|
||||||
|
![](images/houghlines2.jpg)
|
||||||
|
|
||||||
|
Hough Tranform in OpenCV
|
||||||
|
=========================
|
||||||
|
|
||||||
|
Everything explained above is encapsulated in the OpenCV function, \*\*cv2.HoughLines()\*\*. It simply returns an array of :math:(rho,
|
||||||
|
theta)\` values. \f$\rho\f$ is measured in pixels and \f$\theta\f$ is measured in radians. First parameter,
|
||||||
|
Input image should be a binary image, so apply threshold or use canny edge detection before finding
|
||||||
|
applying hough transform. Second and third parameters are \f$\rho\f$ and \f$\theta\f$ accuracies
|
||||||
|
respectively. Fourth argument is the threshold, which means minimum vote it should get for it to be
|
||||||
|
considered as a line. Remember, number of votes depend upon number of points on the line. So it
|
||||||
|
represents the minimum length of line that should be detected.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('dave.jpg')
|
||||||
|
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
edges = cv2.Canny(gray,50,150,apertureSize = 3)
|
||||||
|
|
||||||
|
lines = cv2.HoughLines(edges,1,np.pi/180,200)
|
||||||
|
for rho,theta in lines[0]:
|
||||||
|
a = np.cos(theta)
|
||||||
|
b = np.sin(theta)
|
||||||
|
x0 = a*rho
|
||||||
|
y0 = b*rho
|
||||||
|
x1 = int(x0 + 1000*(-b))
|
||||||
|
y1 = int(y0 + 1000*(a))
|
||||||
|
x2 = int(x0 - 1000*(-b))
|
||||||
|
y2 = int(y0 - 1000*(a))
|
||||||
|
|
||||||
|
cv2.line(img,(x1,y1),(x2,y2),(0,0,255),2)
|
||||||
|
|
||||||
|
cv2.imwrite('houghlines3.jpg',img)
|
||||||
|
@endcode
|
||||||
|
Check the results below:
|
||||||
|
|
||||||
|
![image](images/houghlines3.jpg)
|
||||||
|
|
||||||
|
Probabilistic Hough Transform
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
In the hough transform, you can see that even for a line with two arguments, it takes a lot of
|
||||||
|
computation. Probabilistic Hough Transform is an optimization of Hough Transform we saw. It doesn't
|
||||||
|
take all the points into consideration, instead take only a random subset of points and that is
|
||||||
|
sufficient for line detection. Just we have to decrease the threshold. See below image which compare
|
||||||
|
Hough Transform and Probabilistic Hough Transform in hough space. (Image Courtesy : [Franck
|
||||||
|
Bettinger's home page](http://phdfb1.free.fr/robot/mscthesis/node14.html)
|
||||||
|
|
||||||
|
![image](images/houghlines4.png)
|
||||||
|
|
||||||
|
OpenCV implementation is based on Robust Detection of Lines Using the Progressive Probabilistic
|
||||||
|
Hough Transform by Matas, J. and Galambos, C. and Kittler, J.V.. The function used is
|
||||||
|
**cv2.HoughLinesP()**. It has two new arguments.
|
||||||
|
- **minLineLength** - Minimum length of line. Line segments shorter than this are rejected.
|
||||||
|
- **maxLineGap** - Maximum allowed gap between line segments to treat them as single line.
|
||||||
|
|
||||||
|
Best thing is that, it directly returns the two endpoints of lines. In previous case, you got only
|
||||||
|
the parameters of lines, and you had to find all the points. Here, everything is direct and simple.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('dave.jpg')
|
||||||
|
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
edges = cv2.Canny(gray,50,150,apertureSize = 3)
|
||||||
|
minLineLength = 100
|
||||||
|
maxLineGap = 10
|
||||||
|
lines = cv2.HoughLinesP(edges,1,np.pi/180,100,minLineLength,maxLineGap)
|
||||||
|
for x1,y1,x2,y2 in lines[0]:
|
||||||
|
cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)
|
||||||
|
|
||||||
|
cv2.imwrite('houghlines5.jpg',img)
|
||||||
|
@endcode
|
||||||
|
See the results below:
|
||||||
|
|
||||||
|
![image](images/houghlines5.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [Hough Transform on Wikipedia](http://en.wikipedia.org/wiki/Hough_transform)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,156 @@
|
|||||||
|
Morphological Transformations {#tutorial_py_morphological_ops}
|
||||||
|
=============================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will learn different morphological operations like Erosion, Dilation, Opening, Closing
|
||||||
|
etc.
|
||||||
|
- We will see different functions like : **cv2.erode()**, **cv2.dilate()**,
|
||||||
|
**cv2.morphologyEx()** etc.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
Morphological transformations are some simple operations based on the image shape. It is normally
|
||||||
|
performed on binary images. It needs two inputs, one is our original image, second one is called
|
||||||
|
**structuring element** or **kernel** which decides the nature of operation. Two basic morphological
|
||||||
|
operators are Erosion and Dilation. Then its variant forms like Opening, Closing, Gradient etc also
|
||||||
|
comes into play. We will see them one-by-one with help of following image:
|
||||||
|
|
||||||
|
![image](images/j.png)
|
||||||
|
|
||||||
|
### 1. Erosion
|
||||||
|
|
||||||
|
The basic idea of erosion is just like soil erosion only, it erodes away the boundaries of
|
||||||
|
foreground object (Always try to keep foreground in white). So what it does? The kernel slides
|
||||||
|
through the image (as in 2D convolution). A pixel in the original image (either 1 or 0) will be
|
||||||
|
considered 1 only if all the pixels under the kernel is 1, otherwise it is eroded (made to zero).
|
||||||
|
|
||||||
|
So what happends is that, all the pixels near boundary will be discarded depending upon the size of
|
||||||
|
kernel. So the thickness or size of the foreground object decreases or simply white region decreases
|
||||||
|
in the image. It is useful for removing small white noises (as we have seen in colorspace chapter),
|
||||||
|
detach two connected objects etc.
|
||||||
|
|
||||||
|
Here, as an example, I would use a 5x5 kernel with full of ones. Let's see it how it works:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = cv2.imread('j.png',0)
|
||||||
|
kernel = np.ones((5,5),np.uint8)
|
||||||
|
erosion = cv2.erode(img,kernel,iterations = 1)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/erosion.png)
|
||||||
|
|
||||||
|
### 2. Dilation
|
||||||
|
|
||||||
|
It is just opposite of erosion. Here, a pixel element is '1' if atleast one pixel under the kernel
|
||||||
|
is '1'. So it increases the white region in the image or size of foreground object increases.
|
||||||
|
Normally, in cases like noise removal, erosion is followed by dilation. Because, erosion removes
|
||||||
|
white noises, but it also shrinks our object. So we dilate it. Since noise is gone, they won't come
|
||||||
|
back, but our object area increases. It is also useful in joining broken parts of an object.
|
||||||
|
@code{.py}
|
||||||
|
dilation = cv2.dilate(img,kernel,iterations = 1)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/dilation.png)
|
||||||
|
|
||||||
|
### 3. Opening
|
||||||
|
|
||||||
|
Opening is just another name of **erosion followed by dilation**. It is useful in removing noise, as
|
||||||
|
we explained above. Here we use the function, **cv2.morphologyEx()**
|
||||||
|
@code{.py}
|
||||||
|
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/opening.png)
|
||||||
|
|
||||||
|
### 4. Closing
|
||||||
|
|
||||||
|
Closing is reverse of Opening, **Dilation followed by Erosion**. It is useful in closing small holes
|
||||||
|
inside the foreground objects, or small black points on the object.
|
||||||
|
@code{.py}
|
||||||
|
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/closing.png)
|
||||||
|
|
||||||
|
### 5. Morphological Gradient
|
||||||
|
|
||||||
|
It is the difference between dilation and erosion of an image.
|
||||||
|
|
||||||
|
The result will look like the outline of the object.
|
||||||
|
@code{.py}
|
||||||
|
gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/gradient.png)
|
||||||
|
|
||||||
|
### 6. Top Hat
|
||||||
|
|
||||||
|
It is the difference between input image and Opening of the image. Below example is done for a 9x9
|
||||||
|
kernel.
|
||||||
|
@code{.py}
|
||||||
|
tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/tophat.png)
|
||||||
|
|
||||||
|
### 7. Black Hat
|
||||||
|
|
||||||
|
It is the difference between the closing of the input image and input image.
|
||||||
|
@code{.py}
|
||||||
|
blackhat = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/blackhat.png)
|
||||||
|
|
||||||
|
Structuring Element
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
We manually created a structuring elements in the previous examples with help of Numpy. It is
|
||||||
|
rectangular shape. But in some cases, you may need elliptical/circular shaped kernels. So for this
|
||||||
|
purpose, OpenCV has a function, **cv2.getStructuringElement()**. You just pass the shape and size of
|
||||||
|
the kernel, you get the desired kernel.
|
||||||
|
@code{.py}
|
||||||
|
# Rectangular Kernel
|
||||||
|
>>> cv2.getStructuringElement(cv2.MORPH_RECT,(5,5))
|
||||||
|
array([[1, 1, 1, 1, 1],
|
||||||
|
[1, 1, 1, 1, 1],
|
||||||
|
[1, 1, 1, 1, 1],
|
||||||
|
[1, 1, 1, 1, 1],
|
||||||
|
[1, 1, 1, 1, 1]], dtype=uint8)
|
||||||
|
|
||||||
|
# Elliptical Kernel
|
||||||
|
>>> cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
|
||||||
|
array([[0, 0, 1, 0, 0],
|
||||||
|
[1, 1, 1, 1, 1],
|
||||||
|
[1, 1, 1, 1, 1],
|
||||||
|
[1, 1, 1, 1, 1],
|
||||||
|
[0, 0, 1, 0, 0]], dtype=uint8)
|
||||||
|
|
||||||
|
# Cross-shaped Kernel
|
||||||
|
>>> cv2.getStructuringElement(cv2.MORPH_CROSS,(5,5))
|
||||||
|
array([[0, 0, 1, 0, 0],
|
||||||
|
[0, 0, 1, 0, 0],
|
||||||
|
[1, 1, 1, 1, 1],
|
||||||
|
[0, 0, 1, 0, 0],
|
||||||
|
[0, 0, 1, 0, 0]], dtype=uint8)
|
||||||
|
@endcode
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [Morphological Operations](http://homepages.inf.ed.ac.uk/rbf/HIPR2/morops.htm) at HIPR2
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
141
doc/py_tutorials/py_imgproc/py_pyramids/py_pyramids.markdown
Normal file
141
doc/py_tutorials/py_imgproc/py_pyramids/py_pyramids.markdown
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
Image Pyramids {#tutorial_py_pyramids}
|
||||||
|
==============
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will learn about Image Pyramids
|
||||||
|
- We will use Image pyramids to create a new fruit, "Orapple"
|
||||||
|
- We will see these functions: **cv2.pyrUp()**, **cv2.pyrDown()**
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
Normally, we used to work with an image of constant size. But in some occassions, we need to work
|
||||||
|
with images of different resolution of the same image. For example, while searching for something in
|
||||||
|
an image, like face, we are not sure at what size the object will be present in the image. In that
|
||||||
|
case, we will need to create a set of images with different resolution and search for object in all
|
||||||
|
the images. These set of images with different resolution are called Image Pyramids (because when
|
||||||
|
they are kept in a stack with biggest image at bottom and smallest image at top look like a
|
||||||
|
pyramid).
|
||||||
|
|
||||||
|
There are two kinds of Image Pyramids. 1) Gaussian Pyramid and 2) Laplacian Pyramids
|
||||||
|
|
||||||
|
Higher level (Low resolution) in a Gaussian Pyramid is formed by removing consecutive rows and
|
||||||
|
columns in Lower level (higher resolution) image. Then each pixel in higher level is formed by the
|
||||||
|
contribution from 5 pixels in underlying level with gaussian weights. By doing so, a \f$M \times N\f$
|
||||||
|
image becomes \f$M/2 \times N/2\f$ image. So area reduces to one-fourth of original area. It is called
|
||||||
|
an Octave. The same pattern continues as we go upper in pyramid (ie, resolution decreases).
|
||||||
|
Similarly while expanding, area becomes 4 times in each level. We can find Gaussian pyramids using
|
||||||
|
**cv2.pyrDown()** and **cv2.pyrUp()** functions.
|
||||||
|
@code{.py}
|
||||||
|
img = cv2.imread('messi5.jpg')
|
||||||
|
lower_reso = cv2.pyrDown(higher_reso)
|
||||||
|
@endcode
|
||||||
|
Below is the 4 levels in an image pyramid.
|
||||||
|
|
||||||
|
![image](images/messipyr.jpg)
|
||||||
|
|
||||||
|
Now you can go down the image pyramid with **cv2.pyrUp()** function.
|
||||||
|
@code{.py}
|
||||||
|
higher_reso2 = cv2.pyrUp(lower_reso)
|
||||||
|
@endcode
|
||||||
|
Remember, higher_reso2 is not equal to higher_reso, because once you decrease the resolution, you
|
||||||
|
loose the information. Below image is 3 level down the pyramid created from smallest image in
|
||||||
|
previous case. Compare it with original image:
|
||||||
|
|
||||||
|
![image](images/messiup.jpg)
|
||||||
|
|
||||||
|
Laplacian Pyramids are formed from the Gaussian Pyramids. There is no exclusive function for that.
|
||||||
|
Laplacian pyramid images are like edge images only. Most of its elements are zeros. They are used in
|
||||||
|
image compression. A level in Laplacian Pyramid is formed by the difference between that level in
|
||||||
|
Gaussian Pyramid and expanded version of its upper level in Gaussian Pyramid. The three levels of a
|
||||||
|
Laplacian level will look like below (contrast is adjusted to enhance the contents):
|
||||||
|
|
||||||
|
![image](images/lap.jpg)
|
||||||
|
|
||||||
|
Image Blending using Pyramids
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
One application of Pyramids is Image Blending. For example, in image stitching, you will need to
|
||||||
|
stack two images together, but it may not look good due to discontinuities between images. In that
|
||||||
|
case, image blending with Pyramids gives you seamless blending without leaving much data in the
|
||||||
|
images. One classical example of this is the blending of two fruits, Orange and Apple. See the
|
||||||
|
result now itself to understand what I am saying:
|
||||||
|
|
||||||
|
![image](images/orapple.jpg)
|
||||||
|
|
||||||
|
Please check first reference in additional resources, it has full diagramatic details on image
|
||||||
|
blending, Laplacian Pyramids etc. Simply it is done as follows:
|
||||||
|
|
||||||
|
-# Load the two images of apple and orange
|
||||||
|
2. Find the Gaussian Pyramids for apple and orange (in this particular example, number of levels
|
||||||
|
is 6)
|
||||||
|
3. From Gaussian Pyramids, find their Laplacian Pyramids
|
||||||
|
4. Now join the left half of apple and right half of orange in each levels of Laplacian Pyramids
|
||||||
|
5. Finally from this joint image pyramids, reconstruct the original image.
|
||||||
|
|
||||||
|
Below is the full code. (For sake of simplicity, each step is done separately which may take more
|
||||||
|
memory. You can optimize it if you want so).
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np,sys
|
||||||
|
|
||||||
|
A = cv2.imread('apple.jpg')
|
||||||
|
B = cv2.imread('orange.jpg')
|
||||||
|
|
||||||
|
# generate Gaussian pyramid for A
|
||||||
|
G = A.copy()
|
||||||
|
gpA = [G]
|
||||||
|
for i in xrange(6):
|
||||||
|
G = cv2.pyrDown(G)
|
||||||
|
gpA.append(G)
|
||||||
|
|
||||||
|
# generate Gaussian pyramid for B
|
||||||
|
G = B.copy()
|
||||||
|
gpB = [G]
|
||||||
|
for i in xrange(6):
|
||||||
|
G = cv2.pyrDown(G)
|
||||||
|
gpB.append(G)
|
||||||
|
|
||||||
|
# generate Laplacian Pyramid for A
|
||||||
|
lpA = [gpA[5]]
|
||||||
|
for i in xrange(5,0,-1):
|
||||||
|
GE = cv2.pyrUp(gpA[i])
|
||||||
|
L = cv2.subtract(gpA[i-1],GE)
|
||||||
|
lpA.append(L)
|
||||||
|
|
||||||
|
# generate Laplacian Pyramid for B
|
||||||
|
lpB = [gpB[5]]
|
||||||
|
for i in xrange(5,0,-1):
|
||||||
|
GE = cv2.pyrUp(gpB[i])
|
||||||
|
L = cv2.subtract(gpB[i-1],GE)
|
||||||
|
lpB.append(L)
|
||||||
|
|
||||||
|
# Now add left and right halves of images in each level
|
||||||
|
LS = []
|
||||||
|
for la,lb in zip(lpA,lpB):
|
||||||
|
rows,cols,dpt = la.shape
|
||||||
|
ls = np.hstack((la[:,0:cols/2], lb[:,cols/2:]))
|
||||||
|
LS.append(ls)
|
||||||
|
|
||||||
|
# now reconstruct
|
||||||
|
ls_ = LS[0]
|
||||||
|
for i in xrange(1,6):
|
||||||
|
ls_ = cv2.pyrUp(ls_)
|
||||||
|
ls_ = cv2.add(ls_, LS[i])
|
||||||
|
|
||||||
|
# image with direct connecting each half
|
||||||
|
real = np.hstack((A[:,:cols/2],B[:,cols/2:]))
|
||||||
|
|
||||||
|
cv2.imwrite('Pyramid_blending2.jpg',ls_)
|
||||||
|
cv2.imwrite('Direct_blending.jpg',real)
|
||||||
|
@endcode
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [Image Blending](http://pages.cs.wisc.edu/~csverma/CS766_09/ImageMosaic/imagemosaic.html)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,76 @@
|
|||||||
|
Image Processing in OpenCV {#tutorial_py_table_of_contents_imgproc}
|
||||||
|
==========================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_colorspaces
|
||||||
|
|
||||||
|
Learn to change images between different color spaces.
|
||||||
|
Plus learn to track a colored object in a video.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_geometric_transformations
|
||||||
|
|
||||||
|
Learn to apply different geometric transformations to images like rotation, translation etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_thresholding
|
||||||
|
|
||||||
|
Learn
|
||||||
|
to convert images to binary images using global thresholding, Adaptive thresholding, Otsu's
|
||||||
|
binarization etc
|
||||||
|
|
||||||
|
- @subpage tutorial_py_filtering
|
||||||
|
|
||||||
|
Learn
|
||||||
|
to blur the images, filter the images with custom kernels etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_morphological_ops
|
||||||
|
|
||||||
|
Learn about morphological transformations like Erosion, Dilation, Opening, Closing etc
|
||||||
|
|
||||||
|
- @subpage tutorial_py_gradients
|
||||||
|
|
||||||
|
Learn
|
||||||
|
to find image gradients, edges etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_canny
|
||||||
|
|
||||||
|
Learn
|
||||||
|
to find edges with Canny Edge Detection
|
||||||
|
|
||||||
|
- @subpage tutorial_py_pyramids
|
||||||
|
|
||||||
|
Learn about image pyramids and how to use them for image blending
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_contours
|
||||||
|
|
||||||
|
All
|
||||||
|
about Contours in OpenCV
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_histograms
|
||||||
|
|
||||||
|
All
|
||||||
|
about histograms in OpenCV
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_transforms
|
||||||
|
|
||||||
|
Meet
|
||||||
|
different Image Transforms in OpenCV like Fourier Transform, Cosine Transform etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_template_matching
|
||||||
|
|
||||||
|
Learn
|
||||||
|
to search for an object in an image using Template Matching
|
||||||
|
|
||||||
|
- @subpage tutorial_py_houghlines
|
||||||
|
|
||||||
|
Learn to detect lines in an image
|
||||||
|
|
||||||
|
- @subpage tutorial_py_houghcircles
|
||||||
|
|
||||||
|
Learn to detect circles in an image
|
||||||
|
|
||||||
|
- @subpage tutorial_py_watershed
|
||||||
|
|
||||||
|
Learn to segment images with watershed segmentation
|
||||||
|
|
||||||
|
- @subpage tutorial_py_grabcut
|
||||||
|
|
||||||
|
Learn to extract foreground with GrabCut algorithm
|
@ -0,0 +1,136 @@
|
|||||||
|
Template Matching {#tutorial_py_template_matching}
|
||||||
|
=================
|
||||||
|
|
||||||
|
Goals
|
||||||
|
-----
|
||||||
|
|
||||||
|
In this chapter, you will learn
|
||||||
|
- To find objects in an image using Template Matching
|
||||||
|
- You will see these functions : **cv2.matchTemplate()**, **cv2.minMaxLoc()**
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
Template Matching is a method for searching and finding the location of a template image in a larger
|
||||||
|
image. OpenCV comes with a function **cv2.matchTemplate()** for this purpose. It simply slides the
|
||||||
|
template image over the input image (as in 2D convolution) and compares the template and patch of
|
||||||
|
input image under the template image. Several comparison methods are implemented in OpenCV. (You can
|
||||||
|
check docs for more details). It returns a grayscale image, where each pixel denotes how much does
|
||||||
|
the neighbourhood of that pixel match with template.
|
||||||
|
|
||||||
|
If input image is of size (WxH) and template image is of size (wxh), output image will have a size
|
||||||
|
of (W-w+1, H-h+1). Once you got the result, you can use **cv2.minMaxLoc()** function to find where
|
||||||
|
is the maximum/minimum value. Take it as the top-left corner of rectangle and take (w,h) as width
|
||||||
|
and height of the rectangle. That rectangle is your region of template.
|
||||||
|
|
||||||
|
@note If you are using cv2.TM_SQDIFF as comparison method, minimum value gives the best match.
|
||||||
|
|
||||||
|
Template Matching in OpenCV
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
Here, as an example, we will search for Messi's face in his photo. So I created a template as below:
|
||||||
|
|
||||||
|
![image](images/messi_face.jpg)
|
||||||
|
|
||||||
|
We will try all the comparison methods so that we can see how their results look like:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('messi5.jpg',0)
|
||||||
|
img2 = img.copy()
|
||||||
|
template = cv2.imread('template.jpg',0)
|
||||||
|
w, h = template.shape[::-1]
|
||||||
|
|
||||||
|
# All the 6 methods for comparison in a list
|
||||||
|
methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
|
||||||
|
'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
|
||||||
|
|
||||||
|
for meth in methods:
|
||||||
|
img = img2.copy()
|
||||||
|
method = eval(meth)
|
||||||
|
|
||||||
|
# Apply template Matching
|
||||||
|
res = cv2.matchTemplate(img,template,method)
|
||||||
|
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
|
||||||
|
|
||||||
|
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
|
||||||
|
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
|
||||||
|
top_left = min_loc
|
||||||
|
else:
|
||||||
|
top_left = max_loc
|
||||||
|
bottom_right = (top_left[0] + w, top_left[1] + h)
|
||||||
|
|
||||||
|
cv2.rectangle(img,top_left, bottom_right, 255, 2)
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(res,cmap = 'gray')
|
||||||
|
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(122),plt.imshow(img,cmap = 'gray')
|
||||||
|
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.suptitle(meth)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
See the results below:
|
||||||
|
|
||||||
|
- cv2.TM_CCOEFF
|
||||||
|
|
||||||
|
![image](images/template_ccoeff_1.jpg)
|
||||||
|
|
||||||
|
- cv2.TM_CCOEFF_NORMED
|
||||||
|
|
||||||
|
![image](images/template_ccoeffn_2.jpg)
|
||||||
|
|
||||||
|
- cv2.TM_CCORR
|
||||||
|
|
||||||
|
![image](images/template_ccorr_3.jpg)
|
||||||
|
|
||||||
|
- cv2.TM_CCORR_NORMED
|
||||||
|
|
||||||
|
![image](images/template_ccorrn_4.jpg)
|
||||||
|
|
||||||
|
- cv2.TM_SQDIFF
|
||||||
|
|
||||||
|
![image](images/template_sqdiff_5.jpg)
|
||||||
|
|
||||||
|
- cv2.TM_SQDIFF_NORMED
|
||||||
|
|
||||||
|
![image](images/template_sqdiffn_6.jpg)
|
||||||
|
|
||||||
|
You can see that the result using **cv2.TM_CCORR** is not good as we expected.
|
||||||
|
|
||||||
|
Template Matching with Multiple Objects
|
||||||
|
---------------------------------------
|
||||||
|
|
||||||
|
In the previous section, we searched image for Messi's face, which occurs only once in the image.
|
||||||
|
Suppose you are searching for an object which has multiple occurances, **cv2.minMaxLoc()** won't
|
||||||
|
give you all the locations. In that case, we will use thresholding. So in this example, we will use
|
||||||
|
a screenshot of the famous game **Mario** and we will find the coins in it.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img_rgb = cv2.imread('mario.png')
|
||||||
|
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
|
||||||
|
template = cv2.imread('mario_coin.png',0)
|
||||||
|
w, h = template.shape[::-1]
|
||||||
|
|
||||||
|
res = cv2.matchTemplate(img_gray,template,cv2.TM_CCOEFF_NORMED)
|
||||||
|
threshold = 0.8
|
||||||
|
loc = np.where( res >= threshold)
|
||||||
|
for pt in zip(*loc[::-1]):
|
||||||
|
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0,0,255), 2)
|
||||||
|
|
||||||
|
cv2.imwrite('res.png',img_rgb)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/res_mario.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,233 @@
|
|||||||
|
Image Thresholding {#tutorial_py_thresholding}
|
||||||
|
==================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
- In this tutorial, you will learn Simple thresholding, Adaptive thresholding, Otsu's thresholding
|
||||||
|
etc.
|
||||||
|
- You will learn these functions : **cv2.threshold**, **cv2.adaptiveThreshold** etc.
|
||||||
|
|
||||||
|
Simple Thresholding
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
Here, the matter is straight forward. If pixel value is greater than a threshold value, it is
|
||||||
|
assigned one value (may be white), else it is assigned another value (may be black). The function
|
||||||
|
used is **cv2.threshold**. First argument is the source image, which **should be a grayscale
|
||||||
|
image**. Second argument is the threshold value which is used to classify the pixel values. Third
|
||||||
|
argument is the maxVal which represents the value to be given if pixel value is more than (sometimes
|
||||||
|
less than) the threshold value. OpenCV provides different styles of thresholding and it is decided
|
||||||
|
by the fourth parameter of the function. Different types are:
|
||||||
|
|
||||||
|
- cv2.THRESH_BINARY
|
||||||
|
- cv2.THRESH_BINARY_INV
|
||||||
|
- cv2.THRESH_TRUNC
|
||||||
|
- cv2.THRESH_TOZERO
|
||||||
|
- cv2.THRESH_TOZERO_INV
|
||||||
|
|
||||||
|
Documentation clearly explain what each type is meant for. Please check out the documentation.
|
||||||
|
|
||||||
|
Two outputs are obtained. First one is a **retval** which will be explained later. Second output is
|
||||||
|
our **thresholded image**.
|
||||||
|
|
||||||
|
Code :
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('gradient.png',0)
|
||||||
|
ret,thresh1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
|
||||||
|
ret,thresh2 = cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
|
||||||
|
ret,thresh3 = cv2.threshold(img,127,255,cv2.THRESH_TRUNC)
|
||||||
|
ret,thresh4 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO)
|
||||||
|
ret,thresh5 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO_INV)
|
||||||
|
|
||||||
|
titles = ['Original Image','BINARY','BINARY_INV','TRUNC','TOZERO','TOZERO_INV']
|
||||||
|
images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]
|
||||||
|
|
||||||
|
for i in xrange(6):
|
||||||
|
plt.subplot(2,3,i+1),plt.imshow(images[i],'gray')
|
||||||
|
plt.title(titles[i])
|
||||||
|
plt.xticks([]),plt.yticks([])
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
@note To plot multiple images, we have used plt.subplot() function. Please checkout Matplotlib docs
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
Result is given below :
|
||||||
|
|
||||||
|
![image](images/threshold.jpg)
|
||||||
|
|
||||||
|
Adaptive Thresholding
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
In the previous section, we used a global value as threshold value. But it may not be good in all
|
||||||
|
the conditions where image has different lighting conditions in different areas. In that case, we go
|
||||||
|
for adaptive thresholding. In this, the algorithm calculate the threshold for a small regions of the
|
||||||
|
image. So we get different thresholds for different regions of the same image and it gives us better
|
||||||
|
results for images with varying illumination.
|
||||||
|
|
||||||
|
It has three ‘special’ input params and only one output argument.
|
||||||
|
|
||||||
|
**Adaptive Method** - It decides how thresholding value is calculated.
|
||||||
|
- cv2.ADAPTIVE_THRESH_MEAN_C : threshold value is the mean of neighbourhood area.
|
||||||
|
- cv2.ADAPTIVE_THRESH_GAUSSIAN_C : threshold value is the weighted sum of neighbourhood
|
||||||
|
values where weights are a gaussian window.
|
||||||
|
|
||||||
|
**Block Size** - It decides the size of neighbourhood area.
|
||||||
|
|
||||||
|
**C** - It is just a constant which is subtracted from the mean or weighted mean calculated.
|
||||||
|
|
||||||
|
Below piece of code compares global thresholding and adaptive thresholding for an image with varying
|
||||||
|
illumination:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('dave.jpg',0)
|
||||||
|
img = cv2.medianBlur(img,5)
|
||||||
|
|
||||||
|
ret,th1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
|
||||||
|
th2 = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,\
|
||||||
|
cv2.THRESH_BINARY,11,2)
|
||||||
|
th3 = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
|
||||||
|
cv2.THRESH_BINARY,11,2)
|
||||||
|
|
||||||
|
titles = ['Original Image', 'Global Thresholding (v = 127)',
|
||||||
|
'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding']
|
||||||
|
images = [img, th1, th2, th3]
|
||||||
|
|
||||||
|
for i in xrange(4):
|
||||||
|
plt.subplot(2,2,i+1),plt.imshow(images[i],'gray')
|
||||||
|
plt.title(titles[i])
|
||||||
|
plt.xticks([]),plt.yticks([])
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Result :
|
||||||
|
|
||||||
|
![image](images/ada_threshold.jpg)
|
||||||
|
|
||||||
|
Otsu’s Binarization
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
In the first section, I told you there is a second parameter **retVal**. Its use comes when we go
|
||||||
|
for Otsu’s Binarization. So what is it?
|
||||||
|
|
||||||
|
In global thresholding, we used an arbitrary value for threshold value, right? So, how can we know a
|
||||||
|
value we selected is good or not? Answer is, trial and error method. But consider a **bimodal
|
||||||
|
image** (*In simple words, bimodal image is an image whose histogram has two peaks*). For that
|
||||||
|
image, we can approximately take a value in the middle of those peaks as threshold value, right ?
|
||||||
|
That is what Otsu binarization does. So in simple words, it automatically calculates a threshold
|
||||||
|
value from image histogram for a bimodal image. (For images which are not bimodal, binarization
|
||||||
|
won’t be accurate.)
|
||||||
|
|
||||||
|
For this, our cv2.threshold() function is used, but pass an extra flag, cv2.THRESH_OTSU. **For
|
||||||
|
threshold value, simply pass zero**. Then the algorithm finds the optimal threshold value and
|
||||||
|
returns you as the second output, retVal. If Otsu thresholding is not used, retVal is same as the
|
||||||
|
threshold value you used.
|
||||||
|
|
||||||
|
Check out below example. Input image is a noisy image. In first case, I applied global thresholding
|
||||||
|
for a value of 127. In second case, I applied Otsu’s thresholding directly. In third case, I
|
||||||
|
filtered image with a 5x5 gaussian kernel to remove the noise, then applied Otsu thresholding. See
|
||||||
|
how noise filtering improves the result.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('noisy2.png',0)
|
||||||
|
|
||||||
|
# global thresholding
|
||||||
|
ret1,th1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
|
||||||
|
|
||||||
|
# Otsu's thresholding
|
||||||
|
ret2,th2 = cv2.threshold(img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
||||||
|
|
||||||
|
# Otsu's thresholding after Gaussian filtering
|
||||||
|
blur = cv2.GaussianBlur(img,(5,5),0)
|
||||||
|
ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
||||||
|
|
||||||
|
# plot all the images and their histograms
|
||||||
|
images = [img, 0, th1,
|
||||||
|
img, 0, th2,
|
||||||
|
blur, 0, th3]
|
||||||
|
titles = ['Original Noisy Image','Histogram','Global Thresholding (v=127)',
|
||||||
|
'Original Noisy Image','Histogram',"Otsu's Thresholding",
|
||||||
|
'Gaussian filtered Image','Histogram',"Otsu's Thresholding"]
|
||||||
|
|
||||||
|
for i in xrange(3):
|
||||||
|
plt.subplot(3,3,i*3+1),plt.imshow(images[i*3],'gray')
|
||||||
|
plt.title(titles[i*3]), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(3,3,i*3+2),plt.hist(images[i*3].ravel(),256)
|
||||||
|
plt.title(titles[i*3+1]), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(3,3,i*3+3),plt.imshow(images[i*3+2],'gray')
|
||||||
|
plt.title(titles[i*3+2]), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Result :
|
||||||
|
|
||||||
|
![image](images/otsu.jpg)
|
||||||
|
|
||||||
|
### How Otsu's Binarization Works?
|
||||||
|
|
||||||
|
This section demonstrates a Python implementation of Otsu's binarization to show how it works
|
||||||
|
actually. If you are not interested, you can skip this.
|
||||||
|
|
||||||
|
Since we are working with bimodal images, Otsu's algorithm tries to find a threshold value (t) which
|
||||||
|
minimizes the **weighted within-class variance** given by the relation :
|
||||||
|
|
||||||
|
\f[\sigma_w^2(t) = q_1(t)\sigma_1^2(t)+q_2(t)\sigma_2^2(t)\f]
|
||||||
|
|
||||||
|
where
|
||||||
|
|
||||||
|
\f[q_1(t) = \sum_{i=1}^{t} P(i) \quad \& \quad q_1(t) = \sum_{i=t+1}^{I} P(i)\f]\f[\mu_1(t) = \sum_{i=1}^{t} \frac{iP(i)}{q_1(t)} \quad \& \quad \mu_2(t) = \sum_{i=t+1}^{I} \frac{iP(i)}{q_2(t)}\f]\f[\sigma_1^2(t) = \sum_{i=1}^{t} [i-\mu_1(t)]^2 \frac{P(i)}{q_1(t)} \quad \& \quad \sigma_2^2(t) = \sum_{i=t+1}^{I} [i-\mu_1(t)]^2 \frac{P(i)}{q_2(t)}\f]
|
||||||
|
|
||||||
|
It actually finds a value of t which lies in between two peaks such that variances to both classes
|
||||||
|
are minimum. It can be simply implemented in Python as follows:
|
||||||
|
@code{.py}
|
||||||
|
img = cv2.imread('noisy2.png',0)
|
||||||
|
blur = cv2.GaussianBlur(img,(5,5),0)
|
||||||
|
|
||||||
|
# find normalized_histogram, and its cumulative distribution function
|
||||||
|
hist = cv2.calcHist([blur],[0],None,[256],[0,256])
|
||||||
|
hist_norm = hist.ravel()/hist.max()
|
||||||
|
Q = hist_norm.cumsum()
|
||||||
|
|
||||||
|
bins = np.arange(256)
|
||||||
|
|
||||||
|
fn_min = np.inf
|
||||||
|
thresh = -1
|
||||||
|
|
||||||
|
for i in xrange(1,256):
|
||||||
|
p1,p2 = np.hsplit(hist_norm,[i]) # probabilities
|
||||||
|
q1,q2 = Q[i],Q[255]-Q[i] # cum sum of classes
|
||||||
|
b1,b2 = np.hsplit(bins,[i]) # weights
|
||||||
|
|
||||||
|
# finding means and variances
|
||||||
|
m1,m2 = np.sum(p1*b1)/q1, np.sum(p2*b2)/q2
|
||||||
|
v1,v2 = np.sum(((b1-m1)**2)*p1)/q1,np.sum(((b2-m2)**2)*p2)/q2
|
||||||
|
|
||||||
|
# calculates the minimization function
|
||||||
|
fn = v1*q1 + v2*q2
|
||||||
|
if fn < fn_min:
|
||||||
|
fn_min = fn
|
||||||
|
thresh = i
|
||||||
|
|
||||||
|
# find otsu's threshold value with OpenCV function
|
||||||
|
ret, otsu = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
||||||
|
print thresh,ret
|
||||||
|
@endcode
|
||||||
|
*(Some of the functions may be new here, but we will cover them in coming chapters)*
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# Digital Image Processing, Rafael C. Gonzalez
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# There are some optimizations available for Otsu's binarization. You can search and implement it.
|
@ -0,0 +1,293 @@
|
|||||||
|
Fourier Transform {#tutorial_py_fourier_transform}
|
||||||
|
=================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this section, we will learn
|
||||||
|
- To find the Fourier Transform of images using OpenCV
|
||||||
|
- To utilize the FFT functions available in Numpy
|
||||||
|
- Some applications of Fourier Transform
|
||||||
|
- We will see following functions : **cv2.dft()**, **cv2.idft()** etc
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
Fourier Transform is used to analyze the frequency characteristics of various filters. For images,
|
||||||
|
**2D Discrete Fourier Transform (DFT)** is used to find the frequency domain. A fast algorithm
|
||||||
|
called **Fast Fourier Transform (FFT)** is used for calculation of DFT. Details about these can be
|
||||||
|
found in any image processing or signal processing textbooks. Please see Additional Resources_
|
||||||
|
section.
|
||||||
|
|
||||||
|
For a sinusoidal signal, \f$x(t) = A \sin(2 \pi ft)\f$, we can say \f$f\f$ is the frequency of signal, and
|
||||||
|
if its frequency domain is taken, we can see a spike at \f$f\f$. If signal is sampled to form a discrete
|
||||||
|
signal, we get the same frequency domain, but is periodic in the range \f$[- \pi, \pi]\f$ or \f$[0,2\pi]\f$
|
||||||
|
(or \f$[0,N]\f$ for N-point DFT). You can consider an image as a signal which is sampled in two
|
||||||
|
directions. So taking fourier transform in both X and Y directions gives you the frequency
|
||||||
|
representation of image.
|
||||||
|
|
||||||
|
More intuitively, for the sinusoidal signal, if the amplitude varies so fast in short time, you can
|
||||||
|
say it is a high frequency signal. If it varies slowly, it is a low frequency signal. You can extend
|
||||||
|
the same idea to images. Where does the amplitude varies drastically in images ? At the edge points,
|
||||||
|
or noises. So we can say, edges and noises are high frequency contents in an image. If there is no
|
||||||
|
much changes in amplitude, it is a low frequency component. ( Some links are added to
|
||||||
|
Additional Resources_ which explains frequency transform intuitively with examples).
|
||||||
|
|
||||||
|
Now we will see how to find the Fourier Transform.
|
||||||
|
|
||||||
|
Fourier Transform in Numpy
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
First we will see how to find Fourier Transform using Numpy. Numpy has an FFT package to do this.
|
||||||
|
**np.fft.fft2()** provides us the frequency transform which will be a complex array. Its first
|
||||||
|
argument is the input image, which is grayscale. Second argument is optional which decides the size
|
||||||
|
of output array. If it is greater than size of input image, input image is padded with zeros before
|
||||||
|
calculation of FFT. If it is less than input image, input image will be cropped. If no arguments
|
||||||
|
passed, Output array size will be same as input.
|
||||||
|
|
||||||
|
Now once you got the result, zero frequency component (DC component) will be at top left corner. If
|
||||||
|
you want to bring it to center, you need to shift the result by \f$\frac{N}{2}\f$ in both the
|
||||||
|
directions. This is simply done by the function, **np.fft.fftshift()**. (It is more easier to
|
||||||
|
analyze). Once you found the frequency transform, you can find the magnitude spectrum.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('messi5.jpg',0)
|
||||||
|
f = np.fft.fft2(img)
|
||||||
|
fshift = np.fft.fftshift(f)
|
||||||
|
magnitude_spectrum = 20*np.log(np.abs(fshift))
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img, cmap = 'gray')
|
||||||
|
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(122),plt.imshow(magnitude_spectrum, cmap = 'gray')
|
||||||
|
plt.title('Magnitude Spectrum'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Result look like below:
|
||||||
|
|
||||||
|
![image](images/fft1.jpg)
|
||||||
|
|
||||||
|
See, You can see more whiter region at the center showing low frequency content is more.
|
||||||
|
|
||||||
|
So you found the frequency transform Now you can do some operations in frequency domain, like high
|
||||||
|
pass filtering and reconstruct the image, ie find inverse DFT. For that you simply remove the low
|
||||||
|
frequencies by masking with a rectangular window of size 60x60. Then apply the inverse shift using
|
||||||
|
**np.fft.ifftshift()** so that DC component again come at the top-left corner. Then find inverse FFT
|
||||||
|
using **np.ifft2()** function. The result, again, will be a complex number. You can take its
|
||||||
|
absolute value.
|
||||||
|
@code{.py}
|
||||||
|
rows, cols = img.shape
|
||||||
|
crow,ccol = rows/2 , cols/2
|
||||||
|
fshift[crow-30:crow+30, ccol-30:ccol+30] = 0
|
||||||
|
f_ishift = np.fft.ifftshift(fshift)
|
||||||
|
img_back = np.fft.ifft2(f_ishift)
|
||||||
|
img_back = np.abs(img_back)
|
||||||
|
|
||||||
|
plt.subplot(131),plt.imshow(img, cmap = 'gray')
|
||||||
|
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(132),plt.imshow(img_back, cmap = 'gray')
|
||||||
|
plt.title('Image after HPF'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(133),plt.imshow(img_back)
|
||||||
|
plt.title('Result in JET'), plt.xticks([]), plt.yticks([])
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Result look like below:
|
||||||
|
|
||||||
|
![image](images/fft2.jpg)
|
||||||
|
|
||||||
|
The result shows High Pass Filtering is an edge detection operation. This is what we have seen in
|
||||||
|
Image Gradients chapter. This also shows that most of the image data is present in the Low frequency
|
||||||
|
region of the spectrum. Anyway we have seen how to find DFT, IDFT etc in Numpy. Now let's see how to
|
||||||
|
do it in OpenCV.
|
||||||
|
|
||||||
|
If you closely watch the result, especially the last image in JET color, you can see some artifacts
|
||||||
|
(One instance I have marked in red arrow). It shows some ripple like structures there, and it is
|
||||||
|
called **ringing effects**. It is caused by the rectangular window we used for masking. This mask is
|
||||||
|
converted to sinc shape which causes this problem. So rectangular windows is not used for filtering.
|
||||||
|
Better option is Gaussian Windows.
|
||||||
|
|
||||||
|
Fourier Transform in OpenCV
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
OpenCV provides the functions **cv2.dft()** and **cv2.idft()** for this. It returns the same result
|
||||||
|
as previous, but with two channels. First channel will have the real part of the result and second
|
||||||
|
channel will have the imaginary part of the result. The input image should be converted to
|
||||||
|
np.float32 first. We will see how to do it.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('messi5.jpg',0)
|
||||||
|
|
||||||
|
dft = cv2.dft(np.float32(img),flags = cv2.DFT_COMPLEX_OUTPUT)
|
||||||
|
dft_shift = np.fft.fftshift(dft)
|
||||||
|
|
||||||
|
magnitude_spectrum = 20*np.log(cv2.magnitude(dft_shift[:,:,0],dft_shift[:,:,1]))
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img, cmap = 'gray')
|
||||||
|
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(122),plt.imshow(magnitude_spectrum, cmap = 'gray')
|
||||||
|
plt.title('Magnitude Spectrum'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
@note You can also use **cv2.cartToPolar()** which returns both magnitude and phase in a single shot
|
||||||
|
|
||||||
|
So, now we have to do inverse DFT. In previous session, we created a HPF, this time we will see how
|
||||||
|
to remove high frequency contents in the image, ie we apply LPF to image. It actually blurs the
|
||||||
|
image. For this, we create a mask first with high value (1) at low frequencies, ie we pass the LF
|
||||||
|
content, and 0 at HF region.
|
||||||
|
|
||||||
|
@code{.py}
|
||||||
|
rows, cols = img.shape
|
||||||
|
crow,ccol = rows/2 , cols/2
|
||||||
|
|
||||||
|
# create a mask first, center square is 1, remaining all zeros
|
||||||
|
mask = np.zeros((rows,cols,2),np.uint8)
|
||||||
|
mask[crow-30:crow+30, ccol-30:ccol+30] = 1
|
||||||
|
|
||||||
|
# apply mask and inverse DFT
|
||||||
|
fshift = dft_shift*mask
|
||||||
|
f_ishift = np.fft.ifftshift(fshift)
|
||||||
|
img_back = cv2.idft(f_ishift)
|
||||||
|
img_back = cv2.magnitude(img_back[:,:,0],img_back[:,:,1])
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img, cmap = 'gray')
|
||||||
|
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.subplot(122),plt.imshow(img_back, cmap = 'gray')
|
||||||
|
plt.title('Magnitude Spectrum'), plt.xticks([]), plt.yticks([])
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result:
|
||||||
|
|
||||||
|
![image](images/fft4.jpg)
|
||||||
|
|
||||||
|
@note As usual, OpenCV functions **cv2.dft()** and **cv2.idft()** are faster than Numpy
|
||||||
|
counterparts. But Numpy functions are more user-friendly. For more details about performance issues,
|
||||||
|
see below section.
|
||||||
|
|
||||||
|
Performance Optimization of DFT
|
||||||
|
===============================
|
||||||
|
|
||||||
|
Performance of DFT calculation is better for some array size. It is fastest when array size is power
|
||||||
|
of two. The arrays whose size is a product of 2’s, 3’s, and 5’s are also processed quite
|
||||||
|
efficiently. So if you are worried about the performance of your code, you can modify the size of
|
||||||
|
the array to any optimal size (by padding zeros) before finding DFT. For OpenCV, you have to
|
||||||
|
manually pad zeros. But for Numpy, you specify the new size of FFT calculation, and it will
|
||||||
|
automatically pad zeros for you.
|
||||||
|
|
||||||
|
So how do we find this optimal size ? OpenCV provides a function, **cv2.getOptimalDFTSize()** for
|
||||||
|
this. It is applicable to both **cv2.dft()** and **np.fft.fft2()**. Let's check their performance
|
||||||
|
using IPython magic command %timeit.
|
||||||
|
@code{.py}
|
||||||
|
In [16]: img = cv2.imread('messi5.jpg',0)
|
||||||
|
In [17]: rows,cols = img.shape
|
||||||
|
In [18]: print rows,cols
|
||||||
|
342 548
|
||||||
|
|
||||||
|
In [19]: nrows = cv2.getOptimalDFTSize(rows)
|
||||||
|
In [20]: ncols = cv2.getOptimalDFTSize(cols)
|
||||||
|
In [21]: print nrows, ncols
|
||||||
|
360 576
|
||||||
|
@endcode
|
||||||
|
See, the size (342,548) is modified to (360, 576). Now let's pad it with zeros (for OpenCV) and find
|
||||||
|
their DFT calculation performance. You can do it by creating a new big zero array and copy the data
|
||||||
|
to it, or use **cv2.copyMakeBorder()**.
|
||||||
|
@code{.py}
|
||||||
|
nimg = np.zeros((nrows,ncols))
|
||||||
|
nimg[:rows,:cols] = img
|
||||||
|
@endcode
|
||||||
|
OR:
|
||||||
|
@code{.py}
|
||||||
|
right = ncols - cols
|
||||||
|
bottom = nrows - rows
|
||||||
|
bordertype = cv2.BORDER_CONSTANT #just to avoid line breakup in PDF file
|
||||||
|
nimg = cv2.copyMakeBorder(img,0,bottom,0,right,bordertype, value = 0)
|
||||||
|
@endcode
|
||||||
|
Now we calculate the DFT performance comparison of Numpy function:
|
||||||
|
@code{.py}
|
||||||
|
In [22]: %timeit fft1 = np.fft.fft2(img)
|
||||||
|
10 loops, best of 3: 40.9 ms per loop
|
||||||
|
In [23]: %timeit fft2 = np.fft.fft2(img,[nrows,ncols])
|
||||||
|
100 loops, best of 3: 10.4 ms per loop
|
||||||
|
@endcode
|
||||||
|
It shows a 4x speedup. Now we will try the same with OpenCV functions.
|
||||||
|
@code{.py}
|
||||||
|
In [24]: %timeit dft1= cv2.dft(np.float32(img),flags=cv2.DFT_COMPLEX_OUTPUT)
|
||||||
|
100 loops, best of 3: 13.5 ms per loop
|
||||||
|
In [27]: %timeit dft2= cv2.dft(np.float32(nimg),flags=cv2.DFT_COMPLEX_OUTPUT)
|
||||||
|
100 loops, best of 3: 3.11 ms per loop
|
||||||
|
@endcode
|
||||||
|
It also shows a 4x speed-up. You can also see that OpenCV functions are around 3x faster than Numpy
|
||||||
|
functions. This can be tested for inverse FFT also, and that is left as an exercise for you.
|
||||||
|
|
||||||
|
Why Laplacian is a High Pass Filter?
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
A similar question was asked in a forum. The question is, why Laplacian is a high pass filter? Why
|
||||||
|
Sobel is a HPF? etc. And the first answer given to it was in terms of Fourier Transform. Just take
|
||||||
|
the fourier transform of Laplacian for some higher size of FFT. Analyze it:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
# simple averaging filter without scaling parameter
|
||||||
|
mean_filter = np.ones((3,3))
|
||||||
|
|
||||||
|
# creating a guassian filter
|
||||||
|
x = cv2.getGaussianKernel(5,10)
|
||||||
|
gaussian = x*x.T
|
||||||
|
|
||||||
|
# different edge detecting filters
|
||||||
|
# scharr in x-direction
|
||||||
|
scharr = np.array([[-3, 0, 3],
|
||||||
|
[-10,0,10],
|
||||||
|
[-3, 0, 3]])
|
||||||
|
# sobel in x direction
|
||||||
|
sobel_x= np.array([[-1, 0, 1],
|
||||||
|
[-2, 0, 2],
|
||||||
|
[-1, 0, 1]])
|
||||||
|
# sobel in y direction
|
||||||
|
sobel_y= np.array([[-1,-2,-1],
|
||||||
|
[0, 0, 0],
|
||||||
|
[1, 2, 1]])
|
||||||
|
# laplacian
|
||||||
|
laplacian=np.array([[0, 1, 0],
|
||||||
|
[1,-4, 1],
|
||||||
|
[0, 1, 0]])
|
||||||
|
|
||||||
|
filters = [mean_filter, gaussian, laplacian, sobel_x, sobel_y, scharr]
|
||||||
|
filter_name = ['mean_filter', 'gaussian','laplacian', 'sobel_x', \
|
||||||
|
'sobel_y', 'scharr_x']
|
||||||
|
fft_filters = [np.fft.fft2(x) for x in filters]
|
||||||
|
fft_shift = [np.fft.fftshift(y) for y in fft_filters]
|
||||||
|
mag_spectrum = [np.log(np.abs(z)+1) for z in fft_shift]
|
||||||
|
|
||||||
|
for i in xrange(6):
|
||||||
|
plt.subplot(2,3,i+1),plt.imshow(mag_spectrum[i],cmap = 'gray')
|
||||||
|
plt.title(filter_name[i]), plt.xticks([]), plt.yticks([])
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
See the result:
|
||||||
|
|
||||||
|
![image](images/fft5.jpg)
|
||||||
|
|
||||||
|
From image, you can see what frequency region each kernel blocks, and what region it passes. From
|
||||||
|
that information, we can say why each kernel is a HPF or a LPF
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [An Intuitive Explanation of Fourier
|
||||||
|
Theory](http://cns-alumni.bu.edu/~slehar/fourier/fourier.html) by Steven Lehar
|
||||||
|
2. [Fourier Transform](http://homepages.inf.ed.ac.uk/rbf/HIPR2/fourier.htm) at HIPR
|
||||||
|
3. [What does frequency domain denote in case of images?](http://dsp.stackexchange.com/q/1637/818)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,5 @@
|
|||||||
|
Image Transforms in OpenCV {#tutorial_py_table_of_contents_transforms}
|
||||||
|
==========================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_fourier_transform
|
||||||
|
Learn to find the Fourier Transform of images
|
148
doc/py_tutorials/py_imgproc/py_watershed/py_watershed.markdown
Normal file
148
doc/py_tutorials/py_imgproc/py_watershed/py_watershed.markdown
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
Image Segmentation with Watershed Algorithm {#tutorial_py_watershed}
|
||||||
|
===========================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will learn to use marker-based image segmentation using watershed algorithm
|
||||||
|
- We will see: **cv2.watershed()**
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
Any grayscale image can be viewed as a topographic surface where high intensity denotes peaks and
|
||||||
|
hills while low intensity denotes valleys. You start filling every isolated valleys (local minima)
|
||||||
|
with different colored water (labels). As the water rises, depending on the peaks (gradients)
|
||||||
|
nearby, water from different valleys, obviously with different colors will start to merge. To avoid
|
||||||
|
that, you build barriers in the locations where water merges. You continue the work of filling water
|
||||||
|
and building barriers until all the peaks are under water. Then the barriers you created gives you
|
||||||
|
the segmentation result. This is the "philosophy" behind the watershed. You can visit the [CMM
|
||||||
|
webpage on watershed](http://cmm.ensmp.fr/~beucher/wtshed.html) to understand it with the help of
|
||||||
|
some animations.
|
||||||
|
|
||||||
|
But this approach gives you oversegmented result due to noise or any other irregularities in the
|
||||||
|
image. So OpenCV implemented a marker-based watershed algorithm where you specify which are all
|
||||||
|
valley points are to be merged and which are not. It is an interactive image segmentation. What we
|
||||||
|
do is to give different labels for our object we know. Label the region which we are sure of being
|
||||||
|
the foreground or object with one color (or intensity), label the region which we are sure of being
|
||||||
|
background or non-object with another color and finally the region which we are not sure of
|
||||||
|
anything, label it with 0. That is our marker. Then apply watershed algorithm. Then our marker will
|
||||||
|
be updated with the labels we gave, and the boundaries of objects will have a value of -1.
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
Below we will see an example on how to use the Distance Transform along with watershed to segment
|
||||||
|
mutually touching objects.
|
||||||
|
|
||||||
|
Consider the coins image below, the coins are touching each other. Even if you threshold it, it will
|
||||||
|
be touching each other.
|
||||||
|
|
||||||
|
![image](images/water_coins.jpg)
|
||||||
|
|
||||||
|
We start with finding an approximate estimate of the coins. For that, we can use the Otsu's
|
||||||
|
binarization.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('coins.png')
|
||||||
|
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
ret, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
|
||||||
|
@endcode
|
||||||
|
Result:
|
||||||
|
|
||||||
|
![image](images/water_thresh.jpg)
|
||||||
|
|
||||||
|
Now we need to remove any small white noises in the image. For that we can use morphological
|
||||||
|
opening. To remove any small holes in the object, we can use morphological closing. So, now we know
|
||||||
|
for sure that region near to center of objects are foreground and region much away from the object
|
||||||
|
are background. Only region we are not sure is the boundary region of coins.
|
||||||
|
|
||||||
|
So we need to extract the area which we are sure they are coins. Erosion removes the boundary
|
||||||
|
pixels. So whatever remaining, we can be sure it is coin. That would work if objects were not
|
||||||
|
touching each other. But since they are touching each other, another good option would be to find
|
||||||
|
the distance transform and apply a proper threshold. Next we need to find the area which we are sure
|
||||||
|
they are not coins. For that, we dilate the result. Dilation increases object boundary to
|
||||||
|
background. This way, we can make sure whatever region in background in result is really a
|
||||||
|
background, since boundary region is removed. See the image below.
|
||||||
|
|
||||||
|
![image](images/water_fgbg.jpg)
|
||||||
|
|
||||||
|
The remaining regions are those which we don't have any idea, whether it is coins or background.
|
||||||
|
Watershed algorithm should find it. These areas are normally around the boundaries of coins where
|
||||||
|
foreground and background meet (Or even two different coins meet). We call it border. It can be
|
||||||
|
obtained from subtracting sure_fg area from sure_bg area.
|
||||||
|
@code{.py}
|
||||||
|
# noise removal
|
||||||
|
kernel = np.ones((3,3),np.uint8)
|
||||||
|
opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)
|
||||||
|
|
||||||
|
# sure background area
|
||||||
|
sure_bg = cv2.dilate(opening,kernel,iterations=3)
|
||||||
|
|
||||||
|
# Finding sure foreground area
|
||||||
|
dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
|
||||||
|
ret, sure_fg = cv2.threshold(dist_transform,0.7*dist_transform.max(),255,0)
|
||||||
|
|
||||||
|
# Finding unknown region
|
||||||
|
sure_fg = np.uint8(sure_fg)
|
||||||
|
unknown = cv2.subtract(sure_bg,sure_fg)
|
||||||
|
@endcode
|
||||||
|
See the result. In the thresholded image, we get some regions of coins which we are sure of coins
|
||||||
|
and they are detached now. (In some cases, you may be interested in only foreground segmentation,
|
||||||
|
not in separating the mutually touching objects. In that case, you need not use distance transform,
|
||||||
|
just erosion is sufficient. Erosion is just another method to extract sure foreground area, that's
|
||||||
|
all.)
|
||||||
|
|
||||||
|
![image](images/water_dt.jpg)
|
||||||
|
|
||||||
|
Now we know for sure which are region of coins, which are background and all. So we create marker
|
||||||
|
(it is an array of same size as that of original image, but with int32 datatype) and label the
|
||||||
|
regions inside it. The regions we know for sure (whether foreground or background) are labelled with
|
||||||
|
any positive integers, but different integers, and the area we don't know for sure are just left as
|
||||||
|
zero. For this we use **cv2.connectedComponents()**. It labels background of the image with 0, then
|
||||||
|
other objects are labelled with integers starting from 1.
|
||||||
|
|
||||||
|
But we know that if background is marked with 0, watershed will consider it as unknown area. So we
|
||||||
|
want to mark it with different integer. Instead, we will mark unknown region, defined by unknown,
|
||||||
|
with 0.
|
||||||
|
@code{.py}
|
||||||
|
# Marker labelling
|
||||||
|
ret, markers = cv2.connectedComponents(sure_fg)
|
||||||
|
|
||||||
|
# Add one to all labels so that sure background is not 0, but 1
|
||||||
|
markers = markers+1
|
||||||
|
|
||||||
|
# Now, mark the region of unknown with zero
|
||||||
|
markers[unknown==255] = 0
|
||||||
|
@endcode
|
||||||
|
See the result shown in JET colormap. The dark blue region shows unknown region. Sure coins are
|
||||||
|
colored with different values. Remaining area which are sure background are shown in lighter blue
|
||||||
|
compared to unknown region.
|
||||||
|
|
||||||
|
![image](images/water_marker.jpg)
|
||||||
|
|
||||||
|
Now our marker is ready. It is time for final step, apply watershed. Then marker image will be
|
||||||
|
modified. The boundary region will be marked with -1.
|
||||||
|
@code{.py}
|
||||||
|
markers = cv2.watershed(img,markers)
|
||||||
|
img[markers == -1] = [255,0,0]
|
||||||
|
@endcode
|
||||||
|
See the result below. For some coins, the region where they touch are segmented properly and for
|
||||||
|
some, they are not.
|
||||||
|
|
||||||
|
![image](images/water_result.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# CMM page on [Watershed Tranformation](http://cmm.ensmp.fr/~beucher/wtshed.html)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# OpenCV samples has an interactive sample on watershed segmentation, watershed.py. Run it, Enjoy
|
||||||
|
it, then learn it.
|
10
doc/py_tutorials/py_ml/py_kmeans/py_kmeans_index.markdown
Normal file
10
doc/py_tutorials/py_ml/py_kmeans/py_kmeans_index.markdown
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
K-Means Clustering {#tutorial_py_kmeans_index}
|
||||||
|
==================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_kmeans_understanding
|
||||||
|
|
||||||
|
Read to get an intuitive understanding of K-Means Clustering
|
||||||
|
|
||||||
|
- @subpage tutorial_py_kmeans_opencv
|
||||||
|
|
||||||
|
Now let's try K-Means functions in OpenCV
|
@ -0,0 +1,194 @@
|
|||||||
|
K-Means Clustering in OpenCV {#tutorial_py_kmeans_opencv}
|
||||||
|
============================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
- Learn to use **cv2.kmeans()** function in OpenCV for data clustering
|
||||||
|
|
||||||
|
Understanding Parameters
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
### Input parameters
|
||||||
|
|
||||||
|
-# **samples** : It should be of **np.float32** data type, and each feature should be put in a
|
||||||
|
single column.
|
||||||
|
-# **nclusters(K)** : Number of clusters required at end
|
||||||
|
-# **criteria** : It is the iteration termination criteria. When this criteria is satisfied, algorithm iteration stops. Actually, it should be a tuple of 3 parameters. They are \`( type, max_iter, epsilon )\`:
|
||||||
|
-# type of termination criteria. It has 3 flags as below:
|
||||||
|
- **cv2.TERM_CRITERIA_EPS** - stop the algorithm iteration if specified accuracy, *epsilon*, is reached.
|
||||||
|
- **cv2.TERM_CRITERIA_MAX_ITER** - stop the algorithm after the specified number of iterations, *max_iter*.
|
||||||
|
- **cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER** - stop the iteration when any of the above condition is met.
|
||||||
|
-# max_iter - An integer specifying maximum number of iterations.
|
||||||
|
-# epsilon - Required accuracy
|
||||||
|
|
||||||
|
-# **attempts** : Flag to specify the number of times the algorithm is executed using different
|
||||||
|
initial labellings. The algorithm returns the labels that yield the best compactness. This
|
||||||
|
compactness is returned as output.
|
||||||
|
-# **flags** : This flag is used to specify how initial centers are taken. Normally two flags are
|
||||||
|
used for this : **cv2.KMEANS_PP_CENTERS** and **cv2.KMEANS_RANDOM_CENTERS**.
|
||||||
|
|
||||||
|
### Output parameters
|
||||||
|
|
||||||
|
-# **compactness** : It is the sum of squared distance from each point to their corresponding
|
||||||
|
centers.
|
||||||
|
-# **labels** : This is the label array (same as 'code' in previous article) where each element
|
||||||
|
marked '0', '1'.....
|
||||||
|
-# **centers** : This is array of centers of clusters.
|
||||||
|
|
||||||
|
Now we will see how to apply K-Means algorithm with three examples.
|
||||||
|
|
||||||
|
1. Data with Only One Feature
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Consider, you have a set of data with only one feature, ie one-dimensional. For eg, we can take our
|
||||||
|
t-shirt problem where you use only height of people to decide the size of t-shirt.
|
||||||
|
|
||||||
|
So we start by creating data and plot it in Matplotlib
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
x = np.random.randint(25,100,25)
|
||||||
|
y = np.random.randint(175,255,25)
|
||||||
|
z = np.hstack((x,y))
|
||||||
|
z = z.reshape((50,1))
|
||||||
|
z = np.float32(z)
|
||||||
|
plt.hist(z,256,[0,256]),plt.show()
|
||||||
|
@endcode
|
||||||
|
So we have 'z' which is an array of size 50, and values ranging from 0 to 255. I have reshaped 'z'
|
||||||
|
to a column vector. It will be more useful when more than one features are present. Then I made data
|
||||||
|
of np.float32 type.
|
||||||
|
|
||||||
|
We get following image :
|
||||||
|
|
||||||
|
![image](images/oc_1d_testdata.png)
|
||||||
|
|
||||||
|
Now we apply the KMeans function. Before that we need to specify the criteria. My criteria is such
|
||||||
|
that, whenever 10 iterations of algorithm is ran, or an accuracy of epsilon = 1.0 is reached, stop
|
||||||
|
the algorithm and return the answer.
|
||||||
|
@code{.py}
|
||||||
|
# Define criteria = ( type, max_iter = 10 , epsilon = 1.0 )
|
||||||
|
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
|
||||||
|
|
||||||
|
# Set flags (Just to avoid line break in the code)
|
||||||
|
flags = cv2.KMEANS_RANDOM_CENTERS
|
||||||
|
|
||||||
|
# Apply KMeans
|
||||||
|
compactness,labels,centers = cv2.kmeans(z,2,None,criteria,10,flags)
|
||||||
|
@endcode
|
||||||
|
This gives us the compactness, labels and centers. In this case, I got centers as 60 and 207. Labels
|
||||||
|
will have the same size as that of test data where each data will be labelled as '0','1','2' etc.
|
||||||
|
depending on their centroids. Now we split the data to different clusters depending on their labels.
|
||||||
|
@code{.py}
|
||||||
|
A = z[labels==0]
|
||||||
|
B = z[labels==1]
|
||||||
|
@endcode
|
||||||
|
Now we plot A in Red color and B in Blue color and their centroids in Yellow color.
|
||||||
|
@code{.py}
|
||||||
|
# Now plot 'A' in red, 'B' in blue, 'centers' in yellow
|
||||||
|
plt.hist(A,256,[0,256],color = 'r')
|
||||||
|
plt.hist(B,256,[0,256],color = 'b')
|
||||||
|
plt.hist(centers,32,[0,256],color = 'y')
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Below is the output we got:
|
||||||
|
|
||||||
|
![image](images/oc_1d_clustered.png)
|
||||||
|
|
||||||
|
2. Data with Multiple Features
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
In previous example, we took only height for t-shirt problem. Here, we will take both height and
|
||||||
|
weight, ie two features.
|
||||||
|
|
||||||
|
Remember, in previous case, we made our data to a single column vector. Each feature is arranged in
|
||||||
|
a column, while each row corresponds to an input test sample.
|
||||||
|
|
||||||
|
For example, in this case, we set a test data of size 50x2, which are heights and weights of 50
|
||||||
|
people. First column corresponds to height of all the 50 people and second column corresponds to
|
||||||
|
their weights. First row contains two elements where first one is the height of first person and
|
||||||
|
second one his weight. Similarly remaining rows corresponds to heights and weights of other people.
|
||||||
|
Check image below:
|
||||||
|
|
||||||
|
![image](images/oc_feature_representation.jpg)
|
||||||
|
|
||||||
|
Now I am directly moving to the code:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
X = np.random.randint(25,50,(25,2))
|
||||||
|
Y = np.random.randint(60,85,(25,2))
|
||||||
|
Z = np.vstack((X,Y))
|
||||||
|
|
||||||
|
# convert to np.float32
|
||||||
|
Z = np.float32(Z)
|
||||||
|
|
||||||
|
# define criteria and apply kmeans()
|
||||||
|
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
|
||||||
|
ret,label,center=cv2.kmeans(Z,2,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
|
||||||
|
|
||||||
|
# Now separate the data, Note the flatten()
|
||||||
|
A = Z[label.ravel()==0]
|
||||||
|
B = Z[label.ravel()==1]
|
||||||
|
|
||||||
|
# Plot the data
|
||||||
|
plt.scatter(A[:,0],A[:,1])
|
||||||
|
plt.scatter(B[:,0],B[:,1],c = 'r')
|
||||||
|
plt.scatter(center[:,0],center[:,1],s = 80,c = 'y', marker = 's')
|
||||||
|
plt.xlabel('Height'),plt.ylabel('Weight')
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Below is the output we get:
|
||||||
|
|
||||||
|
![image](images/oc_2d_clustered.jpg)
|
||||||
|
|
||||||
|
3. Color Quantization
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Color Quantization is the process of reducing number of colors in an image. One reason to do so is
|
||||||
|
to reduce the memory. Sometimes, some devices may have limitation such that it can produce only
|
||||||
|
limited number of colors. In those cases also, color quantization is performed. Here we use k-means
|
||||||
|
clustering for color quantization.
|
||||||
|
|
||||||
|
There is nothing new to be explained here. There are 3 features, say, R,G,B. So we need to reshape
|
||||||
|
the image to an array of Mx3 size (M is number of pixels in image). And after the clustering, we
|
||||||
|
apply centroid values (it is also R,G,B) to all pixels, such that resulting image will have
|
||||||
|
specified number of colors. And again we need to reshape it back to the shape of original image.
|
||||||
|
Below is the code:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
img = cv2.imread('home.jpg')
|
||||||
|
Z = img.reshape((-1,3))
|
||||||
|
|
||||||
|
# convert to np.float32
|
||||||
|
Z = np.float32(Z)
|
||||||
|
|
||||||
|
# define criteria, number of clusters(K) and apply kmeans()
|
||||||
|
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
|
||||||
|
K = 8
|
||||||
|
ret,label,center=cv2.kmeans(Z,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
|
||||||
|
|
||||||
|
# Now convert back into uint8, and make original image
|
||||||
|
center = np.uint8(center)
|
||||||
|
res = center[label.flatten()]
|
||||||
|
res2 = res.reshape((img.shape))
|
||||||
|
|
||||||
|
cv2.imshow('res2',res2)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
See the result below for K=8:
|
||||||
|
|
||||||
|
![image](images/oc_color_quantization.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,85 @@
|
|||||||
|
Understanding K-Means Clustering {#tutorial_py_kmeans_understanding}
|
||||||
|
================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter, we will understand the concepts of K-Means Clustering, how it works etc.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
We will deal this with an example which is commonly used.
|
||||||
|
|
||||||
|
### T-shirt size problem
|
||||||
|
|
||||||
|
Consider a company, which is going to release a new model of T-shirt to market. Obviously they will
|
||||||
|
have to manufacture models in different sizes to satisfy people of all sizes. So the company make a
|
||||||
|
data of people's height and weight, and plot them on to a graph, as below:
|
||||||
|
|
||||||
|
![image](images/tshirt.jpg)
|
||||||
|
|
||||||
|
Company can't create t-shirts with all the sizes. Instead, they divide people to Small, Medium and
|
||||||
|
Large, and manufacture only these 3 models which will fit into all the people. This grouping of
|
||||||
|
people into three groups can be done by k-means clustering, and algorithm provides us best 3 sizes,
|
||||||
|
which will satisfy all the people. And if it doesn't, company can divide people to more groups, may
|
||||||
|
be five, and so on. Check image below :
|
||||||
|
|
||||||
|
![image](images/tshirt_grouped.jpg)
|
||||||
|
|
||||||
|
### How does it work ?
|
||||||
|
|
||||||
|
This algorithm is an iterative process. We will explain it step-by-step with the help of images.
|
||||||
|
|
||||||
|
Consider a set of data as below ( You can consider it as t-shirt problem). We need to cluster this
|
||||||
|
data into two groups.
|
||||||
|
|
||||||
|
![image](images/testdata.jpg)
|
||||||
|
|
||||||
|
**Step : 1** - Algorithm randomly chooses two centroids, \f$C1\f$ and \f$C2\f$ (sometimes, any two data are
|
||||||
|
taken as the centroids).
|
||||||
|
|
||||||
|
**Step : 2** - It calculates the distance from each point to both centroids. If a test data is more
|
||||||
|
closer to \f$C1\f$, then that data is labelled with '0'. If it is closer to \f$C2\f$, then labelled as '1'
|
||||||
|
(If more centroids are there, labelled as '2','3' etc).
|
||||||
|
|
||||||
|
In our case, we will color all '0' labelled with red, and '1' labelled with blue. So we get
|
||||||
|
following image after above operations.
|
||||||
|
|
||||||
|
![image](images/initial_labelling.jpg)
|
||||||
|
|
||||||
|
**Step : 3** - Next we calculate the average of all blue points and red points separately and that
|
||||||
|
will be our new centroids. That is \f$C1\f$ and \f$C2\f$ shift to newly calculated centroids. (Remember, the
|
||||||
|
images shown are not true values and not to true scale, it is just for demonstration only).
|
||||||
|
|
||||||
|
And again, perform step 2 with new centroids and label data to '0' and '1'.
|
||||||
|
|
||||||
|
So we get result as below :
|
||||||
|
|
||||||
|
![image](images/update_centroid.jpg)
|
||||||
|
|
||||||
|
Now **Step - 2** and **Step - 3** are iterated until both centroids are converged to fixed points.
|
||||||
|
*(Or it may be stopped depending on the criteria we provide, like maximum number of iterations, or a
|
||||||
|
specific accuracy is reached etc.)* **These points are such that sum of distances between test data
|
||||||
|
and their corresponding centroids are minimum**. Or simply, sum of distances between
|
||||||
|
\f$C1 \leftrightarrow Red\_Points\f$ and \f$C2 \leftrightarrow Blue\_Points\f$ is minimum.
|
||||||
|
|
||||||
|
\f[minimize \;\bigg[J = \sum_{All\: Red\_Points}distance(C1,Red\_Point) + \sum_{All\: Blue\_Points}distance(C2,Blue\_Point)\bigg]\f]
|
||||||
|
|
||||||
|
Final result almost looks like below :
|
||||||
|
|
||||||
|
![image](images/final_clusters.jpg)
|
||||||
|
|
||||||
|
So this is just an intuitive understanding of K-Means Clustering. For more details and mathematical
|
||||||
|
explanation, please read any standard machine learning textbooks or check links in additional
|
||||||
|
resources. It is just a top layer of K-Means clustering. There are a lot of modifications to this
|
||||||
|
algorithm like, how to choose the initial centroids, how to speed up the iteration process etc.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [Machine Learning Course](https://www.coursera.org/course/ml), Video lectures by Prof. Andrew Ng
|
||||||
|
(Some of the images are taken from this)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
10
doc/py_tutorials/py_ml/py_knn/py_knn_index.markdown
Normal file
10
doc/py_tutorials/py_ml/py_knn/py_knn_index.markdown
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
K-Nearest Neighbour {#tutorial_py_knn_index}
|
||||||
|
===================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_knn_understanding
|
||||||
|
|
||||||
|
Get a basic understanding of what kNN is
|
||||||
|
|
||||||
|
- @subpage tutorial_py_knn_opencv
|
||||||
|
|
||||||
|
Now let's use kNN in OpenCV for digit recognition OCR
|
@ -0,0 +1,121 @@
|
|||||||
|
OCR of Hand-written Data using kNN {#tutorial_py_knn_opencv}
|
||||||
|
==================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter
|
||||||
|
- We will use our knowledge on kNN to build a basic OCR application.
|
||||||
|
- We will try with Digits and Alphabets data available that comes with OpenCV.
|
||||||
|
|
||||||
|
OCR of Hand-written Digits
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Our goal is to build an application which can read the handwritten digits. For this we need some
|
||||||
|
train_data and test_data. OpenCV comes with an image digits.png (in the folder
|
||||||
|
opencv/samples/python2/data/) which has 5000 handwritten digits (500 for each digit). Each digit is
|
||||||
|
a 20x20 image. So our first step is to split this image into 5000 different digits. For each digit,
|
||||||
|
we flatten it into a single row with 400 pixels. That is our feature set, ie intensity values of all
|
||||||
|
pixels. It is the simplest feature set we can create. We use first 250 samples of each digit as
|
||||||
|
train_data, and next 250 samples as test_data. So let's prepare them first.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('digits.png')
|
||||||
|
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# Now we split the image to 5000 cells, each 20x20 size
|
||||||
|
cells = [np.hsplit(row,100) for row in np.vsplit(gray,50)]
|
||||||
|
|
||||||
|
# Make it into a Numpy array. It size will be (50,100,20,20)
|
||||||
|
x = np.array(cells)
|
||||||
|
|
||||||
|
# Now we prepare train_data and test_data.
|
||||||
|
train = x[:,:50].reshape(-1,400).astype(np.float32) # Size = (2500,400)
|
||||||
|
test = x[:,50:100].reshape(-1,400).astype(np.float32) # Size = (2500,400)
|
||||||
|
|
||||||
|
# Create labels for train and test data
|
||||||
|
k = np.arange(10)
|
||||||
|
train_labels = np.repeat(k,250)[:,np.newaxis]
|
||||||
|
test_labels = train_labels.copy()
|
||||||
|
|
||||||
|
# Initiate kNN, train the data, then test it with test data for k=1
|
||||||
|
knn = cv2.KNearest()
|
||||||
|
knn.train(train,train_labels)
|
||||||
|
ret,result,neighbours,dist = knn.find_nearest(test,k=5)
|
||||||
|
|
||||||
|
# Now we check the accuracy of classification
|
||||||
|
# For that, compare the result with test_labels and check which are wrong
|
||||||
|
matches = result==test_labels
|
||||||
|
correct = np.count_nonzero(matches)
|
||||||
|
accuracy = correct*100.0/result.size
|
||||||
|
print accuracy
|
||||||
|
@endcode
|
||||||
|
So our basic OCR app is ready. This particular example gave me an accuracy of 91%. One option
|
||||||
|
improve accuracy is to add more data for training, especially the wrong ones. So instead of finding
|
||||||
|
this training data everytime I start application, I better save it, so that next time, I directly
|
||||||
|
read this data from a file and start classification. You can do it with the help of some Numpy
|
||||||
|
functions like np.savetxt, np.savez, np.load etc. Please check their docs for more details.
|
||||||
|
@code{.py}
|
||||||
|
# save the data
|
||||||
|
np.savez('knn_data.npz',train=train, train_labels=train_labels)
|
||||||
|
|
||||||
|
# Now load the data
|
||||||
|
with np.load('knn_data.npz') as data:
|
||||||
|
print data.files
|
||||||
|
train = data['train']
|
||||||
|
train_labels = data['train_labels']
|
||||||
|
@endcode
|
||||||
|
In my system, it takes around 4.4 MB of memory. Since we are using intensity values (uint8 data) as
|
||||||
|
features, it would be better to convert the data to np.uint8 first and then save it. It takes only
|
||||||
|
1.1 MB in this case. Then while loading, you can convert back into float32.
|
||||||
|
|
||||||
|
OCR of English Alphabets
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
Next we will do the same for English alphabets, but there is a slight change in data and feature
|
||||||
|
set. Here, instead of images, OpenCV comes with a data file, letter-recognition.data in
|
||||||
|
opencv/samples/cpp/ folder. If you open it, you will see 20000 lines which may, on first sight, look
|
||||||
|
like garbage. Actually, in each row, first column is an alphabet which is our label. Next 16 numbers
|
||||||
|
following it are its different features. These features are obtained from [UCI Machine Learning
|
||||||
|
Repository](http://archive.ics.uci.edu/ml/). You can find the details of these features in [this
|
||||||
|
page](http://archive.ics.uci.edu/ml/datasets/Letter+Recognition).
|
||||||
|
|
||||||
|
There are 20000 samples available, so we take first 10000 data as training samples and remaining
|
||||||
|
10000 as test samples. We should change the alphabets to ascii characters because we can't work with
|
||||||
|
alphabets directly.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# Load the data, converters convert the letter to a number
|
||||||
|
data= np.loadtxt('letter-recognition.data', dtype= 'float32', delimiter = ',',
|
||||||
|
converters= {0: lambda ch: ord(ch)-ord('A')})
|
||||||
|
|
||||||
|
# split the data to two, 10000 each for train and test
|
||||||
|
train, test = np.vsplit(data,2)
|
||||||
|
|
||||||
|
# split trainData and testData to features and responses
|
||||||
|
responses, trainData = np.hsplit(train,[1])
|
||||||
|
labels, testData = np.hsplit(test,[1])
|
||||||
|
|
||||||
|
# Initiate the kNN, classify, measure accuracy.
|
||||||
|
knn = cv2.KNearest()
|
||||||
|
knn.train(trainData, responses)
|
||||||
|
ret, result, neighbours, dist = knn.find_nearest(testData, k=5)
|
||||||
|
|
||||||
|
correct = np.count_nonzero(result == labels)
|
||||||
|
accuracy = correct*100.0/10000
|
||||||
|
print accuracy
|
||||||
|
@endcode
|
||||||
|
It gives me an accuracy of 93.22%. Again, if you want to increase accuracy, you can iteratively add
|
||||||
|
error data in each level.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,153 @@
|
|||||||
|
Understanding k-Nearest Neighbour {#tutorial_py_knn_understanding}
|
||||||
|
=================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter, we will understand the concepts of k-Nearest Neighbour (kNN) algorithm.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
kNN is one of the simplest of classification algorithms available for supervised learning. The idea
|
||||||
|
is to search for closest match of the test data in feature space. We will look into it with below
|
||||||
|
image.
|
||||||
|
|
||||||
|
![image](images/knn_theory.png)
|
||||||
|
|
||||||
|
In the image, there are two families, Blue Squares and Red Triangles. We call each family as
|
||||||
|
**Class**. Their houses are shown in their town map which we call feature space. *(You can consider
|
||||||
|
a feature space as a space where all datas are projected. For example, consider a 2D coordinate
|
||||||
|
space. Each data has two features, x and y coordinates. You can represent this data in your 2D
|
||||||
|
coordinate space, right? Now imagine if there are three features, you need 3D space. Now consider N
|
||||||
|
features, where you need N-dimensional space, right? This N-dimensional space is its feature space.
|
||||||
|
In our image, you can consider it as a 2D case with two features)*.
|
||||||
|
|
||||||
|
Now a new member comes into the town and creates a new home, which is shown as green circle. He
|
||||||
|
should be added to one of these Blue/Red families. We call that process, **Classification**. What we
|
||||||
|
do? Since we are dealing with kNN, let us apply this algorithm.
|
||||||
|
|
||||||
|
One method is to check who is his nearest neighbour. From the image, it is clear it is the Red
|
||||||
|
Triangle family. So he is also added into Red Triangle. This method is called simply **Nearest
|
||||||
|
Neighbour**, because classification depends only on the nearest neighbour.
|
||||||
|
|
||||||
|
But there is a problem with that. Red Triangle may be the nearest. But what if there are lot of Blue
|
||||||
|
Squares near to him? Then Blue Squares have more strength in that locality than Red Triangle. So
|
||||||
|
just checking nearest one is not sufficient. Instead we check some k nearest families. Then whoever
|
||||||
|
is majority in them, the new guy belongs to that family. In our image, let's take k=3, ie 3 nearest
|
||||||
|
families. He has two Red and one Blue (there are two Blues equidistant, but since k=3, we take only
|
||||||
|
one of them), so again he should be added to Red family. But what if we take k=7? Then he has 5 Blue
|
||||||
|
families and 2 Red families. Great!! Now he should be added to Blue family. So it all changes with
|
||||||
|
value of k. More funny thing is, what if k = 4? He has 2 Red and 2 Blue neighbours. It is a tie !!!
|
||||||
|
So better take k as an odd number. So this method is called **k-Nearest Neighbour** since
|
||||||
|
classification depends on k nearest neighbours.
|
||||||
|
|
||||||
|
Again, in kNN, it is true we are considering k neighbours, but we are giving equal importance to
|
||||||
|
all, right? Is it justice? For example, take the case of k=4. We told it is a tie. But see, the 2
|
||||||
|
Red families are more closer to him than the other 2 Blue families. So he is more eligible to be
|
||||||
|
added to Red. So how do we mathematically explain that? We give some weights to each family
|
||||||
|
depending on their distance to the new-comer. For those who are near to him get higher weights while
|
||||||
|
those are far away get lower weights. Then we add total weights of each family separately. Whoever
|
||||||
|
gets highest total weights, new-comer goes to that family. This is called **modified kNN**.
|
||||||
|
|
||||||
|
So what are some important things you see here?
|
||||||
|
|
||||||
|
- You need to have information about all the houses in town, right? Because, we have to check
|
||||||
|
the distance from new-comer to all the existing houses to find the nearest neighbour. If there
|
||||||
|
are plenty of houses and families, it takes lots of memory, and more time for calculation
|
||||||
|
also.
|
||||||
|
- There is almost zero time for any kind of training or preparation.
|
||||||
|
|
||||||
|
Now let's see it in OpenCV.
|
||||||
|
|
||||||
|
kNN in OpenCV
|
||||||
|
-------------
|
||||||
|
|
||||||
|
We will do a simple example here, with two families (classes), just like above. Then in the next
|
||||||
|
chapter, we will do an even better example.
|
||||||
|
|
||||||
|
So here, we label the Red family as **Class-0** (so denoted by 0) and Blue family as **Class-1**
|
||||||
|
(denoted by 1). We create 25 families or 25 training data, and label them either Class-0 or Class-1.
|
||||||
|
We do all these with the help of Random Number Generator in Numpy.
|
||||||
|
|
||||||
|
Then we plot it with the help of Matplotlib. Red families are shown as Red Triangles and Blue
|
||||||
|
families are shown as Blue Squares.
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# Feature set containing (x,y) values of 25 known/training data
|
||||||
|
trainData = np.random.randint(0,100,(25,2)).astype(np.float32)
|
||||||
|
|
||||||
|
# Labels each one either Red or Blue with numbers 0 and 1
|
||||||
|
responses = np.random.randint(0,2,(25,1)).astype(np.float32)
|
||||||
|
|
||||||
|
# Take Red families and plot them
|
||||||
|
red = trainData[responses.ravel()==0]
|
||||||
|
plt.scatter(red[:,0],red[:,1],80,'r','^')
|
||||||
|
|
||||||
|
# Take Blue families and plot them
|
||||||
|
blue = trainData[responses.ravel()==1]
|
||||||
|
plt.scatter(blue[:,0],blue[:,1],80,'b','s')
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
You will get something similar to our first image. Since you are using random number generator, you
|
||||||
|
will be getting different data each time you run the code.
|
||||||
|
|
||||||
|
Next initiate the kNN algorithm and pass the trainData and responses to train the kNN (It constructs
|
||||||
|
a search tree).
|
||||||
|
|
||||||
|
Then we will bring one new-comer and classify him to a family with the help of kNN in OpenCV. Before
|
||||||
|
going to kNN, we need to know something on our test data (data of new comers). Our data should be a
|
||||||
|
floating point array with size \f$number \; of \; testdata \times number \; of \; features\f$. Then we
|
||||||
|
find the nearest neighbours of new-comer. We can specify how many neighbours we want. It returns:
|
||||||
|
|
||||||
|
-# The label given to new-comer depending upon the kNN theory we saw earlier. If you want Nearest
|
||||||
|
Neighbour algorithm, just specify k=1 where k is the number of neighbours.
|
||||||
|
2. The labels of k-Nearest Neighbours.
|
||||||
|
3. Corresponding distances from new-comer to each nearest neighbour.
|
||||||
|
|
||||||
|
So let's see how it works. New comer is marked in green color.
|
||||||
|
@code{.py}
|
||||||
|
newcomer = np.random.randint(0,100,(1,2)).astype(np.float32)
|
||||||
|
plt.scatter(newcomer[:,0],newcomer[:,1],80,'g','o')
|
||||||
|
|
||||||
|
knn = cv2.KNearest()
|
||||||
|
knn.train(trainData,responses)
|
||||||
|
ret, results, neighbours ,dist = knn.find_nearest(newcomer, 3)
|
||||||
|
|
||||||
|
print "result: ", results,"\n"
|
||||||
|
print "neighbours: ", neighbours,"\n"
|
||||||
|
print "distance: ", dist
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
I got the result as follows:
|
||||||
|
@code{.py}
|
||||||
|
result: [[ 1.]]
|
||||||
|
neighbours: [[ 1. 1. 1.]]
|
||||||
|
distance: [[ 53. 58. 61.]]
|
||||||
|
@endcode
|
||||||
|
It says our new-comer got 3 neighbours, all from Blue family. Therefore, he is labelled as Blue
|
||||||
|
family. It is obvious from plot below:
|
||||||
|
|
||||||
|
![image](images/knn_simple.png)
|
||||||
|
|
||||||
|
If you have large number of data, you can just pass it as array. Corresponding results are also
|
||||||
|
obtained as arrays.
|
||||||
|
@code{.py}
|
||||||
|
# 10 new comers
|
||||||
|
newcomers = np.random.randint(0,100,(10,2)).astype(np.float32)
|
||||||
|
ret, results,neighbours,dist = knn.find_nearest(newcomer, 3)
|
||||||
|
# The results also will contain 10 labels.
|
||||||
|
@endcode
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [NPTEL notes on Pattern Recognition, Chapter
|
||||||
|
11](http://www.nptel.iitm.ac.in/courses/106108057/12)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,135 @@
|
|||||||
|
Understanding SVM {#tutorial_py_svm_basics}
|
||||||
|
=================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter
|
||||||
|
- We will see an intuitive understanding of SVM
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
### Linearly Separable Data
|
||||||
|
|
||||||
|
Consider the image below which has two types of data, red and blue. In kNN, for a test data, we used
|
||||||
|
to measure its distance to all the training samples and take the one with minimum distance. It takes
|
||||||
|
plenty of time to measure all the distances and plenty of memory to store all the training-samples.
|
||||||
|
But considering the data given in image, should we need that much?
|
||||||
|
|
||||||
|
![image](images/svm_basics1.png)
|
||||||
|
|
||||||
|
Consider another idea. We find a line, \f$f(x)=ax_1+bx_2+c\f$ which divides both the data to two
|
||||||
|
regions. When we get a new test_data \f$X\f$, just substitute it in \f$f(x)\f$. If \f$f(X) > 0\f$, it belongs
|
||||||
|
to blue group, else it belongs to red group. We can call this line as **Decision Boundary**. It is
|
||||||
|
very simple and memory-efficient. Such data which can be divided into two with a straight line (or
|
||||||
|
hyperplanes in higher dimensions) is called **Linear Separable**.
|
||||||
|
|
||||||
|
So in above image, you can see plenty of such lines are possible. Which one we will take? Very
|
||||||
|
intuitively we can say that the line should be passing as far as possible from all the points. Why?
|
||||||
|
Because there can be noise in the incoming data. This data should not affect the classification
|
||||||
|
accuracy. So taking a farthest line will provide more immunity against noise. So what SVM does is to
|
||||||
|
find a straight line (or hyperplane) with largest minimum distance to the training samples. See the
|
||||||
|
bold line in below image passing through the center.
|
||||||
|
|
||||||
|
![image](images/svm_basics2.png)
|
||||||
|
|
||||||
|
So to find this Decision Boundary, you need training data. Do you need all? NO. Just the ones which
|
||||||
|
are close to the opposite group are sufficient. In our image, they are the one blue filled circle
|
||||||
|
and two red filled squares. We can call them **Support Vectors** and the lines passing through them
|
||||||
|
are called **Support Planes**. They are adequate for finding our decision boundary. We need not
|
||||||
|
worry about all the data. It helps in data reduction.
|
||||||
|
|
||||||
|
What happened is, first two hyperplanes are found which best represents the data. For eg, blue data
|
||||||
|
is represented by \f$w^Tx+b_0 > 1\f$ while red data is represented by \f$w^Tx+b_0 < -1\f$ where \f$w\f$ is
|
||||||
|
**weight vector** ( \f$w=[w_1, w_2,..., w_n]\f$) and \f$x\f$ is the feature vector
|
||||||
|
(\f$x = [x_1,x_2,..., x_n]\f$). \f$b_0\f$ is the **bias**. Weight vector decides the orientation of decision
|
||||||
|
boundary while bias point decides its location. Now decision boundary is defined to be midway
|
||||||
|
between these hyperplanes, so expressed as \f$w^Tx+b_0 = 0\f$. The minimum distance from support vector
|
||||||
|
to the decision boundary is given by, \f$distance_{support \, vectors}=\frac{1}{||w||}\f$. Margin is
|
||||||
|
twice this distance, and we need to maximize this margin. i.e. we need to minimize a new function
|
||||||
|
\f$L(w, b_0)\f$ with some constraints which can expressed below:
|
||||||
|
|
||||||
|
\f[\min_{w, b_0} L(w, b_0) = \frac{1}{2}||w||^2 \; \text{subject to} \; t_i(w^Tx+b_0) \geq 1 \; \forall i\f]
|
||||||
|
|
||||||
|
where \f$t_i\f$ is the label of each class, \f$t_i \in [-1,1]\f$.
|
||||||
|
|
||||||
|
### Non-Linearly Separable Data
|
||||||
|
|
||||||
|
Consider some data which can't be divided into two with a straight line. For example, consider an
|
||||||
|
one-dimensional data where 'X' is at -3 & +3 and 'O' is at -1 & +1. Clearly it is not linearly
|
||||||
|
separable. But there are methods to solve these kinds of problems. If we can map this data set with
|
||||||
|
a function, \f$f(x) = x^2\f$, we get 'X' at 9 and 'O' at 1 which are linear separable.
|
||||||
|
|
||||||
|
Otherwise we can convert this one-dimensional to two-dimensional data. We can use \f$f(x)=(x,x^2)\f$
|
||||||
|
function to map this data. Then 'X' becomes (-3,9) and (3,9) while 'O' becomes (-1,1) and (1,1).
|
||||||
|
This is also linear separable. In short, chance is more for a non-linear separable data in
|
||||||
|
lower-dimensional space to become linear separable in higher-dimensional space.
|
||||||
|
|
||||||
|
In general, it is possible to map points in a d-dimensional space to some D-dimensional space
|
||||||
|
\f$(D>d)\f$ to check the possibility of linear separability. There is an idea which helps to compute the
|
||||||
|
dot product in the high-dimensional (kernel) space by performing computations in the low-dimensional
|
||||||
|
input (feature) space. We can illustrate with following example.
|
||||||
|
|
||||||
|
Consider two points in two-dimensional space, \f$p=(p_1,p_2)\f$ and \f$q=(q_1,q_2)\f$. Let \f$\phi\f$ be a
|
||||||
|
mapping function which maps a two-dimensional point to three-dimensional space as follows:
|
||||||
|
|
||||||
|
\f[\phi (p) = (p_{1}^2,p_{2}^2,\sqrt{2} p_1 p_2)
|
||||||
|
\phi (q) = (q_{1}^2,q_{2}^2,\sqrt{2} q_1 q_2)\f]
|
||||||
|
|
||||||
|
Let us define a kernel function \f$K(p,q)\f$ which does a dot product between two points, shown below:
|
||||||
|
|
||||||
|
\f[
|
||||||
|
\begin{aligned}
|
||||||
|
K(p,q) = \phi(p).\phi(q) &= \phi(p)^T \phi(q) \\
|
||||||
|
&= (p_{1}^2,p_{2}^2,\sqrt{2} p_1 p_2).(q_{1}^2,q_{2}^2,\sqrt{2} q_1 q_2) \\
|
||||||
|
&= p_1 q_1 + p_2 q_2 + 2 p_1 q_1 p_2 q_2 \\
|
||||||
|
&= (p_1 q_1 + p_2 q_2)^2 \\
|
||||||
|
\phi(p).\phi(q) &= (p.q)^2
|
||||||
|
\end{aligned}
|
||||||
|
\f]
|
||||||
|
|
||||||
|
It means, a dot product in three-dimensional space can be achieved using squared dot product in
|
||||||
|
two-dimensional space. This can be applied to higher dimensional space. So we can calculate higher
|
||||||
|
dimensional features from lower dimensions itself. Once we map them, we get a higher dimensional
|
||||||
|
space.
|
||||||
|
|
||||||
|
In addition to all these concepts, there comes the problem of misclassification. So just finding
|
||||||
|
decision boundary with maximum margin is not sufficient. We need to consider the problem of
|
||||||
|
misclassification errors also. Sometimes, it may be possible to find a decision boundary with less
|
||||||
|
margin, but with reduced misclassification. Anyway we need to modify our model such that it should
|
||||||
|
find decision boundary with maximum margin, but with less misclassification. The minimization
|
||||||
|
criteria is modified as:
|
||||||
|
|
||||||
|
\f[min \; ||w||^2 + C(distance \; of \; misclassified \; samples \; to \; their \; correct \; regions)\f]
|
||||||
|
|
||||||
|
Below image shows this concept. For each sample of the training data a new parameter \f$\xi_i\f$ is
|
||||||
|
defined. It is the distance from its corresponding training sample to their correct decision region.
|
||||||
|
For those who are not misclassified, they fall on their corresponding support planes, so their
|
||||||
|
distance is zero.
|
||||||
|
|
||||||
|
![image](images/svm_basics3.png)
|
||||||
|
|
||||||
|
So the new optimization problem is :
|
||||||
|
|
||||||
|
\f[\min_{w, b_{0}} L(w,b_0) = ||w||^{2} + C \sum_{i} {\xi_{i}} \text{ subject to } y_{i}(w^{T} x_{i} + b_{0}) \geq 1 - \xi_{i} \text{ and } \xi_{i} \geq 0 \text{ } \forall i\f]
|
||||||
|
|
||||||
|
How should the parameter C be chosen? It is obvious that the answer to this question depends on how
|
||||||
|
the training data is distributed. Although there is no general answer, it is useful to take into
|
||||||
|
account these rules:
|
||||||
|
|
||||||
|
- Large values of C give solutions with less misclassification errors but a smaller margin.
|
||||||
|
Consider that in this case it is expensive to make misclassification errors. Since the aim of
|
||||||
|
the optimization is to minimize the argument, few misclassifications errors are allowed.
|
||||||
|
- Small values of C give solutions with bigger margin and more classification errors. In this
|
||||||
|
case the minimization does not consider that much the term of the sum so it focuses more on
|
||||||
|
finding a hyperplane with big margin.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [NPTEL notes on Statistical Pattern Recognition, Chapters
|
||||||
|
25-29](http://www.nptel.iitm.ac.in/courses/106108057/26).
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
10
doc/py_tutorials/py_ml/py_svm/py_svm_index.markdown
Normal file
10
doc/py_tutorials/py_ml/py_svm/py_svm_index.markdown
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
Support Vector Machines (SVM) {#tutorial_py_svm_index}
|
||||||
|
=============================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_svm_basics
|
||||||
|
|
||||||
|
Get a basic understanding of what SVM is
|
||||||
|
|
||||||
|
- @subpage tutorial_py_svm_opencv
|
||||||
|
|
||||||
|
Let's use SVM functionalities in OpenCV
|
@ -0,0 +1,137 @@
|
|||||||
|
OCR of Hand-written Data using SVM {#tutorial_py_svm_opencv}
|
||||||
|
==================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter
|
||||||
|
|
||||||
|
- We will revisit the hand-written data OCR, but, with SVM instead of kNN.
|
||||||
|
|
||||||
|
OCR of Hand-written Digits
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
In kNN, we directly used pixel intensity as the feature vector. This time we will use [Histogram of
|
||||||
|
Oriented Gradients](http://en.wikipedia.org/wiki/Histogram_of_oriented_gradients) (HOG) as feature
|
||||||
|
vectors.
|
||||||
|
|
||||||
|
Here, before finding the HOG, we deskew the image using its second order moments. So we first define
|
||||||
|
a function **deskew()** which takes a digit image and deskew it. Below is the deskew() function:
|
||||||
|
@code{.py}
|
||||||
|
def deskew(img):
|
||||||
|
m = cv2.moments(img)
|
||||||
|
if abs(m['mu02']) < 1e-2:
|
||||||
|
return img.copy()
|
||||||
|
skew = m['mu11']/m['mu02']
|
||||||
|
M = np.float32([[1, skew, -0.5*SZ*skew], [0, 1, 0]])
|
||||||
|
img = cv2.warpAffine(img,M,(SZ, SZ),flags=affine_flags)
|
||||||
|
return img
|
||||||
|
@endcode
|
||||||
|
Below image shows above deskew function applied to an image of zero. Left image is the original
|
||||||
|
image and right image is the deskewed image.
|
||||||
|
|
||||||
|
![image](images/deskew.jpg)
|
||||||
|
|
||||||
|
Next we have to find the HOG Descriptor of each cell. For that, we find Sobel derivatives of each
|
||||||
|
cell in X and Y direction. Then find their magnitude and direction of gradient at each pixel. This
|
||||||
|
gradient is quantized to 16 integer values. Divide this image to four sub-squares. For each
|
||||||
|
sub-square, calculate the histogram of direction (16 bins) weighted with their magnitude. So each
|
||||||
|
sub-square gives you a vector containing 16 values. Four such vectors (of four sub-squares) together
|
||||||
|
gives us a feature vector containing 64 values. This is the feature vector we use to train our data.
|
||||||
|
@code{.py}
|
||||||
|
def hog(img):
|
||||||
|
gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
|
||||||
|
gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
|
||||||
|
mag, ang = cv2.cartToPolar(gx, gy)
|
||||||
|
|
||||||
|
# quantizing binvalues in (0...16)
|
||||||
|
bins = np.int32(bin_n*ang/(2*np.pi))
|
||||||
|
|
||||||
|
# Divide to 4 sub-squares
|
||||||
|
bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]
|
||||||
|
mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
|
||||||
|
hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
|
||||||
|
hist = np.hstack(hists)
|
||||||
|
return hist
|
||||||
|
@endcode
|
||||||
|
Finally, as in the previous case, we start by splitting our big dataset into individual cells. For
|
||||||
|
every digit, 250 cells are reserved for training data and remaining 250 data is reserved for
|
||||||
|
testing. Full code is given below:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
SZ=20
|
||||||
|
bin_n = 16 # Number of bins
|
||||||
|
|
||||||
|
svm_params = dict( kernel_type = cv2.SVM_LINEAR,
|
||||||
|
svm_type = cv2.SVM_C_SVC,
|
||||||
|
C=2.67, gamma=5.383 )
|
||||||
|
|
||||||
|
affine_flags = cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR
|
||||||
|
|
||||||
|
def deskew(img):
|
||||||
|
m = cv2.moments(img)
|
||||||
|
if abs(m['mu02']) < 1e-2:
|
||||||
|
return img.copy()
|
||||||
|
skew = m['mu11']/m['mu02']
|
||||||
|
M = np.float32([[1, skew, -0.5*SZ*skew], [0, 1, 0]])
|
||||||
|
img = cv2.warpAffine(img,M,(SZ, SZ),flags=affine_flags)
|
||||||
|
return img
|
||||||
|
|
||||||
|
def hog(img):
|
||||||
|
gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
|
||||||
|
gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
|
||||||
|
mag, ang = cv2.cartToPolar(gx, gy)
|
||||||
|
bins = np.int32(bin_n*ang/(2*np.pi)) # quantizing binvalues in (0...16)
|
||||||
|
bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]
|
||||||
|
mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
|
||||||
|
hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
|
||||||
|
hist = np.hstack(hists) # hist is a 64 bit vector
|
||||||
|
return hist
|
||||||
|
|
||||||
|
img = cv2.imread('digits.png',0)
|
||||||
|
|
||||||
|
cells = [np.hsplit(row,100) for row in np.vsplit(img,50)]
|
||||||
|
|
||||||
|
# First half is trainData, remaining is testData
|
||||||
|
train_cells = [ i[:50] for i in cells ]
|
||||||
|
test_cells = [ i[50:] for i in cells]
|
||||||
|
|
||||||
|
###### Now training ########################
|
||||||
|
|
||||||
|
deskewed = [map(deskew,row) for row in train_cells]
|
||||||
|
hogdata = [map(hog,row) for row in deskewed]
|
||||||
|
trainData = np.float32(hogdata).reshape(-1,64)
|
||||||
|
responses = np.float32(np.repeat(np.arange(10),250)[:,np.newaxis])
|
||||||
|
|
||||||
|
svm = cv2.SVM()
|
||||||
|
svm.train(trainData,responses, params=svm_params)
|
||||||
|
svm.save('svm_data.dat')
|
||||||
|
|
||||||
|
###### Now testing ########################
|
||||||
|
|
||||||
|
deskewed = [map(deskew,row) for row in test_cells]
|
||||||
|
hogdata = [map(hog,row) for row in deskewed]
|
||||||
|
testData = np.float32(hogdata).reshape(-1,bin_n*4)
|
||||||
|
result = svm.predict_all(testData)
|
||||||
|
|
||||||
|
####### Check Accuracy ########################
|
||||||
|
mask = result==responses
|
||||||
|
correct = np.count_nonzero(mask)
|
||||||
|
print correct*100.0/result.size
|
||||||
|
@endcode
|
||||||
|
This particular technique gave me nearly 94% accuracy. You can try different values for various
|
||||||
|
parameters of SVM to check if higher accuracy is possible. Or you can read technical papers on this
|
||||||
|
area and try to implement them.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# [Histograms of Oriented Gradients Video](www.youtube.com/watch?v=0Zib1YEE4LU)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# OpenCV samples contain digits.py which applies a slight improvement of the above method to get
|
||||||
|
improved result. It also contains the reference. Check it and understand it.
|
@ -0,0 +1,16 @@
|
|||||||
|
Machine Learning {#tutorial_py_table_of_contents_ml}
|
||||||
|
================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_knn_index
|
||||||
|
|
||||||
|
Learn to use kNN for classification
|
||||||
|
Plus learn about handwritten digit recognition using kNN
|
||||||
|
|
||||||
|
- @subpage tutorial_py_svm_index
|
||||||
|
|
||||||
|
Understand concepts of SVM
|
||||||
|
|
||||||
|
- @subpage tutorial_py_kmeans_index
|
||||||
|
|
||||||
|
Learn to use K-Means Clustering to group data to a number of clusters.
|
||||||
|
Plus learn to do color quantization using K-Means Clustering
|
@ -0,0 +1,135 @@
|
|||||||
|
Face Detection using Haar Cascades {#tutorial_py_face_detection}
|
||||||
|
==================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this session,
|
||||||
|
|
||||||
|
- We will see the basics of face detection using Haar Feature-based Cascade Classifiers
|
||||||
|
- We will extend the same for eye detection etc.
|
||||||
|
|
||||||
|
Basics
|
||||||
|
------
|
||||||
|
|
||||||
|
Object Detection using Haar feature-based cascade classifiers is an effective object detection
|
||||||
|
method proposed by Paul Viola and Michael Jones in their paper, "Rapid Object Detection using a
|
||||||
|
Boosted Cascade of Simple Features" in 2001. It is a machine learning based approach where a cascade
|
||||||
|
function is trained from a lot of positive and negative images. It is then used to detect objects in
|
||||||
|
other images.
|
||||||
|
|
||||||
|
Here we will work with face detection. Initially, the algorithm needs a lot of positive images
|
||||||
|
(images of faces) and negative images (images without faces) to train the classifier. Then we need
|
||||||
|
to extract features from it. For this, haar features shown in below image are used. They are just
|
||||||
|
like our convolutional kernel. Each feature is a single value obtained by subtracting sum of pixels
|
||||||
|
under white rectangle from sum of pixels under black rectangle.
|
||||||
|
|
||||||
|
![image](images/haar_features.jpg)
|
||||||
|
|
||||||
|
Now all possible sizes and locations of each kernel is used to calculate plenty of features. (Just
|
||||||
|
imagine how much computation it needs? Even a 24x24 window results over 160000 features). For each
|
||||||
|
feature calculation, we need to find sum of pixels under white and black rectangles. To solve this,
|
||||||
|
they introduced the integral images. It simplifies calculation of sum of pixels, how large may be
|
||||||
|
the number of pixels, to an operation involving just four pixels. Nice, isn't it? It makes things
|
||||||
|
super-fast.
|
||||||
|
|
||||||
|
But among all these features we calculated, most of them are irrelevant. For example, consider the
|
||||||
|
image below. Top row shows two good features. The first feature selected seems to focus on the
|
||||||
|
property that the region of the eyes is often darker than the region of the nose and cheeks. The
|
||||||
|
second feature selected relies on the property that the eyes are darker than the bridge of the nose.
|
||||||
|
But the same windows applying on cheeks or any other place is irrelevant. So how do we select the
|
||||||
|
best features out of 160000+ features? It is achieved by **Adaboost**.
|
||||||
|
|
||||||
|
![image](images/haar.png)
|
||||||
|
|
||||||
|
For this, we apply each and every feature on all the training images. For each feature, it finds the
|
||||||
|
best threshold which will classify the faces to positive and negative. But obviously, there will be
|
||||||
|
errors or misclassifications. We select the features with minimum error rate, which means they are
|
||||||
|
the features that best classifies the face and non-face images. (The process is not as simple as
|
||||||
|
this. Each image is given an equal weight in the beginning. After each classification, weights of
|
||||||
|
misclassified images are increased. Then again same process is done. New error rates are calculated.
|
||||||
|
Also new weights. The process is continued until required accuracy or error rate is achieved or
|
||||||
|
required number of features are found).
|
||||||
|
|
||||||
|
Final classifier is a weighted sum of these weak classifiers. It is called weak because it alone
|
||||||
|
can't classify the image, but together with others forms a strong classifier. The paper says even
|
||||||
|
200 features provide detection with 95% accuracy. Their final setup had around 6000 features.
|
||||||
|
(Imagine a reduction from 160000+ features to 6000 features. That is a big gain).
|
||||||
|
|
||||||
|
So now you take an image. Take each 24x24 window. Apply 6000 features to it. Check if it is face or
|
||||||
|
not. Wow.. Wow.. Isn't it a little inefficient and time consuming? Yes, it is. Authors have a good
|
||||||
|
solution for that.
|
||||||
|
|
||||||
|
In an image, most of the image region is non-face region. So it is a better idea to have a simple
|
||||||
|
method to check if a window is not a face region. If it is not, discard it in a single shot. Don't
|
||||||
|
process it again. Instead focus on region where there can be a face. This way, we can find more time
|
||||||
|
to check a possible face region.
|
||||||
|
|
||||||
|
For this they introduced the concept of **Cascade of Classifiers**. Instead of applying all the 6000
|
||||||
|
features on a window, group the features into different stages of classifiers and apply one-by-one.
|
||||||
|
(Normally first few stages will contain very less number of features). If a window fails the first
|
||||||
|
stage, discard it. We don't consider remaining features on it. If it passes, apply the second stage
|
||||||
|
of features and continue the process. The window which passes all stages is a face region. How is
|
||||||
|
the plan !!!
|
||||||
|
|
||||||
|
Authors' detector had 6000+ features with 38 stages with 1, 10, 25, 25 and 50 features in first five
|
||||||
|
stages. (Two features in the above image is actually obtained as the best two features from
|
||||||
|
Adaboost). According to authors, on an average, 10 features out of 6000+ are evaluated per
|
||||||
|
sub-window.
|
||||||
|
|
||||||
|
So this is a simple intuitive explanation of how Viola-Jones face detection works. Read paper for
|
||||||
|
more details or check out the references in Additional Resources section.
|
||||||
|
|
||||||
|
Haar-cascade Detection in OpenCV
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
OpenCV comes with a trainer as well as detector. If you want to train your own classifier for any
|
||||||
|
object like car, planes etc. you can use OpenCV to create one. Its full details are given here:
|
||||||
|
[Cascade Classifier Training.](http://docs.opencv.org/doc/user_guide/ug_traincascade.html)
|
||||||
|
|
||||||
|
Here we will deal with detection. OpenCV already contains many pre-trained classifiers for face,
|
||||||
|
eyes, smile etc. Those XML files are stored in opencv/data/haarcascades/ folder. Let's create face
|
||||||
|
and eye detector with OpenCV.
|
||||||
|
|
||||||
|
First we need to load the required XML classifiers. Then load our input image (or video) in
|
||||||
|
grayscale mode.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
|
||||||
|
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')
|
||||||
|
|
||||||
|
img = cv2.imread('sachin.jpg')
|
||||||
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
@endcode
|
||||||
|
Now we find the faces in the image. If faces are found, it returns the positions of detected faces
|
||||||
|
as Rect(x,y,w,h). Once we get these locations, we can create a ROI for the face and apply eye
|
||||||
|
detection on this ROI (since eyes are always on the face !!! ).
|
||||||
|
@code{.py}
|
||||||
|
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
|
||||||
|
for (x,y,w,h) in faces:
|
||||||
|
cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
|
||||||
|
roi_gray = gray[y:y+h, x:x+w]
|
||||||
|
roi_color = img[y:y+h, x:x+w]
|
||||||
|
eyes = eye_cascade.detectMultiScale(roi_gray)
|
||||||
|
for (ex,ey,ew,eh) in eyes:
|
||||||
|
cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
|
||||||
|
|
||||||
|
cv2.imshow('img',img)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
Result looks like below:
|
||||||
|
|
||||||
|
![image](images/face.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# Video Lecture on [Face Detection and Tracking](http://www.youtube.com/watch?v=WfdYYNamHZ8)
|
||||||
|
2. An interesting interview regarding Face Detection by [Adam
|
||||||
|
Harvey](http://www.makematics.com/research/viola-jones/)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,7 @@
|
|||||||
|
Object Detection {#tutorial_py_table_of_contents_objdetect}
|
||||||
|
================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_face_detection
|
||||||
|
|
||||||
|
Face detection
|
||||||
|
using haar-cascades
|
@ -0,0 +1,89 @@
|
|||||||
|
Image Inpainting {#tutorial_py_inpainting}
|
||||||
|
================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will learn how to remove small noises, strokes etc in old photographs by a method called
|
||||||
|
inpainting
|
||||||
|
- We will see inpainting functionalities in OpenCV.
|
||||||
|
|
||||||
|
Basics
|
||||||
|
------
|
||||||
|
|
||||||
|
Most of you will have some old degraded photos at your home with some black spots, some strokes etc
|
||||||
|
on it. Have you ever thought of restoring it back? We can't simply erase them in a paint tool
|
||||||
|
because it is will simply replace black structures with white structures which is of no use. In
|
||||||
|
these cases, a technique called image inpainting is used. The basic idea is simple: Replace those
|
||||||
|
bad marks with its neighbouring pixels so that it looks like the neigbourhood. Consider the image
|
||||||
|
shown below (taken from [Wikipedia](http://en.wikipedia.org/wiki/Inpainting)):
|
||||||
|
|
||||||
|
![image](images/inpaint_basics.jpg)
|
||||||
|
|
||||||
|
Several algorithms were designed for this purpose and OpenCV provides two of them. Both can be
|
||||||
|
accessed by the same function, **cv2.inpaint()**
|
||||||
|
|
||||||
|
First algorithm is based on the paper **"An Image Inpainting Technique Based on the Fast Marching
|
||||||
|
Method"** by Alexandru Telea in 2004. It is based on Fast Marching Method. Consider a region in the
|
||||||
|
image to be inpainted. Algorithm starts from the boundary of this region and goes inside the region
|
||||||
|
gradually filling everything in the boundary first. It takes a small neighbourhood around the pixel
|
||||||
|
on the neigbourhood to be inpainted. This pixel is replaced by normalized weighted sum of all the
|
||||||
|
known pixels in the neigbourhood. Selection of the weights is an important matter. More weightage is
|
||||||
|
given to those pixels lying near to the point, near to the normal of the boundary and those lying on
|
||||||
|
the boundary contours. Once a pixel is inpainted, it moves to next nearest pixel using Fast Marching
|
||||||
|
Method. FMM ensures those pixels near the known pixels are inpainted first, so that it just works
|
||||||
|
like a manual heuristic operation. This algorithm is enabled by using the flag, cv2.INPAINT_TELEA.
|
||||||
|
|
||||||
|
Second algorithm is based on the paper **"Navier-Stokes, Fluid Dynamics, and Image and Video
|
||||||
|
Inpainting"** by Bertalmio, Marcelo, Andrea L. Bertozzi, and Guillermo Sapiro in 2001. This
|
||||||
|
algorithm is based on fluid dynamics and utilizes partial differential equations. Basic principle is
|
||||||
|
heurisitic. It first travels along the edges from known regions to unknown regions (because edges
|
||||||
|
are meant to be continuous). It continues isophotes (lines joining points with same intensity, just
|
||||||
|
like contours joins points with same elevation) while matching gradient vectors at the boundary of
|
||||||
|
the inpainting region. For this, some methods from fluid dynamics are used. Once they are obtained,
|
||||||
|
color is filled to reduce minimum variance in that area. This algorithm is enabled by using the
|
||||||
|
flag, cv2.INPAINT_NS.
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
We need to create a mask of same size as that of input image, where non-zero pixels corresponds to
|
||||||
|
the area which is to be inpainted. Everything else is simple. My image is degraded with some black
|
||||||
|
strokes (I added manually). I created a corresponding strokes with Paint tool.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
img = cv2.imread('messi_2.jpg')
|
||||||
|
mask = cv2.imread('mask2.png',0)
|
||||||
|
|
||||||
|
dst = cv2.inpaint(img,mask,3,cv2.INPAINT_TELEA)
|
||||||
|
|
||||||
|
cv2.imshow('dst',dst)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
See the result below. First image shows degraded input. Second image is the mask. Third image is the
|
||||||
|
result of first algorithm and last image is the result of second algorithm.
|
||||||
|
|
||||||
|
![image](images/inpaint_result.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# Bertalmio, Marcelo, Andrea L. Bertozzi, and Guillermo Sapiro. "Navier-stokes, fluid dynamics,
|
||||||
|
and image and video inpainting." In Computer Vision and Pattern Recognition, 2001. CVPR 2001.
|
||||||
|
Proceedings of the 2001 IEEE Computer Society Conference on, vol. 1, pp. I-355. IEEE, 2001.
|
||||||
|
2. Telea, Alexandru. "An image inpainting technique based on the fast marching method." Journal of
|
||||||
|
graphics tools 9.1 (2004): 23-34.
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# OpenCV comes with an interactive sample on inpainting, samples/python2/inpaint.py, try it.
|
||||||
|
2. A few months ago, I watched a video on [Content-Aware
|
||||||
|
Fill](http://www.youtube.com/watch?v=ZtoUiplKa2A), an advanced inpainting technique used in
|
||||||
|
Adobe Photoshop. On further search, I was able to find that same technique is already there in
|
||||||
|
GIMP with different name, "Resynthesizer" (You need to install separate plugin). I am sure you
|
||||||
|
will enjoy the technique.
|
@ -0,0 +1,152 @@
|
|||||||
|
Image Denoising {#tutorial_py_non_local_means}
|
||||||
|
===============
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
|
||||||
|
- You will learn about Non-local Means Denoising algorithm to remove noise in the image.
|
||||||
|
- You will see different functions like **cv2.fastNlMeansDenoising()**,
|
||||||
|
**cv2.fastNlMeansDenoisingColored()** etc.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
In earlier chapters, we have seen many image smoothing techniques like Gaussian Blurring, Median
|
||||||
|
Blurring etc and they were good to some extent in removing small quantities of noise. In those
|
||||||
|
techniques, we took a small neighbourhood around a pixel and did some operations like gaussian
|
||||||
|
weighted average, median of the values etc to replace the central element. In short, noise removal
|
||||||
|
at a pixel was local to its neighbourhood.
|
||||||
|
|
||||||
|
There is a property of noise. Noise is generally considered to be a random variable with zero mean.
|
||||||
|
Consider a noisy pixel, \f$p = p_0 + n\f$ where \f$p_0\f$ is the true value of pixel and \f$n\f$ is the noise in
|
||||||
|
that pixel. You can take large number of same pixels (say \f$N\f$) from different images and computes
|
||||||
|
their average. Ideally, you should get \f$p = p_0\f$ since mean of noise is zero.
|
||||||
|
|
||||||
|
You can verify it yourself by a simple setup. Hold a static camera to a certain location for a
|
||||||
|
couple of seconds. This will give you plenty of frames, or a lot of images of the same scene. Then
|
||||||
|
write a piece of code to find the average of all the frames in the video (This should be too simple
|
||||||
|
for you now ). Compare the final result and first frame. You can see reduction in noise.
|
||||||
|
Unfortunately this simple method is not robust to camera and scene motions. Also often there is only
|
||||||
|
one noisy image available.
|
||||||
|
|
||||||
|
So idea is simple, we need a set of similar images to average out the noise. Consider a small window
|
||||||
|
(say 5x5 window) in the image. Chance is large that the same patch may be somewhere else in the
|
||||||
|
image. Sometimes in a small neigbourhood around it. What about using these similar patches together
|
||||||
|
and find their average? For that particular window, that is fine. See an example image below:
|
||||||
|
|
||||||
|
![image](images/nlm_patch.jpg)
|
||||||
|
|
||||||
|
The blue patches in the image looks the similar. Green patches looks similar. So we take a pixel,
|
||||||
|
take small window around it, search for similar windows in the image, average all the windows and
|
||||||
|
replace the pixel with the result we got. This method is Non-Local Means Denoising. It takes more
|
||||||
|
time compared to blurring techniques we saw earlier, but its result is very good. More details and
|
||||||
|
online demo can be found at first link in additional resources.
|
||||||
|
|
||||||
|
For color images, image is converted to CIELAB colorspace and then it separately denoise L and AB
|
||||||
|
components.
|
||||||
|
|
||||||
|
Image Denoising in OpenCV
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
OpenCV provides four variations of this technique.
|
||||||
|
|
||||||
|
-# **cv2.fastNlMeansDenoising()** - works with a single grayscale images
|
||||||
|
2. **cv2.fastNlMeansDenoisingColored()** - works with a color image.
|
||||||
|
3. **cv2.fastNlMeansDenoisingMulti()** - works with image sequence captured in short period of time
|
||||||
|
(grayscale images)
|
||||||
|
4. **cv2.fastNlMeansDenoisingColoredMulti()** - same as above, but for color images.
|
||||||
|
|
||||||
|
Common arguments are:
|
||||||
|
- h : parameter deciding filter strength. Higher h value removes noise better, but removes
|
||||||
|
details of image also. (10 is ok)
|
||||||
|
- hForColorComponents : same as h, but for color images only. (normally same as h)
|
||||||
|
- templateWindowSize : should be odd. (recommended 7)
|
||||||
|
- searchWindowSize : should be odd. (recommended 21)
|
||||||
|
|
||||||
|
Please visit first link in additional resources for more details on these parameters.
|
||||||
|
|
||||||
|
We will demonstrate 2 and 3 here. Rest is left for you.
|
||||||
|
|
||||||
|
### 1. cv2.fastNlMeansDenoisingColored()
|
||||||
|
|
||||||
|
As mentioned above it is used to remove noise from color images. (Noise is expected to be gaussian).
|
||||||
|
See the example below:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
img = cv2.imread('die.png')
|
||||||
|
|
||||||
|
dst = cv2.fastNlMeansDenoisingColored(img,None,10,10,7,21)
|
||||||
|
|
||||||
|
plt.subplot(121),plt.imshow(img)
|
||||||
|
plt.subplot(122),plt.imshow(dst)
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Below is a zoomed version of result. My input image has a gaussian noise of \f$\sigma = 25\f$. See the
|
||||||
|
result:
|
||||||
|
|
||||||
|
![image](images/nlm_result1.jpg)
|
||||||
|
|
||||||
|
### 2. cv2.fastNlMeansDenoisingMulti()
|
||||||
|
|
||||||
|
Now we will apply the same method to a video. The first argument is the list of noisy frames. Second
|
||||||
|
argument imgToDenoiseIndex specifies which frame we need to denoise, for that we pass the index of
|
||||||
|
frame in our input list. Third is the temporalWindowSize which specifies the number of nearby frames
|
||||||
|
to be used for denoising. It should be odd. In that case, a total of temporalWindowSize frames are
|
||||||
|
used where central frame is the frame to be denoised. For example, you passed a list of 5 frames as
|
||||||
|
input. Let imgToDenoiseIndex = 2 and temporalWindowSize = 3. Then frame-1, frame-2 and frame-3 are
|
||||||
|
used to denoise frame-2. Let's see an example.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture('vtest.avi')
|
||||||
|
|
||||||
|
# create a list of first 5 frames
|
||||||
|
img = [cap.read()[1] for i in xrange(5)]
|
||||||
|
|
||||||
|
# convert all to grayscale
|
||||||
|
gray = [cv2.cvtColor(i, cv2.COLOR_BGR2GRAY) for i in img]
|
||||||
|
|
||||||
|
# convert all to float64
|
||||||
|
gray = [np.float64(i) for i in gray]
|
||||||
|
|
||||||
|
# create a noise of variance 25
|
||||||
|
noise = np.random.randn(*gray[1].shape)*10
|
||||||
|
|
||||||
|
# Add this noise to images
|
||||||
|
noisy = [i+noise for i in gray]
|
||||||
|
|
||||||
|
# Convert back to uint8
|
||||||
|
noisy = [np.uint8(np.clip(i,0,255)) for i in noisy]
|
||||||
|
|
||||||
|
# Denoise 3rd frame considering all the 5 frames
|
||||||
|
dst = cv2.fastNlMeansDenoisingMulti(noisy, 2, 5, None, 4, 7, 35)
|
||||||
|
|
||||||
|
plt.subplot(131),plt.imshow(gray[2],'gray')
|
||||||
|
plt.subplot(132),plt.imshow(noisy[2],'gray')
|
||||||
|
plt.subplot(133),plt.imshow(dst,'gray')
|
||||||
|
plt.show()
|
||||||
|
@endcode
|
||||||
|
Below image shows a zoomed version of the result we got:
|
||||||
|
|
||||||
|
![image](images/nlm_multi.jpg)
|
||||||
|
|
||||||
|
It takes considerable amount of time for computation. In the result, first image is the original
|
||||||
|
frame, second is the noisy one, third is the denoised image.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# <http://www.ipol.im/pub/art/2011/bcm_nlm/> (It has the details, online demo etc. Highly
|
||||||
|
recommended to visit. Our test image is generated from this link)
|
||||||
|
2. [Online course at coursera](https://www.coursera.org/course/images) (First image taken from
|
||||||
|
here)
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,16 @@
|
|||||||
|
Computational Photography {#tutorial_py_table_of_contents_photo}
|
||||||
|
=========================
|
||||||
|
|
||||||
|
Here you will learn different OpenCV functionalities related to Computational Photography like image
|
||||||
|
denoising etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_non_local_means
|
||||||
|
|
||||||
|
See a good technique
|
||||||
|
to remove noises in images called Non-Local Means Denoising
|
||||||
|
|
||||||
|
- @subpage tutorial_py_inpainting
|
||||||
|
|
||||||
|
Do you have a old
|
||||||
|
degraded photo with many black spots and strokes on it? Take it. Let's try to restore them with a
|
||||||
|
technique called image inpainting.
|
86
doc/py_tutorials/py_setup/py_intro/py_intro.markdown
Normal file
86
doc/py_tutorials/py_setup/py_intro/py_intro.markdown
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
Introduction to OpenCV-Python Tutorials {#tutorial_py_intro}
|
||||||
|
=======================================
|
||||||
|
|
||||||
|
OpenCV
|
||||||
|
------
|
||||||
|
|
||||||
|
OpenCV was started at Intel in 1999 by **Gary Bradsky**, and the first release came out in 2000.
|
||||||
|
**Vadim Pisarevsky** joined Gary Bradsky to manage Intel's Russian software OpenCV team. In 2005,
|
||||||
|
OpenCV was used on Stanley, the vehicle that won the 2005 DARPA Grand Challenge. Later, its active
|
||||||
|
development continued under the support of Willow Garage with Gary Bradsky and Vadim Pisarevsky
|
||||||
|
leading the project. OpenCV now supports a multitude of algorithms related to Computer Vision and
|
||||||
|
Machine Learning and is expanding day by day.
|
||||||
|
|
||||||
|
OpenCV supports a wide variety of programming languages such as C++, Python, Java, etc., and is
|
||||||
|
available on different platforms including Windows, Linux, OS X, Android, and iOS. Interfaces for
|
||||||
|
high-speed GPU operations based on CUDA and OpenCL are also under active development.
|
||||||
|
|
||||||
|
OpenCV-Python is the Python API for OpenCV, combining the best qualities of the OpenCV C++ API and
|
||||||
|
the Python language.
|
||||||
|
|
||||||
|
OpenCV-Python
|
||||||
|
-------------
|
||||||
|
|
||||||
|
OpenCV-Python is a library of Python bindings designed to solve computer vision problems.
|
||||||
|
|
||||||
|
Python is a general purpose programming language started by **Guido van Rossum** that became very
|
||||||
|
popular very quickly, mainly because of its simplicity and code readability. It enables the
|
||||||
|
programmer to express ideas in fewer lines of code without reducing readability.
|
||||||
|
|
||||||
|
Compared to languages like C/C++, Python is slower. That said, Python can be easily extended with
|
||||||
|
C/C++, which allows us to write computationally intensive code in C/C++ and create Python wrappers
|
||||||
|
that can be used as Python modules. This gives us two advantages: first, the code is as fast as the
|
||||||
|
original C/C++ code (since it is the actual C++ code working in background) and second, it easier to
|
||||||
|
code in Python than C/C++. OpenCV-Python is a Python wrapper for the original OpenCV C++
|
||||||
|
implementation.
|
||||||
|
|
||||||
|
OpenCV-Python makes use of **Numpy**, which is a highly optimized library for numerical operations
|
||||||
|
with a MATLAB-style syntax. All the OpenCV array structures are converted to and from Numpy arrays.
|
||||||
|
This also makes it easier to integrate with other libraries that use Numpy such as SciPy and
|
||||||
|
Matplotlib.
|
||||||
|
|
||||||
|
OpenCV-Python Tutorials
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
OpenCV introduces a new set of tutorials which will guide you through various functions available in
|
||||||
|
OpenCV-Python. **This guide is mainly focused on OpenCV 3.x version** (although most of the
|
||||||
|
tutorials will also work with OpenCV 2.x).
|
||||||
|
|
||||||
|
Prior knowledge of Python and Numpy is recommended as they won't be covered in this guide.
|
||||||
|
**Proficiency with Numpy is a must in order to write optimized code using OpenCV-Python.**
|
||||||
|
|
||||||
|
This tutorial was originally started by *Abid Rahman K.* as part of the Google Summer of Code 2013
|
||||||
|
program under the guidance of *Alexander Mordvintsev*.
|
||||||
|
|
||||||
|
OpenCV Needs You !!!
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Since OpenCV is an open source initiative, all are welcome to make contributions to the library,
|
||||||
|
documentation, and tutorials. If you find any mistake in this tutorial (from a small spelling
|
||||||
|
mistake to an egregious error in code or concept), feel free to correct it by cloning OpenCV in
|
||||||
|
[GitHub](https://github.com/Itseez/opencv) and submitting a pull request. OpenCV developers will
|
||||||
|
check your pull request, give you important feedback and (once it passes the approval of the
|
||||||
|
reviewer) it will be merged into OpenCV. You will then become an open source contributor :-)
|
||||||
|
|
||||||
|
As new modules are added to OpenCV-Python, this tutorial will have to be expanded. If you are
|
||||||
|
familiar with a particular algorithm and can write up a tutorial including basic theory of the
|
||||||
|
algorithm and code showing example usage, please do so.
|
||||||
|
|
||||||
|
Remember, we **together** can make this project a great success !!!
|
||||||
|
|
||||||
|
Contributors
|
||||||
|
------------
|
||||||
|
|
||||||
|
Below is the list of contributors who submitted tutorials to OpenCV-Python.
|
||||||
|
|
||||||
|
-# Alexander Mordvintsev (GSoC-2013 mentor)
|
||||||
|
2. Abid Rahman K. (GSoC-2013 intern)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# A Quick guide to Python - [A Byte of Python](http://swaroopch.com/notes/python/)
|
||||||
|
2. [Basic Numpy Tutorials](http://wiki.scipy.org/Tentative_NumPy_Tutorial)
|
||||||
|
3. [Numpy Examples List](http://wiki.scipy.org/Numpy_Example_List)
|
||||||
|
4. [OpenCV Documentation](http://docs.opencv.org/)
|
||||||
|
5. [OpenCV Forum](http://answers.opencv.org/questions/)
|
@ -0,0 +1,258 @@
|
|||||||
|
Install OpenCV-Python in Fedora {#tutorial_py_setup_in_fedora}
|
||||||
|
===============================
|
||||||
|
|
||||||
|
Goals
|
||||||
|
-----
|
||||||
|
|
||||||
|
In this tutorial
|
||||||
|
- We will learn to setup OpenCV-Python in your Fedora system. Below steps are tested for
|
||||||
|
Fedora 18 (64-bit) and Fedora 19 (32-bit).
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
|
||||||
|
OpenCV-Python can be installed in Fedora in two ways, 1) Install from pre-built binaries available
|
||||||
|
in fedora repositories, 2) Compile from the source. In this section, we will see both.
|
||||||
|
|
||||||
|
Another important thing is the additional libraries required. OpenCV-Python requires only **Numpy**
|
||||||
|
(in addition to other dependencies, which we will see later). But in this tutorials, we also use
|
||||||
|
**Matplotlib** for some easy and nice plotting purposes (which I feel much better compared to
|
||||||
|
OpenCV). Matplotlib is optional, but highly recommended. Similarly we will also see **IPython**, an
|
||||||
|
Interactive Python Terminal, which is also highly recommended.
|
||||||
|
|
||||||
|
Installing OpenCV-Python from Pre-built Binaries
|
||||||
|
------------------------------------------------
|
||||||
|
|
||||||
|
Install all packages with following command in terminal as root.
|
||||||
|
@code{.sh}
|
||||||
|
$ yum install numpy opencv*
|
||||||
|
@endcode
|
||||||
|
Open Python IDLE (or IPython) and type following codes in Python terminal.
|
||||||
|
@code{.py}
|
||||||
|
>>> import cv2
|
||||||
|
>>> print cv2.__version__
|
||||||
|
@endcode
|
||||||
|
If the results are printed out without any errors, congratulations !!! You have installed
|
||||||
|
OpenCV-Python successfully.
|
||||||
|
|
||||||
|
It is quite easy. But there is a problem with this. Yum repositories may not contain the latest
|
||||||
|
version of OpenCV always. For example, at the time of writing this tutorial, yum repository contains
|
||||||
|
2.4.5 while latest OpenCV version is 2.4.6. With respect to Python API, latest version will always
|
||||||
|
contain much better support. Also, there may be chance of problems with camera support, video
|
||||||
|
playback etc depending upon the drivers, ffmpeg, gstreamer packages present etc.
|
||||||
|
|
||||||
|
So my personnel preference is next method, i.e. compiling from source. Also at some point of time,
|
||||||
|
if you want to contribute to OpenCV, you will need this.
|
||||||
|
|
||||||
|
Installing OpenCV from source
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Compiling from source may seem a little complicated at first, but once you succeeded in it, there is
|
||||||
|
nothing complicated.
|
||||||
|
|
||||||
|
First we will install some dependencies. Some are compulsory, some are optional. Optional
|
||||||
|
dependencies, you can leave if you don't want.
|
||||||
|
|
||||||
|
### Compulsory Dependencies
|
||||||
|
|
||||||
|
We need **CMake** to configure the installation, **GCC** for compilation, **Python-devel** and
|
||||||
|
**Numpy** for creating Python extensions etc.
|
||||||
|
@code{.sh}
|
||||||
|
yum install cmake
|
||||||
|
yum install python-devel numpy
|
||||||
|
yum install gcc gcc-c++
|
||||||
|
@endcode
|
||||||
|
Next we need **GTK** support for GUI features, Camera support (libdc1394, libv4l), Media Support
|
||||||
|
(ffmpeg, gstreamer) etc.
|
||||||
|
@code{.sh}
|
||||||
|
yum install gtk2-devel
|
||||||
|
yum install libdc1394-devel
|
||||||
|
yum install libv4l-devel
|
||||||
|
yum install ffmpeg-devel
|
||||||
|
yum install gstreamer-plugins-base-devel
|
||||||
|
@endcode
|
||||||
|
### Optional Dependencies
|
||||||
|
|
||||||
|
Above dependencies are sufficient to install OpenCV in your fedora machine. But depending upon your
|
||||||
|
requirements, you may need some extra dependencies. A list of such optional dependencies are given
|
||||||
|
below. You can either leave it or install it, your call :)
|
||||||
|
|
||||||
|
OpenCV comes with supporting files for image formats like PNG, JPEG, JPEG2000, TIFF, WebP etc. But
|
||||||
|
it may be a little old. If you want to get latest libraries, you can install development files for
|
||||||
|
these formats.
|
||||||
|
@code{.sh}
|
||||||
|
yum install libpng-devel
|
||||||
|
yum install libjpeg-turbo-devel
|
||||||
|
yum install jasper-devel
|
||||||
|
yum install openexr-devel
|
||||||
|
yum install libtiff-devel
|
||||||
|
yum install libwebp-devel
|
||||||
|
@endcode
|
||||||
|
Several OpenCV functions are parallelized with **Intel's Threading Building Blocks** (TBB). But if
|
||||||
|
you want to enable it, you need to install TBB first. ( Also while configuring installation with
|
||||||
|
CMake, don't forget to pass -D WITH_TBB=ON. More details below.)
|
||||||
|
@code{.sh}
|
||||||
|
yum install tbb-devel
|
||||||
|
@endcode
|
||||||
|
OpenCV uses another library **Eigen** for optimized mathematical operations. So if you have Eigen
|
||||||
|
installed in your system, you can exploit it. ( Also while configuring installation with CMake,
|
||||||
|
don't forget to pass -D WITH_EIGEN=ON. More details below.)
|
||||||
|
@code{.sh}
|
||||||
|
yum install eigen3-devel
|
||||||
|
@endcode
|
||||||
|
If you want to build **documentation** ( *Yes, you can create offline version of OpenCV's complete
|
||||||
|
official documentation in your system in HTML with full search facility so that you need not access
|
||||||
|
internet always if any question, and it is quite FAST!!!* ), you need to install **Sphinx** (a
|
||||||
|
documentation generation tool) and **pdflatex** (if you want to create a PDF version of it). ( Also
|
||||||
|
while configuring installation with CMake, don't forget to pass -D BUILD_DOCS=ON. More details
|
||||||
|
below.)
|
||||||
|
@code{.sh}
|
||||||
|
yum install python-sphinx
|
||||||
|
yum install texlive
|
||||||
|
@endcode
|
||||||
|
### Downloading OpenCV
|
||||||
|
|
||||||
|
Next we have to download OpenCV. You can download the latest release of OpenCV from [sourceforge
|
||||||
|
site](http://sourceforge.net/projects/opencvlibrary/). Then extract the folder.
|
||||||
|
|
||||||
|
Or you can download latest source from OpenCV's github repo. (If you want to contribute to OpenCV,
|
||||||
|
choose this. It always keeps your OpenCV up-to-date). For that, you need to install **Git** first.
|
||||||
|
@code{.sh}
|
||||||
|
yum install git
|
||||||
|
git clone https://github.com/Itseez/opencv.git
|
||||||
|
@endcode
|
||||||
|
It will create a folder OpenCV in home directory (or the directory you specify). The cloning may
|
||||||
|
take some time depending upon your internet connection.
|
||||||
|
|
||||||
|
Now open a terminal window and navigate to the downloaded OpenCV folder. Create a new build folder
|
||||||
|
and navigate to it.
|
||||||
|
@code{.sh}
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
@endcode
|
||||||
|
### Configuring and Installing
|
||||||
|
|
||||||
|
Now we have installed all the required dependencies, let's install OpenCV. Installation has to be
|
||||||
|
configured with CMake. It specifies which modules are to be installed, installation path, which
|
||||||
|
additional libraries to be used, whether documentation and examples to be compiled etc. Below
|
||||||
|
command is normally used for configuration (executed from build folder).
|
||||||
|
@code{.sh}
|
||||||
|
cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local ..
|
||||||
|
@endcode
|
||||||
|
It specifies that build type is "Release Mode" and installation path is /usr/local. Observe the -D
|
||||||
|
before each option and .. at the end. In short, this is the format:
|
||||||
|
@code{.sh}
|
||||||
|
cmake [-D <flag>] [-D <flag>] ..
|
||||||
|
@endcode
|
||||||
|
You can specify as many flags you want, but each flag should be preceded by -D.
|
||||||
|
|
||||||
|
So in this tutorial, we are installing OpenCV with TBB and Eigen support. We also build the
|
||||||
|
documentation, but we exclude Performance tests and building samples. We also disable GPU related
|
||||||
|
modules (since we use OpenCV-Python, we don't need GPU related modules. It saves us some time).
|
||||||
|
|
||||||
|
*(All the below commands can be done in a single cmake statement, but it is split here for better
|
||||||
|
understanding.)*
|
||||||
|
|
||||||
|
- Enable TBB and Eigen support:
|
||||||
|
@code{.sh}
|
||||||
|
cmake -D WITH_TBB=ON -D WITH_EIGEN=ON ..
|
||||||
|
@endcode
|
||||||
|
- Enable documentation and disable tests and samples
|
||||||
|
@code{.sh}
|
||||||
|
cmake -D BUILD_DOCS=ON -D BUILD_TESTS=OFF -D BUILD_PERF_TESTS=OFF -D BUILD_EXAMPLES=OFF ..
|
||||||
|
@endcode
|
||||||
|
- Disable all GPU related modules.
|
||||||
|
@code{.sh}
|
||||||
|
cmake -D WITH_OPENCL=OFF -D WITH_CUDA=OFF -D BUILD_opencv_gpu=OFF -D BUILD_opencv_gpuarithm=OFF -D BUILD_opencv_gpubgsegm=OFF -D BUILD_opencv_gpucodec=OFF -D BUILD_opencv_gpufeatures2d=OFF -D BUILD_opencv_gpufilters=OFF -D BUILD_opencv_gpuimgproc=OFF -D BUILD_opencv_gpulegacy=OFF -D BUILD_opencv_gpuoptflow=OFF -D BUILD_opencv_gpustereo=OFF -D BUILD_opencv_gpuwarping=OFF ..
|
||||||
|
@endcode
|
||||||
|
- Set installation path and build type
|
||||||
|
@code{.sh}
|
||||||
|
cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local ..
|
||||||
|
@endcode
|
||||||
|
Each time you enter cmake statement, it prints out the resulting configuration setup. In the final
|
||||||
|
setup you got, make sure that following fields are filled (below is the some important parts of
|
||||||
|
configuration I got). These fields should be filled appropriately in your system also. Otherwise
|
||||||
|
some problem has happened. So check if you have correctly performed above steps.
|
||||||
|
@code{.sh}
|
||||||
|
-- GUI:
|
||||||
|
-- GTK+ 2.x: YES (ver 2.24.19)
|
||||||
|
-- GThread : YES (ver 2.36.3)
|
||||||
|
|
||||||
|
-- Video I/O:
|
||||||
|
-- DC1394 2.x: YES (ver 2.2.0)
|
||||||
|
-- FFMPEG: YES
|
||||||
|
-- codec: YES (ver 54.92.100)
|
||||||
|
-- format: YES (ver 54.63.104)
|
||||||
|
-- util: YES (ver 52.18.100)
|
||||||
|
-- swscale: YES (ver 2.2.100)
|
||||||
|
-- gentoo-style: YES
|
||||||
|
-- GStreamer:
|
||||||
|
-- base: YES (ver 0.10.36)
|
||||||
|
-- video: YES (ver 0.10.36)
|
||||||
|
-- app: YES (ver 0.10.36)
|
||||||
|
-- riff: YES (ver 0.10.36)
|
||||||
|
-- pbutils: YES (ver 0.10.36)
|
||||||
|
|
||||||
|
-- V4L/V4L2: Using libv4l (ver 1.0.0)
|
||||||
|
|
||||||
|
-- Other third-party libraries:
|
||||||
|
-- Use Eigen: YES (ver 3.1.4)
|
||||||
|
-- Use TBB: YES (ver 4.0 interface 6004)
|
||||||
|
|
||||||
|
-- Python:
|
||||||
|
-- Interpreter: /usr/bin/python2 (ver 2.7.5)
|
||||||
|
-- Libraries: /lib/libpython2.7.so (ver 2.7.5)
|
||||||
|
-- numpy: /usr/lib/python2.7/site-packages/numpy/core/include (ver 1.7.1)
|
||||||
|
-- packages path: lib/python2.7/site-packages
|
||||||
|
|
||||||
|
-- Documentation:
|
||||||
|
-- Build Documentation: YES
|
||||||
|
-- Sphinx: /usr/bin/sphinx-build (ver 1.1.3)
|
||||||
|
-- PdfLaTeX compiler: /usr/bin/pdflatex
|
||||||
|
--
|
||||||
|
-- Tests and samples:
|
||||||
|
-- Tests: NO
|
||||||
|
-- Performance tests: NO
|
||||||
|
-- C/C++ Examples: NO
|
||||||
|
@endcode
|
||||||
|
Many other flags and settings are there. It is left for you for further exploration.
|
||||||
|
|
||||||
|
Now you build the files using make command and install it using make install command. make install
|
||||||
|
should be executed as root.
|
||||||
|
@code{.sh}
|
||||||
|
make
|
||||||
|
su
|
||||||
|
make install
|
||||||
|
@endcode
|
||||||
|
Installation is over. All files are installed in /usr/local/ folder. But to use it, your Python
|
||||||
|
should be able to find OpenCV module. You have two options for that.
|
||||||
|
|
||||||
|
-# **Move the module to any folder in Python Path** : Python path can be found out by entering
|
||||||
|
import sys;print sys.path in Python terminal. It will print out many locations. Move
|
||||||
|
/usr/local/lib/python2.7/site-packages/cv2.so to any of this folder. For example,
|
||||||
|
@code{.sh}
|
||||||
|
su mv /usr/local/lib/python2.7/site-packages/cv2.so /usr/lib/python2.7/site-packages
|
||||||
|
@endcode
|
||||||
|
But you will have to do this every time you install OpenCV.
|
||||||
|
|
||||||
|
-# **Add /usr/local/lib/python2.7/site-packages to the PYTHON_PATH**: It is to be done only once.
|
||||||
|
Just open \~/.bashrc and add following line to it, then log out and come back.
|
||||||
|
@code{.sh}
|
||||||
|
export PYTHONPATH=$PYTHONPATH:/usr/local/lib/python2.7/site-packages
|
||||||
|
@endcode
|
||||||
|
Thus OpenCV installation is finished. Open a terminal and try import cv2.
|
||||||
|
|
||||||
|
To build the documentation, just enter following commands:
|
||||||
|
@code{.sh}
|
||||||
|
make docs
|
||||||
|
make html_docs
|
||||||
|
@endcode
|
||||||
|
Then open opencv/build/doc/_html/index.html and bookmark it in the browser.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# Compile OpenCV from source in your Fedora machine.
|
@ -0,0 +1,151 @@
|
|||||||
|
Install OpenCV-Python in Windows {#tutorial_py_setup_in_windows}
|
||||||
|
================================
|
||||||
|
|
||||||
|
Goals
|
||||||
|
-----
|
||||||
|
|
||||||
|
In this tutorial
|
||||||
|
- We will learn to setup OpenCV-Python in your Windows system.
|
||||||
|
|
||||||
|
Below steps are tested in a Windows 7-64 bit machine with Visual Studio 2010 and Visual Studio 2012.
|
||||||
|
The screenshots shows VS2012.
|
||||||
|
|
||||||
|
Installing OpenCV from prebuilt binaries
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
-# Below Python packages are to be downloaded and installed to their default locations.
|
||||||
|
|
||||||
|
-# [Python-2.7.x](http://python.org/ftp/python/2.7.5/python-2.7.5.msi).
|
||||||
|
|
||||||
|
-# [Numpy](http://sourceforge.net/projects/numpy/files/NumPy/1.7.1/numpy-1.7.1-win32-superpack-python2.7.exe/download).
|
||||||
|
|
||||||
|
-# [Matplotlib](https://downloads.sourceforge.net/project/matplotlib/matplotlib/matplotlib-1.3.0/matplotlib-1.3.0.win32-py2.7.exe) (*Matplotlib is optional, but recommended since we use it a lot in our tutorials*).
|
||||||
|
|
||||||
|
-# Install all packages into their default locations. Python will be installed to `C:/Python27/`.
|
||||||
|
|
||||||
|
-# After installation, open Python IDLE. Enter import numpy and make sure Numpy is working fine.
|
||||||
|
|
||||||
|
-# Download latest OpenCV release from [sourceforge
|
||||||
|
site](http://sourceforge.net/projects/opencvlibrary/files/opencv-win/2.4.6/OpenCV-2.4.6.0.exe/download)
|
||||||
|
and double-click to extract it.
|
||||||
|
|
||||||
|
-# Goto **opencv/build/python/2.7** folder.
|
||||||
|
|
||||||
|
-# Copy **cv2.pyd** to **C:/Python27/lib/site-packages**.
|
||||||
|
|
||||||
|
-# Open Python IDLE and type following codes in Python terminal.
|
||||||
|
@code
|
||||||
|
>>> import cv2
|
||||||
|
>>> print cv2.__version__
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
If the results are printed out without any errors, congratulations !!! You have installed
|
||||||
|
OpenCV-Python successfully.
|
||||||
|
|
||||||
|
Building OpenCV from source
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
-# Download and install Visual Studio and CMake.
|
||||||
|
|
||||||
|
-# [Visual Studio 2012](http://go.microsoft.com/?linkid=9816768)
|
||||||
|
|
||||||
|
-# [CMake](http://www.cmake.org/files/v2.8/cmake-2.8.11.2-win32-x86.exe)
|
||||||
|
|
||||||
|
-# Download and install necessary Python packages to their default locations
|
||||||
|
|
||||||
|
-# [Python 2.7.x](http://python.org/ftp/python/2.7.5/python-2.7.5.msi)
|
||||||
|
|
||||||
|
-# [Numpy](http://sourceforge.net/projects/numpy/files/NumPy/1.7.1/numpy-1.7.1-win32-superpack-python2.7.exe/download)
|
||||||
|
|
||||||
|
-# [Matplotlib](https://downloads.sourceforge.net/project/matplotlib/matplotlib/matplotlib-1.3.0/matplotlib-1.3.0.win32-py2.7.exe)
|
||||||
|
(*Matplotlib is optional, but recommended since we use it a lot in our tutorials.*)
|
||||||
|
|
||||||
|
@note In this case, we are using 32-bit binaries of Python packages. But if you want to use
|
||||||
|
OpenCV for x64, 64-bit binaries of Python packages are to be installed. Problem is that, there
|
||||||
|
is no official 64-bit binaries of Numpy. You have to build it on your own. For that, you have to
|
||||||
|
use the same compiler used to build Python. When you start Python IDLE, it shows the compiler
|
||||||
|
details. You can get more [information here](http://stackoverflow.com/q/2676763/1134940). So
|
||||||
|
your system must have the same Visual Studio version and build Numpy from source.
|
||||||
|
|
||||||
|
@note Another method to have 64-bit Python packages is to use ready-made Python distributions
|
||||||
|
from third-parties like [Anaconda](http://www.continuum.io/downloads),
|
||||||
|
[Enthought](https://www.enthought.com/downloads/) etc. It will be bigger in size, but will have
|
||||||
|
everything you need. Everything in a single shell. You can also download 32-bit versions also.
|
||||||
|
|
||||||
|
-# Make sure Python and Numpy are working fine.
|
||||||
|
|
||||||
|
-# Download OpenCV source. It can be from
|
||||||
|
[Sourceforge](http://sourceforge.net/projects/opencvlibrary/) (for official release version) or
|
||||||
|
from [Github](https://github.com/Itseez/opencv) (for latest source).
|
||||||
|
-# Extract it to a folder, opencv and create a new folder build in it.
|
||||||
|
-# Open CMake-gui (*Start \> All Programs \> CMake-gui*)
|
||||||
|
-# Fill the fields as follows (see the image below):
|
||||||
|
|
||||||
|
-# Click on **Browse Source...** and locate the opencv folder.
|
||||||
|
|
||||||
|
-# Click on **Browse Build...** and locate the build folder we created.
|
||||||
|
|
||||||
|
-# Click on **Configure**.
|
||||||
|
|
||||||
|
![image](images/Capture1.jpg)
|
||||||
|
|
||||||
|
-# It will open a new window to select the compiler. Choose appropriate compiler (here,
|
||||||
|
Visual Studio 11) and click **Finish**.
|
||||||
|
|
||||||
|
![image](images/Capture2.png)
|
||||||
|
|
||||||
|
-# Wait until analysis is finished.
|
||||||
|
|
||||||
|
-# You will see all the fields are marked in red. Click on the **WITH** field to expand it. It
|
||||||
|
decides what extra features you need. So mark appropriate fields. See the below image:
|
||||||
|
|
||||||
|
![image](images/Capture3.png)
|
||||||
|
|
||||||
|
-# Now click on **BUILD** field to expand it. First few fields configure the build method. See the
|
||||||
|
below image:
|
||||||
|
|
||||||
|
![image](images/Capture5.png)
|
||||||
|
|
||||||
|
-# Remaining fields specify what modules are to be built. Since GPU modules are not yet supported
|
||||||
|
by OpenCV-Python, you can completely avoid it to save time (But if you work with them, keep it
|
||||||
|
there). See the image below:
|
||||||
|
|
||||||
|
![image](images/Capture6.png)
|
||||||
|
|
||||||
|
-# Now click on **ENABLE** field to expand it. Make sure **ENABLE_SOLUTION_FOLDERS** is unchecked
|
||||||
|
(Solution folders are not supported by Visual Studio Express edition). See the image below:
|
||||||
|
|
||||||
|
![image](images/Capture7.png)
|
||||||
|
|
||||||
|
-# Also make sure that in the **PYTHON** field, everything is filled. (Ignore
|
||||||
|
PYTHON_DEBUG_LIBRARY). See image below:
|
||||||
|
|
||||||
|
![image](images/Capture80.png)
|
||||||
|
|
||||||
|
-# Finally click the **Generate** button.
|
||||||
|
|
||||||
|
-# Now go to our **opencv/build** folder. There you will find **OpenCV.sln** file. Open it with
|
||||||
|
Visual Studio.
|
||||||
|
|
||||||
|
-# Check build mode as **Release** instead of **Debug**.
|
||||||
|
|
||||||
|
-# In the solution explorer, right-click on the **Solution** (or **ALL_BUILD**) and build it. It
|
||||||
|
will take some time to finish.
|
||||||
|
|
||||||
|
-# Again, right-click on **INSTALL** and build it. Now OpenCV-Python will be installed.
|
||||||
|
|
||||||
|
![image](images/Capture8.png)
|
||||||
|
|
||||||
|
-# Open Python IDLE and enter import cv2. If no error, it is installed correctly.
|
||||||
|
|
||||||
|
@note We have installed with no other support like TBB, Eigen, Qt, Documentation etc. It would be
|
||||||
|
difficult to explain it here. A more detailed video will be added soon or you can just hack around.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
If you have a windows machine, compile the OpenCV from source. Do all kinds of hacks. If you meet
|
||||||
|
any problem, visit OpenCV forum and explain your problem.
|
@ -0,0 +1,17 @@
|
|||||||
|
Introduction to OpenCV {#tutorial_py_table_of_contents_setup}
|
||||||
|
======================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_intro
|
||||||
|
|
||||||
|
Getting Started with
|
||||||
|
OpenCV-Python
|
||||||
|
|
||||||
|
- @subpage tutorial_py_setup_in_windows
|
||||||
|
|
||||||
|
Set Up
|
||||||
|
OpenCV-Python in Windows
|
||||||
|
|
||||||
|
- @subpage tutorial_py_setup_in_fedora
|
||||||
|
|
||||||
|
Set Up
|
||||||
|
OpenCV-Python in Fedora
|
55
doc/py_tutorials/py_tutorials.markdown
Normal file
55
doc/py_tutorials/py_tutorials.markdown
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
OpenCV-Python Tutorials {#tutorial_py_root}
|
||||||
|
=======================
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_setup
|
||||||
|
|
||||||
|
Learn how to setup OpenCV-Python on your computer!
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_gui
|
||||||
|
|
||||||
|
Here you will learn how to display and save images and videos, control mouse events and create trackbar.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_core
|
||||||
|
|
||||||
|
In this section you
|
||||||
|
will learn basic operations on image like pixel editing, geometric transformations, code
|
||||||
|
optimization, some mathematical tools etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_imgproc
|
||||||
|
|
||||||
|
In this section
|
||||||
|
you will learn different image processing functions inside OpenCV.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_feature2d
|
||||||
|
|
||||||
|
In this section
|
||||||
|
you will learn about feature detectors and descriptors
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_video
|
||||||
|
|
||||||
|
In this section you
|
||||||
|
will learn different techniques to work with videos like object tracking etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_calib3d
|
||||||
|
|
||||||
|
In this section we
|
||||||
|
will learn about camera calibration, stereo imaging etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_ml
|
||||||
|
|
||||||
|
In this section you
|
||||||
|
will learn different image processing functions inside OpenCV.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_photo
|
||||||
|
|
||||||
|
In this section you
|
||||||
|
will learn different computational photography techniques like image denoising etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_objdetect
|
||||||
|
|
||||||
|
In this section you
|
||||||
|
will object detection techniques like face detection etc.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_table_of_contents_bindings
|
||||||
|
|
||||||
|
In this section, we will see how OpenCV-Python bindings are generated
|
@ -0,0 +1,173 @@
|
|||||||
|
Background Subtraction {#tutorial_py_bg_subtraction}
|
||||||
|
======================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
|
||||||
|
- We will familiarize with the background subtraction methods available in OpenCV.
|
||||||
|
|
||||||
|
Basics
|
||||||
|
------
|
||||||
|
|
||||||
|
Background subtraction is a major preprocessing steps in many vision based applications. For
|
||||||
|
example, consider the cases like visitor counter where a static camera takes the number of visitors
|
||||||
|
entering or leaving the room, or a traffic camera extracting information about the vehicles etc. In
|
||||||
|
all these cases, first you need to extract the person or vehicles alone. Technically, you need to
|
||||||
|
extract the moving foreground from static background.
|
||||||
|
|
||||||
|
If you have an image of background alone, like image of the room without visitors, image of the road
|
||||||
|
without vehicles etc, it is an easy job. Just subtract the new image from the background. You get
|
||||||
|
the foreground objects alone. But in most of the cases, you may not have such an image, so we need
|
||||||
|
to extract the background from whatever images we have. It become more complicated when there is
|
||||||
|
shadow of the vehicles. Since shadow is also moving, simple subtraction will mark that also as
|
||||||
|
foreground. It complicates things.
|
||||||
|
|
||||||
|
Several algorithms were introduced for this purpose. OpenCV has implemented three such algorithms
|
||||||
|
which is very easy to use. We will see them one-by-one.
|
||||||
|
|
||||||
|
### BackgroundSubtractorMOG
|
||||||
|
|
||||||
|
It is a Gaussian Mixture-based Background/Foreground Segmentation Algorithm. It was introduced in
|
||||||
|
the paper "An improved adaptive background mixture model for real-time tracking with shadow
|
||||||
|
detection" by P. KadewTraKuPong and R. Bowden in 2001. It uses a method to model each background
|
||||||
|
pixel by a mixture of K Gaussian distributions (K = 3 to 5). The weights of the mixture represent
|
||||||
|
the time proportions that those colours stay in the scene. The probable background colours are the
|
||||||
|
ones which stay longer and more static.
|
||||||
|
|
||||||
|
While coding, we need to create a background object using the function,
|
||||||
|
**cv2.createBackgroundSubtractorMOG()**. It has some optional parameters like length of history,
|
||||||
|
number of gaussian mixtures, threshold etc. It is all set to some default values. Then inside the
|
||||||
|
video loop, use backgroundsubtractor.apply() method to get the foreground mask.
|
||||||
|
|
||||||
|
See a simple example below:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture('vtest.avi')
|
||||||
|
|
||||||
|
fgbg = cv2.createBackgroundSubtractorMOG()
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
ret, frame = cap.read()
|
||||||
|
|
||||||
|
fgmask = fgbg.apply(frame)
|
||||||
|
|
||||||
|
cv2.imshow('frame',fgmask)
|
||||||
|
k = cv2.waitKey(30) & 0xff
|
||||||
|
if k == 27:
|
||||||
|
break
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
( All the results are shown at the end for comparison).
|
||||||
|
|
||||||
|
### BackgroundSubtractorMOG2
|
||||||
|
|
||||||
|
It is also a Gaussian Mixture-based Background/Foreground Segmentation Algorithm. It is based on two
|
||||||
|
papers by Z.Zivkovic, "Improved adaptive Gausian mixture model for background subtraction" in 2004
|
||||||
|
and "Efficient Adaptive Density Estimation per Image Pixel for the Task of Background Subtraction"
|
||||||
|
in 2006. One important feature of this algorithm is that it selects the appropriate number of
|
||||||
|
gaussian distribution for each pixel. (Remember, in last case, we took a K gaussian distributions
|
||||||
|
throughout the algorithm). It provides better adaptibility to varying scenes due illumination
|
||||||
|
changes etc.
|
||||||
|
|
||||||
|
As in previous case, we have to create a background subtractor object. Here, you have an option of
|
||||||
|
selecting whether shadow to be detected or not. If detectShadows = True (which is so by default), it
|
||||||
|
detects and marks shadows, but decreases the speed. Shadows will be marked in gray color.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture('vtest.avi')
|
||||||
|
|
||||||
|
fgbg = cv2.createBackgroundSubtractorMOG2()
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
ret, frame = cap.read()
|
||||||
|
|
||||||
|
fgmask = fgbg.apply(frame)
|
||||||
|
|
||||||
|
cv2.imshow('frame',fgmask)
|
||||||
|
k = cv2.waitKey(30) & 0xff
|
||||||
|
if k == 27:
|
||||||
|
break
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
(Results given at the end)
|
||||||
|
|
||||||
|
### BackgroundSubtractorGMG
|
||||||
|
|
||||||
|
This algorithm combines statistical background image estimation and per-pixel Bayesian segmentation.
|
||||||
|
It was introduced by Andrew B. Godbehere, Akihiro Matsukawa, Ken Goldberg in their paper "Visual
|
||||||
|
Tracking of Human Visitors under Variable-Lighting Conditions for a Responsive Audio Art
|
||||||
|
Installation" in 2012. As per the paper, the system ran a successful interactive audio art
|
||||||
|
installation called “Are We There Yet?” from March 31 - July 31 2011 at the Contemporary Jewish
|
||||||
|
Museum in San Francisco, California.
|
||||||
|
|
||||||
|
It uses first few (120 by default) frames for background modelling. It employs probabilistic
|
||||||
|
foreground segmentation algorithm that identifies possible foreground objects using Bayesian
|
||||||
|
inference. The estimates are adaptive; newer observations are more heavily weighted than old
|
||||||
|
observations to accommodate variable illumination. Several morphological filtering operations like
|
||||||
|
closing and opening are done to remove unwanted noise. You will get a black window during first few
|
||||||
|
frames.
|
||||||
|
|
||||||
|
It would be better to apply morphological opening to the result to remove the noises.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture('vtest.avi')
|
||||||
|
|
||||||
|
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
|
||||||
|
fgbg = cv2.createBackgroundSubtractorGMG()
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
ret, frame = cap.read()
|
||||||
|
|
||||||
|
fgmask = fgbg.apply(frame)
|
||||||
|
fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_OPEN, kernel)
|
||||||
|
|
||||||
|
cv2.imshow('frame',fgmask)
|
||||||
|
k = cv2.waitKey(30) & 0xff
|
||||||
|
if k == 27:
|
||||||
|
break
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
Results
|
||||||
|
-------
|
||||||
|
|
||||||
|
**Original Frame**
|
||||||
|
|
||||||
|
Below image shows the 200th frame of a video
|
||||||
|
|
||||||
|
![image](images/resframe.jpg)
|
||||||
|
|
||||||
|
**Result of BackgroundSubtractorMOG**
|
||||||
|
|
||||||
|
![image](images/resmog.jpg)
|
||||||
|
|
||||||
|
**Result of BackgroundSubtractorMOG2**
|
||||||
|
|
||||||
|
Gray color region shows shadow region.
|
||||||
|
|
||||||
|
![image](images/resmog2.jpg)
|
||||||
|
|
||||||
|
**Result of BackgroundSubtractorGMG**
|
||||||
|
|
||||||
|
Noise is removed with morphological opening.
|
||||||
|
|
||||||
|
![image](images/resgmg.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
@ -0,0 +1,225 @@
|
|||||||
|
Optical Flow {#tutorial_py_lucas_kanade}
|
||||||
|
============
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
- We will understand the concepts of optical flow and its estimation using Lucas-Kanade
|
||||||
|
method.
|
||||||
|
- We will use functions like **cv2.calcOpticalFlowPyrLK()** to track feature points in a
|
||||||
|
video.
|
||||||
|
|
||||||
|
Optical Flow
|
||||||
|
------------
|
||||||
|
|
||||||
|
Optical flow is the pattern of apparent motion of image objects between two consecutive frames
|
||||||
|
caused by the movemement of object or camera. It is 2D vector field where each vector is a
|
||||||
|
displacement vector showing the movement of points from first frame to second. Consider the image
|
||||||
|
below (Image Courtesy: [Wikipedia article on Optical
|
||||||
|
Flow](http://en.wikipedia.org/wiki/Optical_flow)).
|
||||||
|
|
||||||
|
![image](images/optical_flow_basic1.jpg)
|
||||||
|
|
||||||
|
It shows a ball moving in 5 consecutive frames. The arrow shows its displacement vector. Optical
|
||||||
|
flow has many applications in areas like :
|
||||||
|
|
||||||
|
- Structure from Motion
|
||||||
|
- Video Compression
|
||||||
|
- Video Stabilization ...
|
||||||
|
|
||||||
|
Optical flow works on several assumptions:
|
||||||
|
|
||||||
|
-# The pixel intensities of an object do not change between consecutive frames.
|
||||||
|
2. Neighbouring pixels have similar motion.
|
||||||
|
|
||||||
|
Consider a pixel \f$I(x,y,t)\f$ in first frame (Check a new dimension, time, is added here. Earlier we
|
||||||
|
were working with images only, so no need of time). It moves by distance \f$(dx,dy)\f$ in next frame
|
||||||
|
taken after \f$dt\f$ time. So since those pixels are the same and intensity does not change, we can say,
|
||||||
|
|
||||||
|
\f[I(x,y,t) = I(x+dx, y+dy, t+dt)\f]
|
||||||
|
|
||||||
|
Then take taylor series approximation of right-hand side, remove common terms and divide by \f$dt\f$ to
|
||||||
|
get the following equation:
|
||||||
|
|
||||||
|
\f[f_x u + f_y v + f_t = 0 \;\f]
|
||||||
|
|
||||||
|
where:
|
||||||
|
|
||||||
|
\f[f_x = \frac{\partial f}{\partial x} \; ; \; f_y = \frac{\partial f}{\partial x}\f]\f[u = \frac{dx}{dt} \; ; \; v = \frac{dy}{dt}\f]
|
||||||
|
|
||||||
|
Above equation is called Optical Flow equation. In it, we can find \f$f_x\f$ and \f$f_y\f$, they are image
|
||||||
|
gradients. Similarly \f$f_t\f$ is the gradient along time. But \f$(u,v)\f$ is unknown. We cannot solve this
|
||||||
|
one equation with two unknown variables. So several methods are provided to solve this problem and
|
||||||
|
one of them is Lucas-Kanade.
|
||||||
|
|
||||||
|
### Lucas-Kanade method
|
||||||
|
|
||||||
|
We have seen an assumption before, that all the neighbouring pixels will have similar motion.
|
||||||
|
Lucas-Kanade method takes a 3x3 patch around the point. So all the 9 points have the same motion. We
|
||||||
|
can find \f$(f_x, f_y, f_t)\f$ for these 9 points. So now our problem becomes solving 9 equations with
|
||||||
|
two unknown variables which is over-determined. A better solution is obtained with least square fit
|
||||||
|
method. Below is the final solution which is two equation-two unknown problem and solve to get the
|
||||||
|
solution.
|
||||||
|
|
||||||
|
\f[\begin{bmatrix} u \\ v \end{bmatrix} =
|
||||||
|
\begin{bmatrix}
|
||||||
|
\sum_{i}{f_{x_i}}^2 & \sum_{i}{f_{x_i} f_{y_i} } \\
|
||||||
|
\sum_{i}{f_{x_i} f_{y_i}} & \sum_{i}{f_{y_i}}^2
|
||||||
|
\end{bmatrix}^{-1}
|
||||||
|
\begin{bmatrix}
|
||||||
|
- \sum_{i}{f_{x_i} f_{t_i}} \\
|
||||||
|
- \sum_{i}{f_{y_i} f_{t_i}}
|
||||||
|
\end{bmatrix}\f]
|
||||||
|
|
||||||
|
( Check similarity of inverse matrix with Harris corner detector. It denotes that corners are better
|
||||||
|
points to be tracked.)
|
||||||
|
|
||||||
|
So from user point of view, idea is simple, we give some points to track, we receive the optical
|
||||||
|
flow vectors of those points. But again there are some problems. Until now, we were dealing with
|
||||||
|
small motions. So it fails when there is large motion. So again we go for pyramids. When we go up in
|
||||||
|
the pyramid, small motions are removed and large motions becomes small motions. So applying
|
||||||
|
Lucas-Kanade there, we get optical flow along with the scale.
|
||||||
|
|
||||||
|
Lucas-Kanade Optical Flow in OpenCV
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
|
OpenCV provides all these in a single function, **cv2.calcOpticalFlowPyrLK()**. Here, we create a
|
||||||
|
simple application which tracks some points in a video. To decide the points, we use
|
||||||
|
**cv2.goodFeaturesToTrack()**. We take the first frame, detect some Shi-Tomasi corner points in it,
|
||||||
|
then we iteratively track those points using Lucas-Kanade optical flow. For the function
|
||||||
|
**cv2.calcOpticalFlowPyrLK()** we pass the previous frame, previous points and next frame. It
|
||||||
|
returns next points along with some status numbers which has a value of 1 if next point is found,
|
||||||
|
else zero. We iteratively pass these next points as previous points in next step. See the code
|
||||||
|
below:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture('slow.flv')
|
||||||
|
|
||||||
|
# params for ShiTomasi corner detection
|
||||||
|
feature_params = dict( maxCorners = 100,
|
||||||
|
qualityLevel = 0.3,
|
||||||
|
minDistance = 7,
|
||||||
|
blockSize = 7 )
|
||||||
|
|
||||||
|
# Parameters for lucas kanade optical flow
|
||||||
|
lk_params = dict( winSize = (15,15),
|
||||||
|
maxLevel = 2,
|
||||||
|
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
|
||||||
|
|
||||||
|
# Create some random colors
|
||||||
|
color = np.random.randint(0,255,(100,3))
|
||||||
|
|
||||||
|
# Take first frame and find corners in it
|
||||||
|
ret, old_frame = cap.read()
|
||||||
|
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
|
||||||
|
p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
|
||||||
|
|
||||||
|
# Create a mask image for drawing purposes
|
||||||
|
mask = np.zeros_like(old_frame)
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
ret,frame = cap.read()
|
||||||
|
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# calculate optical flow
|
||||||
|
p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
|
||||||
|
|
||||||
|
# Select good points
|
||||||
|
good_new = p1[st==1]
|
||||||
|
good_old = p0[st==1]
|
||||||
|
|
||||||
|
# draw the tracks
|
||||||
|
for i,(new,old) in enumerate(zip(good_new,good_old)):
|
||||||
|
a,b = new.ravel()
|
||||||
|
c,d = old.ravel()
|
||||||
|
mask = cv2.line(mask, (a,b),(c,d), color[i].tolist(), 2)
|
||||||
|
frame = cv2.circle(frame,(a,b),5,color[i].tolist(),-1)
|
||||||
|
img = cv2.add(frame,mask)
|
||||||
|
|
||||||
|
cv2.imshow('frame',img)
|
||||||
|
k = cv2.waitKey(30) & 0xff
|
||||||
|
if k == 27:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Now update the previous frame and previous points
|
||||||
|
old_gray = frame_gray.copy()
|
||||||
|
p0 = good_new.reshape(-1,1,2)
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
cap.release()
|
||||||
|
@endcode
|
||||||
|
(This code doesn't check how correct are the next keypoints. So even if any feature point disappears
|
||||||
|
in image, there is a chance that optical flow finds the next point which may look close to it. So
|
||||||
|
actually for a robust tracking, corner points should be detected in particular intervals. OpenCV
|
||||||
|
samples comes up with such a sample which finds the feature points at every 5 frames. It also run a
|
||||||
|
backward-check of the optical flow points got to select only good ones. Check
|
||||||
|
samples/python2/lk_track.py).
|
||||||
|
|
||||||
|
See the results we got:
|
||||||
|
|
||||||
|
![image](images/opticalflow_lk.jpg)
|
||||||
|
|
||||||
|
Dense Optical Flow in OpenCV
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
Lucas-Kanade method computes optical flow for a sparse feature set (in our example, corners detected
|
||||||
|
using Shi-Tomasi algorithm). OpenCV provides another algorithm to find the dense optical flow. It
|
||||||
|
computes the optical flow for all the points in the frame. It is based on Gunner Farneback's
|
||||||
|
algorithm which is explained in "Two-Frame Motion Estimation Based on Polynomial Expansion" by
|
||||||
|
Gunner Farneback in 2003.
|
||||||
|
|
||||||
|
Below sample shows how to find the dense optical flow using above algorithm. We get a 2-channel
|
||||||
|
array with optical flow vectors, \f$(u,v)\f$. We find their magnitude and direction. We color code the
|
||||||
|
result for better visualization. Direction corresponds to Hue value of the image. Magnitude
|
||||||
|
corresponds to Value plane. See the code below:
|
||||||
|
@code{.py}
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
cap = cv2.VideoCapture("vtest.avi")
|
||||||
|
|
||||||
|
ret, frame1 = cap.read()
|
||||||
|
prvs = cv2.cvtColor(frame1,cv2.COLOR_BGR2GRAY)
|
||||||
|
hsv = np.zeros_like(frame1)
|
||||||
|
hsv[...,1] = 255
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
ret, frame2 = cap.read()
|
||||||
|
next = cv2.cvtColor(frame2,cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
flow = cv2.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
|
||||||
|
|
||||||
|
mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
|
||||||
|
hsv[...,0] = ang*180/np.pi/2
|
||||||
|
hsv[...,2] = cv2.normalize(mag,None,0,255,cv2.NORM_MINMAX)
|
||||||
|
rgb = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
|
||||||
|
|
||||||
|
cv2.imshow('frame2',rgb)
|
||||||
|
k = cv2.waitKey(30) & 0xff
|
||||||
|
if k == 27:
|
||||||
|
break
|
||||||
|
elif k == ord('s'):
|
||||||
|
cv2.imwrite('opticalfb.png',frame2)
|
||||||
|
cv2.imwrite('opticalhsv.png',rgb)
|
||||||
|
prvs = next
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
@endcode
|
||||||
|
See the result below:
|
||||||
|
|
||||||
|
![image](images/opticalfb.jpg)
|
||||||
|
|
||||||
|
OpenCV comes with a more advanced sample on dense optical flow, please see
|
||||||
|
samples/python2/opt_flow.py.
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# Check the code in samples/python2/lk_track.py. Try to understand the code.
|
||||||
|
2. Check the code in samples/python2/opt_flow.py. Try to understand the code.
|
185
doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown
Normal file
185
doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
Meanshift and Camshift {#tutorial_py_meanshift}
|
||||||
|
======================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this chapter,
|
||||||
|
|
||||||
|
- We will learn about Meanshift and Camshift algorithms to find and track objects in videos.
|
||||||
|
|
||||||
|
Meanshift
|
||||||
|
---------
|
||||||
|
|
||||||
|
The intuition behind the meanshift is simple. Consider you have a set of points. (It can be a pixel
|
||||||
|
distribution like histogram backprojection). You are given a small window ( may be a circle) and you
|
||||||
|
have to move that window to the area of maximum pixel density (or maximum number of points). It is
|
||||||
|
illustrated in the simple image given below:
|
||||||
|
|
||||||
|
![image](images/meanshift_basics.jpg)
|
||||||
|
|
||||||
|
The initial window is shown in blue circle with the name "C1". Its original center is marked in blue
|
||||||
|
rectangle, named "C1_o". But if you find the centroid of the points inside that window, you will
|
||||||
|
get the point "C1_r" (marked in small blue circle) which is the real centroid of window. Surely
|
||||||
|
they don't match. So move your window such that circle of the new window matches with previous
|
||||||
|
centroid. Again find the new centroid. Most probably, it won't match. So move it again, and continue
|
||||||
|
the iterations such that center of window and its centroid falls on the same location (or with a
|
||||||
|
small desired error). So finally what you obtain is a window with maximum pixel distribution. It is
|
||||||
|
marked with green circle, named "C2". As you can see in image, it has maximum number of points. The
|
||||||
|
whole process is demonstrated on a static image below:
|
||||||
|
|
||||||
|
![image](images/meanshift_face.gif)
|
||||||
|
|
||||||
|
So we normally pass the histogram backprojected image and initial target location. When the object
|
||||||
|
moves, obviously the movement is reflected in histogram backprojected image. As a result, meanshift
|
||||||
|
algorithm moves our window to the new location with maximum density.
|
||||||
|
|
||||||
|
### Meanshift in OpenCV
|
||||||
|
|
||||||
|
To use meanshift in OpenCV, first we need to setup the target, find its histogram so that we can
|
||||||
|
backproject the target on each frame for calculation of meanshift. We also need to provide initial
|
||||||
|
location of window. For histogram, only Hue is considered here. Also, to avoid false values due to
|
||||||
|
low light, low light values are discarded using **cv2.inRange()** function.
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture('slow.flv')
|
||||||
|
|
||||||
|
# take first frame of the video
|
||||||
|
ret,frame = cap.read()
|
||||||
|
|
||||||
|
# setup initial location of window
|
||||||
|
r,h,c,w = 250,90,400,125 # simply hardcoded the values
|
||||||
|
track_window = (c,r,w,h)
|
||||||
|
|
||||||
|
# set up the ROI for tracking
|
||||||
|
roi = frame[r:r+h, c:c+w]
|
||||||
|
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||||
|
mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
|
||||||
|
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
|
||||||
|
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)
|
||||||
|
|
||||||
|
# Setup the termination criteria, either 10 iteration or move by atleast 1 pt
|
||||||
|
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
ret ,frame = cap.read()
|
||||||
|
|
||||||
|
if ret == True:
|
||||||
|
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
|
||||||
|
dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)
|
||||||
|
|
||||||
|
# apply meanshift to get the new location
|
||||||
|
ret, track_window = cv2.meanShift(dst, track_window, term_crit)
|
||||||
|
|
||||||
|
# Draw it on image
|
||||||
|
x,y,w,h = track_window
|
||||||
|
img2 = cv2.rectangle(frame, (x,y), (x+w,y+h), 255,2)
|
||||||
|
cv2.imshow('img2',img2)
|
||||||
|
|
||||||
|
k = cv2.waitKey(60) & 0xff
|
||||||
|
if k == 27:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
cv2.imwrite(chr(k)+".jpg",img2)
|
||||||
|
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
cap.release()
|
||||||
|
@endcode
|
||||||
|
Three frames in a video I used is given below:
|
||||||
|
|
||||||
|
![image](images/meanshift_result.jpg)
|
||||||
|
|
||||||
|
Camshift
|
||||||
|
--------
|
||||||
|
|
||||||
|
Did you closely watch the last result? There is a problem. Our window always has the same size when
|
||||||
|
car is farther away and it is very close to camera. That is not good. We need to adapt the window
|
||||||
|
size with size and rotation of the target. Once again, the solution came from "OpenCV Labs" and it
|
||||||
|
is called CAMshift (Continuously Adaptive Meanshift) published by Gary Bradsky in his paper
|
||||||
|
"Computer Vision Face Tracking for Use in a Perceptual User Interface" in 1988.
|
||||||
|
|
||||||
|
It applies meanshift first. Once meanshift converges, it updates the size of the window as,
|
||||||
|
\f$s = 2 \times \sqrt{\frac{M_{00}}{256}}\f$. It also calculates the orientation of best fitting ellipse
|
||||||
|
to it. Again it applies the meanshift with new scaled search window and previous window location.
|
||||||
|
The process is continued until required accuracy is met.
|
||||||
|
|
||||||
|
![image](images/camshift_face.gif)
|
||||||
|
|
||||||
|
### Camshift in OpenCV
|
||||||
|
|
||||||
|
It is almost same as meanshift, but it returns a rotated rectangle (that is our result) and box
|
||||||
|
parameters (used to be passed as search window in next iteration). See the code below:
|
||||||
|
@code{.py}
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture('slow.flv')
|
||||||
|
|
||||||
|
# take first frame of the video
|
||||||
|
ret,frame = cap.read()
|
||||||
|
|
||||||
|
# setup initial location of window
|
||||||
|
r,h,c,w = 250,90,400,125 # simply hardcoded the values
|
||||||
|
track_window = (c,r,w,h)
|
||||||
|
|
||||||
|
# set up the ROI for tracking
|
||||||
|
roi = frame[r:r+h, c:c+w]
|
||||||
|
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||||
|
mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
|
||||||
|
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
|
||||||
|
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)
|
||||||
|
|
||||||
|
# Setup the termination criteria, either 10 iteration or move by atleast 1 pt
|
||||||
|
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
|
||||||
|
|
||||||
|
while(1):
|
||||||
|
ret ,frame = cap.read()
|
||||||
|
|
||||||
|
if ret == True:
|
||||||
|
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
|
||||||
|
dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)
|
||||||
|
|
||||||
|
# apply meanshift to get the new location
|
||||||
|
ret, track_window = cv2.CamShift(dst, track_window, term_crit)
|
||||||
|
|
||||||
|
# Draw it on image
|
||||||
|
pts = cv2.boxPoints(ret)
|
||||||
|
pts = np.int0(pts)
|
||||||
|
img2 = cv2.polylines(frame,[pts],True, 255,2)
|
||||||
|
cv2.imshow('img2',img2)
|
||||||
|
|
||||||
|
k = cv2.waitKey(60) & 0xff
|
||||||
|
if k == 27:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
cv2.imwrite(chr(k)+".jpg",img2)
|
||||||
|
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
cap.release()
|
||||||
|
@endcode
|
||||||
|
Three frames of the result is shown below:
|
||||||
|
|
||||||
|
![image](images/camshift_result.jpg)
|
||||||
|
|
||||||
|
Additional Resources
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
-# French Wikipedia page on [Camshift](http://fr.wikipedia.org/wiki/Camshift). (The two animations
|
||||||
|
are taken from here)
|
||||||
|
2. Bradski, G.R., "Real time face and object tracking as a component of a perceptual user
|
||||||
|
interface," Applications of Computer Vision, 1998. WACV '98. Proceedings., Fourth IEEE Workshop
|
||||||
|
on , vol., no., pp.214,219, 19-21 Oct 1998
|
||||||
|
|
||||||
|
Exercises
|
||||||
|
---------
|
||||||
|
|
||||||
|
-# OpenCV comes with a Python sample on interactive demo of camshift. Use it, hack it, understand
|
||||||
|
it.
|
@ -16,7 +16,7 @@ In this chapter,
|
|||||||
Meanshift
|
Meanshift
|
||||||
============
|
============
|
||||||
|
|
||||||
The intuition behind the meanshift is simple. Consider you have a set of points. (It can be a pixel distribution like histogram backprojection). You are given a small window ( may be a circle) and you have to move that window to the area of maximum pixel density (or maximum number of points). It is illustrated in the simple image given below:
|
The intuition behind the meanshift is simple. Consider you have a set of points. (It can be a pixel distribution like histogram backprojection). You are given a small window (maybe a circle) and you have to move that window to the area of maximum pixel density (or maximum number of points). It is illustrated in the simple image given below:
|
||||||
|
|
||||||
.. image:: images/meanshift_basics.jpg
|
.. image:: images/meanshift_basics.jpg
|
||||||
:alt: Intuition behind meanshift
|
:alt: Intuition behind meanshift
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
Video Analysis {#tutorial_py_table_of_contents_video}
|
||||||
|
==============
|
||||||
|
|
||||||
|
- @subpage tutorial_py_meanshift
|
||||||
|
|
||||||
|
We have already seen
|
||||||
|
an example of color-based tracking. It is simpler. This time, we see significantly better
|
||||||
|
algorithms like "Meanshift", and its upgraded version, "Camshift" to find and track them.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_lucas_kanade
|
||||||
|
|
||||||
|
Now let's discuss an important concept, "Optical Flow", which is related to videos and has many applications.
|
||||||
|
|
||||||
|
- @subpage tutorial_py_bg_subtraction
|
||||||
|
|
||||||
|
In several applications, we need to extract foreground for further operations like object tracking. Background Subtraction is a well-known method in those cases.
|
@ -1,16 +1,13 @@
|
|||||||
OpenCV modules {#mainpage}
|
OpenCV modules {#mainpage}
|
||||||
==============
|
==============
|
||||||
|
|
||||||
@subpage intro
|
- @ref intro
|
||||||
|
- @ref tutorial_root
|
||||||
|
- @ref tutorial_py_root
|
||||||
|
- @ref tutorial_user_guide
|
||||||
|
- @ref faq
|
||||||
|
- @ref citelist
|
||||||
|
|
||||||
### Main modules
|
|
||||||
|
|
||||||
Module name | Folder
|
|
||||||
-------------- | -------------
|
|
||||||
@CMAKE_DOXYGEN_MAIN_REFERENCE@
|
@CMAKE_DOXYGEN_MAIN_REFERENCE@
|
||||||
|
|
||||||
### Extra modules
|
|
||||||
|
|
||||||
Module name | Folder
|
|
||||||
-------------- | -------------
|
|
||||||
@CMAKE_DOXYGEN_EXTRA_REFERENCE@
|
@CMAKE_DOXYGEN_EXTRA_REFERENCE@
|
||||||
|
@ -0,0 +1,490 @@
|
|||||||
|
Camera calibration With OpenCV {#tutorial_camera_calibration}
|
||||||
|
==============================
|
||||||
|
|
||||||
|
Cameras have been around for a long-long time. However, with the introduction of the cheap *pinhole*
|
||||||
|
cameras in the late 20th century, they became a common occurrence in our everyday life.
|
||||||
|
Unfortunately, this cheapness comes with its price: significant distortion. Luckily, these are
|
||||||
|
constants and with a calibration and some remapping we can correct this. Furthermore, with
|
||||||
|
calibration you may also determine the relation between the camera's natural units (pixels) and the
|
||||||
|
real world units (for example millimeters).
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
For the distortion OpenCV takes into account the radial and tangential factors. For the radial
|
||||||
|
factor one uses the following formula:
|
||||||
|
|
||||||
|
\f[x_{corrected} = x( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) \\
|
||||||
|
y_{corrected} = y( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6)\f]
|
||||||
|
|
||||||
|
So for an old pixel point at \f$(x,y)\f$ coordinates in the input image, its position on the corrected
|
||||||
|
output image will be \f$(x_{corrected} y_{corrected})\f$. The presence of the radial distortion
|
||||||
|
manifests in form of the "barrel" or "fish-eye" effect.
|
||||||
|
|
||||||
|
Tangential distortion occurs because the image taking lenses are not perfectly parallel to the
|
||||||
|
imaging plane. It can be corrected via the formulas:
|
||||||
|
|
||||||
|
\f[x_{corrected} = x + [ 2p_1xy + p_2(r^2+2x^2)] \\
|
||||||
|
y_{corrected} = y + [ p_1(r^2+ 2y^2)+ 2p_2xy]\f]
|
||||||
|
|
||||||
|
So we have five distortion parameters which in OpenCV are presented as one row matrix with 5
|
||||||
|
columns:
|
||||||
|
|
||||||
|
\f[Distortion_{coefficients}=(k_1 \hspace{10pt} k_2 \hspace{10pt} p_1 \hspace{10pt} p_2 \hspace{10pt} k_3)\f]
|
||||||
|
|
||||||
|
Now for the unit conversion we use the following formula:
|
||||||
|
|
||||||
|
\f[\left [ \begin{matrix} x \\ y \\ w \end{matrix} \right ] = \left [ \begin{matrix} f_x & 0 & c_x \\ 0 & f_y & c_y \\ 0 & 0 & 1 \end{matrix} \right ] \left [ \begin{matrix} X \\ Y \\ Z \end{matrix} \right ]\f]
|
||||||
|
|
||||||
|
Here the presence of \f$w\f$ is explained by the use of homography coordinate system (and \f$w=Z\f$). The
|
||||||
|
unknown parameters are \f$f_x\f$ and \f$f_y\f$ (camera focal lengths) and \f$(c_x, c_y)\f$ which are the optical
|
||||||
|
centers expressed in pixels coordinates. If for both axes a common focal length is used with a given
|
||||||
|
\f$a\f$ aspect ratio (usually 1), then \f$f_y=f_x*a\f$ and in the upper formula we will have a single focal
|
||||||
|
length \f$f\f$. The matrix containing these four parameters is referred to as the *camera matrix*. While
|
||||||
|
the distortion coefficients are the same regardless of the camera resolutions used, these should be
|
||||||
|
scaled along with the current resolution from the calibrated resolution.
|
||||||
|
|
||||||
|
The process of determining these two matrices is the calibration. Calculation of these parameters is
|
||||||
|
done through basic geometrical equations. The equations used depend on the chosen calibrating
|
||||||
|
objects. Currently OpenCV supports three types of objects for calibration:
|
||||||
|
|
||||||
|
- Classical black-white chessboard
|
||||||
|
- Symmetrical circle pattern
|
||||||
|
- Asymmetrical circle pattern
|
||||||
|
|
||||||
|
Basically, you need to take snapshots of these patterns with your camera and let OpenCV find them.
|
||||||
|
Each found pattern results in a new equation. To solve the equation you need at least a
|
||||||
|
predetermined number of pattern snapshots to form a well-posed equation system. This number is
|
||||||
|
higher for the chessboard pattern and less for the circle ones. For example, in theory the
|
||||||
|
chessboard pattern requires at least two snapshots. However, in practice we have a good amount of
|
||||||
|
noise present in our input images, so for good results you will probably need at least 10 good
|
||||||
|
snapshots of the input pattern in different positions.
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
The sample application will:
|
||||||
|
|
||||||
|
- Determine the distortion matrix
|
||||||
|
- Determine the camera matrix
|
||||||
|
- Take input from Camera, Video and Image file list
|
||||||
|
- Read configuration from XML/YAML file
|
||||||
|
- Save the results into XML/YAML file
|
||||||
|
- Calculate re-projection error
|
||||||
|
|
||||||
|
Source code
|
||||||
|
-----------
|
||||||
|
|
||||||
|
You may also find the source code in the `samples/cpp/tutorial_code/calib3d/camera_calibration/`
|
||||||
|
folder of the OpenCV source library or [download it from here
|
||||||
|
](samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp). The program has a
|
||||||
|
single argument: the name of its configuration file. If none is given then it will try to open the
|
||||||
|
one named "default.xml". [Here's a sample configuration file
|
||||||
|
](samples/cpp/tutorial_code/calib3d/camera_calibration/in_VID5.xml) in XML format. In the
|
||||||
|
configuration file you may choose to use camera as an input, a video file or an image list. If you
|
||||||
|
opt for the last one, you will need to create a configuration file where you enumerate the images to
|
||||||
|
use. Here's [an example of this ](samples/cpp/tutorial_code/calib3d/camera_calibration/VID5.xml).
|
||||||
|
The important part to remember is that the images need to be specified using the absolute path or
|
||||||
|
the relative one from your application's working directory. You may find all this in the samples
|
||||||
|
directory mentioned above.
|
||||||
|
|
||||||
|
The application starts up with reading the settings from the configuration file. Although, this is
|
||||||
|
an important part of it, it has nothing to do with the subject of this tutorial: *camera
|
||||||
|
calibration*. Therefore, I've chosen not to post the code for that part here. Technical background
|
||||||
|
on how to do this you can find in the @ref tutorial_file_input_output_with_xml_yml tutorial.
|
||||||
|
|
||||||
|
Explanation
|
||||||
|
-----------
|
||||||
|
|
||||||
|
-# **Read the settings.**
|
||||||
|
@code{.cpp}
|
||||||
|
Settings s;
|
||||||
|
const string inputSettingsFile = argc > 1 ? argv[1] : "default.xml";
|
||||||
|
FileStorage fs(inputSettingsFile, FileStorage::READ); // Read the settings
|
||||||
|
if (!fs.isOpened())
|
||||||
|
{
|
||||||
|
cout << "Could not open the configuration file: \"" << inputSettingsFile << "\"" << endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fs["Settings"] >> s;
|
||||||
|
fs.release(); // close Settings file
|
||||||
|
|
||||||
|
if (!s.goodInput)
|
||||||
|
{
|
||||||
|
cout << "Invalid input detected. Application stopping. " << endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
For this I've used simple OpenCV class input operation. After reading the file I've an
|
||||||
|
additional post-processing function that checks validity of the input. Only if all inputs are
|
||||||
|
good then *goodInput* variable will be true.
|
||||||
|
|
||||||
|
-# **Get next input, if it fails or we have enough of them - calibrate**. After this we have a big
|
||||||
|
loop where we do the following operations: get the next image from the image list, camera or
|
||||||
|
video file. If this fails or we have enough images then we run the calibration process. In case
|
||||||
|
of image we step out of the loop and otherwise the remaining frames will be undistorted (if the
|
||||||
|
option is set) via changing from *DETECTION* mode to the *CALIBRATED* one.
|
||||||
|
@code{.cpp}
|
||||||
|
for(int i = 0;;++i)
|
||||||
|
{
|
||||||
|
Mat view;
|
||||||
|
bool blinkOutput = false;
|
||||||
|
|
||||||
|
view = s.nextImage();
|
||||||
|
|
||||||
|
//----- If no more image, or got enough, then stop calibration and show result -------------
|
||||||
|
if( mode == CAPTURING && imagePoints.size() >= (unsigned)s.nrFrames )
|
||||||
|
{
|
||||||
|
if( runCalibrationAndSave(s, imageSize, cameraMatrix, distCoeffs, imagePoints))
|
||||||
|
mode = CALIBRATED;
|
||||||
|
else
|
||||||
|
mode = DETECTION;
|
||||||
|
}
|
||||||
|
if(view.empty()) // If no more images then run calibration, save and stop loop.
|
||||||
|
{
|
||||||
|
if( imagePoints.size() > 0 )
|
||||||
|
runCalibrationAndSave(s, imageSize, cameraMatrix, distCoeffs, imagePoints);
|
||||||
|
break;
|
||||||
|
imageSize = view.size(); // Format input image.
|
||||||
|
if( s.flipVertical ) flip( view, view, 0 );
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
For some cameras we may need to flip the input image. Here we do this too.
|
||||||
|
|
||||||
|
-# **Find the pattern in the current input**. The formation of the equations I mentioned above aims
|
||||||
|
to finding major patterns in the input: in case of the chessboard this are corners of the
|
||||||
|
squares and for the circles, well, the circles themselves. The position of these will form the
|
||||||
|
result which will be written into the *pointBuf* vector.
|
||||||
|
@code{.cpp}
|
||||||
|
vector<Point2f> pointBuf;
|
||||||
|
|
||||||
|
bool found;
|
||||||
|
switch( s.calibrationPattern ) // Find feature points on the input format
|
||||||
|
{
|
||||||
|
case Settings::CHESSBOARD:
|
||||||
|
found = findChessboardCorners( view, s.boardSize, pointBuf,
|
||||||
|
CALIB_CB_ADAPTIVE_THRESH | CALIB_CB_FAST_CHECK | CALIB_CB_NORMALIZE_IMAGE);
|
||||||
|
break;
|
||||||
|
case Settings::CIRCLES_GRID:
|
||||||
|
found = findCirclesGrid( view, s.boardSize, pointBuf );
|
||||||
|
break;
|
||||||
|
case Settings::ASYMMETRIC_CIRCLES_GRID:
|
||||||
|
found = findCirclesGrid( view, s.boardSize, pointBuf, CALIB_CB_ASYMMETRIC_GRID );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
Depending on the type of the input pattern you use either the @ref cv::findChessboardCorners or
|
||||||
|
the @ref cv::findCirclesGrid function. For both of them you pass the current image and the size
|
||||||
|
of the board and you'll get the positions of the patterns. Furthermore, they return a boolean
|
||||||
|
variable which states if the pattern was found in the input (we only need to take into account
|
||||||
|
those images where this is true!).
|
||||||
|
|
||||||
|
Then again in case of cameras we only take camera images when an input delay time is passed.
|
||||||
|
This is done in order to allow user moving the chessboard around and getting different images.
|
||||||
|
Similar images result in similar equations, and similar equations at the calibration step will
|
||||||
|
form an ill-posed problem, so the calibration will fail. For square images the positions of the
|
||||||
|
corners are only approximate. We may improve this by calling the @ref cv::cornerSubPix function.
|
||||||
|
It will produce better calibration result. After this we add a valid inputs result to the
|
||||||
|
*imagePoints* vector to collect all of the equations into a single container. Finally, for
|
||||||
|
visualization feedback purposes we will draw the found points on the input image using @ref
|
||||||
|
cv::findChessboardCorners function.
|
||||||
|
@code{.cpp}
|
||||||
|
if ( found) // If done with success,
|
||||||
|
{
|
||||||
|
// improve the found corners' coordinate accuracy for chessboard
|
||||||
|
if( s.calibrationPattern == Settings::CHESSBOARD)
|
||||||
|
{
|
||||||
|
Mat viewGray;
|
||||||
|
cvtColor(view, viewGray, COLOR_BGR2GRAY);
|
||||||
|
cornerSubPix( viewGray, pointBuf, Size(11,11),
|
||||||
|
Size(-1,-1), TermCriteria( TermCriteria::EPS+TermCriteria::MAX_ITER, 30, 0.1 ));
|
||||||
|
}
|
||||||
|
|
||||||
|
if( mode == CAPTURING && // For camera only take new samples after delay time
|
||||||
|
(!s.inputCapture.isOpened() || clock() - prevTimestamp > s.delay*1e-3*CLOCKS_PER_SEC) )
|
||||||
|
{
|
||||||
|
imagePoints.push_back(pointBuf);
|
||||||
|
prevTimestamp = clock();
|
||||||
|
blinkOutput = s.inputCapture.isOpened();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw the corners.
|
||||||
|
drawChessboardCorners( view, s.boardSize, Mat(pointBuf), found );
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
-# **Show state and result to the user, plus command line control of the application**. This part
|
||||||
|
shows text output on the image.
|
||||||
|
@code{.cpp}
|
||||||
|
//----------------------------- Output Text ------------------------------------------------
|
||||||
|
string msg = (mode == CAPTURING) ? "100/100" :
|
||||||
|
mode == CALIBRATED ? "Calibrated" : "Press 'g' to start";
|
||||||
|
int baseLine = 0;
|
||||||
|
Size textSize = getTextSize(msg, 1, 1, 1, &baseLine);
|
||||||
|
Point textOrigin(view.cols - 2*textSize.width - 10, view.rows - 2*baseLine - 10);
|
||||||
|
|
||||||
|
if( mode == CAPTURING )
|
||||||
|
{
|
||||||
|
if(s.showUndistorsed)
|
||||||
|
msg = format( "%d/%d Undist", (int)imagePoints.size(), s.nrFrames );
|
||||||
|
else
|
||||||
|
msg = format( "%d/%d", (int)imagePoints.size(), s.nrFrames );
|
||||||
|
}
|
||||||
|
|
||||||
|
putText( view, msg, textOrigin, 1, 1, mode == CALIBRATED ? GREEN : RED);
|
||||||
|
|
||||||
|
if( blinkOutput )
|
||||||
|
bitwise_not(view, view);
|
||||||
|
@endcode
|
||||||
|
If we ran calibration and got camera's matrix with the distortion coefficients we may want to
|
||||||
|
correct the image using @ref cv::undistort function:
|
||||||
|
@code{.cpp}
|
||||||
|
//------------------------- Video capture output undistorted ------------------------------
|
||||||
|
if( mode == CALIBRATED && s.showUndistorsed )
|
||||||
|
{
|
||||||
|
Mat temp = view.clone();
|
||||||
|
undistort(temp, view, cameraMatrix, distCoeffs);
|
||||||
|
}
|
||||||
|
//------------------------------ Show image and check for input commands -------------------
|
||||||
|
imshow("Image View", view);
|
||||||
|
@endcode
|
||||||
|
Then we wait for an input key and if this is *u* we toggle the distortion removal, if it is *g*
|
||||||
|
we start again the detection process, and finally for the *ESC* key we quit the application:
|
||||||
|
@code{.cpp}
|
||||||
|
char key = waitKey(s.inputCapture.isOpened() ? 50 : s.delay);
|
||||||
|
if( key == ESC_KEY )
|
||||||
|
break;
|
||||||
|
|
||||||
|
if( key == 'u' && mode == CALIBRATED )
|
||||||
|
s.showUndistorsed = !s.showUndistorsed;
|
||||||
|
|
||||||
|
if( s.inputCapture.isOpened() && key == 'g' )
|
||||||
|
{
|
||||||
|
mode = CAPTURING;
|
||||||
|
imagePoints.clear();
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
-# **Show the distortion removal for the images too**. When you work with an image list it is not
|
||||||
|
possible to remove the distortion inside the loop. Therefore, you must do this after the loop.
|
||||||
|
Taking advantage of this now I'll expand the @ref cv::undistort function, which is in fact first
|
||||||
|
calls @ref cv::initUndistortRectifyMap to find transformation matrices and then performs
|
||||||
|
transformation using @ref cv::remap function. Because, after successful calibration map
|
||||||
|
calculation needs to be done only once, by using this expanded form you may speed up your
|
||||||
|
application:
|
||||||
|
@code{.cpp}
|
||||||
|
if( s.inputType == Settings::IMAGE_LIST && s.showUndistorsed )
|
||||||
|
{
|
||||||
|
Mat view, rview, map1, map2;
|
||||||
|
initUndistortRectifyMap(cameraMatrix, distCoeffs, Mat(),
|
||||||
|
getOptimalNewCameraMatrix(cameraMatrix, distCoeffs, imageSize, 1, imageSize, 0),
|
||||||
|
imageSize, CV_16SC2, map1, map2);
|
||||||
|
|
||||||
|
for(int i = 0; i < (int)s.imageList.size(); i++ )
|
||||||
|
{
|
||||||
|
view = imread(s.imageList[i], 1);
|
||||||
|
if(view.empty())
|
||||||
|
continue;
|
||||||
|
remap(view, rview, map1, map2, INTER_LINEAR);
|
||||||
|
imshow("Image View", rview);
|
||||||
|
char c = waitKey();
|
||||||
|
if( c == ESC_KEY || c == 'q' || c == 'Q' )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
The calibration and save
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
Because the calibration needs to be done only once per camera, it makes sense to save it after a
|
||||||
|
successful calibration. This way later on you can just load these values into your program. Due to
|
||||||
|
this we first make the calibration, and if it succeeds we save the result into an OpenCV style XML
|
||||||
|
or YAML file, depending on the extension you give in the configuration file.
|
||||||
|
|
||||||
|
Therefore in the first function we just split up these two processes. Because we want to save many
|
||||||
|
of the calibration variables we'll create these variables here and pass on both of them to the
|
||||||
|
calibration and saving function. Again, I'll not show the saving part as that has little in common
|
||||||
|
with the calibration. Explore the source file in order to find out how and what:
|
||||||
|
@code{.cpp}
|
||||||
|
bool runCalibrationAndSave(Settings& s, Size imageSize, Mat& cameraMatrix, Mat& distCoeffs,vector<vector<Point2f> > imagePoints )
|
||||||
|
{
|
||||||
|
vector<Mat> rvecs, tvecs;
|
||||||
|
vector<float> reprojErrs;
|
||||||
|
double totalAvgErr = 0;
|
||||||
|
|
||||||
|
bool ok = runCalibration(s,imageSize, cameraMatrix, distCoeffs, imagePoints, rvecs, tvecs,
|
||||||
|
reprojErrs, totalAvgErr);
|
||||||
|
cout << (ok ? "Calibration succeeded" : "Calibration failed")
|
||||||
|
<< ". avg re projection error = " << totalAvgErr ;
|
||||||
|
|
||||||
|
if( ok ) // save only if the calibration was done with success
|
||||||
|
saveCameraParams( s, imageSize, cameraMatrix, distCoeffs, rvecs ,tvecs, reprojErrs,
|
||||||
|
imagePoints, totalAvgErr);
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
We do the calibration with the help of the @ref cv::calibrateCamera function. It has the following
|
||||||
|
parameters:
|
||||||
|
|
||||||
|
- The object points. This is a vector of *Point3f* vector that for each input image describes how
|
||||||
|
should the pattern look. If we have a planar pattern (like a chessboard) then we can simply set
|
||||||
|
all Z coordinates to zero. This is a collection of the points where these important points are
|
||||||
|
present. Because, we use a single pattern for all the input images we can calculate this just
|
||||||
|
once and multiply it for all the other input views. We calculate the corner points with the
|
||||||
|
*calcBoardCornerPositions* function as:
|
||||||
|
@code{.cpp}
|
||||||
|
void calcBoardCornerPositions(Size boardSize, float squareSize, vector<Point3f>& corners,
|
||||||
|
Settings::Pattern patternType /*= Settings::CHESSBOARD*/)
|
||||||
|
{
|
||||||
|
corners.clear();
|
||||||
|
|
||||||
|
switch(patternType)
|
||||||
|
{
|
||||||
|
case Settings::CHESSBOARD:
|
||||||
|
case Settings::CIRCLES_GRID:
|
||||||
|
for( int i = 0; i < boardSize.height; ++i )
|
||||||
|
for( int j = 0; j < boardSize.width; ++j )
|
||||||
|
corners.push_back(Point3f(float( j*squareSize ), float( i*squareSize ), 0));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Settings::ASYMMETRIC_CIRCLES_GRID:
|
||||||
|
for( int i = 0; i < boardSize.height; i++ )
|
||||||
|
for( int j = 0; j < boardSize.width; j++ )
|
||||||
|
corners.push_back(Point3f(float((2*j + i % 2)*squareSize), float(i*squareSize), 0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
And then multiply it as:
|
||||||
|
@code{.cpp}
|
||||||
|
vector<vector<Point3f> > objectPoints(1);
|
||||||
|
calcBoardCornerPositions(s.boardSize, s.squareSize, objectPoints[0], s.calibrationPattern);
|
||||||
|
objectPoints.resize(imagePoints.size(),objectPoints[0]);
|
||||||
|
@endcode
|
||||||
|
- The image points. This is a vector of *Point2f* vector which for each input image contains
|
||||||
|
coordinates of the important points (corners for chessboard and centers of the circles for the
|
||||||
|
circle pattern). We have already collected this from @ref cv::findChessboardCorners or @ref
|
||||||
|
cv::findCirclesGrid function. We just need to pass it on.
|
||||||
|
- The size of the image acquired from the camera, video file or the images.
|
||||||
|
- The camera matrix. If we used the fixed aspect ratio option we need to set the \f$f_x\f$ to zero:
|
||||||
|
@code{.cpp}
|
||||||
|
cameraMatrix = Mat::eye(3, 3, CV_64F);
|
||||||
|
if( s.flag & CALIB_FIX_ASPECT_RATIO )
|
||||||
|
cameraMatrix.at<double>(0,0) = 1.0;
|
||||||
|
@endcode
|
||||||
|
- The distortion coefficient matrix. Initialize with zero.
|
||||||
|
@code{.cpp}
|
||||||
|
distCoeffs = Mat::zeros(8, 1, CV_64F);
|
||||||
|
@endcode
|
||||||
|
- For all the views the function will calculate rotation and translation vectors which transform
|
||||||
|
the object points (given in the model coordinate space) to the image points (given in the world
|
||||||
|
coordinate space). The 7-th and 8-th parameters are the output vector of matrices containing in
|
||||||
|
the i-th position the rotation and translation vector for the i-th object point to the i-th
|
||||||
|
image point.
|
||||||
|
- The final argument is the flag. You need to specify here options like fix the aspect ratio for
|
||||||
|
the focal length, assume zero tangential distortion or to fix the principal point.
|
||||||
|
@code{.cpp}
|
||||||
|
double rms = calibrateCamera(objectPoints, imagePoints, imageSize, cameraMatrix,
|
||||||
|
distCoeffs, rvecs, tvecs, s.flag|CV_CALIB_FIX_K4|CV_CALIB_FIX_K5);
|
||||||
|
@endcode
|
||||||
|
- The function returns the average re-projection error. This number gives a good estimation of
|
||||||
|
precision of the found parameters. This should be as close to zero as possible. Given the
|
||||||
|
intrinsic, distortion, rotation and translation matrices we may calculate the error for one view
|
||||||
|
by using the @ref cv::projectPoints to first transform the object point to image point. Then we
|
||||||
|
calculate the absolute norm between what we got with our transformation and the corner/circle
|
||||||
|
finding algorithm. To find the average error we calculate the arithmetical mean of the errors
|
||||||
|
calculated for all the calibration images.
|
||||||
|
@code{.cpp}
|
||||||
|
double computeReprojectionErrors( const vector<vector<Point3f> >& objectPoints,
|
||||||
|
const vector<vector<Point2f> >& imagePoints,
|
||||||
|
const vector<Mat>& rvecs, const vector<Mat>& tvecs,
|
||||||
|
const Mat& cameraMatrix , const Mat& distCoeffs,
|
||||||
|
vector<float>& perViewErrors)
|
||||||
|
{
|
||||||
|
vector<Point2f> imagePoints2;
|
||||||
|
int i, totalPoints = 0;
|
||||||
|
double totalErr = 0, err;
|
||||||
|
perViewErrors.resize(objectPoints.size());
|
||||||
|
|
||||||
|
for( i = 0; i < (int)objectPoints.size(); ++i )
|
||||||
|
{
|
||||||
|
projectPoints( Mat(objectPoints[i]), rvecs[i], tvecs[i], cameraMatrix, // project
|
||||||
|
distCoeffs, imagePoints2);
|
||||||
|
err = norm(Mat(imagePoints[i]), Mat(imagePoints2), NORM_L2); // difference
|
||||||
|
|
||||||
|
int n = (int)objectPoints[i].size();
|
||||||
|
perViewErrors[i] = (float) std::sqrt(err*err/n); // save for this view
|
||||||
|
totalErr += err*err; // sum it up
|
||||||
|
totalPoints += n;
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::sqrt(totalErr/totalPoints); // calculate the arithmetical mean
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
Results
|
||||||
|
-------
|
||||||
|
|
||||||
|
Let there be [this input chessboard pattern ](pattern.png) which has a size of 9 X 6. I've used an
|
||||||
|
AXIS IP camera to create a couple of snapshots of the board and saved it into VID5 directory. I've
|
||||||
|
put this inside the `images/CameraCalibration` folder of my working directory and created the
|
||||||
|
following `VID5.XML` file that describes which images to use:
|
||||||
|
@code{.xml}
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<opencv_storage>
|
||||||
|
<images>
|
||||||
|
images/CameraCalibration/VID5/xx1.jpg
|
||||||
|
images/CameraCalibration/VID5/xx2.jpg
|
||||||
|
images/CameraCalibration/VID5/xx3.jpg
|
||||||
|
images/CameraCalibration/VID5/xx4.jpg
|
||||||
|
images/CameraCalibration/VID5/xx5.jpg
|
||||||
|
images/CameraCalibration/VID5/xx6.jpg
|
||||||
|
images/CameraCalibration/VID5/xx7.jpg
|
||||||
|
images/CameraCalibration/VID5/xx8.jpg
|
||||||
|
</images>
|
||||||
|
</opencv_storage>
|
||||||
|
@endcode
|
||||||
|
Then passed `images/CameraCalibration/VID5/VID5.XML` as an input in the configuration file. Here's a
|
||||||
|
chessboard pattern found during the runtime of the application:
|
||||||
|
|
||||||
|
![](images/fileListImage.jpg)
|
||||||
|
|
||||||
|
After applying the distortion removal we get:
|
||||||
|
|
||||||
|
![](images/fileListImageUnDist.jpg)
|
||||||
|
|
||||||
|
The same works for [this asymmetrical circle pattern ](acircles_pattern.png) by setting the input
|
||||||
|
width to 4 and height to 11. This time I've used a live camera feed by specifying its ID ("1") for
|
||||||
|
the input. Here's, how a detected pattern should look:
|
||||||
|
|
||||||
|
![](images/asymetricalPattern.jpg)
|
||||||
|
|
||||||
|
In both cases in the specified output XML/YAML file you'll find the camera and distortion
|
||||||
|
coefficients matrices:
|
||||||
|
@code{.xml}
|
||||||
|
<Camera_Matrix type_id="opencv-matrix">
|
||||||
|
<rows>3</rows>
|
||||||
|
<cols>3</cols>
|
||||||
|
<dt>d</dt>
|
||||||
|
<data>
|
||||||
|
6.5746697944293521e+002 0. 3.1950000000000000e+002 0.
|
||||||
|
6.5746697944293521e+002 2.3950000000000000e+002 0. 0. 1.</data></Camera_Matrix>
|
||||||
|
<Distortion_Coefficients type_id="opencv-matrix">
|
||||||
|
<rows>5</rows>
|
||||||
|
<cols>1</cols>
|
||||||
|
<dt>d</dt>
|
||||||
|
<data>
|
||||||
|
-4.1802327176423804e-001 5.0715244063187526e-001 0. 0.
|
||||||
|
-5.7843597214487474e-001</data></Distortion_Coefficients>
|
||||||
|
@endcode
|
||||||
|
Add these values as constants to your program, call the @ref cv::initUndistortRectifyMap and the
|
||||||
|
@ref cv::remap function to remove distortion and enjoy distortion free inputs for cheap and low
|
||||||
|
quality cameras.
|
||||||
|
|
||||||
|
You may observe a runtime instance of this on the [YouTube
|
||||||
|
here](https://www.youtube.com/watch?v=ViPN810E0SU).
|
||||||
|
|
||||||
|
\htmlonly
|
||||||
|
<div align="center">
|
||||||
|
<iframe title=" Camera calibration With OpenCV - Chessboard or asymmetrical circle pattern." width="560" height="349" src="http://www.youtube.com/embed/ViPN810E0SU?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
|
||||||
|
</div>
|
||||||
|
\endhtmlonly
|
@ -0,0 +1,54 @@
|
|||||||
|
Camera calibration with square chessboard {#tutorial_camera_calibration_square_chess}
|
||||||
|
=========================================
|
||||||
|
|
||||||
|
The goal of this tutorial is to learn how to calibrate a camera given a set of chessboard images.
|
||||||
|
|
||||||
|
*Test data*: use images in your data/chess folder.
|
||||||
|
|
||||||
|
- Compile opencv with samples by setting BUILD_EXAMPLES to ON in cmake configuration.
|
||||||
|
|
||||||
|
- Go to bin folder and use imagelist_creator to create an XML/YAML list of your images.
|
||||||
|
|
||||||
|
- Then, run calibration sample to get camera parameters. Use square size equal to 3cm.
|
||||||
|
|
||||||
|
Pose estimation
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Now, let us write a code that detects a chessboard in a new image and finds its distance from the
|
||||||
|
camera. You can apply the same method to any object with known 3D geometry that you can detect in an
|
||||||
|
image.
|
||||||
|
|
||||||
|
*Test data*: use chess_test\*.jpg images from your data folder.
|
||||||
|
|
||||||
|
- Create an empty console project. Load a test image: :
|
||||||
|
|
||||||
|
Mat img = imread(argv[1], IMREAD_GRAYSCALE);
|
||||||
|
|
||||||
|
- Detect a chessboard in this image using findChessboard function. :
|
||||||
|
|
||||||
|
bool found = findChessboardCorners( img, boardSize, ptvec, CALIB_CB_ADAPTIVE_THRESH );
|
||||||
|
|
||||||
|
- Now, write a function that generates a vector\<Point3f\> array of 3d coordinates of a chessboard
|
||||||
|
in any coordinate system. For simplicity, let us choose a system such that one of the chessboard
|
||||||
|
corners is in the origin and the board is in the plane *z = 0*.
|
||||||
|
|
||||||
|
- Read camera parameters from XML/YAML file: :
|
||||||
|
|
||||||
|
FileStorage fs(filename, FileStorage::READ);
|
||||||
|
Mat intrinsics, distortion;
|
||||||
|
fs["camera_matrix"] >> intrinsics;
|
||||||
|
fs["distortion_coefficients"] >> distortion;
|
||||||
|
|
||||||
|
- Now we are ready to find chessboard pose by running \`solvePnP\`: :
|
||||||
|
|
||||||
|
vector<Point3f> boardPoints;
|
||||||
|
// fill the array
|
||||||
|
...
|
||||||
|
|
||||||
|
solvePnP(Mat(boardPoints), Mat(foundBoardCorners), cameraMatrix,
|
||||||
|
distCoeffs, rvec, tvec, false);
|
||||||
|
|
||||||
|
- Calculate reprojection error like it is done in calibration sample (see
|
||||||
|
opencv/samples/cpp/calibration.cpp, function computeReprojectionErrors).
|
||||||
|
|
||||||
|
Question: how to calculate the distance from the camera origin to any of the corners?
|
803
doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown
Normal file
803
doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown
Normal file
@ -0,0 +1,803 @@
|
|||||||
|
Real Time pose estimation of a textured object {#tutorial_real_time_pose}
|
||||||
|
==============================================
|
||||||
|
|
||||||
|
Nowadays, augmented reality is one of the top research topic in computer vision and robotics fields.
|
||||||
|
The most elemental problem in augmented reality is the estimation of the camera pose respect of an
|
||||||
|
object in the case of computer vision area to do later some 3D rendering or in the case of robotics
|
||||||
|
obtain an object pose in order to grasp it and do some manipulation. However, this is not a trivial
|
||||||
|
problem to solve due to the fact that the most common issue in image processing is the computational
|
||||||
|
cost of applying a lot of algorithms or mathematical operations for solving a problem which is basic
|
||||||
|
and immediateley for humans.
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this tutorial is explained how to build a real time application to estimate the camera pose in
|
||||||
|
order to track a textured object with six degrees of freedom given a 2D image and its 3D textured
|
||||||
|
model.
|
||||||
|
|
||||||
|
The application will have the followings parts:
|
||||||
|
|
||||||
|
- Read 3D textured object model and object mesh.
|
||||||
|
- Take input from Camera or Video.
|
||||||
|
- Extract ORB features and descriptors from the scene.
|
||||||
|
- Match scene descriptors with model descriptors using Flann matcher.
|
||||||
|
- Pose estimation using PnP + Ransac.
|
||||||
|
- Linear Kalman Filter for bad poses rejection.
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
In computer vision estimate the camera pose from *n* 3D-to-2D point correspondences is a fundamental
|
||||||
|
and well understood problem. The most general version of the problem requires estimating the six
|
||||||
|
degrees of freedom of the pose and five calibration parameters: focal length, principal point,
|
||||||
|
aspect ratio and skew. It could be established with a minimum of 6 correspondences, using the well
|
||||||
|
known Direct Linear Transform (DLT) algorithm. There are, though, several simplifications to the
|
||||||
|
problem which turn into an extensive list of different algorithms that improve the accuracy of the
|
||||||
|
DLT.
|
||||||
|
|
||||||
|
The most common simplification is to assume known calibration parameters which is the so-called
|
||||||
|
Perspective-*n*-Point problem:
|
||||||
|
|
||||||
|
![](images/pnp.jpg)
|
||||||
|
|
||||||
|
**Problem Formulation:** Given a set of correspondences between 3D points \f$p_i\f$ expressed in a world
|
||||||
|
reference frame, and their 2D projections \f$u_i\f$ onto the image, we seek to retrieve the pose (\f$R\f$
|
||||||
|
and \f$t\f$) of the camera w.r.t. the world and the focal length \f$f\f$.
|
||||||
|
|
||||||
|
OpenCV provides four different approaches to solve the Perspective-*n*-Point problem which return
|
||||||
|
\f$R\f$ and \f$t\f$. Then, using the following formula it's possible to project 3D points into the image
|
||||||
|
plane:
|
||||||
|
|
||||||
|
\f[s\ \left [ \begin{matrix} u \\ v \\ 1 \end{matrix} \right ] = \left [ \begin{matrix} f_x & 0 & c_x \\ 0 & f_y & c_y \\ 0 & 0 & 1 \end{matrix} \right ] \left [ \begin{matrix} r_{11} & r_{12} & r_{13} & t_1 \\ r_{21} & r_{22} & r_{23} & t_2 \\ r_{31} & r_{32} & r_{33} & t_3 \end{matrix} \right ] \left [ \begin{matrix} X \\ Y \\ Z\\ 1 \end{matrix} \right ]\f]
|
||||||
|
|
||||||
|
The complete documentation of how to manage with this equations is in @ref calib3d .
|
||||||
|
|
||||||
|
Source code
|
||||||
|
-----------
|
||||||
|
|
||||||
|
You can find the source code of this tutorial in the
|
||||||
|
`samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/` folder of the OpenCV source library.
|
||||||
|
|
||||||
|
The tutorial consists of two main programs:
|
||||||
|
|
||||||
|
-# **Model registration**
|
||||||
|
|
||||||
|
This applicaton is exclusive to whom don't have a 3D textured model of the object to be detected.
|
||||||
|
You can use this program to create your own textured 3D model. This program only works for planar
|
||||||
|
objects, then if you want to model an object with complex shape you should use a sophisticated
|
||||||
|
software to create it.
|
||||||
|
|
||||||
|
The application needs an input image of the object to be registered and its 3D mesh. We have also
|
||||||
|
to provide the intrinsic parameters of the camera with which the input image was taken. All the
|
||||||
|
files need to be specified using the absolute path or the relative one from your application’s
|
||||||
|
working directory. If none files are specified the program will try to open the provided default
|
||||||
|
parameters.
|
||||||
|
|
||||||
|
The application starts up extracting the ORB features and descriptors from the input image and
|
||||||
|
then uses the mesh along with the [Möller–Trumbore intersection
|
||||||
|
algorithm](http://http://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm/)
|
||||||
|
to compute the 3D coordinates of the found features. Finally, the 3D points and the descriptors
|
||||||
|
are stored in different lists in a file with YAML format which each row is a different point. The
|
||||||
|
technical background on how to store the files can be found in the @ref tutorial_file_input_output_with_xml_yml
|
||||||
|
tutorial.
|
||||||
|
|
||||||
|
![](images/registration.png)
|
||||||
|
|
||||||
|
-# **Model detection**
|
||||||
|
|
||||||
|
The aim of this application is estimate in real time the object pose given its 3D textured model.
|
||||||
|
|
||||||
|
The application starts up loading the 3D textured model in YAML file format with the same
|
||||||
|
structure explained in the model registration program. From the scene, the ORB features and
|
||||||
|
descriptors are detected and extracted. Then, is used @ref cv::FlannBasedMatcher with
|
||||||
|
@ref cv::flann::GenericIndex to do the matching between the scene descriptors and the model descriptors.
|
||||||
|
Using the found matches along with @ref cv::solvePnPRansac function the `R` and `t` of
|
||||||
|
the camera are computed. Finally, a KalmanFilter is applied in order to reject bad poses.
|
||||||
|
|
||||||
|
In the case that you compiled OpenCV with the samples, you can find it in opencv/build/bin/cpp-tutorial-pnp_detection\`.
|
||||||
|
Then you can run the application and change some parameters:
|
||||||
|
@code{.cpp}
|
||||||
|
This program shows how to detect an object given its 3D textured model. You can choose to use a recorded video or the webcam.
|
||||||
|
Usage:
|
||||||
|
./cpp-tutorial-pnp_detection -help
|
||||||
|
Keys:
|
||||||
|
'esc' - to quit.
|
||||||
|
--------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Usage: cpp-tutorial-pnp_detection [params]
|
||||||
|
|
||||||
|
-c, --confidence (value:0.95)
|
||||||
|
RANSAC confidence
|
||||||
|
-e, --error (value:2.0)
|
||||||
|
RANSAC reprojection errror
|
||||||
|
-f, --fast (value:true)
|
||||||
|
use of robust fast match
|
||||||
|
-h, --help (value:true)
|
||||||
|
print this message
|
||||||
|
--in, --inliers (value:30)
|
||||||
|
minimum inliers for Kalman update
|
||||||
|
--it, --iterations (value:500)
|
||||||
|
RANSAC maximum iterations count
|
||||||
|
-k, --keypoints (value:2000)
|
||||||
|
number of keypoints to detect
|
||||||
|
--mesh
|
||||||
|
path to ply mesh
|
||||||
|
--method, --pnp (value:0)
|
||||||
|
PnP method: (0) ITERATIVE - (1) EPNP - (2) P3P - (3) DLS
|
||||||
|
--model
|
||||||
|
path to yml model
|
||||||
|
-r, --ratio (value:0.7)
|
||||||
|
threshold for ratio test
|
||||||
|
-v, --video
|
||||||
|
path to recorded video
|
||||||
|
@endcode
|
||||||
|
For example, you can run the application changing the pnp method:
|
||||||
|
@code{.cpp}
|
||||||
|
./cpp-tutorial-pnp_detection --method=2
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
Explanation
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Here is explained in detail the code for the real time application:
|
||||||
|
|
||||||
|
-# **Read 3D textured object model and object mesh.**
|
||||||
|
|
||||||
|
In order to load the textured model I implemented the *class* **Model** which has the function
|
||||||
|
*load()* that opens a YAML file and take the stored 3D points with its corresponding descriptors.
|
||||||
|
You can find an example of a 3D textured model in
|
||||||
|
`samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/Data/cookies_ORB.yml`.
|
||||||
|
@code{.cpp}
|
||||||
|
/* Load a YAML file using OpenCV */
|
||||||
|
void Model::load(const std::string path)
|
||||||
|
{
|
||||||
|
cv::Mat points3d_mat;
|
||||||
|
|
||||||
|
cv::FileStorage storage(path, cv::FileStorage::READ);
|
||||||
|
storage["points_3d"] >> points3d_mat;
|
||||||
|
storage["descriptors"] >> descriptors_;
|
||||||
|
|
||||||
|
points3d_mat.copyTo(list_points3d_in_);
|
||||||
|
|
||||||
|
storage.release();
|
||||||
|
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
In the main program the model is loaded as follows:
|
||||||
|
@code{.cpp}
|
||||||
|
Model model; // instantiate Model object
|
||||||
|
model.load(yml_read_path); // load a 3D textured object model
|
||||||
|
@endcode
|
||||||
|
In order to read the model mesh I implemented a *class* **Mesh** which has a function *load()*
|
||||||
|
that opens a \f$*\f$.ply file and store the 3D points of the object and also the composed triangles.
|
||||||
|
You can find an example of a model mesh in
|
||||||
|
`samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/Data/box.ply`.
|
||||||
|
@code{.cpp}
|
||||||
|
/* Load a CSV with *.ply format */
|
||||||
|
void Mesh::load(const std::string path)
|
||||||
|
{
|
||||||
|
|
||||||
|
// Create the reader
|
||||||
|
CsvReader csvReader(path);
|
||||||
|
|
||||||
|
// Clear previous data
|
||||||
|
list_vertex_.clear();
|
||||||
|
list_triangles_.clear();
|
||||||
|
|
||||||
|
// Read from .ply file
|
||||||
|
csvReader.readPLY(list_vertex_, list_triangles_);
|
||||||
|
|
||||||
|
// Update mesh attributes
|
||||||
|
num_vertexs_ = list_vertex_.size();
|
||||||
|
num_triangles_ = list_triangles_.size();
|
||||||
|
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
In the main program the mesh is loaded as follows:
|
||||||
|
@code{.cpp}
|
||||||
|
Mesh mesh; // instantiate Mesh object
|
||||||
|
mesh.load(ply_read_path); // load an object mesh
|
||||||
|
@endcode
|
||||||
|
You can also load different model and mesh:
|
||||||
|
@code{.cpp}
|
||||||
|
./cpp-tutorial-pnp_detection --mesh=/absolute_path_to_your_mesh.ply --model=/absolute_path_to_your_model.yml
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
-# **Take input from Camera or Video**
|
||||||
|
|
||||||
|
To detect is necessary capture video. It's done loading a recorded video by passing the absolute
|
||||||
|
path where it is located in your machine. In order to test the application you can find a recorded
|
||||||
|
video in `samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/Data/box.mp4`.
|
||||||
|
@code{.cpp}
|
||||||
|
cv::VideoCapture cap; // instantiate VideoCapture
|
||||||
|
cap.open(video_read_path); // open a recorded video
|
||||||
|
|
||||||
|
if(!cap.isOpened()) // check if we succeeded
|
||||||
|
{
|
||||||
|
std::cout << "Could not open the camera device" << std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
Then the algorithm is computed frame per frame:
|
||||||
|
@code{.cpp}
|
||||||
|
cv::Mat frame, frame_vis;
|
||||||
|
|
||||||
|
while(cap.read(frame) && cv::waitKey(30) != 27) // capture frame until ESC is pressed
|
||||||
|
{
|
||||||
|
|
||||||
|
frame_vis = frame.clone(); // refresh visualisation frame
|
||||||
|
|
||||||
|
// MAIN ALGORITHM
|
||||||
|
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
You can also load different recorded video:
|
||||||
|
@code{.cpp}
|
||||||
|
./cpp-tutorial-pnp_detection --video=/absolute_path_to_your_video.mp4
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
-# **Extract ORB features and descriptors from the scene**
|
||||||
|
|
||||||
|
The next step is to detect the scene features and extract it descriptors. For this task I
|
||||||
|
implemented a *class* **RobustMatcher** which has a function for keypoints detection and features
|
||||||
|
extraction. You can find it in
|
||||||
|
`samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/src/RobusMatcher.cpp`. In your
|
||||||
|
*RobusMatch* object you can use any of the 2D features detectors of OpenCV. In this case I used
|
||||||
|
@ref cv::ORB features because is based on @ref cv::FAST to detect the keypoints and cv::xfeatures2d::BriefDescriptorExtractor
|
||||||
|
to extract the descriptors which means that is fast and robust to rotations. You can find more
|
||||||
|
detailed information about *ORB* in the documentation.
|
||||||
|
|
||||||
|
The following code is how to instantiate and set the features detector and the descriptors
|
||||||
|
extractor:
|
||||||
|
@code{.cpp}
|
||||||
|
RobustMatcher rmatcher; // instantiate RobustMatcher
|
||||||
|
|
||||||
|
cv::FeatureDetector * detector = new cv::OrbFeatureDetector(numKeyPoints); // instatiate ORB feature detector
|
||||||
|
cv::DescriptorExtractor * extractor = new cv::OrbDescriptorExtractor(); // instatiate ORB descriptor extractor
|
||||||
|
|
||||||
|
rmatcher.setFeatureDetector(detector); // set feature detector
|
||||||
|
rmatcher.setDescriptorExtractor(extractor); // set descriptor extractor
|
||||||
|
@endcode
|
||||||
|
The features and descriptors will be computed by the *RobustMatcher* inside the matching function.
|
||||||
|
|
||||||
|
-# **Match scene descriptors with model descriptors using Flann matcher**
|
||||||
|
|
||||||
|
It is the first step in our detection algorithm. The main idea is to match the scene descriptors
|
||||||
|
with our model descriptors in order to know the 3D coordinates of the found features into the
|
||||||
|
current scene.
|
||||||
|
|
||||||
|
Firstly, we have to set which matcher we want to use. In this case is used
|
||||||
|
@ref cv::FlannBasedMatcher matcher which in terms of computational cost is faster than the
|
||||||
|
@ref cv::BFMatcher matcher as we increase the trained collectction of features. Then, for
|
||||||
|
FlannBased matcher the index created is *Multi-Probe LSH: Efficient Indexing for High-Dimensional
|
||||||
|
Similarity Search* due to *ORB* descriptors are binary.
|
||||||
|
|
||||||
|
You can tune the *LSH* and search parameters to improve the matching efficiency:
|
||||||
|
@code{.cpp}
|
||||||
|
cv::Ptr<cv::flann::IndexParams> indexParams = cv::makePtr<cv::flann::LshIndexParams>(6, 12, 1); // instantiate LSH index parameters
|
||||||
|
cv::Ptr<cv::flann::SearchParams> searchParams = cv::makePtr<cv::flann::SearchParams>(50); // instantiate flann search parameters
|
||||||
|
|
||||||
|
cv::DescriptorMatcher * matcher = new cv::FlannBasedMatcher(indexParams, searchParams); // instantiate FlannBased matcher
|
||||||
|
rmatcher.setDescriptorMatcher(matcher); // set matcher
|
||||||
|
@endcode
|
||||||
|
Secondly, we have to call the matcher by using *robustMatch()* or *fastRobustMatch()* function.
|
||||||
|
The difference of using this two functions is its computational cost. The first method is slower
|
||||||
|
but more robust at filtering good matches because uses two ratio test and a symmetry test. In
|
||||||
|
contrast, the second method is faster but less robust because only applies a single ratio test to
|
||||||
|
the matches.
|
||||||
|
|
||||||
|
The following code is to get the model 3D points and its descriptors and then call the matcher in
|
||||||
|
the main program:
|
||||||
|
@code{.cpp}
|
||||||
|
// Get the MODEL INFO
|
||||||
|
|
||||||
|
std::vector<cv::Point3f> list_points3d_model = model.get_points3d(); // list with model 3D coordinates
|
||||||
|
cv::Mat descriptors_model = model.get_descriptors(); // list with descriptors of each 3D coordinate
|
||||||
|
@endcode
|
||||||
|
@code{.cpp}
|
||||||
|
// -- Step 1: Robust matching between model descriptors and scene descriptors
|
||||||
|
|
||||||
|
std::vector<cv::DMatch> good_matches; // to obtain the model 3D points in the scene
|
||||||
|
std::vector<cv::KeyPoint> keypoints_scene; // to obtain the 2D points of the scene
|
||||||
|
|
||||||
|
if(fast_match)
|
||||||
|
{
|
||||||
|
rmatcher.fastRobustMatch(frame, good_matches, keypoints_scene, descriptors_model);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rmatcher.robustMatch(frame, good_matches, keypoints_scene, descriptors_model);
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
The following code corresponds to the *robustMatch()* function which belongs to the
|
||||||
|
*RobustMatcher* class. This function uses the given image to detect the keypoints and extract the
|
||||||
|
descriptors, match using *two Nearest Neighbour* the extracted descriptors with the given model
|
||||||
|
descriptors and vice versa. Then, a ratio test is applied to the two direction matches in order to
|
||||||
|
remove these matches which its distance ratio between the first and second best match is larger
|
||||||
|
than a given threshold. Finally, a symmetry test is applied in order the remove non symmetrical
|
||||||
|
matches.
|
||||||
|
@code{.cpp}
|
||||||
|
void RobustMatcher::robustMatch( const cv::Mat& frame, std::vector<cv::DMatch>& good_matches,
|
||||||
|
std::vector<cv::KeyPoint>& keypoints_frame,
|
||||||
|
const std::vector<cv::KeyPoint>& keypoints_model, const cv::Mat& descriptors_model )
|
||||||
|
{
|
||||||
|
|
||||||
|
// 1a. Detection of the ORB features
|
||||||
|
this->computeKeyPoints(frame, keypoints_frame);
|
||||||
|
|
||||||
|
// 1b. Extraction of the ORB descriptors
|
||||||
|
cv::Mat descriptors_frame;
|
||||||
|
this->computeDescriptors(frame, keypoints_frame, descriptors_frame);
|
||||||
|
|
||||||
|
// 2. Match the two image descriptors
|
||||||
|
std::vector<std::vector<cv::DMatch> > matches12, matches21;
|
||||||
|
|
||||||
|
// 2a. From image 1 to image 2
|
||||||
|
matcher_->knnMatch(descriptors_frame, descriptors_model, matches12, 2); // return 2 nearest neighbours
|
||||||
|
|
||||||
|
// 2b. From image 2 to image 1
|
||||||
|
matcher_->knnMatch(descriptors_model, descriptors_frame, matches21, 2); // return 2 nearest neighbours
|
||||||
|
|
||||||
|
// 3. Remove matches for which NN ratio is > than threshold
|
||||||
|
// clean image 1 -> image 2 matches
|
||||||
|
int removed1 = ratioTest(matches12);
|
||||||
|
// clean image 2 -> image 1 matches
|
||||||
|
int removed2 = ratioTest(matches21);
|
||||||
|
|
||||||
|
// 4. Remove non-symmetrical matches
|
||||||
|
symmetryTest(matches12, matches21, good_matches);
|
||||||
|
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
After the matches filtering we have to subtract the 2D and 3D correspondences from the found scene
|
||||||
|
keypoints and our 3D model using the obtained *DMatches* vector. For more information about
|
||||||
|
@ref cv::DMatch check the documentation.
|
||||||
|
@code{.cpp}
|
||||||
|
// -- Step 2: Find out the 2D/3D correspondences
|
||||||
|
|
||||||
|
std::vector<cv::Point3f> list_points3d_model_match; // container for the model 3D coordinates found in the scene
|
||||||
|
std::vector<cv::Point2f> list_points2d_scene_match; // container for the model 2D coordinates found in the scene
|
||||||
|
|
||||||
|
for(unsigned int match_index = 0; match_index < good_matches.size(); ++match_index)
|
||||||
|
{
|
||||||
|
cv::Point3f point3d_model = list_points3d_model[ good_matches[match_index].trainIdx ]; // 3D point from model
|
||||||
|
cv::Point2f point2d_scene = keypoints_scene[ good_matches[match_index].queryIdx ].pt; // 2D point from the scene
|
||||||
|
list_points3d_model_match.push_back(point3d_model); // add 3D point
|
||||||
|
list_points2d_scene_match.push_back(point2d_scene); // add 2D point
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
You can also change the ratio test threshold, the number of keypoints to detect as well as use or
|
||||||
|
not the robust matcher:
|
||||||
|
@code{.cpp}
|
||||||
|
./cpp-tutorial-pnp_detection --ratio=0.8 --keypoints=1000 --fast=false
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
-# **Pose estimation using PnP + Ransac**
|
||||||
|
|
||||||
|
Once with the 2D and 3D correspondences we have to apply a PnP algorithm in order to estimate the
|
||||||
|
camera pose. The reason why we have to use @ref cv::solvePnPRansac instead of @ref cv::solvePnP is
|
||||||
|
due to the fact that after the matching not all the found correspondences are correct and, as like
|
||||||
|
as not, there are false correspondences or also called *outliers*. The [Random Sample
|
||||||
|
Consensus](http://en.wikipedia.org/wiki/RANSAC) or *Ransac* is a non-deterministic iterative
|
||||||
|
method which estimate parameters of a mathematical model from observed data producing an
|
||||||
|
aproximate result as the number of iterations increase. After appyling *Ransac* all the *outliers*
|
||||||
|
will be eliminated to then estimate the camera pose with a certain probability to obtain a good
|
||||||
|
solution.
|
||||||
|
|
||||||
|
For the camera pose estimation I have implemented a *class* **PnPProblem**. This *class* has 4
|
||||||
|
atributes: a given calibration matrix, the rotation matrix, the translation matrix and the
|
||||||
|
rotation-translation matrix. The intrinsic calibration parameters of the camera which you are
|
||||||
|
using to estimate the pose are necessary. In order to obtain the parameters you can check
|
||||||
|
@ref tutorial_camera_calibration_square_chess and @ref tutorial_camera_calibration tutorials.
|
||||||
|
|
||||||
|
The following code is how to declare the *PnPProblem class* in the main program:
|
||||||
|
@code{.cpp}
|
||||||
|
// Intrinsic camera parameters: UVC WEBCAM
|
||||||
|
|
||||||
|
double f = 55; // focal length in mm
|
||||||
|
double sx = 22.3, sy = 14.9; // sensor size
|
||||||
|
double width = 640, height = 480; // image size
|
||||||
|
|
||||||
|
double params_WEBCAM[] = { width*f/sx, // fx
|
||||||
|
height*f/sy, // fy
|
||||||
|
width/2, // cx
|
||||||
|
height/2}; // cy
|
||||||
|
|
||||||
|
PnPProblem pnp_detection(params_WEBCAM); // instantiate PnPProblem class
|
||||||
|
@endcode
|
||||||
|
The following code is how the *PnPProblem class* initialises its atributes:
|
||||||
|
@code{.cpp}
|
||||||
|
// Custom constructor given the intrinsic camera parameters
|
||||||
|
|
||||||
|
PnPProblem::PnPProblem(const double params[])
|
||||||
|
{
|
||||||
|
_A_matrix = cv::Mat::zeros(3, 3, CV_64FC1); // intrinsic camera parameters
|
||||||
|
_A_matrix.at<double>(0, 0) = params[0]; // [ fx 0 cx ]
|
||||||
|
_A_matrix.at<double>(1, 1) = params[1]; // [ 0 fy cy ]
|
||||||
|
_A_matrix.at<double>(0, 2) = params[2]; // [ 0 0 1 ]
|
||||||
|
_A_matrix.at<double>(1, 2) = params[3];
|
||||||
|
_A_matrix.at<double>(2, 2) = 1;
|
||||||
|
_R_matrix = cv::Mat::zeros(3, 3, CV_64FC1); // rotation matrix
|
||||||
|
_t_matrix = cv::Mat::zeros(3, 1, CV_64FC1); // translation matrix
|
||||||
|
_P_matrix = cv::Mat::zeros(3, 4, CV_64FC1); // rotation-translation matrix
|
||||||
|
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
OpenCV provides four PnP methods: ITERATIVE, EPNP, P3P and DLS. Depending on the application type,
|
||||||
|
the estimation method will be different. In the case that we want to make a real time application,
|
||||||
|
the more suitable methods are EPNP and P3P due to that are faster than ITERATIVE and DLS at
|
||||||
|
finding an optimal solution. However, EPNP and P3P are not especially robust in front of planar
|
||||||
|
surfaces and sometimes the pose estimation seems to have a mirror effect. Therefore, in this this
|
||||||
|
tutorial is used ITERATIVE method due to the object to be detected has planar surfaces.
|
||||||
|
|
||||||
|
The OpenCV Ransac implementation wants you to provide three parameters: the maximum number of
|
||||||
|
iterations until stop the algorithm, the maximum allowed distance between the observed and
|
||||||
|
computed point projections to consider it an inlier and the confidence to obtain a good result.
|
||||||
|
You can tune these paramaters in order to improve your algorithm performance. Increasing the
|
||||||
|
number of iterations you will have a more accurate solution, but will take more time to find a
|
||||||
|
solution. Increasing the reprojection error will reduce the computation time, but your solution
|
||||||
|
will be unaccurate. Decreasing the confidence your arlgorithm will be faster, but the obtained
|
||||||
|
solution will be unaccurate.
|
||||||
|
|
||||||
|
The following parameters work for this application:
|
||||||
|
@code{.cpp}
|
||||||
|
// RANSAC parameters
|
||||||
|
|
||||||
|
int iterationsCount = 500; // number of Ransac iterations.
|
||||||
|
float reprojectionError = 2.0; // maximum allowed distance to consider it an inlier.
|
||||||
|
float confidence = 0.95; // ransac successful confidence.
|
||||||
|
@endcode
|
||||||
|
The following code corresponds to the *estimatePoseRANSAC()* function which belongs to the
|
||||||
|
*PnPProblem class*. This function estimates the rotation and translation matrix given a set of
|
||||||
|
2D/3D correspondences, the desired PnP method to use, the output inliers container and the Ransac
|
||||||
|
parameters:
|
||||||
|
@code{.cpp}
|
||||||
|
// Estimate the pose given a list of 2D/3D correspondences with RANSAC and the method to use
|
||||||
|
|
||||||
|
void PnPProblem::estimatePoseRANSAC( const std::vector<cv::Point3f> &list_points3d, // list with model 3D coordinates
|
||||||
|
const std::vector<cv::Point2f> &list_points2d, // list with scene 2D coordinates
|
||||||
|
int flags, cv::Mat &inliers, int iterationsCount, // PnP method; inliers container
|
||||||
|
float reprojectionError, float confidence ) // Ransac parameters
|
||||||
|
{
|
||||||
|
cv::Mat distCoeffs = cv::Mat::zeros(4, 1, CV_64FC1); // vector of distortion coefficients
|
||||||
|
cv::Mat rvec = cv::Mat::zeros(3, 1, CV_64FC1); // output rotation vector
|
||||||
|
cv::Mat tvec = cv::Mat::zeros(3, 1, CV_64FC1); // output translation vector
|
||||||
|
|
||||||
|
bool useExtrinsicGuess = false; // if true the function uses the provided rvec and tvec values as
|
||||||
|
// initial approximations of the rotation and translation vectors
|
||||||
|
|
||||||
|
cv::solvePnPRansac( list_points3d, list_points2d, _A_matrix, distCoeffs, rvec, tvec,
|
||||||
|
useExtrinsicGuess, iterationsCount, reprojectionError, confidence,
|
||||||
|
inliers, flags );
|
||||||
|
|
||||||
|
Rodrigues(rvec,_R_matrix); // converts Rotation Vector to Matrix
|
||||||
|
_t_matrix = tvec; // set translation matrix
|
||||||
|
|
||||||
|
this->set_P_matrix(_R_matrix, _t_matrix); // set rotation-translation matrix
|
||||||
|
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
In the following code are the 3th and 4th steps of the main algorithm. The first, calling the
|
||||||
|
above function and the second taking the output inliers vector from Ransac to get the 2D scene
|
||||||
|
points for drawing purpose. As seen in the code we must be sure to apply Ransac if we have
|
||||||
|
matches, in the other case, the function @ref cv::solvePnPRansac crashes due to any OpenCV *bug*.
|
||||||
|
@code{.cpp}
|
||||||
|
if(good_matches.size() > 0) // None matches, then RANSAC crashes
|
||||||
|
{
|
||||||
|
|
||||||
|
// -- Step 3: Estimate the pose using RANSAC approach
|
||||||
|
pnp_detection.estimatePoseRANSAC( list_points3d_model_match, list_points2d_scene_match,
|
||||||
|
pnpMethod, inliers_idx, iterationsCount, reprojectionError, confidence );
|
||||||
|
|
||||||
|
|
||||||
|
// -- Step 4: Catch the inliers keypoints to draw
|
||||||
|
for(int inliers_index = 0; inliers_index < inliers_idx.rows; ++inliers_index)
|
||||||
|
{
|
||||||
|
int n = inliers_idx.at<int>(inliers_index); // i-inlier
|
||||||
|
cv::Point2f point2d = list_points2d_scene_match[n]; // i-inlier point 2D
|
||||||
|
list_points2d_inliers.push_back(point2d); // add i-inlier to list
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
Finally, once the camera pose has been estimated we can use the \f$R\f$ and \f$t\f$ in order to compute
|
||||||
|
the 2D projection onto the image of a given 3D point expressed in a world reference frame using
|
||||||
|
the showed formula on *Theory*.
|
||||||
|
|
||||||
|
The following code corresponds to the *backproject3DPoint()* function which belongs to the
|
||||||
|
*PnPProblem class*. The function backproject a given 3D point expressed in a world reference frame
|
||||||
|
onto a 2D image:
|
||||||
|
@code{.cpp}
|
||||||
|
// Backproject a 3D point to 2D using the estimated pose parameters
|
||||||
|
|
||||||
|
cv::Point2f PnPProblem::backproject3DPoint(const cv::Point3f &point3d)
|
||||||
|
{
|
||||||
|
// 3D point vector [x y z 1]'
|
||||||
|
cv::Mat point3d_vec = cv::Mat(4, 1, CV_64FC1);
|
||||||
|
point3d_vec.at<double>(0) = point3d.x;
|
||||||
|
point3d_vec.at<double>(1) = point3d.y;
|
||||||
|
point3d_vec.at<double>(2) = point3d.z;
|
||||||
|
point3d_vec.at<double>(3) = 1;
|
||||||
|
|
||||||
|
// 2D point vector [u v 1]'
|
||||||
|
cv::Mat point2d_vec = cv::Mat(4, 1, CV_64FC1);
|
||||||
|
point2d_vec = _A_matrix * _P_matrix * point3d_vec;
|
||||||
|
|
||||||
|
// Normalization of [u v]'
|
||||||
|
cv::Point2f point2d;
|
||||||
|
point2d.x = point2d_vec.at<double>(0) / point2d_vec.at<double>(2);
|
||||||
|
point2d.y = point2d_vec.at<double>(1) / point2d_vec.at<double>(2);
|
||||||
|
|
||||||
|
return point2d;
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
The above function is used to compute all the 3D points of the object *Mesh* to show the pose of
|
||||||
|
the object.
|
||||||
|
|
||||||
|
You can also change RANSAC parameters and PnP method:
|
||||||
|
@code{.cpp}
|
||||||
|
./cpp-tutorial-pnp_detection --error=0.25 --confidence=0.90 --iterations=250 --method=3
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
-# **Linear Kalman Filter for bad poses rejection**
|
||||||
|
|
||||||
|
Is it common in computer vision or robotics fields that after applying detection or tracking
|
||||||
|
techniques, bad results are obtained due to some sensor errors. In order to avoid these bad
|
||||||
|
detections in this tutorial is explained how to implement a Linear Kalman Filter. The Kalman
|
||||||
|
Filter will be applied after detected a given number of inliers.
|
||||||
|
|
||||||
|
You can find more information about what [Kalman
|
||||||
|
Filter](http://en.wikipedia.org/wiki/Kalman_filter) is. In this tutorial it's used the OpenCV
|
||||||
|
implementation of the @ref cv::KalmanFilter based on
|
||||||
|
[Linear Kalman Filter for position and orientation tracking](http://campar.in.tum.de/Chair/KalmanFilter)
|
||||||
|
to set the dynamics and measurement models.
|
||||||
|
|
||||||
|
Firstly, we have to define our state vector which will have 18 states: the positional data (x,y,z)
|
||||||
|
with its first and second derivatives (velocity and acceleration), then rotation is added in form
|
||||||
|
of three euler angles (roll, pitch, jaw) together with their first and second derivatives (angular
|
||||||
|
velocity and acceleration)
|
||||||
|
|
||||||
|
\f[X = (x,y,z,\dot x,\dot y,\dot z,\ddot x,\ddot y,\ddot z,\psi,\theta,\phi,\dot \psi,\dot \theta,\dot \phi,\ddot \psi,\ddot \theta,\ddot \phi)^T\f]
|
||||||
|
|
||||||
|
Secondly, we have to define the number of measuremnts which will be 6: from \f$R\f$ and \f$t\f$ we can
|
||||||
|
extract \f$(x,y,z)\f$ and \f$(\psi,\theta,\phi)\f$. In addition, we have to define the number of control
|
||||||
|
actions to apply to the system which in this case will be *zero*. Finally, we have to define the
|
||||||
|
differential time between measurements which in this case is \f$1/T\f$, where *T* is the frame rate of
|
||||||
|
the video.
|
||||||
|
@code{.cpp}
|
||||||
|
cv::KalmanFilter KF; // instantiate Kalman Filter
|
||||||
|
|
||||||
|
int nStates = 18; // the number of states
|
||||||
|
int nMeasurements = 6; // the number of measured states
|
||||||
|
int nInputs = 0; // the number of action control
|
||||||
|
|
||||||
|
double dt = 0.125; // time between measurements (1/FPS)
|
||||||
|
|
||||||
|
initKalmanFilter(KF, nStates, nMeasurements, nInputs, dt); // init function
|
||||||
|
@endcode
|
||||||
|
The following code corresponds to the *Kalman Filter* initialisation. Firstly, is set the process
|
||||||
|
noise, the measurement noise and the error covariance matrix. Secondly, are set the transition
|
||||||
|
matrix which is the dynamic model and finally the measurement matrix, which is the measurement
|
||||||
|
model.
|
||||||
|
|
||||||
|
You can tune the process and measurement noise to improve the *Kalman Filter* performance. As the
|
||||||
|
measurement noise is reduced the faster will converge doing the algorithm sensitive in front of
|
||||||
|
bad measurements.
|
||||||
|
@code{.cpp}
|
||||||
|
void initKalmanFilter(cv::KalmanFilter &KF, int nStates, int nMeasurements, int nInputs, double dt)
|
||||||
|
{
|
||||||
|
|
||||||
|
KF.init(nStates, nMeasurements, nInputs, CV_64F); // init Kalman Filter
|
||||||
|
|
||||||
|
cv::setIdentity(KF.processNoiseCov, cv::Scalar::all(1e-5)); // set process noise
|
||||||
|
cv::setIdentity(KF.measurementNoiseCov, cv::Scalar::all(1e-4)); // set measurement noise
|
||||||
|
cv::setIdentity(KF.errorCovPost, cv::Scalar::all(1)); // error covariance
|
||||||
|
|
||||||
|
|
||||||
|
/* DYNAMIC MODEL */
|
||||||
|
|
||||||
|
// [1 0 0 dt 0 0 dt2 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 1 0 0 dt 0 0 dt2 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 1 0 0 dt 0 0 dt2 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 1 0 0 dt 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 0 1 0 0 dt 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 0 0 1 0 0 dt 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 1 0 0 dt 0 0 dt2 0 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 1 0 0 dt 0 0 dt2 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 0 1 0 0 dt 0 0 dt2]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 dt 0 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 dt 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 dt]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
|
||||||
|
|
||||||
|
// position
|
||||||
|
KF.transitionMatrix.at<double>(0,3) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(1,4) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(2,5) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(3,6) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(4,7) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(5,8) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(0,6) = 0.5*pow(dt,2);
|
||||||
|
KF.transitionMatrix.at<double>(1,7) = 0.5*pow(dt,2);
|
||||||
|
KF.transitionMatrix.at<double>(2,8) = 0.5*pow(dt,2);
|
||||||
|
|
||||||
|
// orientation
|
||||||
|
KF.transitionMatrix.at<double>(9,12) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(10,13) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(11,14) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(12,15) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(13,16) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(14,17) = dt;
|
||||||
|
KF.transitionMatrix.at<double>(9,15) = 0.5*pow(dt,2);
|
||||||
|
KF.transitionMatrix.at<double>(10,16) = 0.5*pow(dt,2);
|
||||||
|
KF.transitionMatrix.at<double>(11,17) = 0.5*pow(dt,2);
|
||||||
|
|
||||||
|
|
||||||
|
/* MEASUREMENT MODEL */
|
||||||
|
|
||||||
|
// [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
|
||||||
|
// [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
|
||||||
|
|
||||||
|
KF.measurementMatrix.at<double>(0,0) = 1; // x
|
||||||
|
KF.measurementMatrix.at<double>(1,1) = 1; // y
|
||||||
|
KF.measurementMatrix.at<double>(2,2) = 1; // z
|
||||||
|
KF.measurementMatrix.at<double>(3,9) = 1; // roll
|
||||||
|
KF.measurementMatrix.at<double>(4,10) = 1; // pitch
|
||||||
|
KF.measurementMatrix.at<double>(5,11) = 1; // yaw
|
||||||
|
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
In the following code is the 5th step of the main algorithm. When the obtained number of inliers
|
||||||
|
after *Ransac* is over the threshold, the measurements matrix is filled and then the *Kalman
|
||||||
|
Filter* is updated:
|
||||||
|
@code{.cpp}
|
||||||
|
// -- Step 5: Kalman Filter
|
||||||
|
|
||||||
|
// GOOD MEASUREMENT
|
||||||
|
if( inliers_idx.rows >= minInliersKalman )
|
||||||
|
{
|
||||||
|
|
||||||
|
// Get the measured translation
|
||||||
|
cv::Mat translation_measured(3, 1, CV_64F);
|
||||||
|
translation_measured = pnp_detection.get_t_matrix();
|
||||||
|
|
||||||
|
// Get the measured rotation
|
||||||
|
cv::Mat rotation_measured(3, 3, CV_64F);
|
||||||
|
rotation_measured = pnp_detection.get_R_matrix();
|
||||||
|
|
||||||
|
// fill the measurements vector
|
||||||
|
fillMeasurements(measurements, translation_measured, rotation_measured);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Instantiate estimated translation and rotation
|
||||||
|
cv::Mat translation_estimated(3, 1, CV_64F);
|
||||||
|
cv::Mat rotation_estimated(3, 3, CV_64F);
|
||||||
|
|
||||||
|
// update the Kalman filter with good measurements
|
||||||
|
updateKalmanFilter( KF, measurements,
|
||||||
|
translation_estimated, rotation_estimated);
|
||||||
|
@endcode
|
||||||
|
The following code corresponds to the *fillMeasurements()* function which converts the measured
|
||||||
|
[Rotation Matrix to Eulers
|
||||||
|
angles](http://euclideanspace.com/maths/geometry/rotations/conversions/matrixToEuler/index.htm)
|
||||||
|
and fill the measurements matrix along with the measured translation vector:
|
||||||
|
@code{.cpp}
|
||||||
|
void fillMeasurements( cv::Mat &measurements,
|
||||||
|
const cv::Mat &translation_measured, const cv::Mat &rotation_measured)
|
||||||
|
{
|
||||||
|
// Convert rotation matrix to euler angles
|
||||||
|
cv::Mat measured_eulers(3, 1, CV_64F);
|
||||||
|
measured_eulers = rot2euler(rotation_measured);
|
||||||
|
|
||||||
|
// Set measurement to predict
|
||||||
|
measurements.at<double>(0) = translation_measured.at<double>(0); // x
|
||||||
|
measurements.at<double>(1) = translation_measured.at<double>(1); // y
|
||||||
|
measurements.at<double>(2) = translation_measured.at<double>(2); // z
|
||||||
|
measurements.at<double>(3) = measured_eulers.at<double>(0); // roll
|
||||||
|
measurements.at<double>(4) = measured_eulers.at<double>(1); // pitch
|
||||||
|
measurements.at<double>(5) = measured_eulers.at<double>(2); // yaw
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
The following code corresponds to the *updateKalmanFilter()* function which update the Kalman
|
||||||
|
Filter and set the estimated Rotation Matrix and translation vector. The estimated Rotation Matrix
|
||||||
|
comes from the estimated [Euler angles to Rotation
|
||||||
|
Matrix](http://euclideanspace.com/maths/geometry/rotations/conversions/eulerToMatrix/index.htm).
|
||||||
|
@code{.cpp}
|
||||||
|
void updateKalmanFilter( cv::KalmanFilter &KF, cv::Mat &measurement,
|
||||||
|
cv::Mat &translation_estimated, cv::Mat &rotation_estimated )
|
||||||
|
{
|
||||||
|
|
||||||
|
// First predict, to update the internal statePre variable
|
||||||
|
cv::Mat prediction = KF.predict();
|
||||||
|
|
||||||
|
// The "correct" phase that is going to use the predicted value and our measurement
|
||||||
|
cv::Mat estimated = KF.correct(measurement);
|
||||||
|
|
||||||
|
// Estimated translation
|
||||||
|
translation_estimated.at<double>(0) = estimated.at<double>(0);
|
||||||
|
translation_estimated.at<double>(1) = estimated.at<double>(1);
|
||||||
|
translation_estimated.at<double>(2) = estimated.at<double>(2);
|
||||||
|
|
||||||
|
// Estimated euler angles
|
||||||
|
cv::Mat eulers_estimated(3, 1, CV_64F);
|
||||||
|
eulers_estimated.at<double>(0) = estimated.at<double>(9);
|
||||||
|
eulers_estimated.at<double>(1) = estimated.at<double>(10);
|
||||||
|
eulers_estimated.at<double>(2) = estimated.at<double>(11);
|
||||||
|
|
||||||
|
// Convert estimated quaternion to rotation matrix
|
||||||
|
rotation_estimated = euler2rot(eulers_estimated);
|
||||||
|
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
The 6th step is set the estimated rotation-translation matrix:
|
||||||
|
@code{.cpp}
|
||||||
|
// -- Step 6: Set estimated projection matrix
|
||||||
|
pnp_detection_est.set_P_matrix(rotation_estimated, translation_estimated);
|
||||||
|
@endcode
|
||||||
|
The last and optional step is draw the found pose. To do it I implemented a function to draw all
|
||||||
|
the mesh 3D points and an extra reference axis:
|
||||||
|
@code{.cpp}
|
||||||
|
// -- Step X: Draw pose
|
||||||
|
|
||||||
|
drawObjectMesh(frame_vis, &mesh, &pnp_detection, green); // draw current pose
|
||||||
|
drawObjectMesh(frame_vis, &mesh, &pnp_detection_est, yellow); // draw estimated pose
|
||||||
|
|
||||||
|
double l = 5;
|
||||||
|
std::vector<cv::Point2f> pose_points2d;
|
||||||
|
pose_points2d.push_back(pnp_detection_est.backproject3DPoint(cv::Point3f(0,0,0))); // axis center
|
||||||
|
pose_points2d.push_back(pnp_detection_est.backproject3DPoint(cv::Point3f(l,0,0))); // axis x
|
||||||
|
pose_points2d.push_back(pnp_detection_est.backproject3DPoint(cv::Point3f(0,l,0))); // axis y
|
||||||
|
pose_points2d.push_back(pnp_detection_est.backproject3DPoint(cv::Point3f(0,0,l))); // axis z
|
||||||
|
draw3DCoordinateAxes(frame_vis, pose_points2d); // draw axes
|
||||||
|
@endcode
|
||||||
|
You can also modify the minimum inliers to update Kalman Filter:
|
||||||
|
@code{.cpp}
|
||||||
|
./cpp-tutorial-pnp_detection --inliers=20
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
Results
|
||||||
|
-------
|
||||||
|
|
||||||
|
The following videos are the results of pose estimation in real time using the explained detection
|
||||||
|
algorithm using the following parameters:
|
||||||
|
@code{.cpp}
|
||||||
|
// Robust Matcher parameters
|
||||||
|
|
||||||
|
int numKeyPoints = 2000; // number of detected keypoints
|
||||||
|
float ratio = 0.70f; // ratio test
|
||||||
|
bool fast_match = true; // fastRobustMatch() or robustMatch()
|
||||||
|
|
||||||
|
|
||||||
|
// RANSAC parameters
|
||||||
|
|
||||||
|
int iterationsCount = 500; // number of Ransac iterations.
|
||||||
|
int reprojectionError = 2.0; // maximum allowed distance to consider it an inlier.
|
||||||
|
float confidence = 0.95; // ransac successful confidence.
|
||||||
|
|
||||||
|
|
||||||
|
// Kalman Filter parameters
|
||||||
|
|
||||||
|
int minInliersKalman = 30; // Kalman threshold updating
|
||||||
|
@endcode
|
||||||
|
You can watch the real time pose estimation on the [YouTube
|
||||||
|
here](http://www.youtube.com/user/opencvdev/videos).
|
||||||
|
|
||||||
|
\htmlonly
|
||||||
|
<div align="center">
|
||||||
|
<iframe title="Pose estimation of textured object using OpenCV" width="560" height="349" src="http://www.youtube.com/embed/XNATklaJlSQ?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
|
||||||
|
</div>
|
||||||
|
\endhtmlonly
|
||||||
|
\htmlonly
|
||||||
|
<div align="center">
|
||||||
|
<iframe title="Pose estimation of textured object using OpenCV in cluttered background" width="560" height="349" src="http://www.youtube.com/embed/YLS9bWek78k?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
|
||||||
|
</div>
|
||||||
|
\endhtmlonly
|
@ -126,7 +126,7 @@ Here is explained in detail the code for the real time application:
|
|||||||
|
|
||||||
.. code-block:: cpp
|
.. code-block:: cpp
|
||||||
|
|
||||||
/** Load a YAML file using OpenCV **/
|
/* Load a YAML file using OpenCV */
|
||||||
void Model::load(const std::string path)
|
void Model::load(const std::string path)
|
||||||
{
|
{
|
||||||
cv::Mat points3d_mat;
|
cv::Mat points3d_mat;
|
||||||
@ -152,7 +152,7 @@ Here is explained in detail the code for the real time application:
|
|||||||
|
|
||||||
.. code-block:: cpp
|
.. code-block:: cpp
|
||||||
|
|
||||||
/** Load a CSV with *.ply format **/
|
/* Load a CSV with *.ply format */
|
||||||
void Mesh::load(const std::string path)
|
void Mesh::load(const std::string path)
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -535,7 +535,7 @@ Here is explained in detail the code for the real time application:
|
|||||||
cv::setIdentity(KF.errorCovPost, cv::Scalar::all(1)); // error covariance
|
cv::setIdentity(KF.errorCovPost, cv::Scalar::all(1)); // error covariance
|
||||||
|
|
||||||
|
|
||||||
/** DYNAMIC MODEL **/
|
/* DYNAMIC MODEL */
|
||||||
|
|
||||||
// [1 0 0 dt 0 0 dt2 0 0 0 0 0 0 0 0 0 0 0]
|
// [1 0 0 dt 0 0 dt2 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
// [0 1 0 0 dt 0 0 dt2 0 0 0 0 0 0 0 0 0 0]
|
// [0 1 0 0 dt 0 0 dt2 0 0 0 0 0 0 0 0 0 0]
|
||||||
@ -579,7 +579,7 @@ Here is explained in detail the code for the real time application:
|
|||||||
KF.transitionMatrix.at<double>(11,17) = 0.5*pow(dt,2);
|
KF.transitionMatrix.at<double>(11,17) = 0.5*pow(dt,2);
|
||||||
|
|
||||||
|
|
||||||
/** MEASUREMENT MODEL **/
|
/* MEASUREMENT MODEL */
|
||||||
|
|
||||||
// [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
// [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
// [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
// [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
@ -744,7 +744,6 @@ You can watch the real time pose estimation on the `YouTube here <http://www.you
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<iframe title="Pose estimation of textured object using OpenCV" width="560" height="349" src="http://www.youtube.com/embed/XNATklaJlSQ?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
|
<iframe title="Pose estimation of textured object using OpenCV" width="560" height="349" src="http://www.youtube.com/embed/XNATklaJlSQ?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
|
||||||
</div>
|
</div>
|
||||||
</br></br>
|
|
||||||
<div align="center">
|
<div align="center">
|
||||||
<iframe title="Pose estimation of textured object using OpenCV in cluttered background" width="560" height="349" src="http://www.youtube.com/embed/YLS9bWek78k?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
|
<iframe title="Pose estimation of textured object using OpenCV in cluttered background" width="560" height="349" src="http://www.youtube.com/embed/YLS9bWek78k?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>
|
||||||
</div>
|
</div>
|
||||||
|
@ -0,0 +1,32 @@
|
|||||||
|
Camera calibration and 3D reconstruction (calib3d module) {#tutorial_table_of_content_calib3d}
|
||||||
|
==========================================================
|
||||||
|
|
||||||
|
Although we got most of our images in a 2D format they do come from a 3D world. Here you will learn
|
||||||
|
how to find out from the 2D images information about the 3D world.
|
||||||
|
|
||||||
|
- @subpage tutorial_camera_calibration_square_chess
|
||||||
|
|
||||||
|
*Compatibility:* \> OpenCV 2.0
|
||||||
|
|
||||||
|
*Author:* Victor Eruhimov
|
||||||
|
|
||||||
|
You will use some chessboard images to calibrate your camera.
|
||||||
|
|
||||||
|
- @subpage tutorial_camera_calibration
|
||||||
|
|
||||||
|
*Compatibility:* \> OpenCV 2.0
|
||||||
|
|
||||||
|
*Author:* Bernát Gábor
|
||||||
|
|
||||||
|
Camera calibration by using either the chessboard, circle or the asymmetrical circle
|
||||||
|
pattern. Get the images either from a camera attached, a video file or from an image
|
||||||
|
collection.
|
||||||
|
|
||||||
|
- @subpage tutorial_real_time_pose
|
||||||
|
|
||||||
|
*Compatibility:* \> OpenCV 2.0
|
||||||
|
|
||||||
|
*Author:* Edgar Riba
|
||||||
|
|
||||||
|
Real time pose estimation of a textured object using ORB features, FlannBased matcher, PnP
|
||||||
|
approach plus Ransac and Linear Kalman Filter to reject possible bad poses.
|
104
doc/tutorials/core/adding_images/adding_images.markdown
Normal file
104
doc/tutorials/core/adding_images/adding_images.markdown
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
Adding (blending) two images using OpenCV {#tutorial_adding_images}
|
||||||
|
=========================================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this tutorial you will learn:
|
||||||
|
|
||||||
|
- what is *linear blending* and why it is useful;
|
||||||
|
- how to add two images using @ref cv::addWeighted
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
@note
|
||||||
|
The explanation below belongs to the book [Computer Vision: Algorithms and
|
||||||
|
Applications](http://szeliski.org/Book/) by Richard Szeliski
|
||||||
|
|
||||||
|
From our previous tutorial, we know already a bit of *Pixel operators*. An interesting dyadic
|
||||||
|
(two-input) operator is the *linear blend operator*:
|
||||||
|
|
||||||
|
\f[g(x) = (1 - \alpha)f_{0}(x) + \alpha f_{1}(x)\f]
|
||||||
|
|
||||||
|
By varying \f$\alpha\f$ from \f$0 \rightarrow 1\f$ this operator can be used to perform a temporal
|
||||||
|
*cross-disolve* between two images or videos, as seen in slide shows and film productions (cool,
|
||||||
|
eh?)
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
As usual, after the not-so-lengthy explanation, let's go to the code:
|
||||||
|
@code{.cpp}
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
|
||||||
|
int main( int argc, char** argv )
|
||||||
|
{
|
||||||
|
double alpha = 0.5; double beta; double input;
|
||||||
|
|
||||||
|
Mat src1, src2, dst;
|
||||||
|
|
||||||
|
/// Ask the user enter alpha
|
||||||
|
std::cout<<" Simple Linear Blender "<<std::endl;
|
||||||
|
std::cout<<"-----------------------"<<std::endl;
|
||||||
|
std::cout<<"* Enter alpha [0-1]: ";
|
||||||
|
std::cin>>input;
|
||||||
|
|
||||||
|
/// We use the alpha provided by the user if it is between 0 and 1
|
||||||
|
if( input >= 0.0 && input <= 1.0 )
|
||||||
|
{ alpha = input; }
|
||||||
|
|
||||||
|
/// Read image ( same size, same type )
|
||||||
|
src1 = imread("../../images/LinuxLogo.jpg");
|
||||||
|
src2 = imread("../../images/WindowsLogo.jpg");
|
||||||
|
|
||||||
|
if( !src1.data ) { printf("Error loading src1 \n"); return -1; }
|
||||||
|
if( !src2.data ) { printf("Error loading src2 \n"); return -1; }
|
||||||
|
|
||||||
|
/// Create Windows
|
||||||
|
namedWindow("Linear Blend", 1);
|
||||||
|
|
||||||
|
beta = ( 1.0 - alpha );
|
||||||
|
addWeighted( src1, alpha, src2, beta, 0.0, dst);
|
||||||
|
|
||||||
|
imshow( "Linear Blend", dst );
|
||||||
|
|
||||||
|
waitKey(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
Explanation
|
||||||
|
-----------
|
||||||
|
|
||||||
|
-# Since we are going to perform:
|
||||||
|
|
||||||
|
\f[g(x) = (1 - \alpha)f_{0}(x) + \alpha f_{1}(x)\f]
|
||||||
|
|
||||||
|
We need two source images (\f$f_{0}(x)\f$ and \f$f_{1}(x)\f$). So, we load them in the usual way:
|
||||||
|
@code{.cpp}
|
||||||
|
src1 = imread("../../images/LinuxLogo.jpg");
|
||||||
|
src2 = imread("../../images/WindowsLogo.jpg");
|
||||||
|
@endcode
|
||||||
|
**warning**
|
||||||
|
|
||||||
|
Since we are *adding* *src1* and *src2*, they both have to be of the same size (width and
|
||||||
|
height) and type.
|
||||||
|
|
||||||
|
-# Now we need to generate the `g(x)` image. For this, the function add_weighted:addWeighted comes quite handy:
|
||||||
|
@code{.cpp}
|
||||||
|
beta = ( 1.0 - alpha );
|
||||||
|
addWeighted( src1, alpha, src2, beta, 0.0, dst);
|
||||||
|
@endcode
|
||||||
|
since @ref cv::addWeighted produces:
|
||||||
|
\f[dst = \alpha \cdot src1 + \beta \cdot src2 + \gamma\f]
|
||||||
|
In this case, `gamma` is the argument \f$0.0\f$ in the code above.
|
||||||
|
|
||||||
|
-# Create windows, show the images and wait for the user to end the program.
|
||||||
|
|
||||||
|
Result
|
||||||
|
------
|
||||||
|
|
||||||
|
![](images/Adding_Images_Tutorial_Result_Big.jpg)
|
@ -115,6 +115,6 @@ Explanation
|
|||||||
Result
|
Result
|
||||||
=======
|
=======
|
||||||
|
|
||||||
.. image:: images/Adding_Images_Tutorial_Result_0.jpg
|
.. image:: images/Adding_Images_Tutorial_Result_Big.jpg
|
||||||
:alt: Blending Images Tutorial - Final Result
|
:alt: Blending Images Tutorial - Final Result
|
||||||
:align: center
|
:align: center
|
||||||
|
Before Width: | Height: | Size: 6.4 KiB After Width: | Height: | Size: 6.4 KiB |
@ -0,0 +1,243 @@
|
|||||||
|
Basic Drawing {#tutorial_basic_geometric_drawing}
|
||||||
|
=============
|
||||||
|
|
||||||
|
Goals
|
||||||
|
-----
|
||||||
|
|
||||||
|
In this tutorial you will learn how to:
|
||||||
|
|
||||||
|
- Use @ref cv::Point to define 2D points in an image.
|
||||||
|
- Use @ref cv::Scalar and why it is useful
|
||||||
|
- Draw a **line** by using the OpenCV function @ref cv::line
|
||||||
|
- Draw an **ellipse** by using the OpenCV function @ref cv::ellipse
|
||||||
|
- Draw a **rectangle** by using the OpenCV function @ref cv::rectangle
|
||||||
|
- Draw a **circle** by using the OpenCV function @ref cv::circle
|
||||||
|
- Draw a **filled polygon** by using the OpenCV function @ref cv::fillPoly
|
||||||
|
|
||||||
|
OpenCV Theory
|
||||||
|
-------------
|
||||||
|
|
||||||
|
For this tutorial, we will heavily use two structures: @ref cv::Point and @ref cv::Scalar :
|
||||||
|
|
||||||
|
### Point
|
||||||
|
|
||||||
|
It represents a 2D point, specified by its image coordinates \f$x\f$ and \f$y\f$. We can define it as:
|
||||||
|
@code{.cpp}
|
||||||
|
Point pt;
|
||||||
|
pt.x = 10;
|
||||||
|
pt.y = 8;
|
||||||
|
@endcode
|
||||||
|
or
|
||||||
|
@code{.cpp}
|
||||||
|
Point pt = Point(10, 8);
|
||||||
|
@endcode
|
||||||
|
### Scalar
|
||||||
|
|
||||||
|
- Represents a 4-element vector. The type Scalar is widely used in OpenCV for passing pixel
|
||||||
|
values.
|
||||||
|
- In this tutorial, we will use it extensively to represent RGB color values (3 parameters). It is
|
||||||
|
not necessary to define the last argument if it is not going to be used.
|
||||||
|
- Let's see an example, if we are asked for a color argument and we give:
|
||||||
|
@code{.cpp}
|
||||||
|
Scalar( a, b, c )
|
||||||
|
@endcode
|
||||||
|
We would be defining a RGB color such as: *Red = c*, *Green = b* and *Blue = a*
|
||||||
|
|
||||||
|
Code
|
||||||
|
----
|
||||||
|
|
||||||
|
- This code is in your OpenCV sample folder. Otherwise you can grab it from
|
||||||
|
[here](https://github.com/Itseez/opencv/tree/master/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp)
|
||||||
|
|
||||||
|
Explanation
|
||||||
|
-----------
|
||||||
|
|
||||||
|
-# Since we plan to draw two examples (an atom and a rook), we have to create 02 images and two
|
||||||
|
windows to display them.
|
||||||
|
@code{.cpp}
|
||||||
|
/// Windows names
|
||||||
|
char atom_window[] = "Drawing 1: Atom";
|
||||||
|
char rook_window[] = "Drawing 2: Rook";
|
||||||
|
|
||||||
|
/// Create black empty images
|
||||||
|
Mat atom_image = Mat::zeros( w, w, CV_8UC3 );
|
||||||
|
Mat rook_image = Mat::zeros( w, w, CV_8UC3 );
|
||||||
|
@endcode
|
||||||
|
-# We created functions to draw different geometric shapes. For instance, to draw the atom we used
|
||||||
|
*MyEllipse* and *MyFilledCircle*:
|
||||||
|
@code{.cpp}
|
||||||
|
/// 1. Draw a simple atom:
|
||||||
|
|
||||||
|
/// 1.a. Creating ellipses
|
||||||
|
MyEllipse( atom_image, 90 );
|
||||||
|
MyEllipse( atom_image, 0 );
|
||||||
|
MyEllipse( atom_image, 45 );
|
||||||
|
MyEllipse( atom_image, -45 );
|
||||||
|
|
||||||
|
/// 1.b. Creating circles
|
||||||
|
MyFilledCircle( atom_image, Point( w/2.0, w/2.0) );
|
||||||
|
@endcode
|
||||||
|
-# And to draw the rook we employed *MyLine*, *rectangle* and a *MyPolygon*:
|
||||||
|
@code{.cpp}
|
||||||
|
/// 2. Draw a rook
|
||||||
|
|
||||||
|
/// 2.a. Create a convex polygon
|
||||||
|
MyPolygon( rook_image );
|
||||||
|
|
||||||
|
/// 2.b. Creating rectangles
|
||||||
|
rectangle( rook_image,
|
||||||
|
Point( 0, 7*w/8.0 ),
|
||||||
|
Point( w, w),
|
||||||
|
Scalar( 0, 255, 255 ),
|
||||||
|
-1,
|
||||||
|
8 );
|
||||||
|
|
||||||
|
/// 2.c. Create a few lines
|
||||||
|
MyLine( rook_image, Point( 0, 15*w/16 ), Point( w, 15*w/16 ) );
|
||||||
|
MyLine( rook_image, Point( w/4, 7*w/8 ), Point( w/4, w ) );
|
||||||
|
MyLine( rook_image, Point( w/2, 7*w/8 ), Point( w/2, w ) );
|
||||||
|
MyLine( rook_image, Point( 3*w/4, 7*w/8 ), Point( 3*w/4, w ) );
|
||||||
|
@endcode
|
||||||
|
-# Let's check what is inside each of these functions:
|
||||||
|
- *MyLine*
|
||||||
|
@code{.cpp}
|
||||||
|
void MyLine( Mat img, Point start, Point end )
|
||||||
|
{
|
||||||
|
int thickness = 2;
|
||||||
|
int lineType = 8;
|
||||||
|
line( img, start, end,
|
||||||
|
Scalar( 0, 0, 0 ),
|
||||||
|
thickness,
|
||||||
|
lineType );
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
As we can see, *MyLine* just call the function @ref cv::line , which does the following:
|
||||||
|
|
||||||
|
- Draw a line from Point **start** to Point **end**
|
||||||
|
- The line is displayed in the image **img**
|
||||||
|
- The line color is defined by **Scalar( 0, 0, 0)** which is the RGB value correspondent
|
||||||
|
to **Black**
|
||||||
|
- The line thickness is set to **thickness** (in this case 2)
|
||||||
|
- The line is a 8-connected one (**lineType** = 8)
|
||||||
|
- *MyEllipse*
|
||||||
|
@code{.cpp}
|
||||||
|
void MyEllipse( Mat img, double angle )
|
||||||
|
{
|
||||||
|
int thickness = 2;
|
||||||
|
int lineType = 8;
|
||||||
|
|
||||||
|
ellipse( img,
|
||||||
|
Point( w/2.0, w/2.0 ),
|
||||||
|
Size( w/4.0, w/16.0 ),
|
||||||
|
angle,
|
||||||
|
0,
|
||||||
|
360,
|
||||||
|
Scalar( 255, 0, 0 ),
|
||||||
|
thickness,
|
||||||
|
lineType );
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
From the code above, we can observe that the function @ref cv::ellipse draws an ellipse such
|
||||||
|
that:
|
||||||
|
|
||||||
|
- The ellipse is displayed in the image **img**
|
||||||
|
- The ellipse center is located in the point **(w/2.0, w/2.0)** and is enclosed in a box
|
||||||
|
of size **(w/4.0, w/16.0)**
|
||||||
|
- The ellipse is rotated **angle** degrees
|
||||||
|
- The ellipse extends an arc between **0** and **360** degrees
|
||||||
|
- The color of the figure will be **Scalar( 255, 255, 0)** which means blue in RGB value.
|
||||||
|
- The ellipse's **thickness** is 2.
|
||||||
|
- *MyFilledCircle*
|
||||||
|
@code{.cpp}
|
||||||
|
void MyFilledCircle( Mat img, Point center )
|
||||||
|
{
|
||||||
|
int thickness = -1;
|
||||||
|
int lineType = 8;
|
||||||
|
|
||||||
|
circle( img,
|
||||||
|
center,
|
||||||
|
w/32.0,
|
||||||
|
Scalar( 0, 0, 255 ),
|
||||||
|
thickness,
|
||||||
|
lineType );
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
Similar to the ellipse function, we can observe that *circle* receives as arguments:
|
||||||
|
|
||||||
|
- The image where the circle will be displayed (**img**)
|
||||||
|
- The center of the circle denoted as the Point **center**
|
||||||
|
- The radius of the circle: **w/32.0**
|
||||||
|
- The color of the circle: **Scalar(0, 0, 255)** which means *Red* in BGR
|
||||||
|
- Since **thickness** = -1, the circle will be drawn filled.
|
||||||
|
- *MyPolygon*
|
||||||
|
@code{.cpp}
|
||||||
|
void MyPolygon( Mat img )
|
||||||
|
{
|
||||||
|
int lineType = 8;
|
||||||
|
|
||||||
|
/* Create some points */
|
||||||
|
Point rook_points[1][20];
|
||||||
|
rook_points[0][0] = Point( w/4.0, 7*w/8.0 );
|
||||||
|
rook_points[0][1] = Point( 3*w/4.0, 7*w/8.0 );
|
||||||
|
rook_points[0][2] = Point( 3*w/4.0, 13*w/16.0 );
|
||||||
|
rook_points[0][3] = Point( 11*w/16.0, 13*w/16.0 );
|
||||||
|
rook_points[0][4] = Point( 19*w/32.0, 3*w/8.0 );
|
||||||
|
rook_points[0][5] = Point( 3*w/4.0, 3*w/8.0 );
|
||||||
|
rook_points[0][6] = Point( 3*w/4.0, w/8.0 );
|
||||||
|
rook_points[0][7] = Point( 26*w/40.0, w/8.0 );
|
||||||
|
rook_points[0][8] = Point( 26*w/40.0, w/4.0 );
|
||||||
|
rook_points[0][9] = Point( 22*w/40.0, w/4.0 );
|
||||||
|
rook_points[0][10] = Point( 22*w/40.0, w/8.0 );
|
||||||
|
rook_points[0][11] = Point( 18*w/40.0, w/8.0 );
|
||||||
|
rook_points[0][12] = Point( 18*w/40.0, w/4.0 );
|
||||||
|
rook_points[0][13] = Point( 14*w/40.0, w/4.0 );
|
||||||
|
rook_points[0][14] = Point( 14*w/40.0, w/8.0 );
|
||||||
|
rook_points[0][15] = Point( w/4.0, w/8.0 );
|
||||||
|
rook_points[0][16] = Point( w/4.0, 3*w/8.0 );
|
||||||
|
rook_points[0][17] = Point( 13*w/32.0, 3*w/8.0 );
|
||||||
|
rook_points[0][18] = Point( 5*w/16.0, 13*w/16.0 );
|
||||||
|
rook_points[0][19] = Point( w/4.0, 13*w/16.0) ;
|
||||||
|
|
||||||
|
const Point* ppt[1] = { rook_points[0] };
|
||||||
|
int npt[] = { 20 };
|
||||||
|
|
||||||
|
fillPoly( img,
|
||||||
|
ppt,
|
||||||
|
npt,
|
||||||
|
1,
|
||||||
|
Scalar( 255, 255, 255 ),
|
||||||
|
lineType );
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
To draw a filled polygon we use the function @ref cv::fillPoly . We note that:
|
||||||
|
|
||||||
|
- The polygon will be drawn on **img**
|
||||||
|
- The vertices of the polygon are the set of points in **ppt**
|
||||||
|
- The total number of vertices to be drawn are **npt**
|
||||||
|
- The number of polygons to be drawn is only **1**
|
||||||
|
- The color of the polygon is defined by **Scalar( 255, 255, 255)**, which is the BGR
|
||||||
|
value for *white*
|
||||||
|
- *rectangle*
|
||||||
|
@code{.cpp}
|
||||||
|
rectangle( rook_image,
|
||||||
|
Point( 0, 7*w/8.0 ),
|
||||||
|
Point( w, w),
|
||||||
|
Scalar( 0, 255, 255 ),
|
||||||
|
-1, 8 );
|
||||||
|
@endcode
|
||||||
|
Finally we have the @ref cv::rectangle function (we did not create a special function for
|
||||||
|
this guy). We note that:
|
||||||
|
|
||||||
|
- The rectangle will be drawn on **rook_image**
|
||||||
|
- Two opposite vertices of the rectangle are defined by *\* Point( 0, 7*w/8.0 )*\*
|
||||||
|
andPoint( w, w)*\*
|
||||||
|
- The color of the rectangle is given by **Scalar(0, 255, 255)** which is the BGR value
|
||||||
|
for *yellow*
|
||||||
|
- Since the thickness value is given by **-1**, the rectangle will be filled.
|
||||||
|
|
||||||
|
Result
|
||||||
|
------
|
||||||
|
|
||||||
|
Compiling and running your program should give you a result like this:
|
||||||
|
|
||||||
|
![](images/Drawing_1_Tutorial_Result_0.png)
|
@ -204,7 +204,7 @@ Explanation
|
|||||||
{
|
{
|
||||||
int lineType = 8;
|
int lineType = 8;
|
||||||
|
|
||||||
/** Create some points */
|
/* Create some points */
|
||||||
Point rook_points[1][20];
|
Point rook_points[1][20];
|
||||||
rook_points[0][0] = Point( w/4.0, 7*w/8.0 );
|
rook_points[0][0] = Point( w/4.0, 7*w/8.0 );
|
||||||
rook_points[0][1] = Point( 3*w/4.0, 7*w/8.0 );
|
rook_points[0][1] = Point( 3*w/4.0, 7*w/8.0 );
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user