831 lines
		
	
	
		
			31 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			831 lines
		
	
	
		
			31 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
\section{Feature detection and description}
 | 
						|
 | 
						|
\ifCpp
 | 
						|
\cvCppFunc{FAST}
 | 
						|
Detects corners using FAST algorithm by E. Rosten (''Machine learning for high-speed corner detection'', 2006).
 | 
						|
\fi
 | 
						|
 | 
						|
\cvdefCpp{
 | 
						|
void FAST( const Mat\& image, vector<KeyPoint>\& keypoints,
 | 
						|
           int threshold, bool nonmaxSupression=true );
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
\begin{description}
 | 
						|
\cvarg{image}{The image. Keypoints (corners) will be detected on this.}
 | 
						|
\cvarg{keypoints}{Keypoints detected on the image.}
 | 
						|
\cvarg{threshold}{Threshold on difference between intensity of center pixel and 
 | 
						|
	pixels on circle around this pixel. See description of the algorithm.}
 | 
						|
\cvarg{nonmaxSupression}{If it is true then non-maximum supression will be applied to detected corners (keypoints). }
 | 
						|
\end{description}
 | 
						|
 | 
						|
\ifCPy
 | 
						|
\ifPy
 | 
						|
\cvclass{CvSURFPoint}
 | 
						|
A SURF keypoint, represented as a tuple \texttt{((x, y), laplacian, size, dir, hessian)}.
 | 
						|
 | 
						|
\begin{description}
 | 
						|
\cvarg{x}{x-coordinate of the feature within the image}
 | 
						|
\cvarg{y}{y-coordinate of the feature within the image}
 | 
						|
\cvarg{laplacian}{-1, 0 or +1. sign of the laplacian at the point.  Can be used to speedup feature comparison since features with laplacians of different signs can not match}
 | 
						|
\cvarg{size}{size of the feature}
 | 
						|
\cvarg{dir}{orientation of the feature: 0..360 degrees}
 | 
						|
\cvarg{hessian}{value of the hessian (can be used to approximately estimate the feature strengths; see also params.hessianThreshold)}
 | 
						|
\end{description}
 | 
						|
\fi
 | 
						|
 | 
						|
\cvCPyFunc{ExtractSURF}
 | 
						|
Extracts Speeded Up Robust Features from an image.
 | 
						|
 | 
						|
\cvdefC{
 | 
						|
void cvExtractSURF( \par const CvArr* image,\par const CvArr* mask,\par CvSeq** keypoints,\par CvSeq** descriptors,\par CvMemStorage* storage,\par CvSURFParams params );
 | 
						|
}
 | 
						|
\cvdefPy{ExtractSURF(image,mask,storage,params)-> (keypoints,descriptors)}
 | 
						|
 | 
						|
\begin{description}
 | 
						|
\cvarg{image}{The input 8-bit grayscale image}
 | 
						|
\cvarg{mask}{The optional input 8-bit mask. The features are only found in the areas that contain more than 50\% of non-zero mask pixels}
 | 
						|
\ifC
 | 
						|
\cvarg{keypoints}{The output parameter; double pointer to the sequence of keypoints. The sequence of CvSURFPoint structures is as follows:}
 | 
						|
\begin{lstlisting}
 | 
						|
 typedef struct CvSURFPoint
 | 
						|
 {
 | 
						|
    CvPoint2D32f pt; // position of the feature within the image
 | 
						|
    int laplacian;   // -1, 0 or +1. sign of the laplacian at the point.
 | 
						|
                     // can be used to speedup feature comparison
 | 
						|
                     // (normally features with laplacians of different 
 | 
						|
             // signs can not match)
 | 
						|
    int size;        // size of the feature
 | 
						|
    float dir;       // orientation of the feature: 0..360 degrees
 | 
						|
    float hessian;   // value of the hessian (can be used to 
 | 
						|
             // approximately estimate the feature strengths;
 | 
						|
                     // see also params.hessianThreshold)
 | 
						|
 }
 | 
						|
 CvSURFPoint;
 | 
						|
\end{lstlisting}
 | 
						|
\cvarg{descriptors}{The optional output parameter; double pointer to the sequence of descriptors. Depending on the params.extended value, each element of the sequence will be either a 64-element or a 128-element floating-point (\texttt{CV\_32F}) vector. If the parameter is NULL, the descriptors are not computed}
 | 
						|
\else
 | 
						|
\cvarg{keypoints}{sequence of keypoints.}
 | 
						|
\cvarg{descriptors}{sequence of descriptors.  Each SURF descriptor is a list of floats, of length 64 or 128.}
 | 
						|
\fi
 | 
						|
\cvarg{storage}{Memory storage where keypoints and descriptors will be stored}
 | 
						|
\ifC
 | 
						|
\cvarg{params}{Various algorithm parameters put to the structure CvSURFParams:}
 | 
						|
\begin{lstlisting}
 | 
						|
 typedef struct CvSURFParams
 | 
						|
 {
 | 
						|
    int extended; // 0 means basic descriptors (64 elements each),
 | 
						|
                  // 1 means extended descriptors (128 elements each)
 | 
						|
    double hessianThreshold; // only features with keypoint.hessian 
 | 
						|
          // larger than that are extracted.
 | 
						|
                  // good default value is ~300-500 (can depend on the 
 | 
						|
          // average local contrast and sharpness of the image).
 | 
						|
                  // user can further filter out some features based on 
 | 
						|
          // their hessian values and other characteristics.
 | 
						|
    int nOctaves; // the number of octaves to be used for extraction.
 | 
						|
                  // With each next octave the feature size is doubled 
 | 
						|
          // (3 by default)
 | 
						|
    int nOctaveLayers; // The number of layers within each octave 
 | 
						|
          // (4 by default)
 | 
						|
 }
 | 
						|
 CvSURFParams;
 | 
						|
 | 
						|
 CvSURFParams cvSURFParams(double hessianThreshold, int extended=0); 
 | 
						|
          // returns default parameters
 | 
						|
\end{lstlisting}
 | 
						|
\else
 | 
						|
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(extended, hessianThreshold, nOctaves, nOctaveLayers)}:
 | 
						|
\begin{description}
 | 
						|
\cvarg{extended}{0 means basic descriptors (64 elements each), 1 means extended descriptors (128 elements each)}
 | 
						|
\cvarg{hessianThreshold}{only features with hessian larger than that are extracted.  good default value is ~300-500 (can depend on the average local contrast and sharpness of the image).  user can further filter out some features based on their hessian values and other characteristics.}
 | 
						|
\cvarg{nOctaves}{the number of octaves to be used for extraction.  With each next octave the feature size is doubled (3 by default)}
 | 
						|
\cvarg{nOctaveLayers}{The number of layers within each octave (4 by default)}
 | 
						|
\end{description}}
 | 
						|
\fi
 | 
						|
\end{description}
 | 
						|
 | 
						|
The function cvExtractSURF finds robust features in the image, as
 | 
						|
described in \cite{Bay06}. For each feature it returns its location, size,
 | 
						|
orientation and optionally the descriptor, basic or extended. The function
 | 
						|
can be used for object tracking and localization, image stitching etc.
 | 
						|
 | 
						|
\ifC
 | 
						|
See the
 | 
						|
\texttt{find\_obj.cpp} demo in OpenCV samples directory.
 | 
						|
\else
 | 
						|
To extract strong SURF features from an image
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
>>> import cv
 | 
						|
>>> im = cv.LoadImageM("building.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE)
 | 
						|
>>> (keypoints, descriptors) = cv.ExtractSURF(im, None, cv.CreateMemStorage(), (0, 30000, 3, 1))
 | 
						|
>>> print len(keypoints), len(descriptors)
 | 
						|
6 6
 | 
						|
>>> for ((x, y), laplacian, size, dir, hessian) in keypoints:
 | 
						|
...     print "x=\%d y=\%d laplacian=\%d size=\%d dir=\%f hessian=\%f" \% (x, y, laplacian, size, dir, hessian)
 | 
						|
x=30 y=27 laplacian=-1 size=31 dir=69.778503 hessian=36979.789062
 | 
						|
x=296 y=197 laplacian=1 size=33 dir=111.081039 hessian=31514.349609
 | 
						|
x=296 y=266 laplacian=1 size=32 dir=107.092300 hessian=31477.908203
 | 
						|
x=254 y=284 laplacian=1 size=31 dir=279.137360 hessian=34169.800781
 | 
						|
x=498 y=525 laplacian=-1 size=33 dir=278.006592 hessian=31002.759766
 | 
						|
x=777 y=281 laplacian=1 size=70 dir=167.940964 hessian=35538.363281
 | 
						|
\end{lstlisting}
 | 
						|
 | 
						|
\fi
 | 
						|
 | 
						|
\cvCPyFunc{GetStarKeypoints}
 | 
						|
Retrieves keypoints using the StarDetector algorithm.
 | 
						|
 | 
						|
\cvdefC{
 | 
						|
CvSeq* cvGetStarKeypoints( \par const CvArr* image,\par CvMemStorage* storage,\par CvStarDetectorParams params=cvStarDetectorParams() );
 | 
						|
}
 | 
						|
\cvdefPy{GetStarKeypoints(image,storage,params)-> keypoints}
 | 
						|
 | 
						|
\begin{description}
 | 
						|
\cvarg{image}{The input 8-bit grayscale image}
 | 
						|
\cvarg{storage}{Memory storage where the keypoints will be stored}
 | 
						|
\ifC
 | 
						|
\cvarg{params}{Various algorithm parameters given to the structure CvStarDetectorParams:}
 | 
						|
\begin{lstlisting}
 | 
						|
 typedef struct CvStarDetectorParams
 | 
						|
 {
 | 
						|
    int maxSize; // maximal size of the features detected. The following 
 | 
						|
                 // values of the parameter are supported:
 | 
						|
                 // 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128
 | 
						|
    int responseThreshold; // threshold for the approximatd laplacian,
 | 
						|
                           // used to eliminate weak features
 | 
						|
    int lineThresholdProjected; // another threshold for laplacian to 
 | 
						|
                // eliminate edges
 | 
						|
    int lineThresholdBinarized; // another threshold for the feature 
 | 
						|
                // scale to eliminate edges
 | 
						|
    int suppressNonmaxSize; // linear size of a pixel neighborhood 
 | 
						|
                // for non-maxima suppression
 | 
						|
 }
 | 
						|
 CvStarDetectorParams;
 | 
						|
\end{lstlisting}
 | 
						|
\else
 | 
						|
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(maxSize, responseThreshold, lineThresholdProjected, lineThresholdBinarized, suppressNonmaxSize)}:
 | 
						|
\begin{description}
 | 
						|
\cvarg{maxSize}{maximal size of the features detected. The following values of the parameter are supported: 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128}
 | 
						|
\cvarg{responseThreshold}{threshold for the approximatd laplacian, used to eliminate weak features}
 | 
						|
\cvarg{lineThresholdProjected}{another threshold for laplacian to eliminate edges}
 | 
						|
\cvarg{lineThresholdBinarized}{another threshold for the feature scale to eliminate edges}
 | 
						|
\cvarg{suppressNonmaxSize}{linear size of a pixel neighborhood for non-maxima suppression}
 | 
						|
\end{description}
 | 
						|
}
 | 
						|
\fi
 | 
						|
\end{description}
 | 
						|
 | 
						|
The function GetStarKeypoints extracts keypoints that are local
 | 
						|
scale-space extremas. The scale-space is constructed by computing
 | 
						|
approximate values of laplacians with different sigma's at each
 | 
						|
pixel. Instead of using pyramids, a popular approach to save computing
 | 
						|
time, all of the laplacians are computed at each pixel of the original
 | 
						|
high-resolution image. But each approximate laplacian value is computed
 | 
						|
in O(1) time regardless of the sigma, thanks to the use of integral
 | 
						|
images. The algorithm is based on the paper 
 | 
						|
Agrawal08
 | 
						|
, but instead
 | 
						|
of a square, hexagon or octagon it uses an 8-end star shape, hence the name,
 | 
						|
consisting of overlapping upright and tilted squares.
 | 
						|
 | 
						|
\ifC
 | 
						|
Each computed feature is represented by the following structure:
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
typedef struct CvStarKeypoint
 | 
						|
{
 | 
						|
    CvPoint pt; // coordinates of the feature
 | 
						|
    int size; // feature size, see CvStarDetectorParams::maxSize
 | 
						|
    float response; // the approximated laplacian value at that point.
 | 
						|
}
 | 
						|
CvStarKeypoint;
 | 
						|
 | 
						|
inline CvStarKeypoint cvStarKeypoint(CvPoint pt, int size, float response);
 | 
						|
\end{lstlisting}
 | 
						|
\else
 | 
						|
Each keypoint is represented by a tuple \texttt{((x, y), size, response)}:
 | 
						|
\begin{description}
 | 
						|
\cvarg{x, y}{Screen coordinates of the keypoint}
 | 
						|
\cvarg{size}{feature size, up to \texttt{maxSize}}
 | 
						|
\cvarg{response}{approximated laplacian value for the keypoint}
 | 
						|
\end{description}
 | 
						|
\fi
 | 
						|
 | 
						|
\ifC
 | 
						|
Below is the small usage sample:
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
#include "cv.h"
 | 
						|
#include "highgui.h"
 | 
						|
 | 
						|
int main(int argc, char** argv)
 | 
						|
{
 | 
						|
    const char* filename = argc > 1 ? argv[1] : "lena.jpg";
 | 
						|
    IplImage* img = cvLoadImage( filename, 0 ), *cimg;
 | 
						|
    CvMemStorage* storage = cvCreateMemStorage(0);
 | 
						|
    CvSeq* keypoints = 0;
 | 
						|
    int i;
 | 
						|
 | 
						|
    if( !img )
 | 
						|
        return 0;
 | 
						|
    cvNamedWindow( "image", 1 );
 | 
						|
    cvShowImage( "image", img );
 | 
						|
    cvNamedWindow( "features", 1 );
 | 
						|
    cimg = cvCreateImage( cvGetSize(img), 8, 3 );
 | 
						|
    cvCvtColor( img, cimg, CV_GRAY2BGR );
 | 
						|
 | 
						|
    keypoints = cvGetStarKeypoints( img, storage, cvStarDetectorParams(45) );
 | 
						|
 | 
						|
    for( i = 0; i < (keypoints ? keypoints->total : 0); i++ )
 | 
						|
    {
 | 
						|
        CvStarKeypoint kpt = *(CvStarKeypoint*)cvGetSeqElem(keypoints, i);
 | 
						|
        int r = kpt.size/2;
 | 
						|
        cvCircle( cimg, kpt.pt, r, CV_RGB(0,255,0));
 | 
						|
        cvLine( cimg, cvPoint(kpt.pt.x + r, kpt.pt.y + r),
 | 
						|
            cvPoint(kpt.pt.x - r, kpt.pt.y - r), CV_RGB(0,255,0));
 | 
						|
        cvLine( cimg, cvPoint(kpt.pt.x - r, kpt.pt.y + r),
 | 
						|
            cvPoint(kpt.pt.x + r, kpt.pt.y - r), CV_RGB(0,255,0));
 | 
						|
    }
 | 
						|
    cvShowImage( "features", cimg );
 | 
						|
    cvWaitKey();
 | 
						|
}
 | 
						|
\end{lstlisting}
 | 
						|
\fi
 | 
						|
 | 
						|
\fi
 | 
						|
\ifCpp
 | 
						|
 | 
						|
\cvclass{MSER}
 | 
						|
Maximally-Stable Extremal Region Extractor
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
class MSER : public CvMSERParams
 | 
						|
{
 | 
						|
public:
 | 
						|
    // default constructor
 | 
						|
    MSER();
 | 
						|
    // constructor that initializes all the algorithm parameters
 | 
						|
    MSER( int _delta, int _min_area, int _max_area,
 | 
						|
          float _max_variation, float _min_diversity,
 | 
						|
          int _max_evolution, double _area_threshold,
 | 
						|
          double _min_margin, int _edge_blur_size );
 | 
						|
    // runs the extractor on the specified image; returns the MSERs,
 | 
						|
    // each encoded as a contour (vector<Point>, see findContours)
 | 
						|
    // the optional mask marks the area where MSERs are searched for
 | 
						|
    void operator()( const Mat& image, vector<vector<Point> >& msers, const Mat& mask ) const;
 | 
						|
};
 | 
						|
\end{lstlisting}
 | 
						|
 | 
						|
The class encapsulates all the parameters of MSER (see \url{http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions}) extraction algorithm. 
 | 
						|
 | 
						|
\cvclass{StarDetector}
 | 
						|
Implements Star keypoint detector
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
class StarDetector : CvStarDetectorParams
 | 
						|
{
 | 
						|
public:
 | 
						|
    // default constructor
 | 
						|
    StarDetector();
 | 
						|
    // the full constructor initialized all the algorithm parameters:
 | 
						|
    // maxSize - maximum size of the features. The following 
 | 
						|
    //      values of the parameter are supported:
 | 
						|
    //      4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128
 | 
						|
    // responseThreshold - threshold for the approximated laplacian,
 | 
						|
    //      used to eliminate weak features. The larger it is,
 | 
						|
    //      the less features will be retrieved
 | 
						|
    // lineThresholdProjected - another threshold for the laplacian to 
 | 
						|
    //      eliminate edges
 | 
						|
    // lineThresholdBinarized - another threshold for the feature 
 | 
						|
    //      size to eliminate edges.
 | 
						|
    // The larger the 2 threshold, the more points you get.
 | 
						|
    StarDetector(int maxSize, int responseThreshold,
 | 
						|
                 int lineThresholdProjected,
 | 
						|
                 int lineThresholdBinarized,
 | 
						|
                 int suppressNonmaxSize);
 | 
						|
 | 
						|
    // finds keypoints in an image
 | 
						|
    void operator()(const Mat& image, vector<KeyPoint>& keypoints) const;
 | 
						|
};
 | 
						|
\end{lstlisting}
 | 
						|
 | 
						|
The class implements a modified version of CenSurE keypoint detector described in
 | 
						|
\cite{Agrawal08}
 | 
						|
 | 
						|
\cvclass{SIFT}
 | 
						|
Class for extracting keypoints and computing descriptors using approach named Scale Invariant Feature Transform (SIFT).
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
class CV_EXPORTS SIFT
 | 
						|
{
 | 
						|
public:
 | 
						|
    struct CommonParams
 | 
						|
    {
 | 
						|
        static const int DEFAULT_NOCTAVES = 4;
 | 
						|
        static const int DEFAULT_NOCTAVE_LAYERS = 3;
 | 
						|
        static const int DEFAULT_FIRST_OCTAVE = -1;
 | 
						|
        enum{ FIRST_ANGLE = 0, AVERAGE_ANGLE = 1 };
 | 
						|
 | 
						|
        CommonParams();
 | 
						|
        CommonParams( int _nOctaves, int _nOctaveLayers, int _firstOctave, 
 | 
						|
					  int _angleMode );
 | 
						|
        int nOctaves, nOctaveLayers, firstOctave;
 | 
						|
        int angleMode;
 | 
						|
    };
 | 
						|
 | 
						|
    struct DetectorParams
 | 
						|
    {
 | 
						|
        static double GET_DEFAULT_THRESHOLD() 
 | 
						|
          { return 0.04 / SIFT::CommonParams::DEFAULT_NOCTAVE_LAYERS / 2.0; }
 | 
						|
        static double GET_DEFAULT_EDGE_THRESHOLD() { return 10.0; }
 | 
						|
 | 
						|
        DetectorParams();
 | 
						|
        DetectorParams( double _threshold, double _edgeThreshold );
 | 
						|
        double threshold, edgeThreshold;
 | 
						|
    };
 | 
						|
 | 
						|
    struct DescriptorParams
 | 
						|
    {
 | 
						|
        static double GET_DEFAULT_MAGNIFICATION() { return 3.0; }
 | 
						|
        static const bool DEFAULT_IS_NORMALIZE = true;
 | 
						|
        static const int DESCRIPTOR_SIZE = 128;
 | 
						|
 | 
						|
        DescriptorParams();
 | 
						|
        DescriptorParams( double _magnification, bool _isNormalize, 
 | 
						|
						  bool _recalculateAngles );
 | 
						|
        double magnification;
 | 
						|
        bool isNormalize;
 | 
						|
        bool recalculateAngles;
 | 
						|
    };
 | 
						|
 | 
						|
    SIFT();
 | 
						|
    //! sift-detector constructor
 | 
						|
    SIFT( double _threshold, double _edgeThreshold,
 | 
						|
          int _nOctaves=CommonParams::DEFAULT_NOCTAVES,
 | 
						|
          int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS,
 | 
						|
          int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE,
 | 
						|
          int _angleMode=CommonParams::FIRST_ANGLE );
 | 
						|
    //! sift-descriptor constructor
 | 
						|
    SIFT( double _magnification, bool _isNormalize=true,
 | 
						|
          bool _recalculateAngles = true,
 | 
						|
          int _nOctaves=CommonParams::DEFAULT_NOCTAVES,
 | 
						|
          int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS,
 | 
						|
          int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE,
 | 
						|
          int _angleMode=CommonParams::FIRST_ANGLE );
 | 
						|
    SIFT( const CommonParams& _commParams,
 | 
						|
          const DetectorParams& _detectorParams = DetectorParams(),
 | 
						|
          const DescriptorParams& _descriptorParams = DescriptorParams() );
 | 
						|
 | 
						|
    //! returns the descriptor size in floats (128)
 | 
						|
    int descriptorSize() const { return DescriptorParams::DESCRIPTOR_SIZE; }
 | 
						|
    //! finds the keypoints using SIFT algorithm
 | 
						|
    void operator()(const Mat& img, const Mat& mask,
 | 
						|
                    vector<KeyPoint>& keypoints) const;
 | 
						|
    //! finds the keypoints and computes descriptors for them using SIFT algorithm. 
 | 
						|
    //! Optionally it can compute descriptors for the user-provided keypoints
 | 
						|
    void operator()(const Mat& img, const Mat& mask,
 | 
						|
                    vector<KeyPoint>& keypoints,
 | 
						|
                    Mat& descriptors,
 | 
						|
                    bool useProvidedKeypoints=false) const;
 | 
						|
 | 
						|
    CommonParams getCommonParams () const { return commParams; }
 | 
						|
    DetectorParams getDetectorParams () const { return detectorParams; }
 | 
						|
    DescriptorParams getDescriptorParams () const { return descriptorParams; }
 | 
						|
protected:
 | 
						|
    ...
 | 
						|
};
 | 
						|
\end{lstlisting}
 | 
						|
 | 
						|
\cvclass{SURF}\label{cv.class.SURF}
 | 
						|
Class for extracting Speeded Up Robust Features from an image.
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
class SURF : public CvSURFParams
 | 
						|
{
 | 
						|
public:
 | 
						|
    // default constructor
 | 
						|
    SURF();
 | 
						|
    // constructor that initializes all the algorithm parameters
 | 
						|
    SURF(double _hessianThreshold, int _nOctaves=4,
 | 
						|
         int _nOctaveLayers=2, bool _extended=false);
 | 
						|
    // returns the number of elements in each descriptor (64 or 128)
 | 
						|
    int descriptorSize() const;
 | 
						|
    // detects keypoints using fast multi-scale Hessian detector
 | 
						|
    void operator()(const Mat& img, const Mat& mask,
 | 
						|
                    vector<KeyPoint>& keypoints) const;
 | 
						|
    // detects keypoints and computes the SURF descriptors for them;
 | 
						|
    // output vector "descriptors" stores elements of descriptors and has size 
 | 
						|
    // equal descriptorSize()*keypoints.size() as each descriptor is 
 | 
						|
    // descriptorSize() elements of this vector.
 | 
						|
    void operator()(const Mat& img, const Mat& mask,
 | 
						|
                    vector<KeyPoint>& keypoints,
 | 
						|
                    vector<float>& descriptors,
 | 
						|
                    bool useProvidedKeypoints=false) const;
 | 
						|
};
 | 
						|
\end{lstlisting}
 | 
						|
 | 
						|
The class \texttt{SURF} implements Speeded Up Robust Features descriptor \cite{Bay06}.
 | 
						|
There is fast multi-scale Hessian keypoint detector that can be used to find the keypoints
 | 
						|
(which is the default option), but the descriptors can be also computed for the user-specified keypoints.
 | 
						|
The function can be used for object tracking and localization, image stitching etc. See the
 | 
						|
\texttt{find\_obj.cpp} demo in OpenCV samples directory.
 | 
						|
 | 
						|
\cvclass{RandomizedTree}
 | 
						|
The class contains base structure for \texttt{RTreeClassifier}
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
class CV_EXPORTS RandomizedTree
 | 
						|
{  
 | 
						|
public:
 | 
						|
	friend class RTreeClassifier;  
 | 
						|
 | 
						|
	RandomizedTree();
 | 
						|
	~RandomizedTree();
 | 
						|
 | 
						|
	void train(std::vector<BaseKeypoint> const& base_set,
 | 
						|
		 cv::RNG &rng, int depth, int views,
 | 
						|
		 size_t reduced_num_dim, int num_quant_bits);
 | 
						|
	void train(std::vector<BaseKeypoint> const& base_set,
 | 
						|
		 cv::RNG &rng, PatchGenerator &make_patch, int depth,
 | 
						|
		 int views, size_t reduced_num_dim, int num_quant_bits);
 | 
						|
 | 
						|
	// following two funcs are EXPERIMENTAL 
 | 
						|
	//(do not use unless you know exactly what you do)
 | 
						|
	static void quantizeVector(float *vec, int dim, int N, float bnds[2],
 | 
						|
		 int clamp_mode=0);
 | 
						|
	static void quantizeVector(float *src, int dim, int N, float bnds[2],
 | 
						|
		 uchar *dst);  
 | 
						|
 | 
						|
	// patch_data must be a 32x32 array (no row padding)
 | 
						|
	float* getPosterior(uchar* patch_data);
 | 
						|
	const float* getPosterior(uchar* patch_data) const;
 | 
						|
	uchar* getPosterior2(uchar* patch_data);
 | 
						|
 | 
						|
	void read(const char* file_name, int num_quant_bits);
 | 
						|
	void read(std::istream &is, int num_quant_bits);
 | 
						|
	void write(const char* file_name) const;
 | 
						|
	void write(std::ostream &os) const;
 | 
						|
 | 
						|
	int classes() { return classes_; }
 | 
						|
	int depth() { return depth_; }
 | 
						|
 | 
						|
	void discardFloatPosteriors() { freePosteriors(1); }
 | 
						|
 | 
						|
	inline void applyQuantization(int num_quant_bits)
 | 
						|
		 { makePosteriors2(num_quant_bits); }
 | 
						|
 | 
						|
private:
 | 
						|
	int classes_;
 | 
						|
	int depth_;
 | 
						|
	int num_leaves_;  
 | 
						|
	std::vector<RTreeNode> nodes_;  
 | 
						|
	float **posteriors_;        // 16-bytes aligned posteriors
 | 
						|
	uchar **posteriors2_;     // 16-bytes aligned posteriors
 | 
						|
	std::vector<int> leaf_counts_;
 | 
						|
 | 
						|
	void createNodes(int num_nodes, cv::RNG &rng);
 | 
						|
	void allocPosteriorsAligned(int num_leaves, int num_classes);
 | 
						|
	void freePosteriors(int which);   
 | 
						|
		 // which: 1=posteriors_, 2=posteriors2_, 3=both
 | 
						|
	void init(int classes, int depth, cv::RNG &rng);
 | 
						|
	void addExample(int class_id, uchar* patch_data);
 | 
						|
	void finalize(size_t reduced_num_dim, int num_quant_bits);  
 | 
						|
	int getIndex(uchar* patch_data) const;
 | 
						|
	inline float* getPosteriorByIndex(int index);
 | 
						|
	inline uchar* getPosteriorByIndex2(int index);
 | 
						|
	inline const float* getPosteriorByIndex(int index) const;
 | 
						|
	void convertPosteriorsToChar();
 | 
						|
	void makePosteriors2(int num_quant_bits);
 | 
						|
	void compressLeaves(size_t reduced_num_dim);  
 | 
						|
	void estimateQuantPercForPosteriors(float perc[2]);
 | 
						|
};
 | 
						|
\end{lstlisting}
 | 
						|
 | 
						|
\cvCppFunc{RandomizedTree::train}
 | 
						|
Trains a randomized tree using input set of keypoints
 | 
						|
 | 
						|
\cvdefCpp{
 | 
						|
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
 | 
						|
			PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
 | 
						|
			int num\_quant\_bits);	
 | 
						|
			}
 | 
						|
\cvdefCpp{
 | 
						|
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
 | 
						|
			PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
 | 
						|
			int num\_quant\_bits);	
 | 
						|
			}				
 | 
						|
\begin{description}
 | 
						|
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
 | 
						|
\cvarg{rng} {Random numbers generator is used for training}
 | 
						|
\cvarg{make\_patch} {Patch generator is used for training}
 | 
						|
\cvarg{depth} {Maximum tree depth}
 | 
						|
%\cvarg{views} {}
 | 
						|
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
 | 
						|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
 | 
						|
\end{description}		
 | 
						|
 | 
						|
\cvCppFunc{RandomizedTree::read}
 | 
						|
Reads pre-saved randomized tree from file or stream
 | 
						|
\cvdefCpp{read(const char* file\_name, int num\_quant\_bits)}	
 | 
						|
\cvdefCpp{read(std::istream \&is, int num\_quant\_bits)}	
 | 
						|
\begin{description}
 | 
						|
\cvarg{file\_name}{Filename of file contains randomized tree data}
 | 
						|
\cvarg{is}{Input stream associated with file contains randomized tree data}
 | 
						|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
 | 
						|
\end{description}
 | 
						|
 | 
						|
\cvCppFunc{RandomizedTree::write}
 | 
						|
Writes current randomized tree to a file or stream
 | 
						|
\cvdefCpp{void write(const char* file\_name) const;}	
 | 
						|
\cvdefCpp{void write(std::ostream \&os) const;}	
 | 
						|
\begin{description}
 | 
						|
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
 | 
						|
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
 | 
						|
\end{description}
 | 
						|
 | 
						|
 | 
						|
\cvCppFunc{RandomizedTree::applyQuantization}
 | 
						|
Applies quantization to the current randomized tree
 | 
						|
\cvdefCpp{void applyQuantization(int num\_quant\_bits)}
 | 
						|
\begin{description}
 | 
						|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
 | 
						|
\end{description}
 | 
						|
 | 
						|
\cvclass{RTreeNode}
 | 
						|
The class contains base structure for \texttt{RandomizedTree}
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
struct RTreeNode
 | 
						|
{
 | 
						|
	short offset1, offset2;
 | 
						|
 | 
						|
	RTreeNode() {}
 | 
						|
 | 
						|
	RTreeNode(uchar x1, uchar y1, uchar x2, uchar y2)
 | 
						|
		: offset1(y1*PATCH_SIZE + x1),
 | 
						|
		offset2(y2*PATCH_SIZE + x2)
 | 
						|
	{}
 | 
						|
 | 
						|
	//! Left child on 0, right child on 1
 | 
						|
	inline bool operator() (uchar* patch_data) const
 | 
						|
	{
 | 
						|
		return patch_data[offset1] > patch_data[offset2];
 | 
						|
	}
 | 
						|
};
 | 
						|
\end{lstlisting}
 | 
						|
 | 
						|
 | 
						|
\cvclass{RTreeClassifier}
 | 
						|
The class contains \texttt{RTreeClassifier}. It represents calonder descriptor which was originally introduced by Michael Calonder
 | 
						|
 | 
						|
\begin{lstlisting}
 | 
						|
class CV_EXPORTS RTreeClassifier
 | 
						|
{   
 | 
						|
public:
 | 
						|
	static const int DEFAULT_TREES = 48;
 | 
						|
	static const size_t DEFAULT_NUM_QUANT_BITS = 4;  
 | 
						|
 | 
						|
	RTreeClassifier();
 | 
						|
 | 
						|
	void train(std::vector<BaseKeypoint> const& base_set, 
 | 
						|
		cv::RNG &rng,
 | 
						|
		int num_trees = RTreeClassifier::DEFAULT_TREES,
 | 
						|
		int depth = DEFAULT_DEPTH,
 | 
						|
		int views = DEFAULT_VIEWS,
 | 
						|
		size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
 | 
						|
		int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
 | 
						|
			 bool print_status = true);
 | 
						|
	void train(std::vector<BaseKeypoint> const& base_set,
 | 
						|
		cv::RNG &rng, 
 | 
						|
		PatchGenerator &make_patch,
 | 
						|
		int num_trees = RTreeClassifier::DEFAULT_TREES,
 | 
						|
		int depth = DEFAULT_DEPTH,
 | 
						|
		int views = DEFAULT_VIEWS,
 | 
						|
		size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
 | 
						|
		int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
 | 
						|
		 bool print_status = true);
 | 
						|
 | 
						|
	// sig must point to a memory block of at least 
 | 
						|
	//classes()*sizeof(float|uchar) bytes
 | 
						|
	void getSignature(IplImage *patch, uchar *sig);
 | 
						|
	void getSignature(IplImage *patch, float *sig);
 | 
						|
	void getSparseSignature(IplImage *patch, float *sig,
 | 
						|
		 float thresh);
 | 
						|
		 
 | 
						|
	static int countNonZeroElements(float *vec, int n, double tol=1e-10);
 | 
						|
	static inline void safeSignatureAlloc(uchar **sig, int num_sig=1,
 | 
						|
			int sig_len=176);
 | 
						|
	static inline uchar* safeSignatureAlloc(int num_sig=1,
 | 
						|
			 int sig_len=176);  
 | 
						|
 | 
						|
	inline int classes() { return classes_; }
 | 
						|
	inline int original_num_classes()
 | 
						|
		 { return original_num_classes_; }
 | 
						|
 | 
						|
	void setQuantization(int num_quant_bits);
 | 
						|
	void discardFloatPosteriors();
 | 
						|
 | 
						|
	void read(const char* file_name);
 | 
						|
	void read(std::istream &is);
 | 
						|
	void write(const char* file_name) const;
 | 
						|
	void write(std::ostream &os) const;
 | 
						|
 | 
						|
	std::vector<RandomizedTree> trees_;
 | 
						|
 | 
						|
private:    
 | 
						|
	int classes_;
 | 
						|
	int num_quant_bits_;
 | 
						|
	uchar **posteriors_;
 | 
						|
	ushort *ptemp_;
 | 
						|
	int original_num_classes_;  
 | 
						|
	bool keep_floats_;
 | 
						|
};
 | 
						|
\end{lstlisting}
 | 
						|
 | 
						|
\cvCppFunc{RTreeClassifier::train}
 | 
						|
Trains a randomized tree classificator using input set of keypoints
 | 
						|
\cvdefCpp{
 | 
						|
		void train(std::vector<BaseKeypoint> const\& base\_set, 
 | 
						|
			cv::RNG \&rng,
 | 
						|
			int num\_trees = RTreeClassifier::DEFAULT\_TREES,
 | 
						|
			int depth = DEFAULT\_DEPTH,
 | 
						|
			int views = DEFAULT\_VIEWS,
 | 
						|
			size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
 | 
						|
			int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
 | 
						|
			}
 | 
						|
\cvdefCpp{
 | 
						|
		void train(std::vector<BaseKeypoint> const\& base\_set,
 | 
						|
			cv::RNG \&rng, 
 | 
						|
			PatchGenerator \&make\_patch,
 | 
						|
			int num\_trees = RTreeClassifier::DEFAULT\_TREES,
 | 
						|
			int depth = DEFAULT\_DEPTH,
 | 
						|
			int views = DEFAULT\_VIEWS,
 | 
						|
			size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
 | 
						|
			int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
 | 
						|
}			
 | 
						|
\begin{description}
 | 
						|
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
 | 
						|
\cvarg{rng} {Random numbers generator is used for training}
 | 
						|
\cvarg{make\_patch} {Patch generator is used for training}
 | 
						|
\cvarg{num\_trees} {Number of randomized trees used in RTreeClassificator}
 | 
						|
\cvarg{depth} {Maximum tree depth}
 | 
						|
%\cvarg{views} {}
 | 
						|
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
 | 
						|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
 | 
						|
\cvarg{print\_status} {Print current status of training on the console}
 | 
						|
\end{description}		
 | 
						|
 | 
						|
\cvCppFunc{RTreeClassifier::getSignature}
 | 
						|
Returns signature for image patch 
 | 
						|
\cvdefCpp{
 | 
						|
void getSignature(IplImage *patch, uchar *sig)
 | 
						|
}
 | 
						|
\cvdefCpp{
 | 
						|
void getSignature(IplImage *patch, float *sig)
 | 
						|
}
 | 
						|
\begin{description}
 | 
						|
\cvarg{patch} {Image patch to calculate signature for}
 | 
						|
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
 | 
						|
\end{description}
 | 
						|
 | 
						|
\cvCppFunc{RTreeClassifier::getSparseSignature}
 | 
						|
The function is simular to \texttt{getSignature} but uses the threshold for removing all signature elements less than the threshold. So that the signature is compressed
 | 
						|
\cvdefCpp{
 | 
						|
	void getSparseSignature(IplImage *patch, float *sig,
 | 
						|
		 float thresh);
 | 
						|
}
 | 
						|
\begin{description}
 | 
						|
\cvarg{patch} {Image patch to calculate signature for}
 | 
						|
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
 | 
						|
\cvarg{tresh} {The threshold that is used for compressing the signature}
 | 
						|
\end{description}
 | 
						|
 | 
						|
\cvCppFunc{RTreeClassifier::countNonZeroElements}
 | 
						|
The function returns the number of non-zero elements in the input array. 
 | 
						|
\cvdefCpp{
 | 
						|
static int countNonZeroElements(float *vec, int n, double tol=1e-10);
 | 
						|
}
 | 
						|
\begin{description}
 | 
						|
\cvarg{vec}{Input vector contains float elements}
 | 
						|
\cvarg{n}{Input vector size}
 | 
						|
\cvarg{tol} {The threshold used for elements counting. We take all elements are less than \texttt{tol} as zero elements}
 | 
						|
\end{description}
 | 
						|
 | 
						|
\cvCppFunc{RTreeClassifier::read}
 | 
						|
Reads pre-saved RTreeClassifier from file or stream
 | 
						|
\cvdefCpp{read(const char* file\_name)}	
 | 
						|
\cvdefCpp{read(std::istream \&is)}	
 | 
						|
\begin{description}
 | 
						|
\cvarg{file\_name}{Filename of file contains randomized tree data}
 | 
						|
\cvarg{is}{Input stream associated with file contains randomized tree data}
 | 
						|
\end{description}
 | 
						|
 | 
						|
\cvCppFunc{RTreeClassifier::write}
 | 
						|
Writes current RTreeClassifier to a file or stream
 | 
						|
\cvdefCpp{void write(const char* file\_name) const;}	
 | 
						|
\cvdefCpp{void write(std::ostream \&os) const;}	
 | 
						|
\begin{description}
 | 
						|
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
 | 
						|
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
 | 
						|
\end{description}
 | 
						|
 | 
						|
 | 
						|
\cvCppFunc{RTreeClassifier::setQuantization}
 | 
						|
Applies quantization to the current randomized tree
 | 
						|
\cvdefCpp{void setQuantization(int num\_quant\_bits)}
 | 
						|
\begin{description}
 | 
						|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
 | 
						|
\end{description}		
 | 
						|
 | 
						|
Below there is an example of \texttt{RTreeClassifier} usage for feature matching. There are test and train images and we extract features from both with SURF. Output is $best\_corr$ and $best\_corr\_idx$ arrays which keep the best probabilities and corresponding features indexes for every train feature.
 | 
						|
% ===== Example. Using RTreeClassifier for features matching =====
 | 
						|
\begin{lstlisting}
 | 
						|
CvMemStorage* storage = cvCreateMemStorage(0);
 | 
						|
CvSeq *objectKeypoints = 0, *objectDescriptors = 0;
 | 
						|
CvSeq *imageKeypoints = 0, *imageDescriptors = 0;
 | 
						|
CvSURFParams params = cvSURFParams(500, 1);
 | 
						|
cvExtractSURF( test_image, 0, &imageKeypoints, &imageDescriptors,
 | 
						|
		 storage, params );
 | 
						|
cvExtractSURF( train_image, 0, &objectKeypoints, &objectDescriptors,
 | 
						|
		 storage, params );
 | 
						|
 | 
						|
cv::RTreeClassifier detector;
 | 
						|
int patch_width = cv::PATCH_SIZE;
 | 
						|
iint patch_height = cv::PATCH_SIZE;
 | 
						|
vector<cv::BaseKeypoint> base_set;
 | 
						|
int i=0;
 | 
						|
CvSURFPoint* point;
 | 
						|
for (i=0;i<(n_points > 0 ? n_points : objectKeypoints->total);i++)
 | 
						|
{
 | 
						|
	point=(CvSURFPoint*)cvGetSeqElem(objectKeypoints,i);
 | 
						|
	base_set.push_back(
 | 
						|
		cv::BaseKeypoint(point->pt.x,point->pt.y,train_image));
 | 
						|
}
 | 
						|
 | 
						|
	//Detector training
 | 
						|
 cv::RNG rng( cvGetTickCount() );
 | 
						|
cv::PatchGenerator gen(0,255,2,false,0.7,1.3,-CV_PI/3,CV_PI/3,
 | 
						|
			-CV_PI/3,CV_PI/3);
 | 
						|
 | 
						|
printf("RTree Classifier training...\n");
 | 
						|
detector.train(base_set,rng,gen,24,cv::DEFAULT_DEPTH,2000,
 | 
						|
	(int)base_set.size(), detector.DEFAULT_NUM_QUANT_BITS);
 | 
						|
printf("Done\n");
 | 
						|
 | 
						|
float* signature = new float[detector.original_num_classes()];
 | 
						|
float* best_corr;
 | 
						|
int* best_corr_idx;
 | 
						|
if (imageKeypoints->total > 0)
 | 
						|
{
 | 
						|
	best_corr = new float[imageKeypoints->total];
 | 
						|
	best_corr_idx = new int[imageKeypoints->total];
 | 
						|
}
 | 
						|
 | 
						|
for(i=0; i < imageKeypoints->total; i++)
 | 
						|
{
 | 
						|
	point=(CvSURFPoint*)cvGetSeqElem(imageKeypoints,i);
 | 
						|
	int part_idx = -1;
 | 
						|
	float prob = 0.0f;
 | 
						|
 | 
						|
	CvRect roi = cvRect((int)(point->pt.x) - patch_width/2,
 | 
						|
		(int)(point->pt.y) - patch_height/2,
 | 
						|
		 patch_width, patch_height);
 | 
						|
	cvSetImageROI(test_image, roi);
 | 
						|
	roi = cvGetImageROI(test_image);
 | 
						|
	if(roi.width != patch_width || roi.height != patch_height)
 | 
						|
	{
 | 
						|
		best_corr_idx[i] = part_idx;
 | 
						|
		best_corr[i] = prob;
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		cvSetImageROI(test_image, roi);
 | 
						|
		IplImage* roi_image =
 | 
						|
			 cvCreateImage(cvSize(roi.width, roi.height),
 | 
						|
			 test_image->depth, test_image->nChannels);
 | 
						|
		cvCopy(test_image,roi_image);
 | 
						|
 | 
						|
		detector.getSignature(roi_image, signature);
 | 
						|
		for (int j = 0; j< detector.original_num_classes();j++)
 | 
						|
		{
 | 
						|
			if (prob < signature[j])
 | 
						|
			{
 | 
						|
				part_idx = j;
 | 
						|
				prob = signature[j];
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		best_corr_idx[i] = part_idx;
 | 
						|
		best_corr[i] = prob;
 | 
						|
 | 
						|
			
 | 
						|
		if (roi_image)
 | 
						|
			cvReleaseImage(&roi_image);
 | 
						|
	}
 | 
						|
	cvResetImageROI(test_image);
 | 
						|
}
 | 
						|
	
 | 
						|
\end{lstlisting}
 | 
						|
\fi
 |