added documentation on LatentSVM algorithm

2011-04-03 10:16:28 +00:00 · 2011-04-03 10:16:28 +00:00 · da0cb51916
commit da0cb51916
parent b1e0f2a45e
3 changed files with 224 additions and 68 deletions
--- a/doc/calib3d.tex
+++ b/doc/calib3d.tex
@ -609,8 +609,8 @@ vector<Point2f> corners; //this will be filled by the detected corners
 //CALIB_CB_FAST_CHECK saves a lot of time on images
 //that don't contain any chessboard corners
 bool patternfound = findChessboardCorners(gray, patternsize, corners,
-	CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE 
+    CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE
-	+ CALIB_CB_FAST_CHECK);
+    + CALIB_CB_FAST_CHECK);
 if(patternfound)
  cornerSubPix(gray, corners, Size(11, 11), Size(-1, -1),
@ -661,7 +661,7 @@ drawChessboardCorners(img, patternsize, Mat(centers), patternfound);
 \end{lstlisting}
 You can find a printable asymmetric pattern of circles in the OpenCV
-documentation folder (doc/acircles_pattern.png).
+documentation folder (doc/acircles\_pattern.png).
 \textbf{Note:} the function requires some white space (like a circle-thick
 border, the wider the better) around the board to make the detection more robust in various environment.
--- a/doc/objdetect.tex
+++ b/doc/objdetect.tex
@ -477,7 +477,7 @@ The cascade classifier class for object detection.
 class CascadeClassifier
 {
 public:
-	// structure for storing tree node
+    // structure for storing tree node
    struct CV_EXPORTS DTreeNode
    {
        int featureIdx; // feature index on which is a split
@ -518,7 +518,7 @@ public:
    void detectMultiScale( const Mat& image, vector<Rect>& objects,
                           double scaleFactor=1.1, int minNeighbors=3,
-						   int flags=0, Size minSize=Size());
+                           int flags=0, Size minSize=Size());
    bool setImage( Ptr<FeatureEvaluator>&, const Mat& );
    int runAt( Ptr<FeatureEvaluator>&, Point );
@ -637,3 +637,148 @@ Groups the object candidate rectangles
 The function is a wrapper for a generic function \cvCppCross{partition}. It clusters all the input rectangles using the rectangle equivalence criteria, that combines rectangles that have similar sizes and similar locations (the similarity is defined by \texttt{eps}). When \texttt{eps=0}, no clustering is done at all. If $\texttt{eps}\rightarrow +\inf$, all the rectangles will be put in one cluster. Then, the small clusters, containing less than or equal to \texttt{groupThreshold} rectangles, will be rejected. In each other cluster the average rectangle will be computed and put into the output rectangle list.
 \fi
 \ifC
 \section{Discriminatively Trained Part Based Models for Object Detection}
 \subsection{Discriminatively Trained Part Based Models for Object Detection}
 The object detector described below has been initially proposed by
 P.F. Felzenszwalb in \cvCPyCross{Felzenszwalb10}.  It is based on a
 Dalal-Triggs detector that uses a single filter on histogram of
 oriented gradients (HOG) features to represent an object category.
 This detector uses a sliding window approach, where a filter is
 applied at all positions and scales of an image. The first
 innovation is enriching the Dalal-Triggs model using a
 star-structured part-based model defined by a "root" filter
 (analogous to the Dalal-Triggs filter) plus a set of parts filters
 and associated deformation models. The score of one of star models
 at a particular position and scale within an image is the score of
 the root filter at the given location plus the sum over parts of the
 maximum, over placements of that part, of the part filter score on
 its location minus a deformation cost measuring the deviation of the
 part from its ideal location relative to the root. Both root and
 part filter scores are defined by the dot product between a filter
 (a set of weights) and a subwindow of a feature pyramid computed
 from the input image. Another improvement is a representation of the
 class of models by a mixture of star models. The score of a mixture
 model at a particular position and scale is the maximum over
 components, of the score of that component model at the given
 location.
 \fi
 \ifC
 \cvclass{CvLSVMFilterPosition, CvLSVMFilterObject,
 CvLatentSvmDetector, CvObjectDetection}
 \begin{lstlisting}
 /* DataType: STRUCT position
   Structure describes the position of the filter in the feature pyramid
   l - level in the feature pyramid
   (x, y) - coordinate in level l */
 typedef struct {
    unsigned int x;
    unsigned int y;
    unsigned int l;
 } CvLSVMFilterPosition;
 /* DataType: STRUCT filterObject
   Description of the filter, which corresponds to the part of the object
   V               - ideal (penalty = 0) position of the partial filter
                     from the root filter position (V_i in the paper)
   penaltyFunction - vector describes penalty function (d_i in the paper)
                     pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
   FILTER DESCRIPTION
     Rectangular map (sizeX x sizeY),
     every cell stores feature vector (dimension = p)
   H               - matrix of feature vectors
                     to set and get feature vectors (i,j)
                     used formula H[(j * sizeX + i) * p + k], where
                     k - component of feature vector in cell (i, j)
   END OF FILTER DESCRIPTION
   xp              - auxillary parameter for internal use
                     size of row in feature vectors
                     (yp = (int) (p / xp); p = xp * yp) */
 typedef struct{
    CvLSVMFilterPosition V;
    float fineFunction[4];
    unsigned int sizeX;
    unsigned int sizeY;
    unsigned int p;
    unsigned int xp;
    float *H;
 } CvLSVMFilterObject;
 /* data type: STRUCT CvLatentSvmDetector
   structure contains internal representation of trained Latent SVM detector
   num_filters          - total number of filters (root plus part) in model
   num_components       - number of components in model
   num_part_filters     - array containing number of part filters for each component
   filters              - root and part filters for all model components
   b                    - biases for all model components
   score_threshold      - confidence level threshold */
 typedef struct CvLatentSvmDetector {
    int num_filters;
    int num_components;
    int* num_part_filters;
    CvLSVMFilterObject** filters;
    float* b;
    float score_threshold;
 } CvLatentSvmDetector;
 /* data type: STRUCT CvObjectDetection
   structure contains the bounding box and confidence level for detected object
   rect                 - bounding box for a detected object
   score                - confidence level */
 typedef struct CvObjectDetection {
    CvRect rect;
    float score;
 } CvObjectDetection;
 \end{lstlisting}
 \fi
 \ifC
 \cvCPyFunc{LoadLatentSvmDetector} Loads trained detector from a file
 \cvdefC{ CvLatentSvmDetector* cvLoadLatentSvmDetector( \par const
 char* filename); }
 \begin{description}
 \cvarg{filename}{Name of the file containing the description of a
 trained detector}
 \end{description}
 \fi
 \cvCPyFunc{LatentSvmDetectObjects} Detects objects in the image.
 \cvdefC{ void cvLatentSvmDetectObjects( \par IplImage* image,
                                \par CvLatentSvmDetector* detector,
                                \par CvMemStorage* storage,
                                \par float overlap\_threshold CV\_DEFAULT(0.5f),
                                \par int numThreads CV\_DEFAULT(-1));
 }
 \begin{description}
 \cvarg{image}{Image to detect objects in} \cvarg{detector}{LatentSVM
 detector in internal representation} \cvarg{storage}{Memory storage
 to store the resultant sequence of the object candidate rectangles}
 \cvarg{overlap\_threshod}{Threshold for the non-maximum suppression
 algorithm} \cvarg{numThreads}{Number of threads used in parallel
 version of the algorithm}
 \end{description}
 % \begin{description}
 % \cvarg{directory}{Name of the directory containing the description
 % of a trained cascade classifier} \cvarg{orig\_window\_size}{Original
 % size of the objects the cascade has been trained on. Note that it is
 % not stored in the cascade and therefore must be specified
 % separately}
 % \end{description}
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@ -82,6 +82,18 @@
    year = {2004}
 }
@article{Felzenszwalb10,
    author = {Felzenszwalb, P.F. and Girshick, R.B. and McAllester, D. and Ramanan, D.},
    title = {Object Detection with Discriminatively Trained Part Based Models},
    journal = {PAMI},
    volume = {32},
    year = {2010},
    number = {9},
    month = {September},
    pages = {1627-1645},
    bibsource = {http://www.visionbib.com/bibliography/bib/457.html#BB45794}
 }
@article{Hartley99,
    author = {Hartley, R.I.},
    title = {Theory and Practice of Projective Rectification},
@ -311,4 +323,3 @@
 # '''[Zhang96]''' Z. Zhang. Parameter Estimation Techniques: A Tutorial with Application to Conic Fitting, Image and Vision Computing Journal, 1996.
 # '''[Zhang99]''' Z. Zhang. Flexible Camera Calibration By Viewing a Plane From Unknown Orientations. International Conference on Computer Vision (ICCV'99), Corfu, Greece, pages 666-673, September 1999.
 # '''[Zhang00]''' Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000.