added documentation on LatentSVM algorithm

2011-04-03 10:16:28 +00:00
parent b1e0f2a45e
commit da0cb51916
3 changed files with 224 additions and 68 deletions
--- a/doc/calib3d.tex
+++ b/doc/calib3d.tex
@@ -661,7 +661,7 @@ drawChessboardCorners(img, patternsize, Mat(centers), patternfound);
 \end{lstlisting}

 You can find a printable asymmetric pattern of circles in the OpenCV
-documentation folder (doc/acircles_pattern.png).
+documentation folder (doc/acircles\_pattern.png).

 \textbf{Note:} the function requires some white space (like a circle-thick
 border, the wider the better) around the board to make the detection more robust in various environment.
--- a/doc/objdetect.tex
+++ b/doc/objdetect.tex
@@ -637,3 +637,148 @@ Groups the object candidate rectangles

 The function is a wrapper for a generic function \cvCppCross{partition}. It clusters all the input rectangles using the rectangle equivalence criteria, that combines rectangles that have similar sizes and similar locations (the similarity is defined by \texttt{eps}). When \texttt{eps=0}, no clustering is done at all. If $\texttt{eps}\rightarrow +\inf$, all the rectangles will be put in one cluster. Then, the small clusters, containing less than or equal to \texttt{groupThreshold} rectangles, will be rejected. In each other cluster the average rectangle will be computed and put into the output rectangle list.
 \fi
+
+\ifC
+
+\section{Discriminatively Trained Part Based Models for Object Detection}
+
+\subsection{Discriminatively Trained Part Based Models for Object Detection}
+
+The object detector described below has been initially proposed by
+P.F. Felzenszwalb in \cvCPyCross{Felzenszwalb10}.  It is based on a
+Dalal-Triggs detector that uses a single filter on histogram of
+oriented gradients (HOG) features to represent an object category.
+This detector uses a sliding window approach, where a filter is
+applied at all positions and scales of an image. The first
+innovation is enriching the Dalal-Triggs model using a
+star-structured part-based model defined by a "root" filter
+(analogous to the Dalal-Triggs filter) plus a set of parts filters
+and associated deformation models. The score of one of star models
+at a particular position and scale within an image is the score of
+the root filter at the given location plus the sum over parts of the
+maximum, over placements of that part, of the part filter score on
+its location minus a deformation cost measuring the deviation of the
+part from its ideal location relative to the root. Both root and
+part filter scores are defined by the dot product between a filter
+(a set of weights) and a subwindow of a feature pyramid computed
+from the input image. Another improvement is a representation of the
+class of models by a mixture of star models. The score of a mixture
+model at a particular position and scale is the maximum over
+components, of the score of that component model at the given
+location.
+
+\fi
+
+\ifC
+
+\cvclass{CvLSVMFilterPosition, CvLSVMFilterObject,
+CvLatentSvmDetector, CvObjectDetection}
+
+\begin{lstlisting}
+/* DataType: STRUCT position
+   Structure describes the position of the filter in the feature pyramid
+   l - level in the feature pyramid
+   (x, y) - coordinate in level l */
+typedef struct {
+    unsigned int x;
+    unsigned int y;
+    unsigned int l;
+} CvLSVMFilterPosition;
+
+/* DataType: STRUCT filterObject
+   Description of the filter, which corresponds to the part of the object
+   V               - ideal (penalty = 0) position of the partial filter
+                     from the root filter position (V_i in the paper)
+   penaltyFunction - vector describes penalty function (d_i in the paper)
+                     pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
+   FILTER DESCRIPTION
+     Rectangular map (sizeX x sizeY),
+     every cell stores feature vector (dimension = p)
+   H               - matrix of feature vectors
+                     to set and get feature vectors (i,j)
+                     used formula H[(j * sizeX + i) * p + k], where
+                     k - component of feature vector in cell (i, j)
+   END OF FILTER DESCRIPTION
+   xp              - auxillary parameter for internal use
+                     size of row in feature vectors
+                     (yp = (int) (p / xp); p = xp * yp) */
+typedef struct{
+    CvLSVMFilterPosition V;
+    float fineFunction[4];
+    unsigned int sizeX;
+    unsigned int sizeY;
+    unsigned int p;
+    unsigned int xp;
+    float *H;
+} CvLSVMFilterObject;
+
+/* data type: STRUCT CvLatentSvmDetector
+   structure contains internal representation of trained Latent SVM detector
+   num_filters          - total number of filters (root plus part) in model
+   num_components       - number of components in model
+   num_part_filters     - array containing number of part filters for each component
+   filters              - root and part filters for all model components
+   b                    - biases for all model components
+   score_threshold      - confidence level threshold */
+typedef struct CvLatentSvmDetector {
+    int num_filters;
+    int num_components;
+    int* num_part_filters;
+    CvLSVMFilterObject** filters;
+    float* b;
+    float score_threshold;
+} CvLatentSvmDetector;
+
+/* data type: STRUCT CvObjectDetection
+   structure contains the bounding box and confidence level for detected object
+   rect                 - bounding box for a detected object
+   score                - confidence level */
+typedef struct CvObjectDetection {
+    CvRect rect;
+    float score;
+} CvObjectDetection;
+
+\end{lstlisting}
+
+\fi
+
+
+\ifC
+
+\cvCPyFunc{LoadLatentSvmDetector} Loads trained detector from a file
+
+\cvdefC{ CvLatentSvmDetector* cvLoadLatentSvmDetector( \par const
+char* filename); }
+
+\begin{description}
+\cvarg{filename}{Name of the file containing the description of a
+trained detector}
+\end{description}
+
+\fi
+
+\cvCPyFunc{LatentSvmDetectObjects} Detects objects in the image.
+
+\cvdefC{ void cvLatentSvmDetectObjects( \par IplImage* image,
+                                \par CvLatentSvmDetector* detector,
+                                \par CvMemStorage* storage,
+                                \par float overlap\_threshold CV\_DEFAULT(0.5f),
+                                \par int numThreads CV\_DEFAULT(-1));
+}
+
+\begin{description}
+\cvarg{image}{Image to detect objects in} \cvarg{detector}{LatentSVM
+detector in internal representation} \cvarg{storage}{Memory storage
+to store the resultant sequence of the object candidate rectangles}
+\cvarg{overlap\_threshod}{Threshold for the non-maximum suppression
+algorithm} \cvarg{numThreads}{Number of threads used in parallel
+version of the algorithm}
+\end{description}
+
+% \begin{description}
+% \cvarg{directory}{Name of the directory containing the description
+% of a trained cascade classifier} \cvarg{orig\_window\_size}{Original
+% size of the objects the cascade has been trained on. Note that it is
+% not stored in the cascade and therefore must be specified
+% separately}
+% \end{description}
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@@ -82,6 +82,18 @@
    year = {2004}
 }

+@article{Felzenszwalb10,
+    author = {Felzenszwalb, P.F. and Girshick, R.B. and McAllester, D. and Ramanan, D.},
+    title = {Object Detection with Discriminatively Trained Part Based Models},
+    journal = {PAMI},
+    volume = {32},
+    year = {2010},
+    number = {9},
+    month = {September},
+    pages = {1627-1645},
+    bibsource = {http://www.visionbib.com/bibliography/bib/457.html#BB45794}
+}
+
@article{Hartley99,
    author = {Hartley, R.I.},
    title = {Theory and Practice of Projective Rectification},
@@ -311,4 +323,3 @@
 # '''[Zhang96]''' Z. Zhang. Parameter Estimation Techniques: A Tutorial with Application to Conic Fitting, Image and Vision Computing Journal, 1996.
 # '''[Zhang99]''' Z. Zhang. Flexible Camera Calibration By Viewing a Plane From Unknown Orientations. International Conference on Computer Vision (ICCV'99), Corfu, Greece, pages 666-673, September 1999.
 # '''[Zhang00]''' Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000.
-