added documentation on LatentSVM algorithm

This commit is contained in:
Alexey Polovinkin 2011-04-03 10:16:28 +00:00
parent b1e0f2a45e
commit da0cb51916
3 changed files with 224 additions and 68 deletions

View File

@ -661,7 +661,7 @@ drawChessboardCorners(img, patternsize, Mat(centers), patternfound);
\end{lstlisting}
You can find a printable asymmetric pattern of circles in the OpenCV
documentation folder (doc/acircles_pattern.png).
documentation folder (doc/acircles\_pattern.png).
\textbf{Note:} the function requires some white space (like a circle-thick
border, the wider the better) around the board to make the detection more robust in various environment.

View File

@ -637,3 +637,148 @@ Groups the object candidate rectangles
The function is a wrapper for a generic function \cvCppCross{partition}. It clusters all the input rectangles using the rectangle equivalence criteria, that combines rectangles that have similar sizes and similar locations (the similarity is defined by \texttt{eps}). When \texttt{eps=0}, no clustering is done at all. If $\texttt{eps}\rightarrow +\inf$, all the rectangles will be put in one cluster. Then, the small clusters, containing less than or equal to \texttt{groupThreshold} rectangles, will be rejected. In each other cluster the average rectangle will be computed and put into the output rectangle list.
\fi
\ifC
\section{Discriminatively Trained Part Based Models for Object Detection}
\subsection{Discriminatively Trained Part Based Models for Object Detection}
The object detector described below has been initially proposed by
P.F. Felzenszwalb in \cvCPyCross{Felzenszwalb10}. It is based on a
Dalal-Triggs detector that uses a single filter on histogram of
oriented gradients (HOG) features to represent an object category.
This detector uses a sliding window approach, where a filter is
applied at all positions and scales of an image. The first
innovation is enriching the Dalal-Triggs model using a
star-structured part-based model defined by a "root" filter
(analogous to the Dalal-Triggs filter) plus a set of parts filters
and associated deformation models. The score of one of star models
at a particular position and scale within an image is the score of
the root filter at the given location plus the sum over parts of the
maximum, over placements of that part, of the part filter score on
its location minus a deformation cost measuring the deviation of the
part from its ideal location relative to the root. Both root and
part filter scores are defined by the dot product between a filter
(a set of weights) and a subwindow of a feature pyramid computed
from the input image. Another improvement is a representation of the
class of models by a mixture of star models. The score of a mixture
model at a particular position and scale is the maximum over
components, of the score of that component model at the given
location.
\fi
\ifC
\cvclass{CvLSVMFilterPosition, CvLSVMFilterObject,
CvLatentSvmDetector, CvObjectDetection}
\begin{lstlisting}
/* DataType: STRUCT position
Structure describes the position of the filter in the feature pyramid
l - level in the feature pyramid
(x, y) - coordinate in level l */
typedef struct {
unsigned int x;
unsigned int y;
unsigned int l;
} CvLSVMFilterPosition;
/* DataType: STRUCT filterObject
Description of the filter, which corresponds to the part of the object
V - ideal (penalty = 0) position of the partial filter
from the root filter position (V_i in the paper)
penaltyFunction - vector describes penalty function (d_i in the paper)
pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
FILTER DESCRIPTION
Rectangular map (sizeX x sizeY),
every cell stores feature vector (dimension = p)
H - matrix of feature vectors
to set and get feature vectors (i,j)
used formula H[(j * sizeX + i) * p + k], where
k - component of feature vector in cell (i, j)
END OF FILTER DESCRIPTION
xp - auxillary parameter for internal use
size of row in feature vectors
(yp = (int) (p / xp); p = xp * yp) */
typedef struct{
CvLSVMFilterPosition V;
float fineFunction[4];
unsigned int sizeX;
unsigned int sizeY;
unsigned int p;
unsigned int xp;
float *H;
} CvLSVMFilterObject;
/* data type: STRUCT CvLatentSvmDetector
structure contains internal representation of trained Latent SVM detector
num_filters - total number of filters (root plus part) in model
num_components - number of components in model
num_part_filters - array containing number of part filters for each component
filters - root and part filters for all model components
b - biases for all model components
score_threshold - confidence level threshold */
typedef struct CvLatentSvmDetector {
int num_filters;
int num_components;
int* num_part_filters;
CvLSVMFilterObject** filters;
float* b;
float score_threshold;
} CvLatentSvmDetector;
/* data type: STRUCT CvObjectDetection
structure contains the bounding box and confidence level for detected object
rect - bounding box for a detected object
score - confidence level */
typedef struct CvObjectDetection {
CvRect rect;
float score;
} CvObjectDetection;
\end{lstlisting}
\fi
\ifC
\cvCPyFunc{LoadLatentSvmDetector} Loads trained detector from a file
\cvdefC{ CvLatentSvmDetector* cvLoadLatentSvmDetector( \par const
char* filename); }
\begin{description}
\cvarg{filename}{Name of the file containing the description of a
trained detector}
\end{description}
\fi
\cvCPyFunc{LatentSvmDetectObjects} Detects objects in the image.
\cvdefC{ void cvLatentSvmDetectObjects( \par IplImage* image,
\par CvLatentSvmDetector* detector,
\par CvMemStorage* storage,
\par float overlap\_threshold CV\_DEFAULT(0.5f),
\par int numThreads CV\_DEFAULT(-1));
}
\begin{description}
\cvarg{image}{Image to detect objects in} \cvarg{detector}{LatentSVM
detector in internal representation} \cvarg{storage}{Memory storage
to store the resultant sequence of the object candidate rectangles}
\cvarg{overlap\_threshod}{Threshold for the non-maximum suppression
algorithm} \cvarg{numThreads}{Number of threads used in parallel
version of the algorithm}
\end{description}
% \begin{description}
% \cvarg{directory}{Name of the directory containing the description
% of a trained cascade classifier} \cvarg{orig\_window\_size}{Original
% size of the objects the cascade has been trained on. Note that it is
% not stored in the cascade and therefore must be specified
% separately}
% \end{description}

View File

@ -82,6 +82,18 @@
year = {2004}
}
@article{Felzenszwalb10,
author = {Felzenszwalb, P.F. and Girshick, R.B. and McAllester, D. and Ramanan, D.},
title = {Object Detection with Discriminatively Trained Part Based Models},
journal = {PAMI},
volume = {32},
year = {2010},
number = {9},
month = {September},
pages = {1627-1645},
bibsource = {http://www.visionbib.com/bibliography/bib/457.html#BB45794}
}
@article{Hartley99,
author = {Hartley, R.I.},
title = {Theory and Practice of Projective Rectification},
@ -311,4 +323,3 @@
# '''[Zhang96]''' Z. Zhang. Parameter Estimation Techniques: A Tutorial with Application to Conic Fitting, Image and Vision Computing Journal, 1996.
# '''[Zhang99]''' Z. Zhang. Flexible Camera Calibration By Viewing a Plane From Unknown Orientations. International Conference on Computer Vision (ICCV'99), Corfu, Greece, pages 666-673, September 1999.
# '''[Zhang00]''' Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000.