added documentation on LatentSVM algorithm
This commit is contained in:
parent
b1e0f2a45e
commit
da0cb51916
@ -661,7 +661,7 @@ drawChessboardCorners(img, patternsize, Mat(centers), patternfound);
|
||||
\end{lstlisting}
|
||||
|
||||
You can find a printable asymmetric pattern of circles in the OpenCV
|
||||
documentation folder (doc/acircles_pattern.png).
|
||||
documentation folder (doc/acircles\_pattern.png).
|
||||
|
||||
\textbf{Note:} the function requires some white space (like a circle-thick
|
||||
border, the wider the better) around the board to make the detection more robust in various environment.
|
||||
|
@ -637,3 +637,148 @@ Groups the object candidate rectangles
|
||||
|
||||
The function is a wrapper for a generic function \cvCppCross{partition}. It clusters all the input rectangles using the rectangle equivalence criteria, that combines rectangles that have similar sizes and similar locations (the similarity is defined by \texttt{eps}). When \texttt{eps=0}, no clustering is done at all. If $\texttt{eps}\rightarrow +\inf$, all the rectangles will be put in one cluster. Then, the small clusters, containing less than or equal to \texttt{groupThreshold} rectangles, will be rejected. In each other cluster the average rectangle will be computed and put into the output rectangle list.
|
||||
\fi
|
||||
|
||||
\ifC
|
||||
|
||||
\section{Discriminatively Trained Part Based Models for Object Detection}
|
||||
|
||||
\subsection{Discriminatively Trained Part Based Models for Object Detection}
|
||||
|
||||
The object detector described below has been initially proposed by
|
||||
P.F. Felzenszwalb in \cvCPyCross{Felzenszwalb10}. It is based on a
|
||||
Dalal-Triggs detector that uses a single filter on histogram of
|
||||
oriented gradients (HOG) features to represent an object category.
|
||||
This detector uses a sliding window approach, where a filter is
|
||||
applied at all positions and scales of an image. The first
|
||||
innovation is enriching the Dalal-Triggs model using a
|
||||
star-structured part-based model defined by a "root" filter
|
||||
(analogous to the Dalal-Triggs filter) plus a set of parts filters
|
||||
and associated deformation models. The score of one of star models
|
||||
at a particular position and scale within an image is the score of
|
||||
the root filter at the given location plus the sum over parts of the
|
||||
maximum, over placements of that part, of the part filter score on
|
||||
its location minus a deformation cost measuring the deviation of the
|
||||
part from its ideal location relative to the root. Both root and
|
||||
part filter scores are defined by the dot product between a filter
|
||||
(a set of weights) and a subwindow of a feature pyramid computed
|
||||
from the input image. Another improvement is a representation of the
|
||||
class of models by a mixture of star models. The score of a mixture
|
||||
model at a particular position and scale is the maximum over
|
||||
components, of the score of that component model at the given
|
||||
location.
|
||||
|
||||
\fi
|
||||
|
||||
\ifC
|
||||
|
||||
\cvclass{CvLSVMFilterPosition, CvLSVMFilterObject,
|
||||
CvLatentSvmDetector, CvObjectDetection}
|
||||
|
||||
\begin{lstlisting}
|
||||
/* DataType: STRUCT position
|
||||
Structure describes the position of the filter in the feature pyramid
|
||||
l - level in the feature pyramid
|
||||
(x, y) - coordinate in level l */
|
||||
typedef struct {
|
||||
unsigned int x;
|
||||
unsigned int y;
|
||||
unsigned int l;
|
||||
} CvLSVMFilterPosition;
|
||||
|
||||
/* DataType: STRUCT filterObject
|
||||
Description of the filter, which corresponds to the part of the object
|
||||
V - ideal (penalty = 0) position of the partial filter
|
||||
from the root filter position (V_i in the paper)
|
||||
penaltyFunction - vector describes penalty function (d_i in the paper)
|
||||
pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
|
||||
FILTER DESCRIPTION
|
||||
Rectangular map (sizeX x sizeY),
|
||||
every cell stores feature vector (dimension = p)
|
||||
H - matrix of feature vectors
|
||||
to set and get feature vectors (i,j)
|
||||
used formula H[(j * sizeX + i) * p + k], where
|
||||
k - component of feature vector in cell (i, j)
|
||||
END OF FILTER DESCRIPTION
|
||||
xp - auxillary parameter for internal use
|
||||
size of row in feature vectors
|
||||
(yp = (int) (p / xp); p = xp * yp) */
|
||||
typedef struct{
|
||||
CvLSVMFilterPosition V;
|
||||
float fineFunction[4];
|
||||
unsigned int sizeX;
|
||||
unsigned int sizeY;
|
||||
unsigned int p;
|
||||
unsigned int xp;
|
||||
float *H;
|
||||
} CvLSVMFilterObject;
|
||||
|
||||
/* data type: STRUCT CvLatentSvmDetector
|
||||
structure contains internal representation of trained Latent SVM detector
|
||||
num_filters - total number of filters (root plus part) in model
|
||||
num_components - number of components in model
|
||||
num_part_filters - array containing number of part filters for each component
|
||||
filters - root and part filters for all model components
|
||||
b - biases for all model components
|
||||
score_threshold - confidence level threshold */
|
||||
typedef struct CvLatentSvmDetector {
|
||||
int num_filters;
|
||||
int num_components;
|
||||
int* num_part_filters;
|
||||
CvLSVMFilterObject** filters;
|
||||
float* b;
|
||||
float score_threshold;
|
||||
} CvLatentSvmDetector;
|
||||
|
||||
/* data type: STRUCT CvObjectDetection
|
||||
structure contains the bounding box and confidence level for detected object
|
||||
rect - bounding box for a detected object
|
||||
score - confidence level */
|
||||
typedef struct CvObjectDetection {
|
||||
CvRect rect;
|
||||
float score;
|
||||
} CvObjectDetection;
|
||||
|
||||
\end{lstlisting}
|
||||
|
||||
\fi
|
||||
|
||||
|
||||
\ifC
|
||||
|
||||
\cvCPyFunc{LoadLatentSvmDetector} Loads trained detector from a file
|
||||
|
||||
\cvdefC{ CvLatentSvmDetector* cvLoadLatentSvmDetector( \par const
|
||||
char* filename); }
|
||||
|
||||
\begin{description}
|
||||
\cvarg{filename}{Name of the file containing the description of a
|
||||
trained detector}
|
||||
\end{description}
|
||||
|
||||
\fi
|
||||
|
||||
\cvCPyFunc{LatentSvmDetectObjects} Detects objects in the image.
|
||||
|
||||
\cvdefC{ void cvLatentSvmDetectObjects( \par IplImage* image,
|
||||
\par CvLatentSvmDetector* detector,
|
||||
\par CvMemStorage* storage,
|
||||
\par float overlap\_threshold CV\_DEFAULT(0.5f),
|
||||
\par int numThreads CV\_DEFAULT(-1));
|
||||
}
|
||||
|
||||
\begin{description}
|
||||
\cvarg{image}{Image to detect objects in} \cvarg{detector}{LatentSVM
|
||||
detector in internal representation} \cvarg{storage}{Memory storage
|
||||
to store the resultant sequence of the object candidate rectangles}
|
||||
\cvarg{overlap\_threshod}{Threshold for the non-maximum suppression
|
||||
algorithm} \cvarg{numThreads}{Number of threads used in parallel
|
||||
version of the algorithm}
|
||||
\end{description}
|
||||
|
||||
% \begin{description}
|
||||
% \cvarg{directory}{Name of the directory containing the description
|
||||
% of a trained cascade classifier} \cvarg{orig\_window\_size}{Original
|
||||
% size of the objects the cascade has been trained on. Note that it is
|
||||
% not stored in the cascade and therefore must be specified
|
||||
% separately}
|
||||
% \end{description}
|
||||
|
@ -82,6 +82,18 @@
|
||||
year = {2004}
|
||||
}
|
||||
|
||||
@article{Felzenszwalb10,
|
||||
author = {Felzenszwalb, P.F. and Girshick, R.B. and McAllester, D. and Ramanan, D.},
|
||||
title = {Object Detection with Discriminatively Trained Part Based Models},
|
||||
journal = {PAMI},
|
||||
volume = {32},
|
||||
year = {2010},
|
||||
number = {9},
|
||||
month = {September},
|
||||
pages = {1627-1645},
|
||||
bibsource = {http://www.visionbib.com/bibliography/bib/457.html#BB45794}
|
||||
}
|
||||
|
||||
@article{Hartley99,
|
||||
author = {Hartley, R.I.},
|
||||
title = {Theory and Practice of Projective Rectification},
|
||||
@ -311,4 +323,3 @@
|
||||
# '''[Zhang96]''' Z. Zhang. Parameter Estimation Techniques: A Tutorial with Application to Conic Fitting, Image and Vision Computing Journal, 1996.
|
||||
# '''[Zhang99]''' Z. Zhang. Flexible Camera Calibration By Viewing a Plane From Unknown Orientations. International Conference on Computer Vision (ICCV'99), Corfu, Greece, pages 666-673, September 1999.
|
||||
# '''[Zhang00]''' Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user