added documentation on LatentSVM algorithm

This commit is contained in:
Alexey Polovinkin 2011-04-03 10:16:28 +00:00
parent b1e0f2a45e
commit da0cb51916
3 changed files with 224 additions and 68 deletions

View File

@ -421,12 +421,12 @@ typedef struct CvStereoBMState
int speckleWindowSize; // the maximum area of speckles to remove
// (set to 0 to disable speckle filtering)
int speckleRange; // acceptable range of disparity variation in each connected component
int trySmallerWindows; // not used
int trySmallerWindows; // not used
CvRect roi1, roi2; // clipping ROIs
int disp12MaxDiff; // maximum allowed disparity difference in the left-right check
int disp12MaxDiff; // maximum allowed disparity difference in the left-right check
// internal data
...
}
@ -606,16 +606,16 @@ Size patternsize(8,6); //interior number of corners
Mat gray = ....; //source image
vector<Point2f> corners; //this will be filled by the detected corners
//CALIB_CB_FAST_CHECK saves a lot of time on images
//CALIB_CB_FAST_CHECK saves a lot of time on images
//that don't contain any chessboard corners
bool patternfound = findChessboardCorners(gray, patternsize, corners,
CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE
+ CALIB_CB_FAST_CHECK);
bool patternfound = findChessboardCorners(gray, patternsize, corners,
CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE
+ CALIB_CB_FAST_CHECK);
if(patternfound)
cornerSubPix(gray, corners, Size(11, 11), Size(-1, -1),
cornerSubPix(gray, corners, Size(11, 11), Size(-1, -1),
TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 30, 0.1));
drawChessboardCorners(img, patternsize, Mat(corners), patternfound);
\end{lstlisting}
@ -656,12 +656,12 @@ Mat gray = ....; //source image
vector<Point2f> centers; //this will be filled by the detected centers
bool patternfound = findCirclesGrid(gray, patternsize, centers);
drawChessboardCorners(img, patternsize, Mat(centers), patternfound);
\end{lstlisting}
You can find a printable asymmetric pattern of circles in the OpenCV
documentation folder (doc/acircles_pattern.png).
documentation folder (doc/acircles\_pattern.png).
\textbf{Note:} the function requires some white space (like a circle-thick
border, the wider the better) around the board to make the detection more robust in various environment.
@ -920,7 +920,7 @@ void cvFindStereoCorrespondenceGC( \par const CvArr* left, \par const CvArr* rig
\cvarg{useDisparityGuess}{If the parameter is not zero, the algorithm will start with pre-defined disparity maps. Both dispLeft and dispRight should be valid disparity maps. Otherwise, the function starts with blank disparity maps (all pixels are marked as occlusions).}
\end{description}
The function computes disparity maps for the input rectified stereo pair. Note that the left disparity image will contain values in the following range:
The function computes disparity maps for the input rectified stereo pair. Note that the left disparity image will contain values in the following range:
\[
-\texttt{state->numberOfDisparities}-\texttt{state->minDisparity}
@ -932,10 +932,10 @@ or
dispLeft(x,y) == \texttt{CV\_STEREO\_GC\_OCCLUSION}
\]
and for the right disparity image the following will be true:
and for the right disparity image the following will be true:
\[
\texttt{state->minDisparity} \le dispRight(x,y)
\texttt{state->minDisparity} \le dispRight(x,y)
< \texttt{state->minDisparity} + \texttt{state->numberOfDisparities}
\]
@ -1039,7 +1039,7 @@ Returns the new camera matrix based on the free scaling parameter
\par const Mat\& cameraMatrix, const Mat\& distCoeffs,
\par Size imageSize, double alpha, Size newImageSize=Size(),
\par Rect* validPixROI=0);}
\begin{description}
\cvarg{cameraMatrix}{The input camera matrix}
\cvarg{distCoeffs}{The input vector of distortion coefficients $(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])$ of 4, 5 or 8 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed.}
@ -1064,7 +1064,7 @@ Finds the initial camera matrix from the 3D-2D point correspondences
\par const CvMat* imagePoints,
\par const CvMat* npoints, CvSize imageSize,
\par CvMat* cameraMatrix,
\par double aspectRatio=1.);}
\par double aspectRatio=1.);}
\cvdefPy{InitIntrinsicParams2D(objectPoints, imagePoints, npoints, imageSize, cameraMatrix, aspectRatio=1.) -> None}
\cvdefCpp{Mat initCameraMatrix2D( const vector<vector<Point3f> >\& objectPoints,\par
const vector<vector<Point2f> >\& imagePoints,\par
@ -1074,7 +1074,7 @@ Finds the initial camera matrix from the 3D-2D point correspondences
\cvarg{objectPoints}{The joint array of object points; see \cvCross{CalibrateCamera2}{calibrateCamera}}
\cvarg{imagePoints}{The joint array of object point projections; see \cvCross{CalibrateCamera2}{calibrateCamera}}
\cvarg{npoints}{The array of point counts; see \cvCross{CalibrateCamera2}{calibrateCamera}}
\fi
\fi
\ifCpp
\cvarg{objectPoints}{The vector of vectors of the object points. See \cvCppCross{calibrateCamera}}
\cvarg{imagePoints}{The vector of vectors of the corresponding image points. See \cvCppCross{calibrateCamera}}
@ -1153,9 +1153,9 @@ map_x(u,v) \leftarrow x" f_x + c_x \\
map_y(u,v) \leftarrow y" f_y + c_y
\end{array}
\]
where $(k_1, k_2, p_1, p_2[, k_3])$ are the distortion coefficients.
In the case of a stereo camera this function is called twice, once for each camera head, after \cvCross{StereoRectify}{stereoRectify}, which in its turn is called after \cvCross{StereoCalibrate}{stereoCalibrate}. But if the stereo camera was not calibrated, it is still possible to compute the rectification transformations directly from the fundamental matrix using \cvCross{StereoRectifyUncalibrated}{stereoRectifyUncalibrated}. For each camera the function computes homography \texttt{H} as the rectification transformation in pixel domain, not a rotation matrix \texttt{R} in 3D space. The \texttt{R} can be computed from \texttt{H} as
where $(k_1, k_2, p_1, p_2[, k_3])$ are the distortion coefficients.
In the case of a stereo camera this function is called twice, once for each camera head, after \cvCross{StereoRectify}{stereoRectify}, which in its turn is called after \cvCross{StereoCalibrate}{stereoCalibrate}. But if the stereo camera was not calibrated, it is still possible to compute the rectification transformations directly from the fundamental matrix using \cvCross{StereoRectifyUncalibrated}{stereoRectifyUncalibrated}. For each camera the function computes homography \texttt{H} as the rectification transformation in pixel domain, not a rotation matrix \texttt{R} in 3D space. The \texttt{R} can be computed from \texttt{H} as
\[ \texttt{R} = \texttt{cameraMatrix}^{-1} \cdot \texttt{H} \cdot \texttt{cameraMatrix} \]
@ -1280,8 +1280,8 @@ Reprojects disparity image to 3D space.
\cvarg{Q}{The $4 \times 4$ perspective transformation matrix that can be obtained with \cvCross{StereoRectify}{stereoRectify}}
\cvarg{handleMissingValues}{If true, when the pixels with the minimal disparity (that corresponds to the outliers; see \cvCross{FindStereoCorrespondenceBM}{StereoBM}) will be transformed to 3D points with some very large Z value (currently set to 10000)}
\end{description}
The function transforms 1-channel disparity map to 3-channel image representing a 3D surface. That is, for each pixel \texttt{(x,y)} and the corresponding disparity \texttt{d=disparity(x,y)} it computes:
The function transforms 1-channel disparity map to 3-channel image representing a 3D surface. That is, for each pixel \texttt{(x,y)} and the corresponding disparity \texttt{d=disparity(x,y)} it computes:
\[\begin{array}{l}
[X\; Y\; Z\; W]^T = \texttt{Q}*[x\; y\; \texttt{disparity}(x,y)\; 1]^T \\
@ -1348,7 +1348,7 @@ Releases block matching stereo correspondence structure.
\cvarg{state}{Double pointer to the released structure.}
\end{description}
The function releases the stereo correspondence structure and all the associated internal buffers.
The function releases the stereo correspondence structure and all the associated internal buffers.
\cvCPyFunc{ReleaseStereoGCState}
Releases the state structure of the graph cut-based stereo correspondence algorithm.
@ -1360,7 +1360,7 @@ Releases the state structure of the graph cut-based stereo correspondence algori
\cvarg{state}{Double pointer to the released structure.}
\end{description}
The function releases the stereo correspondence structure and all the associated internal buffers.
The function releases the stereo correspondence structure and all the associated internal buffers.
\fi
@ -1460,9 +1460,9 @@ class StereoSGBM
int speckleWindowSize=0, int speckleRange=0,
bool fullDP=false);
virtual ~StereoSGBM();
virtual void operator()(const Mat& left, const Mat& right, Mat& disp);
int minDisparity;
int numberOfDisparities;
int SADWindowSize;
@ -1473,7 +1473,7 @@ class StereoSGBM
int speckleRange;
int disp12MaxDiff;
bool fullDP;
...
};
\end{lstlisting}
@ -1576,7 +1576,7 @@ Calibrates stereo camera.
\cvarg{distCoeffs}{The input/output vector of distortion coefficients $(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])$ of 4, 5 or 8 elements. \cvCpp{On output vector length depends on the flags.}}
\cvarg{cameraMatrix2}{The input/output second camera matrix, as cameraMatrix1.}
\cvarg{distCoeffs2}{The input/output lens distortion coefficients for the second camera, as \texttt{distCoeffs1}.}
\cvarg{imageSize}{Size of the image, used only to initialize intrinsic camera matrix.}
\cvarg{imageSize}{Size of the image, used only to initialize intrinsic camera matrix.}
\cvarg{R}{The output rotation matrix between the 1st and the 2nd cameras' coordinate systems.}
\cvarg{T}{The output translation vector between the cameras' coordinate systems.}
\cvarg{E}{The \cvCPy{optional} output essential matrix.}
@ -1668,10 +1668,10 @@ void stereoRectify( const Mat\& cameraMatrix1, const Mat\& distCoeffs1,\par
\cvarg{roi1, roi2}{The optional output rectangles inside the rectified images where all the pixels are valid. If \texttt{alpha=0}, the ROIs will cover the whole images, otherwise they likely be smaller, see the picture below}
\end{description}
The function computes the rotation matrices for each camera that (virtually) make both camera image planes the same plane. Consequently, that makes all the epipolar lines parallel and thus simplifies the dense stereo correspondence problem. On input the function takes the matrices computed by \cvCppCross{stereoCalibrate} and on output it gives 2 rotation matrices and also 2 projection matrices in the new coordinates. The 2 cases are distinguished by the function are:
The function computes the rotation matrices for each camera that (virtually) make both camera image planes the same plane. Consequently, that makes all the epipolar lines parallel and thus simplifies the dense stereo correspondence problem. On input the function takes the matrices computed by \cvCppCross{stereoCalibrate} and on output it gives 2 rotation matrices and also 2 projection matrices in the new coordinates. The 2 cases are distinguished by the function are:
\begin{enumerate}
\item Horizontal stereo, when 1st and 2nd camera views are shifted relative to each other mainly along the x axis (with possible small vertical shift). Then in the rectified images the corresponding epipolar lines in left and right cameras will be horizontal and have the same y-coordinate. P1 and P2 will look as:
\item Horizontal stereo, when 1st and 2nd camera views are shifted relative to each other mainly along the x axis (with possible small vertical shift). Then in the rectified images the corresponding epipolar lines in left and right cameras will be horizontal and have the same y-coordinate. P1 and P2 will look as:
\[\texttt{P1}=
\begin{bmatrix}
@ -1711,9 +1711,9 @@ f & 0 & cx & 0\\
\]
where $T_y$ is vertical shift between the cameras and $cy_1=cy_2$ if \texttt{CALIB\_ZERO\_DISPARITY} is set.
\end{enumerate}
\end{enumerate}
As you can see, the first 3 columns of \texttt{P1} and \texttt{P2} will effectively be the new "rectified" camera matrices.
As you can see, the first 3 columns of \texttt{P1} and \texttt{P2} will effectively be the new "rectified" camera matrices.
The matrices, together with \texttt{R1} and \texttt{R2}, can then be passed to \cvCross{InitUndistortRectifyMap}{initUndistortRectifyMap} to initialize the rectification map for each camera.
Below is the screenshot from \texttt{stereo\_calib.cpp} sample. Some red horizontal lines, as you can see, pass through the corresponding image regions, i.e. the images are well rectified (which is what most stereo correspondence algorithms rely on). The green rectangles are \texttt{roi1} and \texttt{roi2} - indeed, their interior are all valid pixels.
@ -1746,7 +1746,7 @@ Computes rectification transform for uncalibrated stereo camera.
Otherwise all the points are considered inliers.}
\end{description}
The function computes the rectification transformations without knowing intrinsic parameters of the cameras and their relative position in space, hence the suffix "Uncalibrated". Another related difference from \cvCross{StereoRectify}{stereoRectify} is that the function outputs not the rectification transformations in the object (3D) space, but the planar perspective transformations, encoded by the homography matrices \texttt{H1} and \texttt{H2}. The function implements the algorithm \cite{Hartley99}.
The function computes the rectification transformations without knowing intrinsic parameters of the cameras and their relative position in space, hence the suffix "Uncalibrated". Another related difference from \cvCross{StereoRectify}{stereoRectify} is that the function outputs not the rectification transformations in the object (3D) space, but the planar perspective transformations, encoded by the homography matrices \texttt{H1} and \texttt{H2}. The function implements the algorithm \cite{Hartley99}.
Note that while the algorithm does not need to know the intrinsic parameters of the cameras, it heavily depends on the epipolar geometry. Therefore, if the camera lenses have significant distortion, it would better be corrected before computing the fundamental matrix and calling this function. For example, distortion coefficients can be estimated for each head of stereo camera separately by using \cvCross{CalibrateCamera2}{calibrateCamera} and then the images can be corrected using \cvCross{Undistort2}{undistort}, or just the point coordinates can be corrected with \cvCross{UndistortPoints}{undistortPoints}.
@ -1807,7 +1807,7 @@ void undistortPoints( const Mat\& src, Mat\& dst,\par
\begin{description}
\cvarg{src}{The observed point coordinates, 1xN or Nx1 2-channel (CV\_32FC2 or CV\_64FC2).}
\cvarg{src}{The observed point coordinates, 1xN or Nx1 2-channel (CV\_32FC2 or CV\_64FC2).}
\cvarg{dst}{The output ideal point coordinates, after undistortion and reverse perspective transformation\cvCPy{, same format as \texttt{src}}.}
\cvarg{cameraMatrix}{The camera matrix $\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}$}
\cvarg{distCoeffs}\cvarg{distCoeffs}{The input vector of distortion coefficients $(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6]])$ of 4, 5 or 8 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed.}

View File

@ -79,7 +79,7 @@ typedef struct CvHaarFeature
int tilted; /* 0 means up-right feature, 1 means 45--rotated feature */
/* 2-3 rectangles with weights of opposite signs and
with absolute values inversely proportional to the areas of the
with absolute values inversely proportional to the areas of the
rectangles. If rect[2].weight !=0, then
the feature consists of 3 rectangles, otherwise it consists of 2 */
struct
@ -90,9 +90,9 @@ typedef struct CvHaarFeature
}
CvHaarFeature;
/* a single tree classifier (stump in the simplest case) that returns the
response for the feature at the particular image location (i.e. pixel
sum over subrectangles of the window) and gives out a value depending
/* a single tree classifier (stump in the simplest case) that returns the
response for the feature at the particular image location (i.e. pixel
sum over subrectangles of the window) and gives out a value depending
on the response */
typedef struct CvHaarClassifier
{
@ -101,11 +101,11 @@ typedef struct CvHaarClassifier
/* these are "parallel" arrays. Every index \texttt{i}
corresponds to a node of the decision tree (root has 0-th index).
left[i] - index of the left child (or negated index if the
left[i] - index of the left child (or negated index if the
left child is a leaf)
right[i] - index of the right child (or negated index if the
right[i] - index of the right child (or negated index if the
right child is a leaf)
threshold[i] - branch threshold. if feature responce is <= threshold,
threshold[i] - branch threshold. if feature responce is <= threshold,
left branch is chosen, otherwise right branch is chosen.
alpha[i] - output value correponding to the leaf. */
CvHaarFeature* haar_feature;
@ -141,16 +141,16 @@ typedef struct CvHaarClassifierCascade
{
int flags; /* signature */
int count; /* number of stages */
CvSize orig_window_size; /* original object size (the cascade is
CvSize orig_window_size; /* original object size (the cascade is
trained for) */
/* these two parameters are set by cvSetImagesForHaarClassifierCascade */
CvSize real_window_size; /* current object size */
double scale; /* current scale */
CvHaarStageClassifier* stage_classifier; /* array of stage classifiers */
CvHidHaarClassifierCascade* hid_cascade; /* hidden optimized
representation of the
cascade, created by
CvHidHaarClassifierCascade* hid_cascade; /* hidden optimized
representation of the
cascade, created by
cvSetImagesForHaarClassifierCascade */
}
CvHaarClassifierCascade;
@ -370,13 +370,13 @@ Base class for computing feature values in cascade classifiers.
\begin{lstlisting}
class CV_EXPORTS FeatureEvaluator
{
public:
enum { HAAR = 0, LBP = 1 }; // supported feature types
public:
enum { HAAR = 0, LBP = 1 }; // supported feature types
virtual ~FeatureEvaluator(); // destructor
virtual bool read(const FileNode& node);
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
virtual bool setImage(const Mat& img, Size origWinSize);
virtual bool setWindow(Point p);
@ -428,7 +428,7 @@ bool FeatureEvaluator::setImage(const Mat\& img, Size origWinSize);
Sets window in the current image in which the features will be computed (called by \cvCppCross{CascadeClassifier::runAt}).
\cvdefCpp{
bool FeatureEvaluator::setWindow(Point p);
bool FeatureEvaluator::setWindow(Point p);
}
\begin{description}
@ -477,21 +477,21 @@ The cascade classifier class for object detection.
class CascadeClassifier
{
public:
// structure for storing tree node
struct CV_EXPORTS DTreeNode
// structure for storing tree node
struct CV_EXPORTS DTreeNode
{
int featureIdx; // feature index on which is a split
float threshold; // split threshold of ordered features only
int left; // left child index in the tree nodes array
int right; // right child index in the tree nodes array
};
// structure for storing desision tree
struct CV_EXPORTS DTree
struct CV_EXPORTS DTree
{
int nodeCount; // nodes count
};
// structure for storing cascade stage (BOOST only for now)
struct CV_EXPORTS Stage
{
@ -499,27 +499,27 @@ public:
int ntrees; // number of trees
float threshold; // treshold of stage sum
};
enum { BOOST = 0 }; // supported stage types
// mode of detection (see parameter flags in function HaarDetectObjects)
enum { DO_CANNY_PRUNING = CV_HAAR_DO_CANNY_PRUNING,
SCALE_IMAGE = CV_HAAR_SCALE_IMAGE,
FIND_BIGGEST_OBJECT = CV_HAAR_FIND_BIGGEST_OBJECT,
DO_ROUGH_SEARCH = CV_HAAR_DO_ROUGH_SEARCH };
DO_ROUGH_SEARCH = CV_HAAR_DO_ROUGH_SEARCH };
CascadeClassifier(); // default constructor
CascadeClassifier(const string& filename);
~CascadeClassifier(); // destructor
bool empty() const;
bool load(const string& filename);
bool read(const FileNode& node);
void detectMultiScale( const Mat& image, vector<Rect>& objects,
double scaleFactor=1.1, int minNeighbors=3,
int flags=0, Size minSize=Size());
void detectMultiScale( const Mat& image, vector<Rect>& objects,
double scaleFactor=1.1, int minNeighbors=3,
int flags=0, Size minSize=Size());
bool setImage( Ptr<FeatureEvaluator>&, const Mat& );
int runAt( Ptr<FeatureEvaluator>&, Point );
@ -527,9 +527,9 @@ public:
int stageType; // stage type (BOOST only for now)
int featureType; // feature type (HAAR or LBP for now)
int ncategories; // number of categories (for categorical features only)
int ncategories; // number of categories (for categorical features only)
Size origWinSize; // size of training images
vector<Stage> stages; // vector of stages (BOOST for now)
vector<DTree> classifiers; // vector of decision trees
vector<DTreeNode> nodes; // vector of tree nodes
@ -635,5 +635,150 @@ Groups the object candidate rectangles
\cvarg{eps}{The relative difference between sides of the rectangles to merge them into a group}
\end{description}
The function is a wrapper for a generic function \cvCppCross{partition}. It clusters all the input rectangles using the rectangle equivalence criteria, that combines rectangles that have similar sizes and similar locations (the similarity is defined by \texttt{eps}). When \texttt{eps=0}, no clustering is done at all. If $\texttt{eps}\rightarrow +\inf$, all the rectangles will be put in one cluster. Then, the small clusters, containing less than or equal to \texttt{groupThreshold} rectangles, will be rejected. In each other cluster the average rectangle will be computed and put into the output rectangle list.
The function is a wrapper for a generic function \cvCppCross{partition}. It clusters all the input rectangles using the rectangle equivalence criteria, that combines rectangles that have similar sizes and similar locations (the similarity is defined by \texttt{eps}). When \texttt{eps=0}, no clustering is done at all. If $\texttt{eps}\rightarrow +\inf$, all the rectangles will be put in one cluster. Then, the small clusters, containing less than or equal to \texttt{groupThreshold} rectangles, will be rejected. In each other cluster the average rectangle will be computed and put into the output rectangle list.
\fi
\ifC
\section{Discriminatively Trained Part Based Models for Object Detection}
\subsection{Discriminatively Trained Part Based Models for Object Detection}
The object detector described below has been initially proposed by
P.F. Felzenszwalb in \cvCPyCross{Felzenszwalb10}. It is based on a
Dalal-Triggs detector that uses a single filter on histogram of
oriented gradients (HOG) features to represent an object category.
This detector uses a sliding window approach, where a filter is
applied at all positions and scales of an image. The first
innovation is enriching the Dalal-Triggs model using a
star-structured part-based model defined by a "root" filter
(analogous to the Dalal-Triggs filter) plus a set of parts filters
and associated deformation models. The score of one of star models
at a particular position and scale within an image is the score of
the root filter at the given location plus the sum over parts of the
maximum, over placements of that part, of the part filter score on
its location minus a deformation cost measuring the deviation of the
part from its ideal location relative to the root. Both root and
part filter scores are defined by the dot product between a filter
(a set of weights) and a subwindow of a feature pyramid computed
from the input image. Another improvement is a representation of the
class of models by a mixture of star models. The score of a mixture
model at a particular position and scale is the maximum over
components, of the score of that component model at the given
location.
\fi
\ifC
\cvclass{CvLSVMFilterPosition, CvLSVMFilterObject,
CvLatentSvmDetector, CvObjectDetection}
\begin{lstlisting}
/* DataType: STRUCT position
Structure describes the position of the filter in the feature pyramid
l - level in the feature pyramid
(x, y) - coordinate in level l */
typedef struct {
unsigned int x;
unsigned int y;
unsigned int l;
} CvLSVMFilterPosition;
/* DataType: STRUCT filterObject
Description of the filter, which corresponds to the part of the object
V - ideal (penalty = 0) position of the partial filter
from the root filter position (V_i in the paper)
penaltyFunction - vector describes penalty function (d_i in the paper)
pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
FILTER DESCRIPTION
Rectangular map (sizeX x sizeY),
every cell stores feature vector (dimension = p)
H - matrix of feature vectors
to set and get feature vectors (i,j)
used formula H[(j * sizeX + i) * p + k], where
k - component of feature vector in cell (i, j)
END OF FILTER DESCRIPTION
xp - auxillary parameter for internal use
size of row in feature vectors
(yp = (int) (p / xp); p = xp * yp) */
typedef struct{
CvLSVMFilterPosition V;
float fineFunction[4];
unsigned int sizeX;
unsigned int sizeY;
unsigned int p;
unsigned int xp;
float *H;
} CvLSVMFilterObject;
/* data type: STRUCT CvLatentSvmDetector
structure contains internal representation of trained Latent SVM detector
num_filters - total number of filters (root plus part) in model
num_components - number of components in model
num_part_filters - array containing number of part filters for each component
filters - root and part filters for all model components
b - biases for all model components
score_threshold - confidence level threshold */
typedef struct CvLatentSvmDetector {
int num_filters;
int num_components;
int* num_part_filters;
CvLSVMFilterObject** filters;
float* b;
float score_threshold;
} CvLatentSvmDetector;
/* data type: STRUCT CvObjectDetection
structure contains the bounding box and confidence level for detected object
rect - bounding box for a detected object
score - confidence level */
typedef struct CvObjectDetection {
CvRect rect;
float score;
} CvObjectDetection;
\end{lstlisting}
\fi
\ifC
\cvCPyFunc{LoadLatentSvmDetector} Loads trained detector from a file
\cvdefC{ CvLatentSvmDetector* cvLoadLatentSvmDetector( \par const
char* filename); }
\begin{description}
\cvarg{filename}{Name of the file containing the description of a
trained detector}
\end{description}
\fi
\cvCPyFunc{LatentSvmDetectObjects} Detects objects in the image.
\cvdefC{ void cvLatentSvmDetectObjects( \par IplImage* image,
\par CvLatentSvmDetector* detector,
\par CvMemStorage* storage,
\par float overlap\_threshold CV\_DEFAULT(0.5f),
\par int numThreads CV\_DEFAULT(-1));
}
\begin{description}
\cvarg{image}{Image to detect objects in} \cvarg{detector}{LatentSVM
detector in internal representation} \cvarg{storage}{Memory storage
to store the resultant sequence of the object candidate rectangles}
\cvarg{overlap\_threshod}{Threshold for the non-maximum suppression
algorithm} \cvarg{numThreads}{Number of threads used in parallel
version of the algorithm}
\end{description}
% \begin{description}
% \cvarg{directory}{Name of the directory containing the description
% of a trained cascade classifier} \cvarg{orig\_window\_size}{Original
% size of the objects the cascade has been trained on. Note that it is
% not stored in the cascade and therefore must be specified
% separately}
% \end{description}

View File

@ -82,6 +82,18 @@
year = {2004}
}
@article{Felzenszwalb10,
author = {Felzenszwalb, P.F. and Girshick, R.B. and McAllester, D. and Ramanan, D.},
title = {Object Detection with Discriminatively Trained Part Based Models},
journal = {PAMI},
volume = {32},
year = {2010},
number = {9},
month = {September},
pages = {1627-1645},
bibsource = {http://www.visionbib.com/bibliography/bib/457.html#BB45794}
}
@article{Hartley99,
author = {Hartley, R.I.},
title = {Theory and Practice of Projective Rectification},
@ -311,4 +323,3 @@
# '''[Zhang96]''' Z. Zhang. Parameter Estimation Techniques: A Tutorial with Application to Conic Fitting, Image and Vision Computing Journal, 1996.
# '''[Zhang99]''' Z. Zhang. Flexible Camera Calibration By Viewing a Plane From Unknown Orientations. International Conference on Computer Vision (ICCV'99), Corfu, Greece, pages 666-673, September 1999.
# '''[Zhang00]''' Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000.