404 lines
13 KiB
TeX
404 lines
13 KiB
TeX
|
\section{Object detection and descriptors}
|
||
|
\ifCpp
|
||
|
|
||
|
\cvclass{RandomizedTree}
|
||
|
The class contains base structure for \texttt{RTreeClassifier}
|
||
|
|
||
|
\begin{lstlisting}
|
||
|
class CV_EXPORTS RandomizedTree
|
||
|
{
|
||
|
public:
|
||
|
friend class RTreeClassifier;
|
||
|
|
||
|
RandomizedTree();
|
||
|
~RandomizedTree();
|
||
|
|
||
|
void train(std::vector<BaseKeypoint> const& base_set,
|
||
|
cv::RNG &rng, int depth, int views,
|
||
|
size_t reduced_num_dim, int num_quant_bits);
|
||
|
void train(std::vector<BaseKeypoint> const& base_set,
|
||
|
cv::RNG &rng, PatchGenerator &make_patch, int depth,
|
||
|
int views, size_t reduced_num_dim, int num_quant_bits);
|
||
|
|
||
|
// following two funcs are EXPERIMENTAL
|
||
|
//(do not use unless you know exactly what you do)
|
||
|
static void quantizeVector(float *vec, int dim, int N, float bnds[2],
|
||
|
int clamp_mode=0);
|
||
|
static void quantizeVector(float *src, int dim, int N, float bnds[2],
|
||
|
uchar *dst);
|
||
|
|
||
|
// patch_data must be a 32x32 array (no row padding)
|
||
|
float* getPosterior(uchar* patch_data);
|
||
|
const float* getPosterior(uchar* patch_data) const;
|
||
|
uchar* getPosterior2(uchar* patch_data);
|
||
|
|
||
|
void read(const char* file_name, int num_quant_bits);
|
||
|
void read(std::istream &is, int num_quant_bits);
|
||
|
void write(const char* file_name) const;
|
||
|
void write(std::ostream &os) const;
|
||
|
|
||
|
int classes() { return classes_; }
|
||
|
int depth() { return depth_; }
|
||
|
|
||
|
void discardFloatPosteriors() { freePosteriors(1); }
|
||
|
|
||
|
inline void applyQuantization(int num_quant_bits)
|
||
|
{ makePosteriors2(num_quant_bits); }
|
||
|
|
||
|
private:
|
||
|
int classes_;
|
||
|
int depth_;
|
||
|
int num_leaves_;
|
||
|
std::vector<RTreeNode> nodes_;
|
||
|
float **posteriors_; // 16-bytes aligned posteriors
|
||
|
uchar **posteriors2_; // 16-bytes aligned posteriors
|
||
|
std::vector<int> leaf_counts_;
|
||
|
|
||
|
void createNodes(int num_nodes, cv::RNG &rng);
|
||
|
void allocPosteriorsAligned(int num_leaves, int num_classes);
|
||
|
void freePosteriors(int which);
|
||
|
// which: 1=posteriors_, 2=posteriors2_, 3=both
|
||
|
void init(int classes, int depth, cv::RNG &rng);
|
||
|
void addExample(int class_id, uchar* patch_data);
|
||
|
void finalize(size_t reduced_num_dim, int num_quant_bits);
|
||
|
int getIndex(uchar* patch_data) const;
|
||
|
inline float* getPosteriorByIndex(int index);
|
||
|
inline uchar* getPosteriorByIndex2(int index);
|
||
|
inline const float* getPosteriorByIndex(int index) const;
|
||
|
void convertPosteriorsToChar();
|
||
|
void makePosteriors2(int num_quant_bits);
|
||
|
void compressLeaves(size_t reduced_num_dim);
|
||
|
void estimateQuantPercForPosteriors(float perc[2]);
|
||
|
};
|
||
|
\end{lstlisting}
|
||
|
|
||
|
\cvCppFunc{RandomizedTree::train}
|
||
|
Trains a randomized tree using input set of keypoints
|
||
|
|
||
|
\cvdefCpp{
|
||
|
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
|
||
|
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
|
||
|
int num\_quant\_bits);
|
||
|
}
|
||
|
\cvdefCpp{
|
||
|
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
|
||
|
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
|
||
|
int num\_quant\_bits);
|
||
|
}
|
||
|
\begin{description}
|
||
|
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
|
||
|
\cvarg{rng} {Random numbers generator is used for training}
|
||
|
\cvarg{make\_patch} {Patch generator is used for training}
|
||
|
\cvarg{depth} {Maximum tree depth}
|
||
|
%\cvarg{views} {}
|
||
|
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
|
||
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
|
||
|
\end{description}
|
||
|
|
||
|
\cvCppFunc {RandomizedTree::read}
|
||
|
Reads pre-saved randomized tree from file or stream
|
||
|
\cvdefCpp{read(const char* file\_name, int num\_quant\_bits)}
|
||
|
\cvdefCpp{read(std::istream \&is, int num\_quant\_bits)}
|
||
|
\begin{description}
|
||
|
\cvarg{file\_name}{Filename of file contains randomized tree data}
|
||
|
\cvarg{is}{Input stream associated with file contains randomized tree data}
|
||
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
|
||
|
\end{description}
|
||
|
|
||
|
\cvCppFunc {RandomizedTree::write}
|
||
|
Writes current randomized tree to a file or stream
|
||
|
\cvdefCpp{void write(const char* file\_name) const;}
|
||
|
\cvdefCpp{void write(std::ostream \&os) const;}
|
||
|
\begin{description}
|
||
|
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
|
||
|
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
|
||
|
\end{description}
|
||
|
|
||
|
|
||
|
\cvCppFunc {RandomizedTree::applyQuantization}
|
||
|
Applies quantization to the current randomized tree
|
||
|
\cvdefCpp{void applyQuantization(int num\_quant\_bits)}
|
||
|
\begin{description}
|
||
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
|
||
|
\end{description}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
\cvstruct{RTreeNode}
|
||
|
The class contains base structure for \texttt{RandomizedTree}
|
||
|
|
||
|
\begin{lstlisting}
|
||
|
struct RTreeNode
|
||
|
{
|
||
|
short offset1, offset2;
|
||
|
|
||
|
RTreeNode() {}
|
||
|
|
||
|
RTreeNode(uchar x1, uchar y1, uchar x2, uchar y2)
|
||
|
: offset1(y1*PATCH_SIZE + x1),
|
||
|
offset2(y2*PATCH_SIZE + x2)
|
||
|
{}
|
||
|
|
||
|
//! Left child on 0, right child on 1
|
||
|
inline bool operator() (uchar* patch_data) const
|
||
|
{
|
||
|
return patch_data[offset1] > patch_data[offset2];
|
||
|
}
|
||
|
};
|
||
|
\end{lstlisting}
|
||
|
|
||
|
|
||
|
\cvclass{RTreeClassifier}
|
||
|
The class contains \texttt{RTreeClassifier}. It represents calonder descriptor which was originally introduced by Michael Calonder
|
||
|
|
||
|
\begin{lstlisting}
|
||
|
class CV_EXPORTS RTreeClassifier
|
||
|
{
|
||
|
public:
|
||
|
static const int DEFAULT_TREES = 48;
|
||
|
static const size_t DEFAULT_NUM_QUANT_BITS = 4;
|
||
|
|
||
|
RTreeClassifier();
|
||
|
|
||
|
void train(std::vector<BaseKeypoint> const& base_set,
|
||
|
cv::RNG &rng,
|
||
|
int num_trees = RTreeClassifier::DEFAULT_TREES,
|
||
|
int depth = DEFAULT_DEPTH,
|
||
|
int views = DEFAULT_VIEWS,
|
||
|
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
|
||
|
int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
|
||
|
bool print_status = true);
|
||
|
void train(std::vector<BaseKeypoint> const& base_set,
|
||
|
cv::RNG &rng,
|
||
|
PatchGenerator &make_patch,
|
||
|
int num_trees = RTreeClassifier::DEFAULT_TREES,
|
||
|
int depth = DEFAULT_DEPTH,
|
||
|
int views = DEFAULT_VIEWS,
|
||
|
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
|
||
|
int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
|
||
|
bool print_status = true);
|
||
|
|
||
|
// sig must point to a memory block of at least
|
||
|
//classes()*sizeof(float|uchar) bytes
|
||
|
void getSignature(IplImage *patch, uchar *sig);
|
||
|
void getSignature(IplImage *patch, float *sig);
|
||
|
void getSparseSignature(IplImage *patch, float *sig,
|
||
|
float thresh);
|
||
|
|
||
|
static int countNonZeroElements(float *vec, int n, double tol=1e-10);
|
||
|
static inline void safeSignatureAlloc(uchar **sig, int num_sig=1,
|
||
|
int sig_len=176);
|
||
|
static inline uchar* safeSignatureAlloc(int num_sig=1,
|
||
|
int sig_len=176);
|
||
|
|
||
|
inline int classes() { return classes_; }
|
||
|
inline int original_num_classes()
|
||
|
{ return original_num_classes_; }
|
||
|
|
||
|
void setQuantization(int num_quant_bits);
|
||
|
void discardFloatPosteriors();
|
||
|
|
||
|
void read(const char* file_name);
|
||
|
void read(std::istream &is);
|
||
|
void write(const char* file_name) const;
|
||
|
void write(std::ostream &os) const;
|
||
|
|
||
|
std::vector<RandomizedTree> trees_;
|
||
|
|
||
|
private:
|
||
|
int classes_;
|
||
|
int num_quant_bits_;
|
||
|
uchar **posteriors_;
|
||
|
ushort *ptemp_;
|
||
|
int original_num_classes_;
|
||
|
bool keep_floats_;
|
||
|
};
|
||
|
\end{lstlisting}
|
||
|
|
||
|
\cvCppFunc{RTreeClassifier::train}
|
||
|
Trains a randomized tree classificator using input set of keypoints
|
||
|
\cvdefCpp{
|
||
|
void train(std::vector<BaseKeypoint> const\& base\_set,
|
||
|
cv::RNG \&rng,
|
||
|
int num\_trees = RTreeClassifier::DEFAULT\_TREES,
|
||
|
int depth = DEFAULT\_DEPTH,
|
||
|
int views = DEFAULT\_VIEWS,
|
||
|
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
|
||
|
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
|
||
|
}
|
||
|
\cvdefCpp{
|
||
|
void train(std::vector<BaseKeypoint> const\& base\_set,
|
||
|
cv::RNG \&rng,
|
||
|
PatchGenerator \&make\_patch,
|
||
|
int num\_trees = RTreeClassifier::DEFAULT\_TREES,
|
||
|
int depth = DEFAULT\_DEPTH,
|
||
|
int views = DEFAULT\_VIEWS,
|
||
|
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
|
||
|
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
|
||
|
}
|
||
|
\begin{description}
|
||
|
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
|
||
|
\cvarg{rng} {Random numbers generator is used for training}
|
||
|
\cvarg{make\_patch} {Patch generator is used for training}
|
||
|
\cvarg{num\_trees} {Number of randomized trees used in RTreeClassificator}
|
||
|
\cvarg{depth} {Maximum tree depth}
|
||
|
%\cvarg{views} {}
|
||
|
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
|
||
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
|
||
|
\cvarg{print\_status} {Print current status of training on the console}
|
||
|
\end{description}
|
||
|
|
||
|
\cvCppFunc{RTreeClassifier::getSignature}
|
||
|
Returns signature for image patch
|
||
|
\cvdefCpp{
|
||
|
void getSignature(IplImage *patch, uchar *sig)
|
||
|
}
|
||
|
\cvdefCpp{
|
||
|
void getSignature(IplImage *patch, float *sig)
|
||
|
}
|
||
|
\begin{description}
|
||
|
\cvarg{patch} {Image patch to calculate signature for}
|
||
|
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
|
||
|
\end{description}
|
||
|
|
||
|
\cvCppFunc{RTreeClassifier::getSparseSignature}
|
||
|
The function is simular to \texttt{getSignature} but uses the threshold for removing all signature elements less than the threshold. So that the signature is compressed
|
||
|
\cvdefCpp{
|
||
|
void getSparseSignature(IplImage *patch, float *sig,
|
||
|
float thresh);
|
||
|
}
|
||
|
\begin{description}
|
||
|
\cvarg{patch} {Image patch to calculate signature for}
|
||
|
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
|
||
|
\cvarg{tresh} {The threshold that is used for compressing the signature}
|
||
|
\end{description}
|
||
|
|
||
|
\cvCppFunc{RTreeClassifier::countNonZeroElements}
|
||
|
The function returns the number of non-zero elements in the input array.
|
||
|
\cvdefCpp{
|
||
|
static int countNonZeroElements(float *vec, int n, double tol=1e-10);
|
||
|
}
|
||
|
\begin{description}
|
||
|
\cvarg{vec}{Input vector contains float elements}
|
||
|
\cvarg{n}{Input vector size}
|
||
|
\cvarg{tol} {The threshold used for elements counting. We take all elements are less than \texttt{tol} as zero elements}
|
||
|
\end{description}
|
||
|
|
||
|
\cvCppFunc {RTreeClassifier::read}
|
||
|
Reads pre-saved RTreeClassifier from file or stream
|
||
|
\cvdefCpp{read(const char* file\_name)}
|
||
|
\cvdefCpp{read(std::istream \&is)}
|
||
|
\begin{description}
|
||
|
\cvarg{file\_name}{Filename of file contains randomized tree data}
|
||
|
\cvarg{is}{Input stream associated with file contains randomized tree data}
|
||
|
\end{description}
|
||
|
|
||
|
\cvCppFunc {RTreeClassifier::write}
|
||
|
Writes current RTreeClassifier to a file or stream
|
||
|
\cvdefCpp{void write(const char* file\_name) const;}
|
||
|
\cvdefCpp{void write(std::ostream \&os) const;}
|
||
|
\begin{description}
|
||
|
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
|
||
|
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
|
||
|
\end{description}
|
||
|
|
||
|
|
||
|
\cvCppFunc {RTreeClassifier::setQuantization}
|
||
|
Applies quantization to the current randomized tree
|
||
|
\cvdefCpp{void setQuantization(int num\_quant\_bits)}
|
||
|
\begin{description}
|
||
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
|
||
|
\end{description}
|
||
|
|
||
|
Below there is an example of \texttt{RTreeClassifier} usage for feature matching. There are test and train images and we extract features from both with SURF. Output is $best\_corr$ and $best\_corr\_idx$ arrays which keep the best probabilities and corresponding features indexes for every train feature.
|
||
|
% ===== Example. Using RTreeClassifier for features matching =====
|
||
|
\begin{lstlisting}
|
||
|
CvMemStorage* storage = cvCreateMemStorage(0);
|
||
|
CvSeq *objectKeypoints = 0, *objectDescriptors = 0;
|
||
|
CvSeq *imageKeypoints = 0, *imageDescriptors = 0;
|
||
|
CvSURFParams params = cvSURFParams(500, 1);
|
||
|
cvExtractSURF( test_image, 0, &imageKeypoints, &imageDescriptors,
|
||
|
storage, params );
|
||
|
cvExtractSURF( train_image, 0, &objectKeypoints, &objectDescriptors,
|
||
|
storage, params );
|
||
|
|
||
|
cv::RTreeClassifier detector;
|
||
|
int patch_width = cv::PATCH_SIZE;
|
||
|
iint patch_height = cv::PATCH_SIZE;
|
||
|
vector<cv::BaseKeypoint> base_set;
|
||
|
int i=0;
|
||
|
CvSURFPoint* point;
|
||
|
for (i=0;i<(n_points > 0 ? n_points : objectKeypoints->total);i++)
|
||
|
{
|
||
|
point=(CvSURFPoint*)cvGetSeqElem(objectKeypoints,i);
|
||
|
base_set.push_back(
|
||
|
cv::BaseKeypoint(point->pt.x,point->pt.y,train_image));
|
||
|
}
|
||
|
|
||
|
//Detector training
|
||
|
cv::RNG rng( cvGetTickCount() );
|
||
|
cv::PatchGenerator gen(0,255,2,false,0.7,1.3,-CV_PI/3,CV_PI/3,
|
||
|
-CV_PI/3,CV_PI/3);
|
||
|
|
||
|
printf("RTree Classifier training...\n");
|
||
|
detector.train(base_set,rng,gen,24,cv::DEFAULT_DEPTH,2000,
|
||
|
(int)base_set.size(), detector.DEFAULT_NUM_QUANT_BITS);
|
||
|
printf("Done\n");
|
||
|
|
||
|
float* signature = new float[detector.original_num_classes()];
|
||
|
float* best_corr;
|
||
|
int* best_corr_idx;
|
||
|
if (imageKeypoints->total > 0)
|
||
|
{
|
||
|
best_corr = new float[imageKeypoints->total];
|
||
|
best_corr_idx = new int[imageKeypoints->total];
|
||
|
}
|
||
|
|
||
|
for(i=0; i < imageKeypoints->total; i++)
|
||
|
{
|
||
|
point=(CvSURFPoint*)cvGetSeqElem(imageKeypoints,i);
|
||
|
int part_idx = -1;
|
||
|
float prob = 0.0f;
|
||
|
|
||
|
CvRect roi = cvRect((int)(point->pt.x) - patch_width/2,
|
||
|
(int)(point->pt.y) - patch_height/2,
|
||
|
patch_width, patch_height);
|
||
|
cvSetImageROI(test_image, roi);
|
||
|
roi = cvGetImageROI(test_image);
|
||
|
if(roi.width != patch_width || roi.height != patch_height)
|
||
|
{
|
||
|
best_corr_idx[i] = part_idx;
|
||
|
best_corr[i] = prob;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cvSetImageROI(test_image, roi);
|
||
|
IplImage* roi_image =
|
||
|
cvCreateImage(cvSize(roi.width, roi.height),
|
||
|
test_image->depth, test_image->nChannels);
|
||
|
cvCopy(test_image,roi_image);
|
||
|
|
||
|
detector.getSignature(roi_image, signature);
|
||
|
for (int j = 0; j< detector.original_num_classes();j++)
|
||
|
{
|
||
|
if (prob < signature[j])
|
||
|
{
|
||
|
part_idx = j;
|
||
|
prob = signature[j];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
best_corr_idx[i] = part_idx;
|
||
|
best_corr[i] = prob;
|
||
|
|
||
|
|
||
|
if (roi_image)
|
||
|
cvReleaseImage(&roi_image);
|
||
|
}
|
||
|
cvResetImageROI(test_image);
|
||
|
}
|
||
|
|
||
|
\end{lstlisting}
|
||
|
|
||
|
\fi
|