a lot of small corrections to bring down the number of undocumented functions, reported by the script; added em.cpp sample

This commit is contained in:
Vadim Pisarevsky
2011-06-09 01:16:45 +00:00
parent 3b9e752be7
commit 20aca7440f
30 changed files with 474 additions and 746 deletions

View File

@@ -127,7 +127,7 @@ Weak tree classifier ::
virtual ~CvBoostTree();
virtual bool train( CvDTreeTrainData* _train_data,
const CvMat* subsample_idx, CvBoost* ensemble );
const Mat& subsample_idx, CvBoost* ensemble );
virtual void scale( double s );
virtual void read( CvFileStorage* fs, CvFileNode* node,
CvBoost* ensemble, CvDTreeTrainData* _data );
@@ -173,21 +173,21 @@ Boosted tree classifier ::
CvBoost();
virtual ~CvBoost();
CvBoost( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
CvBoost( const Mat& _train_data, int _tflag,
const Mat& _responses, const Mat& _var_idx=0,
const Mat& _sample_idx=0, const Mat& _var_type=0,
const Mat& _missing_mask=0,
CvBoostParams params=CvBoostParams() );
virtual bool train( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
virtual bool train( const Mat& _train_data, int _tflag,
const Mat& _responses, const Mat& _var_idx=0,
const Mat& _sample_idx=0, const Mat& _var_type=0,
const Mat& _missing_mask=0,
CvBoostParams params=CvBoostParams(),
bool update=false );
virtual float predict( const CvMat* _sample, const CvMat* _missing=0,
CvMat* weak_responses=0, CvSlice slice=CV_WHOLE_SEQ,
virtual float predict( const Mat& _sample, const Mat& _missing=0,
Mat& weak_responses=0, CvSlice slice=CV_WHOLE_SEQ,
bool raw_mode=false ) const;
virtual void prune( CvSlice slice );
@@ -221,7 +221,7 @@ Boosted tree classifier ::
CvBoost::train
--------------
.. cpp:function:: bool CvBoost::train( const CvMat* _train_data, int _tflag, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, const CvMat* _var_type=0, const CvMat* _missing_mask=0, CvBoostParams params=CvBoostParams(), bool update=false )
.. cpp:function:: bool CvBoost::train( const Mat& _train_data, int _tflag, const Mat& _responses, const Mat& _var_idx=Mat(), const Mat& _sample_idx=Mat(), const Mat& _var_type=Mat(), const Mat& _missing_mask=Mat(), CvBoostParams params=CvBoostParams(), bool update=false )
Trains a boosted tree classifier.
@@ -233,7 +233,7 @@ The train method follows the common template. The last parameter ``update`` spec
CvBoost::predict
----------------
.. cpp:function:: float CvBoost::predict( const CvMat* sample, const CvMat* missing=0, CvMat* weak_responses=0, CvSlice slice=CV_WHOLE_SEQ, bool raw_mode=false ) const
.. cpp:function:: float CvBoost::predict( const Mat& sample, const Mat& missing=Mat(), const Range& slice=Range::all(), bool rawMode=false, bool returnSum=false ) const
Predicts a response for an input sample.

View File

@@ -166,27 +166,27 @@ Decision tree training data and shared data for tree ensembles ::
struct CvDTreeTrainData
{
CvDTreeTrainData();
CvDTreeTrainData( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
CvDTreeTrainData( const Mat& _train_data, int _tflag,
const Mat& _responses, const Mat& _var_idx=Mat(),
const Mat& _sample_idx=Mat(), const Mat& _var_type=Mat(),
const Mat& _missing_mask=Mat(),
const CvDTreeParams& _params=CvDTreeParams(),
bool _shared=false, bool _add_labels=false );
virtual ~CvDTreeTrainData();
virtual void set_data( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
virtual void set_data( const Mat& _train_data, int _tflag,
const Mat& _responses, const Mat& _var_idx=Mat(),
const Mat& _sample_idx=Mat(), const Mat& _var_type=Mat(),
const Mat& _missing_mask=Mat(),
const CvDTreeParams& _params=CvDTreeParams(),
bool _shared=false, bool _add_labels=false,
bool _update_data=false );
virtual void get_vectors( const CvMat* _subsample_idx,
virtual void get_vectors( const Mat& _subsample_idx,
float* values, uchar* missing, float* responses,
bool get_class_idx=false );
virtual CvDTreeNode* subsample_data( const CvMat* _subsample_idx );
virtual CvDTreeNode* subsample_data( const Mat& _subsample_idx );
virtual void write_params( CvFileStorage* fs );
virtual void read_params( CvFileStorage* fs, CvFileNode* node );
@@ -226,20 +226,20 @@ Decision tree training data and shared data for tree ensembles ::
int buf_count, buf_size;
bool shared;
CvMat* cat_count;
CvMat* cat_ofs;
CvMat* cat_map;
Mat& cat_count;
Mat& cat_ofs;
Mat& cat_map;
CvMat* counts;
CvMat* buf;
CvMat* direction;
CvMat* split_buf;
Mat& counts;
Mat& buf;
Mat& direction;
Mat& split_buf;
CvMat* var_idx;
CvMat* var_type; // i-th element =
Mat& var_idx;
Mat& var_type; // i-th element =
// k<0 - ordered
// k>=0 - categorical, see k-th element of cat_* arrays
CvMat* priors;
Mat& priors;
CvDTreeParams params;
@@ -294,19 +294,19 @@ Decision tree ::
CvDTree();
virtual ~CvDTree();
virtual bool train( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
virtual bool train( const Mat& _train_data, int _tflag,
const Mat& _responses, const Mat& _var_idx=Mat(),
const Mat& _sample_idx=Mat(), const Mat& _var_type=Mat(),
const Mat& _missing_mask=Mat(),
CvDTreeParams params=CvDTreeParams() );
virtual bool train( CvDTreeTrainData* _train_data,
const CvMat* _subsample_idx );
const Mat& _subsample_idx );
virtual CvDTreeNode* predict( const CvMat* _sample,
const CvMat* _missing_data_mask=0,
virtual CvDTreeNode* predict( const Mat& _sample,
const Mat& _missing_data_mask=Mat(),
bool raw_mode=false ) const;
virtual const CvMat* get_var_importance();
virtual const Mat& get_var_importance();
virtual void clear();
virtual void read( CvFileStorage* fs, CvFileNode* node );
@@ -323,7 +323,7 @@ Decision tree ::
protected:
virtual bool do_train( const CvMat* _subsample_idx );
virtual bool do_train( const Mat& _subsample_idx );
virtual void try_split_node( CvDTreeNode* n );
virtual void split_node_data( CvDTreeNode* n );
@@ -359,7 +359,7 @@ Decision tree ::
CvDTreeNode* root;
int pruned_tree_idx;
CvMat* var_importance;
Mat& var_importance;
CvDTreeTrainData* data;
};
@@ -371,9 +371,9 @@ Decision tree ::
CvDTree::train
--------------
.. cpp:function:: bool CvDTree::train( const CvMat* _train_data, int _tflag, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, const CvMat* _var_type=0, const CvMat* _missing_mask=0, CvDTreeParams params=CvDTreeParams() )
.. cpp:function:: bool CvDTree::train( const Mat& _train_data, int _tflag, const Mat& _responses, const Mat& _var_idx=Mat(), const Mat& _sample_idx=Mat(), const Mat& _var_type=Mat(), const Mat& _missing_mask=Mat(), CvDTreeParams params=CvDTreeParams() )
.. cpp:function:: bool CvDTree::train( CvDTreeTrainData* _train_data, const CvMat* _subsample_idx )
.. cpp:function:: bool CvDTree::train( CvDTreeTrainData* _train_data, const Mat& _subsample_idx )
Trains a decision tree.
@@ -391,7 +391,7 @@ There are two ``train`` methods in ``CvDTree`` :
CvDTree::predict
----------------
.. cpp:function:: CvDTreeNode* CvDTree::predict( const CvMat* _sample, const CvMat* _missing_data_mask=0, bool raw_mode=false ) const
.. cpp:function:: CvDTreeNode* CvDTree::predict( const Mat& _sample, const Mat& _missing_data_mask=Mat(), bool raw_mode=false ) const
Returns the leaf node of a decision tree corresponding to the input vector.

View File

@@ -108,8 +108,8 @@ Parameters of the EM algorithm ::
CvTermCriteria _term_crit=cvTermCriteria(
CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,
100, FLT_EPSILON),
CvMat* _probs=0, CvMat* _weights=0,
CvMat* _means=0, CvMat** _covs=0 ) :
const CvMat* _probs=0, const CvMat* _weights=0,
const CvMat* _means=0, const CvMat** _covs=0 ) :
nclusters(_nclusters), cov_mat_type(_cov_mat_type),
start_step(_start_step),
probs(_probs), weights(_weights), means(_means), covs(_covs),
@@ -149,21 +149,21 @@ EM model ::
enum { START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0 };
CvEM();
CvEM( const CvMat* samples, const CvMat* sample_idx=0,
CvEMParams params=CvEMParams(), CvMat* labels=0 );
CvEM( const Mat& samples, const Mat& sample_idx=Mat(),
CvEMParams params=CvEMParams(), Mat* labels=0 );
virtual ~CvEM();
virtual bool train( const CvMat* samples, const CvMat* sample_idx=0,
CvEMParams params=CvEMParams(), CvMat* labels=0 );
virtual bool train( const Mat& samples, const Mat& sample_idx=Mat(),
CvEMParams params=CvEMParams(), Mat* labels=0 );
virtual float predict( const CvMat* sample, CvMat* probs ) const;
virtual float predict( const Mat& sample, Mat& probs ) const;
virtual void clear();
int get_nclusters() const { return params.nclusters; }
const CvMat* get_means() const { return means; }
const CvMat** get_covs() const { return covs; }
const CvMat* get_weights() const { return weights; }
const CvMat* get_probs() const { return probs; }
const Mat& get_means() const { return means; }
const Mat&* get_covs() const { return covs; }
const Mat& get_weights() const { return weights; }
const Mat& get_probs() const { return probs; }
protected:
@@ -173,19 +173,19 @@ EM model ::
virtual double run_em( const CvVectors& train_data );
virtual void init_auto( const CvVectors& samples );
virtual void kmeans( const CvVectors& train_data, int nclusters,
CvMat* labels, CvTermCriteria criteria,
const CvMat* means );
Mat& labels, CvTermCriteria criteria,
const Mat& means );
CvEMParams params;
double log_likelihood;
CvMat* means;
CvMat** covs;
CvMat* weights;
CvMat* probs;
Mat& means;
Mat&* covs;
Mat& weights;
Mat& probs;
CvMat* log_weight_div_det;
CvMat* inv_eigen_values;
CvMat** cov_rotate_mats;
Mat& log_weight_div_det;
Mat& inv_eigen_values;
Mat&* cov_rotate_mats;
};
@@ -195,7 +195,7 @@ EM model ::
CvEM::train
-----------
.. cpp:function:: void CvEM::train( const CvMat* samples, const CvMat* sample_idx=0, CvEMParams params=CvEMParams(), CvMat* labels=0 )
.. cpp:function:: void CvEM::train( const Mat& samples, const Mat& sample_idx=Mat(), CvEMParams params=CvEMParams(), Mat* labels=0 )
Estimates the Gaussian mixture parameters from a sample set.
@@ -210,110 +210,7 @@ Unlike many of the ML models, EM is an unsupervised learning algorithm and it do
The trained model can be used further for prediction, just like any other classifier. The trained model is similar to the
:ref:`Bayes classifier`.
Example: Clustering random samples of multi-Gaussian distribution using EM ::
#include "ml.h"
#include "highgui.h"
int main( int argc, char** argv )
{
const int N = 4;
const int N1 = (int)sqrt((double)N);
const CvScalar colors[] = {{0,0,255}},{{0,255,0}},
{{0,255,255}},{{255,255,0}
;
int i, j;
int nsamples = 100;
CvRNG rng_state = cvRNG(-1);
CvMat* samples = cvCreateMat( nsamples, 2, CV_32FC1 );
CvMat* labels = cvCreateMat( nsamples, 1, CV_32SC1 );
IplImage* img = cvCreateImage( cvSize( 500, 500 ), 8, 3 );
float _sample[2];
CvMat sample = cvMat( 1, 2, CV_32FC1, _sample );
CvEM em_model;
CvEMParams params;
CvMat samples_part;
cvReshape( samples, samples, 2, 0 );
for( i = 0; i < N; i++ )
{
CvScalar mean, sigma;
// form the training samples
cvGetRows( samples, &samples_part, i*nsamples/N,
(i+1)*nsamples/N );
mean = cvScalar(((i
((i/N1)+1.)*img->height/(N1+1));
sigma = cvScalar(30,30);
cvRandArr( &rng_state, &samples_part, CV_RAND_NORMAL,
mean, sigma );
}
cvReshape( samples, samples, 1, 0 );
// initialize model parameters
params.covs = NULL;
params.means = NULL;
params.weights = NULL;
params.probs = NULL;
params.nclusters = N;
params.cov_mat_type = CvEM::COV_MAT_SPHERICAL;
params.start_step = CvEM::START_AUTO_STEP;
params.term_crit.max_iter = 10;
params.term_crit.epsilon = 0.1;
params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS;
// cluster the data
em_model.train( samples, 0, params, labels );
#if 0
// the piece of code shows how to repeatedly optimize the model
// with less-constrained parameters
//(COV_MAT_DIAGONAL instead of COV_MAT_SPHERICAL)
// when the output of the first stage is used as input for the second one.
CvEM em_model2;
params.cov_mat_type = CvEM::COV_MAT_DIAGONAL;
params.start_step = CvEM::START_E_STEP;
params.means = em_model.get_means();
params.covs = (const CvMat**)em_model.get_covs();
params.weights = em_model.get_weights();
em_model2.train( samples, 0, params, labels );
// to use em_model2, replace em_model.predict()
// with em_model2.predict() below
#endif
// classify every image pixel
cvZero( img );
for( i = 0; i < img->height; i++ )
{
for( j = 0; j < img->width; j++ )
{
CvPoint pt = cvPoint(j, i);
sample.data.fl[0] = (float)j;
sample.data.fl[1] = (float)i;
int response = cvRound(em_model.predict( &sample, NULL ));
CvScalar c = colors[response];
cvCircle( img, pt, 1, cvScalar(c.val[0]*0.75,
c.val[1]*0.75,c.val[2]*0.75), CV_FILLED );
}
}
//draw the clustered samples
for( i = 0; i < nsamples; i++ )
{
CvPoint pt;
pt.x = cvRound(samples->data.fl[i*2]);
pt.y = cvRound(samples->data.fl[i*2+1]);
cvCircle( img, pt, 1, colors[labels->data.i[i]], CV_FILLED );
}
cvNamedWindow( "EM-clustering result", 1 );
cvShowImage( "EM-clustering result", img );
cvWaitKey(0);
cvReleaseMat( &samples );
cvReleaseMat( &labels );
return 0;
}
For example of clustering random samples of multi-Gaussian distribution using EM see em.cpp sample in OpenCV distribution.

View File

@@ -22,15 +22,15 @@ K-Nearest Neighbors model ::
CvKNearest();
virtual ~CvKNearest();
CvKNearest( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _sample_idx=0, bool _is_regression=false, int max_k=32 );
CvKNearest( const Mat& _train_data, const Mat& _responses,
const Mat& _sample_idx=Mat(), bool _is_regression=false, int max_k=32 );
virtual bool train( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _sample_idx=0, bool is_regression=false,
virtual bool train( const Mat& _train_data, const Mat& _responses,
const Mat& _sample_idx=Mat(), bool is_regression=false,
int _max_k=32, bool _update_base=false );
virtual float find_nearest( const CvMat* _samples, int k, CvMat* results,
const float** neighbors=0, CvMat* neighbor_responses=0, CvMat* dist=0 ) const;
virtual float find_nearest( const Mat& _samples, int k, Mat* results=0,
const float** neighbors=0, Mat* neighbor_responses=0, Mat* dist=0 ) const;
virtual void clear();
int get_max_k() const;
@@ -49,7 +49,7 @@ K-Nearest Neighbors model ::
CvKNearest::train
-----------------
.. cpp:function:: bool CvKNearest::train( const CvMat* _train_data, const CvMat* _responses, const CvMat* _sample_idx=0, bool is_regression=false, int _max_k=32, bool _update_base=false )
.. cpp:function:: bool CvKNearest::train( const Mat& _train_data, const Mat& _responses, const Mat& _sample_idx=Mat(), bool is_regression=false, int _max_k=32, bool _update_base=false )
Trains the model.
@@ -70,7 +70,7 @@ The parameter ``_update_base`` specifies whether the model is trained from scrat
CvKNearest::find_nearest
------------------------
.. cpp:function:: float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* results=0, const float** neighbors=0, CvMat* neighbor_responses=0, CvMat* dist=0 ) const
.. cpp:function:: float CvKNearest::find_nearest( const Mat& _samples, int k, Mat* results=0, const float** neighbors=0, Mat* neighbor_responses=0, Mat* dist=0 ) const
Finds the neighbors for input vectors.
@@ -85,7 +85,9 @@ For a custom classification/regression prediction, the method can optionally ret
For each input vector, the neighbors are sorted by their distances to the vector.
If only a single input vector is passed, all output matrices are optional and the predicted value is returned by the method. ::
If only a single input vector is passed, all output matrices are optional and the predicted value is returned by the method.
The sample below (currently using the obsolete ``CvMat`` structures) demonstrates the use of the k-nearest classifier for 2D point classification ::
#include "ml.h"
#include "highgui.h"

View File

@@ -142,23 +142,23 @@ MLP model ::
{
public:
CvANN_MLP();
CvANN_MLP( const CvMat* _layer_sizes,
CvANN_MLP( const Mat& _layer_sizes,
int _activ_func=SIGMOID_SYM,
double _f_param1=0, double _f_param2=0 );
virtual ~CvANN_MLP();
virtual void create( const CvMat* _layer_sizes,
virtual void create( const Mat& _layer_sizes,
int _activ_func=SIGMOID_SYM,
double _f_param1=0, double _f_param2=0 );
virtual int train( const CvMat* _inputs, const CvMat* _outputs,
const CvMat* _sample_weights,
const CvMat* _sample_idx=0,
virtual int train( const Mat& _inputs, const Mat& _outputs,
const Mat& _sample_weights,
const Mat& _sample_idx=Mat(),
CvANN_MLP_TrainParams _params = CvANN_MLP_TrainParams(),
int flags=0 );
virtual float predict( const CvMat* _inputs,
CvMat* _outputs ) const;
virtual float predict( const Mat& _inputs,
Mat& _outputs ) const;
virtual void clear();
@@ -172,12 +172,12 @@ MLP model ::
virtual void write( CvFileStorage* storage, const char* name );
int get_layer_count() { return layer_sizes ? layer_sizes->cols : 0; }
const CvMat* get_layer_sizes() { return layer_sizes; }
const Mat& get_layer_sizes() { return layer_sizes; }
protected:
virtual bool prepare_to_train( const CvMat* _inputs, const CvMat* _outputs,
const CvMat* _sample_weights, const CvMat* _sample_idx,
virtual bool prepare_to_train( const Mat& _inputs, const Mat& _outputs,
const Mat& _sample_weights, const Mat& _sample_idx,
CvANN_MLP_TrainParams _params,
CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags );
@@ -189,23 +189,23 @@ MLP model ::
virtual int train_rprop( CvVectors _ivecs, CvVectors _ovecs,
const double* _sw );
virtual void calc_activ_func( CvMat* xf, const double* bias ) const;
virtual void calc_activ_func_deriv( CvMat* xf, CvMat* deriv,
virtual void calc_activ_func( Mat& xf, const double* bias ) const;
virtual void calc_activ_func_deriv( Mat& xf, Mat& deriv,
const double* bias ) const;
virtual void set_activ_func( int _activ_func=SIGMOID_SYM,
double _f_param1=0, double _f_param2=0 );
virtual void init_weights();
virtual void scale_input( const CvMat* _src, CvMat* _dst ) const;
virtual void scale_output( const CvMat* _src, CvMat* _dst ) const;
virtual void scale_input( const Mat& _src, Mat& _dst ) const;
virtual void scale_output( const Mat& _src, Mat& _dst ) const;
virtual void calc_input_scale( const CvVectors* vecs, int flags );
virtual void calc_output_scale( const CvVectors* vecs, int flags );
virtual void write_params( CvFileStorage* fs );
virtual void read_params( CvFileStorage* fs, CvFileNode* node );
CvMat* layer_sizes;
CvMat* wbuf;
CvMat* sample_weights;
Mat& layer_sizes;
Mat& wbuf;
Mat& sample_weights;
double** weights;
double f_param1, f_param2;
double min_val, max_val, min_val1, max_val1;
@@ -225,7 +225,7 @@ Unlike many other models in ML that are constructed and trained at once, in the
CvANN_MLP::create
-----------------
.. cpp:function:: void CvANN_MLP::create( const CvMat* _layer_sizes, int _activ_func=SIGMOID_SYM, double _f_param1=0, double _f_param2=0 )
.. cpp:function:: void CvANN_MLP::create( const Mat& _layer_sizes, int _activ_func=SIGMOID_SYM, double _f_param1=0, double _f_param2=0 )
Constructs MLP with the specified topology.
@@ -243,7 +243,7 @@ The method creates an MLP network with the specified topology and assigns the sa
CvANN_MLP::train
----------------
.. cpp:function:: int CvANN_MLP::train( const CvMat* _inputs, const CvMat* _outputs, const CvMat* _sample_weights, const CvMat* _sample_idx=0, CvANN_MLP_TrainParams _params = CvANN_MLP_TrainParams(), int flags=0 )
.. cpp:function:: int CvANN_MLP::train( const Mat& _inputs, const Mat& _outputs, const Mat& _sample_weights, const Mat& _sample_idx=Mat(), CvANN_MLP_TrainParams _params = CvANN_MLP_TrainParams(), int flags=0 )
Trains/updates MLP.

View File

@@ -21,13 +21,13 @@ Bayes classifier for normally distributed data ::
CvNormalBayesClassifier();
virtual ~CvNormalBayesClassifier();
CvNormalBayesClassifier( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _var_idx=0, const CvMat* _sample_idx=0 );
CvNormalBayesClassifier( const Mat& _train_data, const Mat& _responses,
const Mat& _var_idx=Mat(), const Mat& _sample_idx=Mat() );
virtual bool train( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _var_idx = 0, const CvMat* _sample_idx=0, bool update=false );
virtual bool train( const Mat& _train_data, const Mat& _responses,
const Mat& _var_idx=Mat(), const Mat& _sample_idx=Mat(), bool update=false );
virtual float predict( const CvMat* _samples, CvMat* results=0 ) const;
virtual float predict( const Mat& _samples, Mat* results=0 ) const;
virtual void clear();
virtual void save( const char* filename, const char* name=0 );
@@ -46,7 +46,7 @@ Bayes classifier for normally distributed data ::
CvNormalBayesClassifier::train
------------------------------
.. cpp:function:: bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx =0, const CvMat* _sample_idx=0, bool update=false )
.. cpp:function:: bool CvNormalBayesClassifier::train( const Mat& _train_data, const Mat& _responses, const Mat& _var_idx =Mat(), const Mat& _sample_idx=Mat(), bool update=false )
Trains the model.
@@ -65,7 +65,7 @@ In addition, there is an ``update`` flag that identifies whether the model shoul
CvNormalBayesClassifier::predict
--------------------------------
.. cpp:function:: float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results=0 ) const
.. cpp:function:: float CvNormalBayesClassifier::predict( const Mat& samples, Mat* results=0 ) const
Predicts the response for sample(s).

View File

@@ -95,23 +95,23 @@ Random trees ::
public:
CvRTrees();
virtual ~CvRTrees();
virtual bool train( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
virtual bool train( const Mat& _train_data, int _tflag,
const Mat& _responses, const Mat& _var_idx=Mat(),
const Mat& _sample_idx=Mat(), const Mat& _var_type=Mat(),
const Mat& _missing_mask=Mat(),
CvRTParams params=CvRTParams() );
virtual float predict( const CvMat* sample, const CvMat* missing = 0 )
virtual float predict( const Mat& sample, const Mat& missing = 0 )
const;
virtual void clear();
virtual const CvMat* get_var_importance();
virtual float get_proximity( const CvMat* sample_1, const CvMat* sample_2 )
virtual const Mat& get_var_importance();
virtual float get_proximity( const Mat& sample_1, const Mat& sample_2 )
const;
virtual void read( CvFileStorage* fs, CvFileNode* node );
virtual void write( CvFileStorage* fs, const char* name );
CvMat* get_active_var_mask();
Mat& get_active_var_mask();
CvRNG* get_rng();
int get_tree_count() const;
@@ -136,7 +136,7 @@ Random trees ::
CvRTrees::train
---------------
.. cpp:function:: bool CvRTrees::train( const CvMat* train_data, int tflag, const CvMat* responses, const CvMat* comp_idx=0, const CvMat* sample_idx=0, const CvMat* var_type=0, const CvMat* missing_mask=0, CvRTParams params=CvRTParams() )
.. cpp:function:: bool CvRTrees::train( const Mat& train_data, int tflag, const Mat& responses, const Mat& comp_idx=Mat(), const Mat& sample_idx=Mat(), const Mat& var_type=Mat(), const Mat& missing_mask=Mat(), CvRTParams params=CvRTParams() )
Trains the Random Tree model.
@@ -149,7 +149,7 @@ The method ``CvRTrees::train`` is very similar to the first form of ``CvDTree::t
CvRTrees::predict
-----------------
.. cpp:function:: double CvRTrees::predict( const CvMat* sample, const CvMat* missing=0 ) const
.. cpp:function:: double CvRTrees::predict( const Mat& sample, const Mat& missing=Mat() ) const
Predicts the output for an input sample.
@@ -161,7 +161,7 @@ The input parameters of the prediction method are the same as in ``CvDTree::pred
CvRTrees::get_var_importance
----------------------------
.. cpp:function:: const CvMat* CvRTrees::get_var_importance() const
.. cpp:function:: const Mat& CvRTrees::get_var_importance() const
Retrieves the variable importance array.
@@ -173,127 +173,10 @@ The method returns the variable importance vector, computed at the training stag
CvRTrees::get_proximity
-----------------------
.. cpp:function:: float CvRTrees::get_proximity( const CvMat* sample_1, const CvMat* sample_2 ) const
.. cpp:function:: float CvRTrees::get_proximity( const Mat& sample_1, const Mat& sample_2 ) const
Retrieves the proximity measure between two training samples.
The method returns proximity measure between any two samples, which is the ratio of those trees in the ensemble, in which the samples fall into the same leaf node, to the total number of the trees.
Example: Prediction of mushroom goodness using the random-tree classifier ::
#include <float.h>
#include <stdio.h>
#include <ctype.h>
#include "ml.h"
int main( void )
{
CvStatModel* cls = NULL;
CvFileStorage* storage = cvOpenFileStorage( "Mushroom.xml",
NULL,CV_STORAGE_READ );
CvMat* data = (CvMat*)cvReadByName(storage, NULL, "sample", 0 );
CvMat train_data, test_data;
CvMat response;
CvMat* missed = NULL;
CvMat* comp_idx = NULL;
CvMat* sample_idx = NULL;
CvMat* type_mask = NULL;
int resp_col = 0;
int i,j;
CvRTreesParams params;
CvTreeClassifierTrainParams cart_params;
const int ntrain_samples = 1000;
const int ntest_samples = 1000;
const int nvars = 23;
if(data == NULL || data->cols != nvars)
{
puts("Error in source data");
return -1;
}
cvGetSubRect( data, &train_data, cvRect(0, 0, nvars, ntrain_samples) );
cvGetSubRect( data, &test_data, cvRect(0, ntrain_samples, nvars,
ntrain_samples + ntest_samples) );
resp_col = 0;
cvGetCol( &train_data, &response, resp_col);
/* create missed variable matrix */
missed = cvCreateMat(train_data.rows, train_data.cols, CV_8UC1);
for( i = 0; i < train_data.rows; i++ )
for( j = 0; j < train_data.cols; j++ )
CV_MAT_ELEM(*missed,uchar,i,j)
= (uchar)(CV_MAT_ELEM(train_data,float,i,j) < 0);
/* create comp_idx vector */
comp_idx = cvCreateMat(1, train_data.cols-1, CV_32SC1);
for( i = 0; i < train_data.cols; i++ )
{
if(i<resp_col)CV_MAT_ELEM(*comp_idx,int,0,i) = i;
if(i>resp_col)CV_MAT_ELEM(*comp_idx,int,0,i-1) = i;
}
/* create sample_idx vector */
sample_idx = cvCreateMat(1, train_data.rows, CV_32SC1);
for( j = i = 0; i < train_data.rows; i++ )
{
if(CV_MAT_ELEM(response,float,i,0) < 0) continue;
CV_MAT_ELEM(*sample_idx,int,0,j) = i;
j++;
}
sample_idx->cols = j;
/* create type mask */
type_mask = cvCreateMat(1, train_data.cols+1, CV_8UC1);
cvSet( type_mask, cvRealScalar(CV_VAR_CATEGORICAL), 0);
// initialize training parameters
cvSetDefaultParamTreeClassifier((CvStatModelParams*)&cart_params);
cart_params.wrong_feature_as_unknown = 1;
params.tree_params = &cart_params;
params.term_crit.max_iter = 50;
params.term_crit.epsilon = 0.1;
params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS;
puts("Random forest results");
cls = cvCreateRTreesClassifier( &train_data,
CV_ROW_SAMPLE,
&response,
(CvStatModelParams*)&
params,
comp_idx,
sample_idx,
type_mask,
missed );
if( cls )
{
CvMat sample = cvMat( 1, nvars, CV_32FC1, test_data.data.fl );
CvMat test_resp;
int wrong = 0, total = 0;
cvGetCol( &test_data, &test_resp, resp_col);
for( i = 0; i < ntest_samples; i++, sample.data.fl += nvars )
{
if( CV_MAT_ELEM(test_resp,float,i,0) >= 0 )
{
float resp = cls->predict( cls, &sample, NULL );
wrong += (fabs(resp-response.data.fl[i]) > 1e-3 ) ? 1 : 0;
total++;
}
}
printf( "Test set error =
}
else
puts("Error forest creation");
cvReleaseMat(&missed);
cvReleaseMat(&sample_idx);
cvReleaseMat(&comp_idx);
cvReleaseMat(&type_mask);
cvReleaseMat(&data);
cvReleaseStatModel(&cls);
cvReleaseFileStorage(&storage);
return 0;
}
For the random trees usage example, please, see letter_recog.cpp sample in OpenCV distribution.

View File

@@ -15,20 +15,20 @@ Base class for statistical models in ML ::
{
public:
/* CvStatModel(); */
/* CvStatModel( const CvMat* train_data ... ); */
/* CvStatModel( const Mat& train_data ... ); */
virtual ~CvStatModel();
virtual void clear()=0;
/* virtual bool train( const CvMat* train_data, [int tflag,] ..., const
CvMat* responses, ...,
[const CvMat* var_idx,] ..., [const CvMat* sample_idx,] ...
[const CvMat* var_type,] ..., [const CvMat* missing_mask,]
/* virtual bool train( const Mat& train_data, [int tflag,] ..., const
Mat& responses, ...,
[const Mat& var_idx,] ..., [const Mat& sample_idx,] ...
[const Mat& var_type,] ..., [const Mat& missing_mask,]
<misc_training_alg_params> ... )=0;
*/
/* virtual float predict( const CvMat* sample ... ) const=0; */
/* virtual float predict( const Mat& sample ... ) const=0; */
virtual void save( const char* filename, const char* name=0 )=0;
virtual void load( const char* filename, const char* name=0 )=0;
@@ -58,7 +58,7 @@ Each statistical model class in ML has a default constructor without parameters.
CvStatModel::CvStatModel(...)
-----------------------------
.. cpp:function:: CvStatModel::CvStatModel( const CvMat* train_data ... )
.. cpp:function:: CvStatModel::CvStatModel( const Mat& train_data ... )
Serves as a training constructor.
@@ -161,7 +161,7 @@ The previous model state is cleared by ``clear()`` .
CvStatModel::train
------------------
.. cpp:function:: bool CvStatMode::train( const CvMat* train_data, [int tflag,] ..., const CvMat* responses, ..., [const CvMat* var_idx,] ..., [const CvMat* sample_idx,] ... [const CvMat* var_type,] ..., [const CvMat* missing_mask,] <misc_training_alg_params> ... )
.. cpp:function:: bool CvStatMode::train( const Mat& train_data, [int tflag,] ..., const Mat& responses, ..., [const Mat& var_idx,] ..., [const Mat& sample_idx,] ... [const Mat& var_type,] ..., [const Mat& missing_mask,] <misc_training_alg_params> ... )
Trains the model.
@@ -193,7 +193,7 @@ Usually, the previous model state is cleared by ``clear()`` before running the t
CvStatModel::predict
--------------------
.. cpp:function:: float CvStatMode::predict( const CvMat* sample[, <prediction_params>] ) const
.. cpp:function:: float CvStatMode::predict( const Mat& sample[, <prediction_params>] ) const
Predicts the response for a sample.

View File

@@ -46,16 +46,16 @@ Support Vector Machines ::
CvSVM();
virtual ~CvSVM();
CvSVM( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _var_idx=0, const CvMat* _sample_idx=0,
CvSVM( const Mat& _train_data, const Mat& _responses,
const Mat& _var_idx=Mat(), const Mat& _sample_idx=Mat(),
CvSVMParams _params=CvSVMParams() );
virtual bool train( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _var_idx=0, const CvMat* _sample_idx=0,
virtual bool train( const Mat& _train_data, const Mat& _responses,
const Mat& _var_idx=Mat(), const Mat& _sample_idx=Mat(),
CvSVMParams _params=CvSVMParams() );
virtual bool train_auto( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _var_idx, const CvMat* _sample_idx, CvSVMParams _params,
virtual bool train_auto( const Mat& _train_data, const Mat& _responses,
const Mat& _var_idx, const Mat& _sample_idx, CvSVMParams _params,
int k_fold = 10,
CvParamGrid C_grid = get_default_grid(CvSVM::C),
CvParamGrid gamma_grid = get_default_grid(CvSVM::GAMMA),
@@ -64,7 +64,7 @@ Support Vector Machines ::
CvParamGrid coef_grid = get_default_grid(CvSVM::COEF),
CvParamGrid degree_grid = get_default_grid(CvSVM::DEGREE) );
virtual float predict( const CvMat* _sample ) const;
virtual float predict( const Mat& _sample ) const;
virtual int get_support_vector_count() const;
virtual const float* get_support_vector(int i) const;
virtual CvSVMParams get_params() const { return params; };
@@ -100,7 +100,7 @@ SVM training parameters ::
CvSVMParams( int _svm_type, int _kernel_type,
double _degree, double _gamma, double _coef0,
double _C, double _nu, double _p,
CvMat* _class_weights, CvTermCriteria _term_crit );
const CvMat* _class_weights, CvTermCriteria _term_crit );
int svm_type;
int kernel_type;
@@ -125,7 +125,7 @@ The structure must be initialized and passed to the training method of
CvSVM::train
------------
.. cpp:function:: bool CvSVM::train( const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, CvSVMParams _params=CvSVMParams() )
.. cpp:function:: bool CvSVM::train( const Mat& _train_data, const Mat& _responses, const Mat& _var_idx=Mat(), const Mat& _sample_idx=Mat(), CvSVMParams _params=CvSVMParams() )
Trains SVM.
@@ -145,7 +145,7 @@ All the other parameters are gathered in the
CvSVM::train_auto
-----------------
.. cpp:function:: train_auto( const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx, const CvMat* _sample_idx, CvSVMParams params, int k_fold = 10, CvParamGrid C_grid = get_default_grid(CvSVM::C), CvParamGrid gamma_grid = get_default_grid(CvSVM::GAMMA), CvParamGrid p_grid = get_default_grid(CvSVM::P), CvParamGrid nu_grid = get_default_grid(CvSVM::NU), CvParamGrid coef_grid = get_default_grid(CvSVM::COEF), CvParamGrid degree_grid = get_default_grid(CvSVM::DEGREE) )
.. cpp:function:: train_auto( const Mat& _train_data, const Mat& _responses, const Mat& _var_idx, const Mat& _sample_idx, CvSVMParams params, int k_fold = 10, CvParamGrid C_grid = get_default_grid(CvSVM::C), CvParamGrid gamma_grid = get_default_grid(CvSVM::GAMMA), CvParamGrid p_grid = get_default_grid(CvSVM::P), CvParamGrid nu_grid = get_default_grid(CvSVM::NU), CvParamGrid coef_grid = get_default_grid(CvSVM::COEF), CvParamGrid degree_grid = get_default_grid(CvSVM::DEGREE) )
Trains SVM with optimal parameters.

View File

@@ -849,187 +849,13 @@ void CvEM::init_auto( const CvVectors& train_data )
void CvEM::kmeans( const CvVectors& train_data, int nclusters, CvMat* labels,
CvTermCriteria termcrit, const CvMat* centers0 )
{
CvMat* centers = 0;
CvMat* old_centers = 0;
CvMat* counters = 0;
CV_FUNCNAME( "CvEM::kmeans" );
__BEGIN__;
cv::RNG rng(0xFFFFFFFF);
int i, j, k, nsamples, dims;
int iter = 0;
double max_dist = DBL_MAX;
termcrit = cvCheckTermCriteria( termcrit, 1e-6, 100 );
termcrit.epsilon *= termcrit.epsilon;
nsamples = train_data.count;
dims = train_data.dims;
nclusters = MIN( nclusters, nsamples );
CV_CALL( centers = cvCreateMat( nclusters, dims, CV_64FC1 ));
CV_CALL( old_centers = cvCreateMat( nclusters, dims, CV_64FC1 ));
CV_CALL( counters = cvCreateMat( 1, nclusters, CV_32SC1 ));
cvZero( old_centers );
if( centers0 )
{
CV_CALL( cvConvert( centers0, centers ));
}
else
{
for( i = 0; i < nsamples; i++ )
labels->data.i[i] = i*nclusters/nsamples;
cvRandShuffle( labels, &rng.state );
}
for( ;; )
{
CvMat* temp;
if( iter > 0 || centers0 )
{
for( i = 0; i < nsamples; i++ )
{
const float* s = train_data.data.fl[i];
int k_best = 0;
double min_dist = DBL_MAX;
for( k = 0; k < nclusters; k++ )
{
const double* c = (double*)(centers->data.ptr + k*centers->step);
double dist = 0;
for( j = 0; j <= dims - 4; j += 4 )
{
double t0 = c[j] - s[j];
double t1 = c[j+1] - s[j+1];
dist += t0*t0 + t1*t1;
t0 = c[j+2] - s[j+2];
t1 = c[j+3] - s[j+3];
dist += t0*t0 + t1*t1;
}
for( ; j < dims; j++ )
{
double t = c[j] - s[j];
dist += t*t;
}
if( min_dist > dist )
{
min_dist = dist;
k_best = k;
}
}
labels->data.i[i] = k_best;
}
}
if( ++iter > termcrit.max_iter )
break;
CV_SWAP( centers, old_centers, temp );
cvZero( centers );
cvZero( counters );
// update centers
for( i = 0; i < nsamples; i++ )
{
const float* s = train_data.data.fl[i];
k = labels->data.i[i];
double* c = (double*)(centers->data.ptr + k*centers->step);
for( j = 0; j <= dims - 4; j += 4 )
{
double t0 = c[j] + s[j];
double t1 = c[j+1] + s[j+1];
c[j] = t0;
c[j+1] = t1;
t0 = c[j+2] + s[j+2];
t1 = c[j+3] + s[j+3];
c[j+2] = t0;
c[j+3] = t1;
}
for( ; j < dims; j++ )
c[j] += s[j];
counters->data.i[k]++;
}
if( iter > 1 )
max_dist = 0;
for( k = 0; k < nclusters; k++ )
{
double* c = (double*)(centers->data.ptr + k*centers->step);
if( counters->data.i[k] != 0 )
{
double scale = 1./counters->data.i[k];
for( j = 0; j < dims; j++ )
c[j] *= scale;
}
else
{
const float* s;
for( j = 0; j < 10; j++ )
{
i = rng(nsamples);
if( counters->data.i[labels->data.i[i]] > 1 )
break;
}
s = train_data.data.fl[i];
for( j = 0; j < dims; j++ )
c[j] = s[j];
}
if( iter > 1 )
{
double dist = 0;
const double* c_o = (double*)(old_centers->data.ptr + k*old_centers->step);
for( j = 0; j < dims; j++ )
{
double t = c[j] - c_o[j];
dist += t*t;
}
if( max_dist < dist )
max_dist = dist;
}
}
if( max_dist < termcrit.epsilon )
break;
}
cvZero( counters );
int i, nsamples = train_data.count, dims = train_data.dims;
cv::Ptr<CvMat> temp_mat = cvCreateMat(nsamples, dims, CV_32F);
for( i = 0; i < nsamples; i++ )
counters->data.i[labels->data.i[i]]++;
// ensure that we do not have empty clusters
for( k = 0; k < nclusters; k++ )
if( counters->data.i[k] == 0 )
for(;;)
{
i = rng(nsamples);
j = labels->data.i[i];
if( counters->data.i[j] > 1 )
{
labels->data.i[i] = k;
counters->data.i[j]--;
counters->data.i[k]++;
break;
}
}
__END__;
cvReleaseMat( &centers );
cvReleaseMat( &old_centers );
cvReleaseMat( &counters );
memcpy( temp_mat->data.ptr + temp_mat->step*i, train_data.data.fl[i], dims*sizeof(float));
cvKMeans2(temp_mat, nclusters, labels, termcrit, 10);
}