a lot of small corrections to bring down the number of undocumented functions, reported by the script; added em.cpp sample

This commit is contained in:
Vadim Pisarevsky
2011-06-09 01:16:45 +00:00
parent 3b9e752be7
commit 20aca7440f
30 changed files with 474 additions and 746 deletions

View File

@@ -95,23 +95,23 @@ Random trees ::
public:
CvRTrees();
virtual ~CvRTrees();
virtual bool train( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
virtual bool train( const Mat& _train_data, int _tflag,
const Mat& _responses, const Mat& _var_idx=Mat(),
const Mat& _sample_idx=Mat(), const Mat& _var_type=Mat(),
const Mat& _missing_mask=Mat(),
CvRTParams params=CvRTParams() );
virtual float predict( const CvMat* sample, const CvMat* missing = 0 )
virtual float predict( const Mat& sample, const Mat& missing = 0 )
const;
virtual void clear();
virtual const CvMat* get_var_importance();
virtual float get_proximity( const CvMat* sample_1, const CvMat* sample_2 )
virtual const Mat& get_var_importance();
virtual float get_proximity( const Mat& sample_1, const Mat& sample_2 )
const;
virtual void read( CvFileStorage* fs, CvFileNode* node );
virtual void write( CvFileStorage* fs, const char* name );
CvMat* get_active_var_mask();
Mat& get_active_var_mask();
CvRNG* get_rng();
int get_tree_count() const;
@@ -136,7 +136,7 @@ Random trees ::
CvRTrees::train
---------------
.. cpp:function:: bool CvRTrees::train( const CvMat* train_data, int tflag, const CvMat* responses, const CvMat* comp_idx=0, const CvMat* sample_idx=0, const CvMat* var_type=0, const CvMat* missing_mask=0, CvRTParams params=CvRTParams() )
.. cpp:function:: bool CvRTrees::train( const Mat& train_data, int tflag, const Mat& responses, const Mat& comp_idx=Mat(), const Mat& sample_idx=Mat(), const Mat& var_type=Mat(), const Mat& missing_mask=Mat(), CvRTParams params=CvRTParams() )
Trains the Random Tree model.
@@ -149,7 +149,7 @@ The method ``CvRTrees::train`` is very similar to the first form of ``CvDTree::t
CvRTrees::predict
-----------------
.. cpp:function:: double CvRTrees::predict( const CvMat* sample, const CvMat* missing=0 ) const
.. cpp:function:: double CvRTrees::predict( const Mat& sample, const Mat& missing=Mat() ) const
Predicts the output for an input sample.
@@ -161,7 +161,7 @@ The input parameters of the prediction method are the same as in ``CvDTree::pred
CvRTrees::get_var_importance
----------------------------
.. cpp:function:: const CvMat* CvRTrees::get_var_importance() const
.. cpp:function:: const Mat& CvRTrees::get_var_importance() const
Retrieves the variable importance array.
@@ -173,127 +173,10 @@ The method returns the variable importance vector, computed at the training stag
CvRTrees::get_proximity
-----------------------
.. cpp:function:: float CvRTrees::get_proximity( const CvMat* sample_1, const CvMat* sample_2 ) const
.. cpp:function:: float CvRTrees::get_proximity( const Mat& sample_1, const Mat& sample_2 ) const
Retrieves the proximity measure between two training samples.
The method returns proximity measure between any two samples, which is the ratio of those trees in the ensemble, in which the samples fall into the same leaf node, to the total number of the trees.
Example: Prediction of mushroom goodness using the random-tree classifier ::
#include <float.h>
#include <stdio.h>
#include <ctype.h>
#include "ml.h"
int main( void )
{
CvStatModel* cls = NULL;
CvFileStorage* storage = cvOpenFileStorage( "Mushroom.xml",
NULL,CV_STORAGE_READ );
CvMat* data = (CvMat*)cvReadByName(storage, NULL, "sample", 0 );
CvMat train_data, test_data;
CvMat response;
CvMat* missed = NULL;
CvMat* comp_idx = NULL;
CvMat* sample_idx = NULL;
CvMat* type_mask = NULL;
int resp_col = 0;
int i,j;
CvRTreesParams params;
CvTreeClassifierTrainParams cart_params;
const int ntrain_samples = 1000;
const int ntest_samples = 1000;
const int nvars = 23;
if(data == NULL || data->cols != nvars)
{
puts("Error in source data");
return -1;
}
cvGetSubRect( data, &train_data, cvRect(0, 0, nvars, ntrain_samples) );
cvGetSubRect( data, &test_data, cvRect(0, ntrain_samples, nvars,
ntrain_samples + ntest_samples) );
resp_col = 0;
cvGetCol( &train_data, &response, resp_col);
/* create missed variable matrix */
missed = cvCreateMat(train_data.rows, train_data.cols, CV_8UC1);
for( i = 0; i < train_data.rows; i++ )
for( j = 0; j < train_data.cols; j++ )
CV_MAT_ELEM(*missed,uchar,i,j)
= (uchar)(CV_MAT_ELEM(train_data,float,i,j) < 0);
/* create comp_idx vector */
comp_idx = cvCreateMat(1, train_data.cols-1, CV_32SC1);
for( i = 0; i < train_data.cols; i++ )
{
if(i<resp_col)CV_MAT_ELEM(*comp_idx,int,0,i) = i;
if(i>resp_col)CV_MAT_ELEM(*comp_idx,int,0,i-1) = i;
}
/* create sample_idx vector */
sample_idx = cvCreateMat(1, train_data.rows, CV_32SC1);
for( j = i = 0; i < train_data.rows; i++ )
{
if(CV_MAT_ELEM(response,float,i,0) < 0) continue;
CV_MAT_ELEM(*sample_idx,int,0,j) = i;
j++;
}
sample_idx->cols = j;
/* create type mask */
type_mask = cvCreateMat(1, train_data.cols+1, CV_8UC1);
cvSet( type_mask, cvRealScalar(CV_VAR_CATEGORICAL), 0);
// initialize training parameters
cvSetDefaultParamTreeClassifier((CvStatModelParams*)&cart_params);
cart_params.wrong_feature_as_unknown = 1;
params.tree_params = &cart_params;
params.term_crit.max_iter = 50;
params.term_crit.epsilon = 0.1;
params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS;
puts("Random forest results");
cls = cvCreateRTreesClassifier( &train_data,
CV_ROW_SAMPLE,
&response,
(CvStatModelParams*)&
params,
comp_idx,
sample_idx,
type_mask,
missed );
if( cls )
{
CvMat sample = cvMat( 1, nvars, CV_32FC1, test_data.data.fl );
CvMat test_resp;
int wrong = 0, total = 0;
cvGetCol( &test_data, &test_resp, resp_col);
for( i = 0; i < ntest_samples; i++, sample.data.fl += nvars )
{
if( CV_MAT_ELEM(test_resp,float,i,0) >= 0 )
{
float resp = cls->predict( cls, &sample, NULL );
wrong += (fabs(resp-response.data.fl[i]) > 1e-3 ) ? 1 : 0;
total++;
}
}
printf( "Test set error =
}
else
puts("Error forest creation");
cvReleaseMat(&missed);
cvReleaseMat(&sample_idx);
cvReleaseMat(&comp_idx);
cvReleaseMat(&type_mask);
cvReleaseMat(&data);
cvReleaseStatModel(&cls);
cvReleaseFileStorage(&storage);
return 0;
}
For the random trees usage example, please, see letter_recog.cpp sample in OpenCV distribution.