2166 lines
77 KiB
C++
2166 lines
77 KiB
C++
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
//
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
// If you do not agree to this license, do not download, install,
|
|
// copy or use the software.
|
|
//
|
|
//
|
|
// Intel License Agreement
|
|
//
|
|
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
|
// Third party copyrights are property of their respective owners.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
// are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
//
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
//
|
|
// * The name of Intel Corporation may not be used to endorse or promote products
|
|
// derived from this software without specific prior written permission.
|
|
//
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
//
|
|
//M*/
|
|
|
|
#ifndef __OPENCV_ML_HPP__
|
|
#define __OPENCV_ML_HPP__
|
|
|
|
#ifdef __cplusplus
|
|
# include "opencv2/core.hpp"
|
|
#endif
|
|
|
|
#include "opencv2/core/core_c.h"
|
|
#include <limits.h>
|
|
|
|
#ifdef __cplusplus
|
|
|
|
#include <map>
|
|
#include <iostream>
|
|
|
|
// Apple defines a check() macro somewhere in the debug headers
|
|
// that interferes with a method definiton in this header
|
|
#undef check
|
|
|
|
/****************************************************************************************\
|
|
* Main struct definitions *
|
|
\****************************************************************************************/
|
|
|
|
/* log(2*PI) */
|
|
#define CV_LOG2PI (1.8378770664093454835606594728112)
|
|
|
|
/* columns of <trainData> matrix are training samples */
|
|
#define CV_COL_SAMPLE 0
|
|
|
|
/* rows of <trainData> matrix are training samples */
|
|
#define CV_ROW_SAMPLE 1
|
|
|
|
#define CV_IS_ROW_SAMPLE(flags) ((flags) & CV_ROW_SAMPLE)
|
|
|
|
struct CvVectors
|
|
{
|
|
int type;
|
|
int dims, count;
|
|
CvVectors* next;
|
|
union
|
|
{
|
|
uchar** ptr;
|
|
float** fl;
|
|
double** db;
|
|
} data;
|
|
};
|
|
|
|
#if 0
|
|
/* A structure, representing the lattice range of statmodel parameters.
|
|
It is used for optimizing statmodel parameters by cross-validation method.
|
|
The lattice is logarithmic, so <step> must be greater then 1. */
|
|
typedef struct CvParamLattice
|
|
{
|
|
double min_val;
|
|
double max_val;
|
|
double step;
|
|
}
|
|
CvParamLattice;
|
|
|
|
CV_INLINE CvParamLattice cvParamLattice( double min_val, double max_val,
|
|
double log_step )
|
|
{
|
|
CvParamLattice pl;
|
|
pl.min_val = MIN( min_val, max_val );
|
|
pl.max_val = MAX( min_val, max_val );
|
|
pl.step = MAX( log_step, 1. );
|
|
return pl;
|
|
}
|
|
|
|
CV_INLINE CvParamLattice cvDefaultParamLattice( void )
|
|
{
|
|
CvParamLattice pl = {0,0,0};
|
|
return pl;
|
|
}
|
|
#endif
|
|
|
|
/* Variable type */
|
|
#define CV_VAR_NUMERICAL 0
|
|
#define CV_VAR_ORDERED 0
|
|
#define CV_VAR_CATEGORICAL 1
|
|
|
|
#define CV_TYPE_NAME_ML_SVM "opencv-ml-svm"
|
|
#define CV_TYPE_NAME_ML_KNN "opencv-ml-knn"
|
|
#define CV_TYPE_NAME_ML_NBAYES "opencv-ml-bayesian"
|
|
#define CV_TYPE_NAME_ML_EM "opencv-ml-em"
|
|
#define CV_TYPE_NAME_ML_BOOSTING "opencv-ml-boost-tree"
|
|
#define CV_TYPE_NAME_ML_TREE "opencv-ml-tree"
|
|
#define CV_TYPE_NAME_ML_ANN_MLP "opencv-ml-ann-mlp"
|
|
#define CV_TYPE_NAME_ML_CNN "opencv-ml-cnn"
|
|
#define CV_TYPE_NAME_ML_RTREES "opencv-ml-random-trees"
|
|
#define CV_TYPE_NAME_ML_ERTREES "opencv-ml-extremely-randomized-trees"
|
|
#define CV_TYPE_NAME_ML_GBT "opencv-ml-gradient-boosting-trees"
|
|
|
|
#define CV_TRAIN_ERROR 0
|
|
#define CV_TEST_ERROR 1
|
|
|
|
class CvStatModel
|
|
{
|
|
public:
|
|
CvStatModel();
|
|
virtual ~CvStatModel();
|
|
|
|
virtual void clear();
|
|
|
|
CV_WRAP virtual void save( const char* filename, const char* name=0 ) const;
|
|
CV_WRAP virtual void load( const char* filename, const char* name=0 );
|
|
|
|
virtual void write( CvFileStorage* storage, const char* name ) const;
|
|
virtual void read( CvFileStorage* storage, CvFileNode* node );
|
|
|
|
protected:
|
|
const char* default_model_name;
|
|
};
|
|
|
|
/****************************************************************************************\
|
|
* Normal Bayes Classifier *
|
|
\****************************************************************************************/
|
|
|
|
/* The structure, representing the grid range of statmodel parameters.
|
|
It is used for optimizing statmodel accuracy by varying model parameters,
|
|
the accuracy estimate being computed by cross-validation.
|
|
The grid is logarithmic, so <step> must be greater then 1. */
|
|
|
|
class CvMLData;
|
|
|
|
struct CvParamGrid
|
|
{
|
|
// SVM params type
|
|
enum { SVM_C=0, SVM_GAMMA=1, SVM_P=2, SVM_NU=3, SVM_COEF=4, SVM_DEGREE=5 };
|
|
|
|
CvParamGrid()
|
|
{
|
|
min_val = max_val = step = 0;
|
|
}
|
|
|
|
CvParamGrid( double min_val, double max_val, double log_step );
|
|
//CvParamGrid( int param_id );
|
|
bool check() const;
|
|
|
|
CV_PROP_RW double min_val;
|
|
CV_PROP_RW double max_val;
|
|
CV_PROP_RW double step;
|
|
};
|
|
|
|
inline CvParamGrid::CvParamGrid( double _min_val, double _max_val, double _log_step )
|
|
{
|
|
min_val = _min_val;
|
|
max_val = _max_val;
|
|
step = _log_step;
|
|
}
|
|
|
|
class CvNormalBayesClassifier : public CvStatModel
|
|
{
|
|
public:
|
|
CV_WRAP CvNormalBayesClassifier();
|
|
virtual ~CvNormalBayesClassifier();
|
|
|
|
CvNormalBayesClassifier( const CvMat* trainData, const CvMat* responses,
|
|
const CvMat* varIdx=0, const CvMat* sampleIdx=0 );
|
|
|
|
virtual bool train( const CvMat* trainData, const CvMat* responses,
|
|
const CvMat* varIdx = 0, const CvMat* sampleIdx=0, bool update=false );
|
|
|
|
virtual float predict( const CvMat* samples, CV_OUT CvMat* results=0, CV_OUT CvMat* results_prob=0 ) const;
|
|
CV_WRAP virtual void clear();
|
|
|
|
CV_WRAP CvNormalBayesClassifier( const cv::Mat& trainData, const cv::Mat& responses,
|
|
const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat() );
|
|
CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
|
|
const cv::Mat& varIdx = cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
|
|
bool update=false );
|
|
CV_WRAP virtual float predict( const cv::Mat& samples, CV_OUT cv::Mat* results=0, CV_OUT cv::Mat* results_prob=0 ) const;
|
|
|
|
virtual void write( CvFileStorage* storage, const char* name ) const;
|
|
virtual void read( CvFileStorage* storage, CvFileNode* node );
|
|
|
|
protected:
|
|
int var_count, var_all;
|
|
CvMat* var_idx;
|
|
CvMat* cls_labels;
|
|
CvMat** count;
|
|
CvMat** sum;
|
|
CvMat** productsum;
|
|
CvMat** avg;
|
|
CvMat** inv_eigen_values;
|
|
CvMat** cov_rotate_mats;
|
|
CvMat* c;
|
|
};
|
|
|
|
|
|
/****************************************************************************************\
|
|
* K-Nearest Neighbour Classifier *
|
|
\****************************************************************************************/
|
|
|
|
// k Nearest Neighbors
|
|
class CvKNearest : public CvStatModel
|
|
{
|
|
public:
|
|
|
|
CV_WRAP CvKNearest();
|
|
virtual ~CvKNearest();
|
|
|
|
CvKNearest( const CvMat* trainData, const CvMat* responses,
|
|
const CvMat* sampleIdx=0, bool isRegression=false, int max_k=32 );
|
|
|
|
virtual bool train( const CvMat* trainData, const CvMat* responses,
|
|
const CvMat* sampleIdx=0, bool is_regression=false,
|
|
int maxK=32, bool updateBase=false );
|
|
|
|
virtual float find_nearest( const CvMat* samples, int k, CV_OUT CvMat* results=0,
|
|
const float** neighbors=0, CV_OUT CvMat* neighborResponses=0, CV_OUT CvMat* dist=0 ) const;
|
|
|
|
CV_WRAP CvKNearest( const cv::Mat& trainData, const cv::Mat& responses,
|
|
const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false, int max_k=32 );
|
|
|
|
CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
|
|
const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false,
|
|
int maxK=32, bool updateBase=false );
|
|
|
|
virtual float find_nearest( const cv::Mat& samples, int k, cv::Mat* results=0,
|
|
const float** neighbors=0, cv::Mat* neighborResponses=0,
|
|
cv::Mat* dist=0 ) const;
|
|
CV_WRAP virtual float find_nearest( const cv::Mat& samples, int k, CV_OUT cv::Mat& results,
|
|
CV_OUT cv::Mat& neighborResponses, CV_OUT cv::Mat& dists) const;
|
|
|
|
virtual void clear();
|
|
int get_max_k() const;
|
|
int get_var_count() const;
|
|
int get_sample_count() const;
|
|
bool is_regression() const;
|
|
|
|
virtual float write_results( int k, int k1, int start, int end,
|
|
const float* neighbor_responses, const float* dist, CvMat* _results,
|
|
CvMat* _neighbor_responses, CvMat* _dist, Cv32suf* sort_buf ) const;
|
|
|
|
virtual void find_neighbors_direct( const CvMat* _samples, int k, int start, int end,
|
|
float* neighbor_responses, const float** neighbors, float* dist ) const;
|
|
|
|
protected:
|
|
|
|
int max_k, var_count;
|
|
int total;
|
|
bool regression;
|
|
CvVectors* samples;
|
|
};
|
|
|
|
/****************************************************************************************\
|
|
* Support Vector Machines *
|
|
\****************************************************************************************/
|
|
|
|
// SVM training parameters
|
|
struct CvSVMParams
|
|
{
|
|
CvSVMParams();
|
|
CvSVMParams( int svm_type, int kernel_type,
|
|
double degree, double gamma, double coef0,
|
|
double Cvalue, double nu, double p,
|
|
CvMat* class_weights, CvTermCriteria term_crit );
|
|
|
|
CV_PROP_RW int svm_type;
|
|
CV_PROP_RW int kernel_type;
|
|
CV_PROP_RW double degree; // for poly
|
|
CV_PROP_RW double gamma; // for poly/rbf/sigmoid/chi2
|
|
CV_PROP_RW double coef0; // for poly/sigmoid
|
|
|
|
CV_PROP_RW double C; // for CV_SVM_C_SVC, CV_SVM_EPS_SVR and CV_SVM_NU_SVR
|
|
CV_PROP_RW double nu; // for CV_SVM_NU_SVC, CV_SVM_ONE_CLASS, and CV_SVM_NU_SVR
|
|
CV_PROP_RW double p; // for CV_SVM_EPS_SVR
|
|
CvMat* class_weights; // for CV_SVM_C_SVC
|
|
CV_PROP_RW CvTermCriteria term_crit; // termination criteria
|
|
};
|
|
|
|
|
|
struct CvSVMKernel
|
|
{
|
|
typedef void (CvSVMKernel::*Calc)( int vec_count, int vec_size, const float** vecs,
|
|
const float* another, float* results );
|
|
CvSVMKernel();
|
|
CvSVMKernel( const CvSVMParams* params, Calc _calc_func );
|
|
virtual bool create( const CvSVMParams* params, Calc _calc_func );
|
|
virtual ~CvSVMKernel();
|
|
|
|
virtual void clear();
|
|
virtual void calc( int vcount, int n, const float** vecs, const float* another, float* results );
|
|
|
|
const CvSVMParams* params;
|
|
Calc calc_func;
|
|
|
|
virtual void calc_non_rbf_base( int vec_count, int vec_size, const float** vecs,
|
|
const float* another, float* results,
|
|
double alpha, double beta );
|
|
virtual void calc_intersec( int vcount, int var_count, const float** vecs,
|
|
const float* another, float* results );
|
|
virtual void calc_chi2( int vec_count, int vec_size, const float** vecs,
|
|
const float* another, float* results );
|
|
virtual void calc_linear( int vec_count, int vec_size, const float** vecs,
|
|
const float* another, float* results );
|
|
virtual void calc_rbf( int vec_count, int vec_size, const float** vecs,
|
|
const float* another, float* results );
|
|
virtual void calc_poly( int vec_count, int vec_size, const float** vecs,
|
|
const float* another, float* results );
|
|
virtual void calc_sigmoid( int vec_count, int vec_size, const float** vecs,
|
|
const float* another, float* results );
|
|
};
|
|
|
|
|
|
struct CvSVMKernelRow
|
|
{
|
|
CvSVMKernelRow* prev;
|
|
CvSVMKernelRow* next;
|
|
float* data;
|
|
};
|
|
|
|
|
|
struct CvSVMSolutionInfo
|
|
{
|
|
double obj;
|
|
double rho;
|
|
double upper_bound_p;
|
|
double upper_bound_n;
|
|
double r; // for Solver_NU
|
|
};
|
|
|
|
class CvSVMSolver
|
|
{
|
|
public:
|
|
typedef bool (CvSVMSolver::*SelectWorkingSet)( int& i, int& j );
|
|
typedef float* (CvSVMSolver::*GetRow)( int i, float* row, float* dst, bool existed );
|
|
typedef void (CvSVMSolver::*CalcRho)( double& rho, double& r );
|
|
|
|
CvSVMSolver();
|
|
|
|
CvSVMSolver( int count, int var_count, const float** samples, schar* y,
|
|
int alpha_count, double* alpha, double Cp, double Cn,
|
|
CvMemStorage* storage, CvSVMKernel* kernel, GetRow get_row,
|
|
SelectWorkingSet select_working_set, CalcRho calc_rho );
|
|
virtual bool create( int count, int var_count, const float** samples, schar* y,
|
|
int alpha_count, double* alpha, double Cp, double Cn,
|
|
CvMemStorage* storage, CvSVMKernel* kernel, GetRow get_row,
|
|
SelectWorkingSet select_working_set, CalcRho calc_rho );
|
|
virtual ~CvSVMSolver();
|
|
|
|
virtual void clear();
|
|
virtual bool solve_generic( CvSVMSolutionInfo& si );
|
|
|
|
virtual bool solve_c_svc( int count, int var_count, const float** samples, schar* y,
|
|
double Cp, double Cn, CvMemStorage* storage,
|
|
CvSVMKernel* kernel, double* alpha, CvSVMSolutionInfo& si );
|
|
virtual bool solve_nu_svc( int count, int var_count, const float** samples, schar* y,
|
|
CvMemStorage* storage, CvSVMKernel* kernel,
|
|
double* alpha, CvSVMSolutionInfo& si );
|
|
virtual bool solve_one_class( int count, int var_count, const float** samples,
|
|
CvMemStorage* storage, CvSVMKernel* kernel,
|
|
double* alpha, CvSVMSolutionInfo& si );
|
|
|
|
virtual bool solve_eps_svr( int count, int var_count, const float** samples, const float* y,
|
|
CvMemStorage* storage, CvSVMKernel* kernel,
|
|
double* alpha, CvSVMSolutionInfo& si );
|
|
|
|
virtual bool solve_nu_svr( int count, int var_count, const float** samples, const float* y,
|
|
CvMemStorage* storage, CvSVMKernel* kernel,
|
|
double* alpha, CvSVMSolutionInfo& si );
|
|
|
|
virtual float* get_row_base( int i, bool* _existed );
|
|
virtual float* get_row( int i, float* dst );
|
|
|
|
int sample_count;
|
|
int var_count;
|
|
int cache_size;
|
|
int cache_line_size;
|
|
const float** samples;
|
|
const CvSVMParams* params;
|
|
CvMemStorage* storage;
|
|
CvSVMKernelRow lru_list;
|
|
CvSVMKernelRow* rows;
|
|
|
|
int alpha_count;
|
|
|
|
double* G;
|
|
double* alpha;
|
|
|
|
// -1 - lower bound, 0 - free, 1 - upper bound
|
|
schar* alpha_status;
|
|
|
|
schar* y;
|
|
double* b;
|
|
float* buf[2];
|
|
double eps;
|
|
int max_iter;
|
|
double C[2]; // C[0] == Cn, C[1] == Cp
|
|
CvSVMKernel* kernel;
|
|
|
|
SelectWorkingSet select_working_set_func;
|
|
CalcRho calc_rho_func;
|
|
GetRow get_row_func;
|
|
|
|
virtual bool select_working_set( int& i, int& j );
|
|
virtual bool select_working_set_nu_svm( int& i, int& j );
|
|
virtual void calc_rho( double& rho, double& r );
|
|
virtual void calc_rho_nu_svm( double& rho, double& r );
|
|
|
|
virtual float* get_row_svc( int i, float* row, float* dst, bool existed );
|
|
virtual float* get_row_one_class( int i, float* row, float* dst, bool existed );
|
|
virtual float* get_row_svr( int i, float* row, float* dst, bool existed );
|
|
};
|
|
|
|
|
|
struct CvSVMDecisionFunc
|
|
{
|
|
double rho;
|
|
int sv_count;
|
|
double* alpha;
|
|
int* sv_index;
|
|
};
|
|
|
|
|
|
// SVM model
|
|
class CvSVM : public CvStatModel
|
|
{
|
|
public:
|
|
// SVM type
|
|
enum { C_SVC=100, NU_SVC=101, ONE_CLASS=102, EPS_SVR=103, NU_SVR=104 };
|
|
|
|
// SVM kernel type
|
|
enum { LINEAR=0, POLY=1, RBF=2, SIGMOID=3, CHI2=4, INTER=5 };
|
|
|
|
// SVM params type
|
|
enum { C=0, GAMMA=1, P=2, NU=3, COEF=4, DEGREE=5 };
|
|
|
|
CV_WRAP CvSVM();
|
|
virtual ~CvSVM();
|
|
|
|
CvSVM( const CvMat* trainData, const CvMat* responses,
|
|
const CvMat* varIdx=0, const CvMat* sampleIdx=0,
|
|
CvSVMParams params=CvSVMParams() );
|
|
|
|
virtual bool train( const CvMat* trainData, const CvMat* responses,
|
|
const CvMat* varIdx=0, const CvMat* sampleIdx=0,
|
|
CvSVMParams params=CvSVMParams() );
|
|
|
|
virtual bool train_auto( const CvMat* trainData, const CvMat* responses,
|
|
const CvMat* varIdx, const CvMat* sampleIdx, CvSVMParams params,
|
|
int kfold = 10,
|
|
CvParamGrid Cgrid = get_default_grid(CvSVM::C),
|
|
CvParamGrid gammaGrid = get_default_grid(CvSVM::GAMMA),
|
|
CvParamGrid pGrid = get_default_grid(CvSVM::P),
|
|
CvParamGrid nuGrid = get_default_grid(CvSVM::NU),
|
|
CvParamGrid coeffGrid = get_default_grid(CvSVM::COEF),
|
|
CvParamGrid degreeGrid = get_default_grid(CvSVM::DEGREE),
|
|
bool balanced=false );
|
|
|
|
virtual float predict( const CvMat* sample, bool returnDFVal=false ) const;
|
|
virtual float predict( const CvMat* samples, CV_OUT CvMat* results, bool returnDFVal=false ) const;
|
|
|
|
CV_WRAP CvSVM( const cv::Mat& trainData, const cv::Mat& responses,
|
|
const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
|
|
CvSVMParams params=CvSVMParams() );
|
|
|
|
CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
|
|
const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
|
|
CvSVMParams params=CvSVMParams() );
|
|
|
|
CV_WRAP virtual bool train_auto( const cv::Mat& trainData, const cv::Mat& responses,
|
|
const cv::Mat& varIdx, const cv::Mat& sampleIdx, CvSVMParams params,
|
|
int k_fold = 10,
|
|
CvParamGrid Cgrid = CvSVM::get_default_grid(CvSVM::C),
|
|
CvParamGrid gammaGrid = CvSVM::get_default_grid(CvSVM::GAMMA),
|
|
CvParamGrid pGrid = CvSVM::get_default_grid(CvSVM::P),
|
|
CvParamGrid nuGrid = CvSVM::get_default_grid(CvSVM::NU),
|
|
CvParamGrid coeffGrid = CvSVM::get_default_grid(CvSVM::COEF),
|
|
CvParamGrid degreeGrid = CvSVM::get_default_grid(CvSVM::DEGREE),
|
|
bool balanced=false);
|
|
CV_WRAP virtual float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
|
|
CV_WRAP_AS(predict_all) virtual void predict( cv::InputArray samples, cv::OutputArray results ) const;
|
|
|
|
CV_WRAP virtual int get_support_vector_count() const;
|
|
virtual const float* get_support_vector(int i) const;
|
|
virtual CvSVMParams get_params() const { return params; }
|
|
CV_WRAP virtual void clear();
|
|
|
|
virtual const CvSVMDecisionFunc* get_decision_function() const { return decision_func; }
|
|
|
|
static CvParamGrid get_default_grid( int param_id );
|
|
|
|
virtual void write( CvFileStorage* storage, const char* name ) const;
|
|
virtual void read( CvFileStorage* storage, CvFileNode* node );
|
|
CV_WRAP int get_var_count() const { return var_idx ? var_idx->cols : var_all; }
|
|
|
|
protected:
|
|
|
|
virtual bool set_params( const CvSVMParams& params );
|
|
virtual bool train1( int sample_count, int var_count, const float** samples,
|
|
const void* responses, double Cp, double Cn,
|
|
CvMemStorage* _storage, double* alpha, double& rho );
|
|
virtual bool do_train( int svm_type, int sample_count, int var_count, const float** samples,
|
|
const CvMat* responses, CvMemStorage* _storage, double* alpha );
|
|
virtual void create_kernel();
|
|
virtual void create_solver();
|
|
|
|
virtual float predict( const float* row_sample, int row_len, bool returnDFVal=false ) const;
|
|
|
|
virtual void write_params( CvFileStorage* fs ) const;
|
|
virtual void read_params( CvFileStorage* fs, CvFileNode* node );
|
|
|
|
void optimize_linear_svm();
|
|
|
|
CvSVMParams params;
|
|
CvMat* class_labels;
|
|
int var_all;
|
|
float** sv;
|
|
int sv_total;
|
|
CvMat* var_idx;
|
|
CvMat* class_weights;
|
|
CvSVMDecisionFunc* decision_func;
|
|
CvMemStorage* storage;
|
|
|
|
CvSVMSolver* solver;
|
|
CvSVMKernel* kernel;
|
|
|
|
private:
|
|
CvSVM(const CvSVM&);
|
|
CvSVM& operator = (const CvSVM&);
|
|
};
|
|
|
|
/****************************************************************************************\
|
|
* Expectation - Maximization *
|
|
\****************************************************************************************/
|
|
namespace cv
|
|
{
|
|
class EM : public Algorithm
|
|
{
|
|
public:
|
|
// Type of covariation matrices
|
|
enum {COV_MAT_SPHERICAL=0, COV_MAT_DIAGONAL=1, COV_MAT_GENERIC=2, COV_MAT_DEFAULT=COV_MAT_DIAGONAL};
|
|
|
|
// Default parameters
|
|
enum {DEFAULT_NCLUSTERS=5, DEFAULT_MAX_ITERS=100};
|
|
|
|
// The initial step
|
|
enum {START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0};
|
|
|
|
CV_WRAP EM(int nclusters=EM::DEFAULT_NCLUSTERS, int covMatType=EM::COV_MAT_DIAGONAL,
|
|
const TermCriteria& termCrit=TermCriteria(TermCriteria::COUNT+TermCriteria::EPS,
|
|
EM::DEFAULT_MAX_ITERS, FLT_EPSILON));
|
|
|
|
virtual ~EM();
|
|
CV_WRAP virtual void clear();
|
|
|
|
CV_WRAP virtual bool train(InputArray samples,
|
|
OutputArray logLikelihoods=noArray(),
|
|
OutputArray labels=noArray(),
|
|
OutputArray probs=noArray());
|
|
|
|
CV_WRAP virtual bool trainE(InputArray samples,
|
|
InputArray means0,
|
|
InputArray covs0=noArray(),
|
|
InputArray weights0=noArray(),
|
|
OutputArray logLikelihoods=noArray(),
|
|
OutputArray labels=noArray(),
|
|
OutputArray probs=noArray());
|
|
|
|
CV_WRAP virtual bool trainM(InputArray samples,
|
|
InputArray probs0,
|
|
OutputArray logLikelihoods=noArray(),
|
|
OutputArray labels=noArray(),
|
|
OutputArray probs=noArray());
|
|
|
|
CV_WRAP Vec2d predict(InputArray sample,
|
|
OutputArray probs=noArray()) const;
|
|
|
|
CV_WRAP bool isTrained() const;
|
|
|
|
AlgorithmInfo* info() const;
|
|
virtual void read(const FileNode& fn);
|
|
|
|
protected:
|
|
|
|
virtual void setTrainData(int startStep, const Mat& samples,
|
|
const Mat* probs0,
|
|
const Mat* means0,
|
|
const std::vector<Mat>* covs0,
|
|
const Mat* weights0);
|
|
|
|
bool doTrain(int startStep,
|
|
OutputArray logLikelihoods,
|
|
OutputArray labels,
|
|
OutputArray probs);
|
|
virtual void eStep();
|
|
virtual void mStep();
|
|
|
|
void clusterTrainSamples();
|
|
void decomposeCovs();
|
|
void computeLogWeightDivDet();
|
|
|
|
Vec2d computeProbabilities(const Mat& sample, Mat* probs) const;
|
|
|
|
// all inner matrices have type CV_64FC1
|
|
CV_PROP_RW int nclusters;
|
|
CV_PROP_RW int covMatType;
|
|
CV_PROP_RW int maxIters;
|
|
CV_PROP_RW double epsilon;
|
|
|
|
Mat trainSamples;
|
|
Mat trainProbs;
|
|
Mat trainLogLikelihoods;
|
|
Mat trainLabels;
|
|
|
|
CV_PROP Mat weights;
|
|
CV_PROP Mat means;
|
|
CV_PROP std::vector<Mat> covs;
|
|
|
|
std::vector<Mat> covsEigenValues;
|
|
std::vector<Mat> covsRotateMats;
|
|
std::vector<Mat> invCovsEigenValues;
|
|
Mat logWeightDivDet;
|
|
};
|
|
} // namespace cv
|
|
|
|
/****************************************************************************************\
|
|
* Decision Tree *
|
|
\****************************************************************************************/\
|
|
struct CvPair16u32s
|
|
{
|
|
unsigned short* u;
|
|
int* i;
|
|
};
|
|
|
|
|
|
#define CV_DTREE_CAT_DIR(idx,subset) \
|
|
(2*((subset[(idx)>>5]&(1 << ((idx) & 31)))==0)-1)
|
|
|
|
struct CvDTreeSplit
|
|
{
|
|
int var_idx;
|
|
int condensed_idx;
|
|
int inversed;
|
|
float quality;
|
|
CvDTreeSplit* next;
|
|
union
|
|
{
|
|
int subset[2];
|
|
struct
|
|
{
|
|
float c;
|
|
int split_point;
|
|
}
|
|
ord;
|
|
};
|
|
};
|
|
|
|
struct CvDTreeNode
|
|
{
|
|
int class_idx;
|
|
int Tn;
|
|
double value;
|
|
|
|
CvDTreeNode* parent;
|
|
CvDTreeNode* left;
|
|
CvDTreeNode* right;
|
|
|
|
CvDTreeSplit* split;
|
|
|
|
int sample_count;
|
|
int depth;
|
|
int* num_valid;
|
|
int offset;
|
|
int buf_idx;
|
|
double maxlr;
|
|
|
|
// global pruning data
|
|
int complexity;
|
|
double alpha;
|
|
double node_risk, tree_risk, tree_error;
|
|
|
|
// cross-validation pruning data
|
|
int* cv_Tn;
|
|
double* cv_node_risk;
|
|
double* cv_node_error;
|
|
|
|
int get_num_valid(int vi) { return num_valid ? num_valid[vi] : sample_count; }
|
|
void set_num_valid(int vi, int n) { if( num_valid ) num_valid[vi] = n; }
|
|
};
|
|
|
|
|
|
struct CvDTreeParams
|
|
{
|
|
CV_PROP_RW int max_categories;
|
|
CV_PROP_RW int max_depth;
|
|
CV_PROP_RW int min_sample_count;
|
|
CV_PROP_RW int cv_folds;
|
|
CV_PROP_RW bool use_surrogates;
|
|
CV_PROP_RW bool use_1se_rule;
|
|
CV_PROP_RW bool truncate_pruned_tree;
|
|
CV_PROP_RW float regression_accuracy;
|
|
const float* priors;
|
|
|
|
CvDTreeParams();
|
|
CvDTreeParams( int max_depth, int min_sample_count,
|
|
float regression_accuracy, bool use_surrogates,
|
|
int max_categories, int cv_folds,
|
|
bool use_1se_rule, bool truncate_pruned_tree,
|
|
const float* priors );
|
|
};
|
|
|
|
|
|
struct CvDTreeTrainData
|
|
{
|
|
CvDTreeTrainData();
|
|
CvDTreeTrainData( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
const CvDTreeParams& params=CvDTreeParams(),
|
|
bool _shared=false, bool _add_labels=false );
|
|
virtual ~CvDTreeTrainData();
|
|
|
|
virtual void set_data( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
const CvDTreeParams& params=CvDTreeParams(),
|
|
bool _shared=false, bool _add_labels=false,
|
|
bool _update_data=false );
|
|
virtual void do_responses_copy();
|
|
|
|
virtual void get_vectors( const CvMat* _subsample_idx,
|
|
float* values, uchar* missing, float* responses, bool get_class_idx=false );
|
|
|
|
virtual CvDTreeNode* subsample_data( const CvMat* _subsample_idx );
|
|
|
|
virtual void write_params( CvFileStorage* fs ) const;
|
|
virtual void read_params( CvFileStorage* fs, CvFileNode* node );
|
|
|
|
// release all the data
|
|
virtual void clear();
|
|
|
|
int get_num_classes() const;
|
|
int get_var_type(int vi) const;
|
|
int get_work_var_count() const {return work_var_count;}
|
|
|
|
virtual const float* get_ord_responses( CvDTreeNode* n, float* values_buf, int* sample_indices_buf );
|
|
virtual const int* get_class_labels( CvDTreeNode* n, int* labels_buf );
|
|
virtual const int* get_cv_labels( CvDTreeNode* n, int* labels_buf );
|
|
virtual const int* get_sample_indices( CvDTreeNode* n, int* indices_buf );
|
|
virtual const int* get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf );
|
|
virtual void get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* sorted_indices_buf,
|
|
const float** ord_values, const int** sorted_indices, int* sample_indices_buf );
|
|
virtual int get_child_buf_idx( CvDTreeNode* n );
|
|
|
|
////////////////////////////////////
|
|
|
|
virtual bool set_params( const CvDTreeParams& params );
|
|
virtual CvDTreeNode* new_node( CvDTreeNode* parent, int count,
|
|
int storage_idx, int offset );
|
|
|
|
virtual CvDTreeSplit* new_split_ord( int vi, float cmp_val,
|
|
int split_point, int inversed, float quality );
|
|
virtual CvDTreeSplit* new_split_cat( int vi, float quality );
|
|
virtual void free_node_data( CvDTreeNode* node );
|
|
virtual void free_train_data();
|
|
virtual void free_node( CvDTreeNode* node );
|
|
|
|
int sample_count, var_all, var_count, max_c_count;
|
|
int ord_var_count, cat_var_count, work_var_count;
|
|
bool have_labels, have_priors;
|
|
bool is_classifier;
|
|
int tflag;
|
|
|
|
const CvMat* train_data;
|
|
const CvMat* responses;
|
|
CvMat* responses_copy; // used in Boosting
|
|
|
|
int buf_count, buf_size; // buf_size is obsolete, please do not use it, use expression ((int64)buf->rows * (int64)buf->cols / buf_count) instead
|
|
bool shared;
|
|
int is_buf_16u;
|
|
|
|
CvMat* cat_count;
|
|
CvMat* cat_ofs;
|
|
CvMat* cat_map;
|
|
|
|
CvMat* counts;
|
|
CvMat* buf;
|
|
inline size_t get_length_subbuf() const
|
|
{
|
|
size_t res = (size_t)(work_var_count + 1) * (size_t)sample_count;
|
|
return res;
|
|
}
|
|
|
|
CvMat* direction;
|
|
CvMat* split_buf;
|
|
|
|
CvMat* var_idx;
|
|
CvMat* var_type; // i-th element =
|
|
// k<0 - ordered
|
|
// k>=0 - categorical, see k-th element of cat_* arrays
|
|
CvMat* priors;
|
|
CvMat* priors_mult;
|
|
|
|
CvDTreeParams params;
|
|
|
|
CvMemStorage* tree_storage;
|
|
CvMemStorage* temp_storage;
|
|
|
|
CvDTreeNode* data_root;
|
|
|
|
CvSet* node_heap;
|
|
CvSet* split_heap;
|
|
CvSet* cv_heap;
|
|
CvSet* nv_heap;
|
|
|
|
cv::RNG* rng;
|
|
};
|
|
|
|
class CvDTree;
|
|
class CvForestTree;
|
|
|
|
namespace cv
|
|
{
|
|
struct DTreeBestSplitFinder;
|
|
struct ForestTreeBestSplitFinder;
|
|
}
|
|
|
|
class CvDTree : public CvStatModel
|
|
{
|
|
public:
|
|
CV_WRAP CvDTree();
|
|
virtual ~CvDTree();
|
|
|
|
virtual bool train( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvDTreeParams params=CvDTreeParams() );
|
|
|
|
virtual bool train( CvMLData* trainData, CvDTreeParams params=CvDTreeParams() );
|
|
|
|
// type in {CV_TRAIN_ERROR, CV_TEST_ERROR}
|
|
virtual float calc_error( CvMLData* trainData, int type, std::vector<float> *resp = 0 );
|
|
|
|
virtual bool train( CvDTreeTrainData* trainData, const CvMat* subsampleIdx );
|
|
|
|
virtual CvDTreeNode* predict( const CvMat* sample, const CvMat* missingDataMask=0,
|
|
bool preprocessedInput=false ) const;
|
|
|
|
CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
|
|
const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
|
|
const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
|
|
const cv::Mat& missingDataMask=cv::Mat(),
|
|
CvDTreeParams params=CvDTreeParams() );
|
|
|
|
CV_WRAP virtual CvDTreeNode* predict( const cv::Mat& sample, const cv::Mat& missingDataMask=cv::Mat(),
|
|
bool preprocessedInput=false ) const;
|
|
CV_WRAP virtual cv::Mat getVarImportance();
|
|
|
|
virtual const CvMat* get_var_importance();
|
|
CV_WRAP virtual void clear();
|
|
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node );
|
|
virtual void write( CvFileStorage* fs, const char* name ) const;
|
|
|
|
// special read & write methods for trees in the tree ensembles
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node,
|
|
CvDTreeTrainData* data );
|
|
virtual void write( CvFileStorage* fs ) const;
|
|
|
|
const CvDTreeNode* get_root() const;
|
|
int get_pruned_tree_idx() const;
|
|
CvDTreeTrainData* get_data();
|
|
|
|
protected:
|
|
friend struct cv::DTreeBestSplitFinder;
|
|
|
|
virtual bool do_train( const CvMat* _subsample_idx );
|
|
|
|
virtual void try_split_node( CvDTreeNode* n );
|
|
virtual void split_node_data( CvDTreeNode* n );
|
|
virtual CvDTreeSplit* find_best_split( CvDTreeNode* n );
|
|
virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
|
|
virtual double calc_node_dir( CvDTreeNode* node );
|
|
virtual void complete_node_dir( CvDTreeNode* node );
|
|
virtual void cluster_categories( const int* vectors, int vector_count,
|
|
int var_count, int* sums, int k, int* cluster_labels );
|
|
|
|
virtual void calc_node_value( CvDTreeNode* node );
|
|
|
|
virtual void prune_cv();
|
|
virtual double update_tree_rnc( int T, int fold );
|
|
virtual int cut_tree( int T, int fold, double min_alpha );
|
|
virtual void free_prune_data(bool cut_tree);
|
|
virtual void free_tree();
|
|
|
|
virtual void write_node( CvFileStorage* fs, CvDTreeNode* node ) const;
|
|
virtual void write_split( CvFileStorage* fs, CvDTreeSplit* split ) const;
|
|
virtual CvDTreeNode* read_node( CvFileStorage* fs, CvFileNode* node, CvDTreeNode* parent );
|
|
virtual CvDTreeSplit* read_split( CvFileStorage* fs, CvFileNode* node );
|
|
virtual void write_tree_nodes( CvFileStorage* fs ) const;
|
|
virtual void read_tree_nodes( CvFileStorage* fs, CvFileNode* node );
|
|
|
|
CvDTreeNode* root;
|
|
CvMat* var_importance;
|
|
CvDTreeTrainData* data;
|
|
CvMat train_data_hdr, responses_hdr;
|
|
cv::Mat train_data_mat, responses_mat;
|
|
|
|
public:
|
|
int pruned_tree_idx;
|
|
};
|
|
|
|
|
|
/****************************************************************************************\
|
|
* Random Trees Classifier *
|
|
\****************************************************************************************/
|
|
|
|
class CvRTrees;
|
|
|
|
class CvForestTree: public CvDTree
|
|
{
|
|
public:
|
|
CvForestTree();
|
|
virtual ~CvForestTree();
|
|
|
|
virtual bool train( CvDTreeTrainData* trainData, const CvMat* _subsample_idx, CvRTrees* forest );
|
|
|
|
virtual int get_var_count() const {return data ? data->var_count : 0;}
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node, CvRTrees* forest, CvDTreeTrainData* _data );
|
|
|
|
/* dummy methods to avoid warnings: BEGIN */
|
|
virtual bool train( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvDTreeParams params=CvDTreeParams() );
|
|
|
|
virtual bool train( CvDTreeTrainData* trainData, const CvMat* _subsample_idx );
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node );
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node,
|
|
CvDTreeTrainData* data );
|
|
/* dummy methods to avoid warnings: END */
|
|
|
|
protected:
|
|
friend struct cv::ForestTreeBestSplitFinder;
|
|
|
|
virtual CvDTreeSplit* find_best_split( CvDTreeNode* n );
|
|
CvRTrees* forest;
|
|
};
|
|
|
|
|
|
struct CvRTParams : public CvDTreeParams
|
|
{
|
|
//Parameters for the forest
|
|
CV_PROP_RW bool calc_var_importance; // true <=> RF processes variable importance
|
|
CV_PROP_RW int nactive_vars;
|
|
CV_PROP_RW CvTermCriteria term_crit;
|
|
|
|
CvRTParams();
|
|
CvRTParams( int max_depth, int min_sample_count,
|
|
float regression_accuracy, bool use_surrogates,
|
|
int max_categories, const float* priors, bool calc_var_importance,
|
|
int nactive_vars, int max_num_of_trees_in_the_forest,
|
|
float forest_accuracy, int termcrit_type );
|
|
};
|
|
|
|
|
|
class CvRTrees : public CvStatModel
|
|
{
|
|
public:
|
|
CV_WRAP CvRTrees();
|
|
virtual ~CvRTrees();
|
|
virtual bool train( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvRTParams params=CvRTParams() );
|
|
|
|
virtual bool train( CvMLData* data, CvRTParams params=CvRTParams() );
|
|
virtual float predict( const CvMat* sample, const CvMat* missing = 0 ) const;
|
|
virtual float predict_prob( const CvMat* sample, const CvMat* missing = 0 ) const;
|
|
|
|
CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
|
|
const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
|
|
const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
|
|
const cv::Mat& missingDataMask=cv::Mat(),
|
|
CvRTParams params=CvRTParams() );
|
|
CV_WRAP virtual float predict( const cv::Mat& sample, const cv::Mat& missing = cv::Mat() ) const;
|
|
CV_WRAP virtual float predict_prob( const cv::Mat& sample, const cv::Mat& missing = cv::Mat() ) const;
|
|
CV_WRAP virtual cv::Mat getVarImportance();
|
|
|
|
CV_WRAP virtual void clear();
|
|
|
|
virtual const CvMat* get_var_importance();
|
|
virtual float get_proximity( const CvMat* sample1, const CvMat* sample2,
|
|
const CvMat* missing1 = 0, const CvMat* missing2 = 0 ) const;
|
|
|
|
virtual float calc_error( CvMLData* data, int type , std::vector<float>* resp = 0 ); // type in {CV_TRAIN_ERROR, CV_TEST_ERROR}
|
|
|
|
virtual float get_train_error();
|
|
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node );
|
|
virtual void write( CvFileStorage* fs, const char* name ) const;
|
|
|
|
CvMat* get_active_var_mask();
|
|
CvRNG* get_rng();
|
|
|
|
int get_tree_count() const;
|
|
CvForestTree* get_tree(int i) const;
|
|
|
|
protected:
|
|
virtual cv::String getName() const;
|
|
|
|
virtual bool grow_forest( const CvTermCriteria term_crit );
|
|
|
|
// array of the trees of the forest
|
|
CvForestTree** trees;
|
|
CvDTreeTrainData* data;
|
|
CvMat train_data_hdr, responses_hdr;
|
|
cv::Mat train_data_mat, responses_mat;
|
|
int ntrees;
|
|
int nclasses;
|
|
double oob_error;
|
|
CvMat* var_importance;
|
|
int nsamples;
|
|
|
|
cv::RNG* rng;
|
|
CvMat* active_var_mask;
|
|
};
|
|
|
|
/****************************************************************************************\
|
|
* Extremely randomized trees Classifier *
|
|
\****************************************************************************************/
|
|
struct CvERTreeTrainData : public CvDTreeTrainData
|
|
{
|
|
virtual void set_data( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
const CvDTreeParams& params=CvDTreeParams(),
|
|
bool _shared=false, bool _add_labels=false,
|
|
bool _update_data=false );
|
|
virtual void get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* missing_buf,
|
|
const float** ord_values, const int** missing, int* sample_buf = 0 );
|
|
virtual const int* get_sample_indices( CvDTreeNode* n, int* indices_buf );
|
|
virtual const int* get_cv_labels( CvDTreeNode* n, int* labels_buf );
|
|
virtual const int* get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf );
|
|
virtual void get_vectors( const CvMat* _subsample_idx, float* values, uchar* missing,
|
|
float* responses, bool get_class_idx=false );
|
|
virtual CvDTreeNode* subsample_data( const CvMat* _subsample_idx );
|
|
const CvMat* missing_mask;
|
|
};
|
|
|
|
class CvForestERTree : public CvForestTree
|
|
{
|
|
protected:
|
|
virtual double calc_node_dir( CvDTreeNode* node );
|
|
virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual void split_node_data( CvDTreeNode* n );
|
|
};
|
|
|
|
class CvERTrees : public CvRTrees
|
|
{
|
|
public:
|
|
CV_WRAP CvERTrees();
|
|
virtual ~CvERTrees();
|
|
virtual bool train( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvRTParams params=CvRTParams());
|
|
CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
|
|
const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
|
|
const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
|
|
const cv::Mat& missingDataMask=cv::Mat(),
|
|
CvRTParams params=CvRTParams());
|
|
virtual bool train( CvMLData* data, CvRTParams params=CvRTParams() );
|
|
protected:
|
|
virtual cv::String getName() const;
|
|
virtual bool grow_forest( const CvTermCriteria term_crit );
|
|
};
|
|
|
|
|
|
/****************************************************************************************\
|
|
* Boosted tree classifier *
|
|
\****************************************************************************************/
|
|
|
|
struct CvBoostParams : public CvDTreeParams
|
|
{
|
|
CV_PROP_RW int boost_type;
|
|
CV_PROP_RW int weak_count;
|
|
CV_PROP_RW int split_criteria;
|
|
CV_PROP_RW double weight_trim_rate;
|
|
|
|
CvBoostParams();
|
|
CvBoostParams( int boost_type, int weak_count, double weight_trim_rate,
|
|
int max_depth, bool use_surrogates, const float* priors );
|
|
};
|
|
|
|
|
|
class CvBoost;
|
|
|
|
class CvBoostTree: public CvDTree
|
|
{
|
|
public:
|
|
CvBoostTree();
|
|
virtual ~CvBoostTree();
|
|
|
|
virtual bool train( CvDTreeTrainData* trainData,
|
|
const CvMat* subsample_idx, CvBoost* ensemble );
|
|
|
|
virtual void scale( double s );
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node,
|
|
CvBoost* ensemble, CvDTreeTrainData* _data );
|
|
virtual void clear();
|
|
|
|
/* dummy methods to avoid warnings: BEGIN */
|
|
virtual bool train( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvDTreeParams params=CvDTreeParams() );
|
|
virtual bool train( CvDTreeTrainData* trainData, const CvMat* _subsample_idx );
|
|
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node );
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node,
|
|
CvDTreeTrainData* data );
|
|
/* dummy methods to avoid warnings: END */
|
|
|
|
protected:
|
|
|
|
virtual void try_split_node( CvDTreeNode* n );
|
|
virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
|
|
float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
|
|
virtual void calc_node_value( CvDTreeNode* n );
|
|
virtual double calc_node_dir( CvDTreeNode* n );
|
|
|
|
CvBoost* ensemble;
|
|
};
|
|
|
|
|
|
class CvBoost : public CvStatModel
|
|
{
|
|
public:
|
|
// Boosting type
|
|
enum { DISCRETE=0, REAL=1, LOGIT=2, GENTLE=3 };
|
|
|
|
// Splitting criteria
|
|
enum { DEFAULT=0, GINI=1, MISCLASS=3, SQERR=4 };
|
|
|
|
CV_WRAP CvBoost();
|
|
virtual ~CvBoost();
|
|
|
|
CvBoost( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvBoostParams params=CvBoostParams() );
|
|
|
|
virtual bool train( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvBoostParams params=CvBoostParams(),
|
|
bool update=false );
|
|
|
|
virtual bool train( CvMLData* data,
|
|
CvBoostParams params=CvBoostParams(),
|
|
bool update=false );
|
|
|
|
virtual float predict( const CvMat* sample, const CvMat* missing=0,
|
|
CvMat* weak_responses=0, CvSlice slice=CV_WHOLE_SEQ,
|
|
bool raw_mode=false, bool return_sum=false ) const;
|
|
|
|
CV_WRAP CvBoost( const cv::Mat& trainData, int tflag,
|
|
const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
|
|
const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
|
|
const cv::Mat& missingDataMask=cv::Mat(),
|
|
CvBoostParams params=CvBoostParams() );
|
|
|
|
CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
|
|
const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
|
|
const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
|
|
const cv::Mat& missingDataMask=cv::Mat(),
|
|
CvBoostParams params=CvBoostParams(),
|
|
bool update=false );
|
|
|
|
CV_WRAP virtual float predict( const cv::Mat& sample, const cv::Mat& missing=cv::Mat(),
|
|
const cv::Range& slice=cv::Range::all(), bool rawMode=false,
|
|
bool returnSum=false ) const;
|
|
|
|
virtual float calc_error( CvMLData* _data, int type , std::vector<float> *resp = 0 ); // type in {CV_TRAIN_ERROR, CV_TEST_ERROR}
|
|
|
|
CV_WRAP virtual void prune( CvSlice slice );
|
|
|
|
CV_WRAP virtual void clear();
|
|
|
|
virtual void write( CvFileStorage* storage, const char* name ) const;
|
|
virtual void read( CvFileStorage* storage, CvFileNode* node );
|
|
virtual const CvMat* get_active_vars(bool absolute_idx=true);
|
|
|
|
CvSeq* get_weak_predictors();
|
|
|
|
CvMat* get_weights();
|
|
CvMat* get_subtree_weights();
|
|
CvMat* get_weak_response();
|
|
const CvBoostParams& get_params() const;
|
|
const CvDTreeTrainData* get_data() const;
|
|
|
|
protected:
|
|
|
|
virtual bool set_params( const CvBoostParams& params );
|
|
virtual void update_weights( CvBoostTree* tree );
|
|
virtual void trim_weights();
|
|
virtual void write_params( CvFileStorage* fs ) const;
|
|
virtual void read_params( CvFileStorage* fs, CvFileNode* node );
|
|
|
|
virtual void initialize_weights(double (&p)[2]);
|
|
|
|
CvDTreeTrainData* data;
|
|
CvMat train_data_hdr, responses_hdr;
|
|
cv::Mat train_data_mat, responses_mat;
|
|
CvBoostParams params;
|
|
CvSeq* weak;
|
|
|
|
CvMat* active_vars;
|
|
CvMat* active_vars_abs;
|
|
bool have_active_cat_vars;
|
|
|
|
CvMat* orig_response;
|
|
CvMat* sum_response;
|
|
CvMat* weak_eval;
|
|
CvMat* subsample_mask;
|
|
CvMat* weights;
|
|
CvMat* subtree_weights;
|
|
bool have_subsample;
|
|
};
|
|
|
|
|
|
/****************************************************************************************\
|
|
* Gradient Boosted Trees *
|
|
\****************************************************************************************/
|
|
|
|
// DataType: STRUCT CvGBTreesParams
|
|
// Parameters of GBT (Gradient Boosted trees model), including single
|
|
// tree settings and ensemble parameters.
|
|
//
|
|
// weak_count - count of trees in the ensemble
|
|
// loss_function_type - loss function used for ensemble training
|
|
// subsample_portion - portion of whole training set used for
|
|
// every single tree training.
|
|
// subsample_portion value is in (0.0, 1.0].
|
|
// subsample_portion == 1.0 when whole dataset is
|
|
// used on each step. Count of sample used on each
|
|
// step is computed as
|
|
// int(total_samples_count * subsample_portion).
|
|
// shrinkage - regularization parameter.
|
|
// Each tree prediction is multiplied on shrinkage value.
|
|
|
|
|
|
struct CvGBTreesParams : public CvDTreeParams
|
|
{
|
|
CV_PROP_RW int weak_count;
|
|
CV_PROP_RW int loss_function_type;
|
|
CV_PROP_RW float subsample_portion;
|
|
CV_PROP_RW float shrinkage;
|
|
|
|
CvGBTreesParams();
|
|
CvGBTreesParams( int loss_function_type, int weak_count, float shrinkage,
|
|
float subsample_portion, int max_depth, bool use_surrogates );
|
|
};
|
|
|
|
// DataType: CLASS CvGBTrees
|
|
// Gradient Boosting Trees (GBT) algorithm implementation.
|
|
//
|
|
// data - training dataset
|
|
// params - parameters of the CvGBTrees
|
|
// weak - array[0..(class_count-1)] of CvSeq
|
|
// for storing tree ensembles
|
|
// orig_response - original responses of the training set samples
|
|
// sum_response - predicitons of the current model on the training dataset.
|
|
// this matrix is updated on every iteration.
|
|
// sum_response_tmp - predicitons of the model on the training set on the next
|
|
// step. On every iteration values of sum_responses_tmp are
|
|
// computed via sum_responses values. When the current
|
|
// step is complete sum_response values become equal to
|
|
// sum_responses_tmp.
|
|
// sampleIdx - indices of samples used for training the ensemble.
|
|
// CvGBTrees training procedure takes a set of samples
|
|
// (train_data) and a set of responses (responses).
|
|
// Only pairs (train_data[i], responses[i]), where i is
|
|
// in sample_idx are used for training the ensemble.
|
|
// subsample_train - indices of samples used for training a single decision
|
|
// tree on the current step. This indices are countered
|
|
// relatively to the sample_idx, so that pairs
|
|
// (train_data[sample_idx[i]], responses[sample_idx[i]])
|
|
// are used for training a decision tree.
|
|
// Training set is randomly splited
|
|
// in two parts (subsample_train and subsample_test)
|
|
// on every iteration accordingly to the portion parameter.
|
|
// subsample_test - relative indices of samples from the training set,
|
|
// which are not used for training a tree on the current
|
|
// step.
|
|
// missing - mask of the missing values in the training set. This
|
|
// matrix has the same size as train_data. 1 - missing
|
|
// value, 0 - not a missing value.
|
|
// class_labels - output class labels map.
|
|
// rng - random number generator. Used for spliting the
|
|
// training set.
|
|
// class_count - count of output classes.
|
|
// class_count == 1 in the case of regression,
|
|
// and > 1 in the case of classification.
|
|
// delta - Huber loss function parameter.
|
|
// base_value - start point of the gradient descent procedure.
|
|
// model prediction is
|
|
// f(x) = f_0 + sum_{i=1..weak_count-1}(f_i(x)), where
|
|
// f_0 is the base value.
|
|
|
|
|
|
|
|
class CvGBTrees : public CvStatModel
|
|
{
|
|
public:
|
|
|
|
/*
|
|
// DataType: ENUM
|
|
// Loss functions implemented in CvGBTrees.
|
|
//
|
|
// SQUARED_LOSS
|
|
// problem: regression
|
|
// loss = (x - x')^2
|
|
//
|
|
// ABSOLUTE_LOSS
|
|
// problem: regression
|
|
// loss = abs(x - x')
|
|
//
|
|
// HUBER_LOSS
|
|
// problem: regression
|
|
// loss = delta*( abs(x - x') - delta/2), if abs(x - x') > delta
|
|
// 1/2*(x - x')^2, if abs(x - x') <= delta,
|
|
// where delta is the alpha-quantile of pseudo responses from
|
|
// the training set.
|
|
//
|
|
// DEVIANCE_LOSS
|
|
// problem: classification
|
|
//
|
|
*/
|
|
enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};
|
|
|
|
|
|
/*
|
|
// Default constructor. Creates a model only (without training).
|
|
// Should be followed by one form of the train(...) function.
|
|
//
|
|
// API
|
|
// CvGBTrees();
|
|
|
|
// INPUT
|
|
// OUTPUT
|
|
// RESULT
|
|
*/
|
|
CV_WRAP CvGBTrees();
|
|
|
|
|
|
/*
|
|
// Full form constructor. Creates a gradient boosting model and does the
|
|
// train.
|
|
//
|
|
// API
|
|
// CvGBTrees( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvGBTreesParams params=CvGBTreesParams() );
|
|
|
|
// INPUT
|
|
// trainData - a set of input feature vectors.
|
|
// size of matrix is
|
|
// <count of samples> x <variables count>
|
|
// or <variables count> x <count of samples>
|
|
// depending on the tflag parameter.
|
|
// matrix values are float.
|
|
// tflag - a flag showing how do samples stored in the
|
|
// trainData matrix row by row (tflag=CV_ROW_SAMPLE)
|
|
// or column by column (tflag=CV_COL_SAMPLE).
|
|
// responses - a vector of responses corresponding to the samples
|
|
// in trainData.
|
|
// varIdx - indices of used variables. zero value means that all
|
|
// variables are active.
|
|
// sampleIdx - indices of used samples. zero value means that all
|
|
// samples from trainData are in the training set.
|
|
// varType - vector of <variables count> length. gives every
|
|
// variable type CV_VAR_CATEGORICAL or CV_VAR_ORDERED.
|
|
// varType = 0 means all variables are numerical.
|
|
// missingDataMask - a mask of misiing values in trainData.
|
|
// missingDataMask = 0 means that there are no missing
|
|
// values.
|
|
// params - parameters of GTB algorithm.
|
|
// OUTPUT
|
|
// RESULT
|
|
*/
|
|
CvGBTrees( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvGBTreesParams params=CvGBTreesParams() );
|
|
|
|
|
|
/*
|
|
// Destructor.
|
|
*/
|
|
virtual ~CvGBTrees();
|
|
|
|
|
|
/*
|
|
// Gradient tree boosting model training
|
|
//
|
|
// API
|
|
// virtual bool train( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvGBTreesParams params=CvGBTreesParams(),
|
|
bool update=false );
|
|
|
|
// INPUT
|
|
// trainData - a set of input feature vectors.
|
|
// size of matrix is
|
|
// <count of samples> x <variables count>
|
|
// or <variables count> x <count of samples>
|
|
// depending on the tflag parameter.
|
|
// matrix values are float.
|
|
// tflag - a flag showing how do samples stored in the
|
|
// trainData matrix row by row (tflag=CV_ROW_SAMPLE)
|
|
// or column by column (tflag=CV_COL_SAMPLE).
|
|
// responses - a vector of responses corresponding to the samples
|
|
// in trainData.
|
|
// varIdx - indices of used variables. zero value means that all
|
|
// variables are active.
|
|
// sampleIdx - indices of used samples. zero value means that all
|
|
// samples from trainData are in the training set.
|
|
// varType - vector of <variables count> length. gives every
|
|
// variable type CV_VAR_CATEGORICAL or CV_VAR_ORDERED.
|
|
// varType = 0 means all variables are numerical.
|
|
// missingDataMask - a mask of misiing values in trainData.
|
|
// missingDataMask = 0 means that there are no missing
|
|
// values.
|
|
// params - parameters of GTB algorithm.
|
|
// update - is not supported now. (!)
|
|
// OUTPUT
|
|
// RESULT
|
|
// Error state.
|
|
*/
|
|
virtual bool train( const CvMat* trainData, int tflag,
|
|
const CvMat* responses, const CvMat* varIdx=0,
|
|
const CvMat* sampleIdx=0, const CvMat* varType=0,
|
|
const CvMat* missingDataMask=0,
|
|
CvGBTreesParams params=CvGBTreesParams(),
|
|
bool update=false );
|
|
|
|
|
|
/*
|
|
// Gradient tree boosting model training
|
|
//
|
|
// API
|
|
// virtual bool train( CvMLData* data,
|
|
CvGBTreesParams params=CvGBTreesParams(),
|
|
bool update=false ) {return false;}
|
|
|
|
// INPUT
|
|
// data - training set.
|
|
// params - parameters of GTB algorithm.
|
|
// update - is not supported now. (!)
|
|
// OUTPUT
|
|
// RESULT
|
|
// Error state.
|
|
*/
|
|
virtual bool train( CvMLData* data,
|
|
CvGBTreesParams params=CvGBTreesParams(),
|
|
bool update=false );
|
|
|
|
|
|
/*
|
|
// Response value prediction
|
|
//
|
|
// API
|
|
// virtual float predict_serial( const CvMat* sample, const CvMat* missing=0,
|
|
CvMat* weak_responses=0, CvSlice slice = CV_WHOLE_SEQ,
|
|
int k=-1 ) const;
|
|
|
|
// INPUT
|
|
// sample - input sample of the same type as in the training set.
|
|
// missing - missing values mask. missing=0 if there are no
|
|
// missing values in sample vector.
|
|
// weak_responses - predictions of all of the trees.
|
|
// not implemented (!)
|
|
// slice - part of the ensemble used for prediction.
|
|
// slice = CV_WHOLE_SEQ when all trees are used.
|
|
// k - number of ensemble used.
|
|
// k is in {-1,0,1,..,<count of output classes-1>}.
|
|
// in the case of classification problem
|
|
// <count of output classes-1> ensembles are built.
|
|
// If k = -1 ordinary prediction is the result,
|
|
// otherwise function gives the prediction of the
|
|
// k-th ensemble only.
|
|
// OUTPUT
|
|
// RESULT
|
|
// Predicted value.
|
|
*/
|
|
virtual float predict_serial( const CvMat* sample, const CvMat* missing=0,
|
|
CvMat* weakResponses=0, CvSlice slice = CV_WHOLE_SEQ,
|
|
int k=-1 ) const;
|
|
|
|
/*
|
|
// Response value prediction.
|
|
// Parallel version (in the case of TBB existence)
|
|
//
|
|
// API
|
|
// virtual float predict( const CvMat* sample, const CvMat* missing=0,
|
|
CvMat* weak_responses=0, CvSlice slice = CV_WHOLE_SEQ,
|
|
int k=-1 ) const;
|
|
|
|
// INPUT
|
|
// sample - input sample of the same type as in the training set.
|
|
// missing - missing values mask. missing=0 if there are no
|
|
// missing values in sample vector.
|
|
// weak_responses - predictions of all of the trees.
|
|
// not implemented (!)
|
|
// slice - part of the ensemble used for prediction.
|
|
// slice = CV_WHOLE_SEQ when all trees are used.
|
|
// k - number of ensemble used.
|
|
// k is in {-1,0,1,..,<count of output classes-1>}.
|
|
// in the case of classification problem
|
|
// <count of output classes-1> ensembles are built.
|
|
// If k = -1 ordinary prediction is the result,
|
|
// otherwise function gives the prediction of the
|
|
// k-th ensemble only.
|
|
// OUTPUT
|
|
// RESULT
|
|
// Predicted value.
|
|
*/
|
|
virtual float predict( const CvMat* sample, const CvMat* missing=0,
|
|
CvMat* weakResponses=0, CvSlice slice = CV_WHOLE_SEQ,
|
|
int k=-1 ) const;
|
|
|
|
/*
|
|
// Deletes all the data.
|
|
//
|
|
// API
|
|
// virtual void clear();
|
|
|
|
// INPUT
|
|
// OUTPUT
|
|
// delete data, weak, orig_response, sum_response,
|
|
// weak_eval, subsample_train, subsample_test,
|
|
// sample_idx, missing, lass_labels
|
|
// delta = 0.0
|
|
// RESULT
|
|
*/
|
|
CV_WRAP virtual void clear();
|
|
|
|
/*
|
|
// Compute error on the train/test set.
|
|
//
|
|
// API
|
|
// virtual float calc_error( CvMLData* _data, int type,
|
|
// std::vector<float> *resp = 0 );
|
|
//
|
|
// INPUT
|
|
// data - dataset
|
|
// type - defines which error is to compute: train (CV_TRAIN_ERROR) or
|
|
// test (CV_TEST_ERROR).
|
|
// OUTPUT
|
|
// resp - vector of predicitons
|
|
// RESULT
|
|
// Error value.
|
|
*/
|
|
virtual float calc_error( CvMLData* _data, int type,
|
|
std::vector<float> *resp = 0 );
|
|
|
|
/*
|
|
//
|
|
// Write parameters of the gtb model and data. Write learned model.
|
|
//
|
|
// API
|
|
// virtual void write( CvFileStorage* fs, const char* name ) const;
|
|
//
|
|
// INPUT
|
|
// fs - file storage to read parameters from.
|
|
// name - model name.
|
|
// OUTPUT
|
|
// RESULT
|
|
*/
|
|
virtual void write( CvFileStorage* fs, const char* name ) const;
|
|
|
|
|
|
/*
|
|
//
|
|
// Read parameters of the gtb model and data. Read learned model.
|
|
//
|
|
// API
|
|
// virtual void read( CvFileStorage* fs, CvFileNode* node );
|
|
//
|
|
// INPUT
|
|
// fs - file storage to read parameters from.
|
|
// node - file node.
|
|
// OUTPUT
|
|
// RESULT
|
|
*/
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node );
|
|
|
|
|
|
// new-style C++ interface
|
|
CV_WRAP CvGBTrees( const cv::Mat& trainData, int tflag,
|
|
const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
|
|
const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
|
|
const cv::Mat& missingDataMask=cv::Mat(),
|
|
CvGBTreesParams params=CvGBTreesParams() );
|
|
|
|
CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
|
|
const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
|
|
const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
|
|
const cv::Mat& missingDataMask=cv::Mat(),
|
|
CvGBTreesParams params=CvGBTreesParams(),
|
|
bool update=false );
|
|
|
|
CV_WRAP virtual float predict( const cv::Mat& sample, const cv::Mat& missing=cv::Mat(),
|
|
const cv::Range& slice = cv::Range::all(),
|
|
int k=-1 ) const;
|
|
|
|
protected:
|
|
|
|
/*
|
|
// Compute the gradient vector components.
|
|
//
|
|
// API
|
|
// virtual void find_gradient( const int k = 0);
|
|
|
|
// INPUT
|
|
// k - used for classification problem, determining current
|
|
// tree ensemble.
|
|
// OUTPUT
|
|
// changes components of data->responses
|
|
// which correspond to samples used for training
|
|
// on the current step.
|
|
// RESULT
|
|
*/
|
|
virtual void find_gradient( const int k = 0);
|
|
|
|
|
|
/*
|
|
//
|
|
// Change values in tree leaves according to the used loss function.
|
|
//
|
|
// API
|
|
// virtual void change_values(CvDTree* tree, const int k = 0);
|
|
//
|
|
// INPUT
|
|
// tree - decision tree to change.
|
|
// k - used for classification problem, determining current
|
|
// tree ensemble.
|
|
// OUTPUT
|
|
// changes 'value' fields of the trees' leaves.
|
|
// changes sum_response_tmp.
|
|
// RESULT
|
|
*/
|
|
virtual void change_values(CvDTree* tree, const int k = 0);
|
|
|
|
|
|
/*
|
|
//
|
|
// Find optimal constant prediction value according to the used loss
|
|
// function.
|
|
// The goal is to find a constant which gives the minimal summary loss
|
|
// on the _Idx samples.
|
|
//
|
|
// API
|
|
// virtual float find_optimal_value( const CvMat* _Idx );
|
|
//
|
|
// INPUT
|
|
// _Idx - indices of the samples from the training set.
|
|
// OUTPUT
|
|
// RESULT
|
|
// optimal constant value.
|
|
*/
|
|
virtual float find_optimal_value( const CvMat* _Idx );
|
|
|
|
|
|
/*
|
|
//
|
|
// Randomly split the whole training set in two parts according
|
|
// to params.portion.
|
|
//
|
|
// API
|
|
// virtual void do_subsample();
|
|
//
|
|
// INPUT
|
|
// OUTPUT
|
|
// subsample_train - indices of samples used for training
|
|
// subsample_test - indices of samples used for test
|
|
// RESULT
|
|
*/
|
|
virtual void do_subsample();
|
|
|
|
|
|
/*
|
|
//
|
|
// Internal recursive function giving an array of subtree tree leaves.
|
|
//
|
|
// API
|
|
// void leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node );
|
|
//
|
|
// INPUT
|
|
// node - current leaf.
|
|
// OUTPUT
|
|
// count - count of leaves in the subtree.
|
|
// leaves - array of pointers to leaves.
|
|
// RESULT
|
|
*/
|
|
void leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node );
|
|
|
|
|
|
/*
|
|
//
|
|
// Get leaves of the tree.
|
|
//
|
|
// API
|
|
// CvDTreeNode** GetLeaves( const CvDTree* dtree, int& len );
|
|
//
|
|
// INPUT
|
|
// dtree - decision tree.
|
|
// OUTPUT
|
|
// len - count of the leaves.
|
|
// RESULT
|
|
// CvDTreeNode** - array of pointers to leaves.
|
|
*/
|
|
CvDTreeNode** GetLeaves( const CvDTree* dtree, int& len );
|
|
|
|
|
|
/*
|
|
//
|
|
// Is it a regression or a classification.
|
|
//
|
|
// API
|
|
// bool problem_type();
|
|
//
|
|
// INPUT
|
|
// OUTPUT
|
|
// RESULT
|
|
// false if it is a classification problem,
|
|
// true - if regression.
|
|
*/
|
|
virtual bool problem_type() const;
|
|
|
|
|
|
/*
|
|
//
|
|
// Write parameters of the gtb model.
|
|
//
|
|
// API
|
|
// virtual void write_params( CvFileStorage* fs ) const;
|
|
//
|
|
// INPUT
|
|
// fs - file storage to write parameters to.
|
|
// OUTPUT
|
|
// RESULT
|
|
*/
|
|
virtual void write_params( CvFileStorage* fs ) const;
|
|
|
|
|
|
/*
|
|
//
|
|
// Read parameters of the gtb model and data.
|
|
//
|
|
// API
|
|
// virtual void read_params( CvFileStorage* fs );
|
|
//
|
|
// INPUT
|
|
// fs - file storage to read parameters from.
|
|
// OUTPUT
|
|
// params - parameters of the gtb model.
|
|
// data - contains information about the structure
|
|
// of the data set (count of variables,
|
|
// their types, etc.).
|
|
// class_labels - output class labels map.
|
|
// RESULT
|
|
*/
|
|
virtual void read_params( CvFileStorage* fs, CvFileNode* fnode );
|
|
int get_len(const CvMat* mat) const;
|
|
|
|
|
|
CvDTreeTrainData* data;
|
|
CvGBTreesParams params;
|
|
|
|
CvSeq** weak;
|
|
CvMat* orig_response;
|
|
CvMat* sum_response;
|
|
CvMat* sum_response_tmp;
|
|
CvMat* sample_idx;
|
|
CvMat* subsample_train;
|
|
CvMat* subsample_test;
|
|
CvMat* missing;
|
|
CvMat* class_labels;
|
|
|
|
cv::RNG* rng;
|
|
|
|
int class_count;
|
|
float delta;
|
|
float base_value;
|
|
|
|
};
|
|
|
|
|
|
|
|
/****************************************************************************************\
|
|
* Artificial Neural Networks (ANN) *
|
|
\****************************************************************************************/
|
|
|
|
/////////////////////////////////// Multi-Layer Perceptrons //////////////////////////////
|
|
|
|
struct CvANN_MLP_TrainParams
|
|
{
|
|
CvANN_MLP_TrainParams();
|
|
CvANN_MLP_TrainParams( CvTermCriteria term_crit, int train_method,
|
|
double param1, double param2=0 );
|
|
~CvANN_MLP_TrainParams();
|
|
|
|
enum { BACKPROP=0, RPROP=1 };
|
|
|
|
CV_PROP_RW CvTermCriteria term_crit;
|
|
CV_PROP_RW int train_method;
|
|
|
|
// backpropagation parameters
|
|
CV_PROP_RW double bp_dw_scale, bp_moment_scale;
|
|
|
|
// rprop parameters
|
|
CV_PROP_RW double rp_dw0, rp_dw_plus, rp_dw_minus, rp_dw_min, rp_dw_max;
|
|
};
|
|
|
|
|
|
class CvANN_MLP : public CvStatModel
|
|
{
|
|
public:
|
|
CV_WRAP CvANN_MLP();
|
|
CvANN_MLP( const CvMat* layerSizes,
|
|
int activateFunc=CvANN_MLP::SIGMOID_SYM,
|
|
double fparam1=0, double fparam2=0 );
|
|
|
|
virtual ~CvANN_MLP();
|
|
|
|
virtual void create( const CvMat* layerSizes,
|
|
int activateFunc=CvANN_MLP::SIGMOID_SYM,
|
|
double fparam1=0, double fparam2=0 );
|
|
|
|
virtual int train( const CvMat* inputs, const CvMat* outputs,
|
|
const CvMat* sampleWeights, const CvMat* sampleIdx=0,
|
|
CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(),
|
|
int flags=0 );
|
|
virtual float predict( const CvMat* inputs, CV_OUT CvMat* outputs ) const;
|
|
|
|
CV_WRAP CvANN_MLP( const cv::Mat& layerSizes,
|
|
int activateFunc=CvANN_MLP::SIGMOID_SYM,
|
|
double fparam1=0, double fparam2=0 );
|
|
|
|
CV_WRAP virtual void create( const cv::Mat& layerSizes,
|
|
int activateFunc=CvANN_MLP::SIGMOID_SYM,
|
|
double fparam1=0, double fparam2=0 );
|
|
|
|
CV_WRAP virtual int train( const cv::Mat& inputs, const cv::Mat& outputs,
|
|
const cv::Mat& sampleWeights, const cv::Mat& sampleIdx=cv::Mat(),
|
|
CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(),
|
|
int flags=0 );
|
|
|
|
CV_WRAP virtual float predict( const cv::Mat& inputs, CV_OUT cv::Mat& outputs ) const;
|
|
|
|
CV_WRAP virtual void clear();
|
|
|
|
// possible activation functions
|
|
enum { IDENTITY = 0, SIGMOID_SYM = 1, GAUSSIAN = 2 };
|
|
|
|
// available training flags
|
|
enum { UPDATE_WEIGHTS = 1, NO_INPUT_SCALE = 2, NO_OUTPUT_SCALE = 4 };
|
|
|
|
virtual void read( CvFileStorage* fs, CvFileNode* node );
|
|
virtual void write( CvFileStorage* storage, const char* name ) const;
|
|
|
|
int get_layer_count() { return layer_sizes ? layer_sizes->cols : 0; }
|
|
const CvMat* get_layer_sizes() { return layer_sizes; }
|
|
double* get_weights(int layer)
|
|
{
|
|
return layer_sizes && weights &&
|
|
(unsigned)layer <= (unsigned)layer_sizes->cols ? weights[layer] : 0;
|
|
}
|
|
|
|
virtual void calc_activ_func_deriv( CvMat* xf, CvMat* deriv, const double* bias ) const;
|
|
|
|
protected:
|
|
|
|
virtual bool prepare_to_train( const CvMat* _inputs, const CvMat* _outputs,
|
|
const CvMat* _sample_weights, const CvMat* sampleIdx,
|
|
CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags );
|
|
|
|
// sequential random backpropagation
|
|
virtual int train_backprop( CvVectors _ivecs, CvVectors _ovecs, const double* _sw );
|
|
|
|
// RPROP algorithm
|
|
virtual int train_rprop( CvVectors _ivecs, CvVectors _ovecs, const double* _sw );
|
|
|
|
virtual void calc_activ_func( CvMat* xf, const double* bias ) const;
|
|
virtual void set_activ_func( int _activ_func=SIGMOID_SYM,
|
|
double _f_param1=0, double _f_param2=0 );
|
|
virtual void init_weights();
|
|
virtual void scale_input( const CvMat* _src, CvMat* _dst ) const;
|
|
virtual void scale_output( const CvMat* _src, CvMat* _dst ) const;
|
|
virtual void calc_input_scale( const CvVectors* vecs, int flags );
|
|
virtual void calc_output_scale( const CvVectors* vecs, int flags );
|
|
|
|
virtual void write_params( CvFileStorage* fs ) const;
|
|
virtual void read_params( CvFileStorage* fs, CvFileNode* node );
|
|
|
|
CvMat* layer_sizes;
|
|
CvMat* wbuf;
|
|
CvMat* sample_weights;
|
|
double** weights;
|
|
double f_param1, f_param2;
|
|
double min_val, max_val, min_val1, max_val1;
|
|
int activ_func;
|
|
int max_count, max_buf_sz;
|
|
CvANN_MLP_TrainParams params;
|
|
cv::RNG* rng;
|
|
};
|
|
|
|
/****************************************************************************************\
|
|
* Auxilary functions declarations *
|
|
\****************************************************************************************/
|
|
|
|
/* Generates <sample> from multivariate normal distribution, where <mean> - is an
|
|
average row vector, <cov> - symmetric covariation matrix */
|
|
CVAPI(void) cvRandMVNormal( CvMat* mean, CvMat* cov, CvMat* sample,
|
|
CvRNG* rng CV_DEFAULT(0) );
|
|
|
|
/* Generates sample from gaussian mixture distribution */
|
|
CVAPI(void) cvRandGaussMixture( CvMat* means[],
|
|
CvMat* covs[],
|
|
float weights[],
|
|
int clsnum,
|
|
CvMat* sample,
|
|
CvMat* sampClasses CV_DEFAULT(0) );
|
|
|
|
#define CV_TS_CONCENTRIC_SPHERES 0
|
|
|
|
/* creates test set */
|
|
CVAPI(void) cvCreateTestSet( int type, CvMat** samples,
|
|
int num_samples,
|
|
int num_features,
|
|
CvMat** responses,
|
|
int num_classes, ... );
|
|
|
|
/****************************************************************************************\
|
|
* Data *
|
|
\****************************************************************************************/
|
|
|
|
#define CV_COUNT 0
|
|
#define CV_PORTION 1
|
|
|
|
struct CvTrainTestSplit
|
|
{
|
|
CvTrainTestSplit();
|
|
CvTrainTestSplit( int train_sample_count, bool mix = true);
|
|
CvTrainTestSplit( float train_sample_portion, bool mix = true);
|
|
|
|
union
|
|
{
|
|
int count;
|
|
float portion;
|
|
} train_sample_part;
|
|
int train_sample_part_mode;
|
|
|
|
bool mix;
|
|
};
|
|
|
|
class CvMLData
|
|
{
|
|
public:
|
|
CvMLData();
|
|
virtual ~CvMLData();
|
|
|
|
// returns:
|
|
// 0 - OK
|
|
// -1 - file can not be opened or is not correct
|
|
int read_csv( const char* filename );
|
|
|
|
const CvMat* get_values() const;
|
|
const CvMat* get_responses();
|
|
const CvMat* get_missing() const;
|
|
|
|
void set_header_lines_number( int n );
|
|
int get_header_lines_number() const;
|
|
|
|
void set_response_idx( int idx ); // old response become predictors, new response_idx = idx
|
|
// if idx < 0 there will be no response
|
|
int get_response_idx() const;
|
|
|
|
void set_train_test_split( const CvTrainTestSplit * spl );
|
|
const CvMat* get_train_sample_idx() const;
|
|
const CvMat* get_test_sample_idx() const;
|
|
void mix_train_and_test_idx();
|
|
|
|
const CvMat* get_var_idx();
|
|
void chahge_var_idx( int vi, bool state ); // misspelled (saved for back compitability),
|
|
// use change_var_idx
|
|
void change_var_idx( int vi, bool state ); // state == true to set vi-variable as predictor
|
|
|
|
const CvMat* get_var_types();
|
|
int get_var_type( int var_idx ) const;
|
|
// following 2 methods enable to change vars type
|
|
// use these methods to assign CV_VAR_CATEGORICAL type for categorical variable
|
|
// with numerical labels; in the other cases var types are correctly determined automatically
|
|
void set_var_types( const char* str ); // str examples:
|
|
// "ord[0-17],cat[18]", "ord[0,2,4,10-12], cat[1,3,5-9,13,14]",
|
|
// "cat", "ord" (all vars are categorical/ordered)
|
|
void change_var_type( int var_idx, int type); // type in { CV_VAR_ORDERED, CV_VAR_CATEGORICAL }
|
|
|
|
void set_delimiter( char ch );
|
|
char get_delimiter() const;
|
|
|
|
void set_miss_ch( char ch );
|
|
char get_miss_ch() const;
|
|
|
|
const std::map<cv::String, int>& get_class_labels_map() const;
|
|
|
|
protected:
|
|
virtual void clear();
|
|
|
|
void str_to_flt_elem( const char* token, float& flt_elem, int& type);
|
|
void free_train_test_idx();
|
|
|
|
char delimiter;
|
|
char miss_ch;
|
|
//char flt_separator;
|
|
|
|
CvMat* values;
|
|
CvMat* missing;
|
|
CvMat* var_types;
|
|
CvMat* var_idx_mask;
|
|
|
|
CvMat* response_out; // header
|
|
CvMat* var_idx_out; // mat
|
|
CvMat* var_types_out; // mat
|
|
|
|
int header_lines_number;
|
|
|
|
int response_idx;
|
|
|
|
int train_sample_count;
|
|
bool mix;
|
|
|
|
int total_class_count;
|
|
std::map<cv::String, int> class_map;
|
|
|
|
CvMat* train_sample_idx;
|
|
CvMat* test_sample_idx;
|
|
int* sample_idx; // data of train_sample_idx and test_sample_idx
|
|
|
|
cv::RNG* rng;
|
|
};
|
|
|
|
|
|
namespace cv
|
|
{
|
|
|
|
typedef CvStatModel StatModel;
|
|
typedef CvParamGrid ParamGrid;
|
|
typedef CvNormalBayesClassifier NormalBayesClassifier;
|
|
typedef CvKNearest KNearest;
|
|
typedef CvSVMParams SVMParams;
|
|
typedef CvSVMKernel SVMKernel;
|
|
typedef CvSVMSolver SVMSolver;
|
|
typedef CvSVM SVM;
|
|
typedef CvDTreeParams DTreeParams;
|
|
typedef CvMLData TrainData;
|
|
typedef CvDTree DecisionTree;
|
|
typedef CvForestTree ForestTree;
|
|
typedef CvRTParams RandomTreeParams;
|
|
typedef CvRTrees RandomTrees;
|
|
typedef CvERTreeTrainData ERTreeTRainData;
|
|
typedef CvForestERTree ERTree;
|
|
typedef CvERTrees ERTrees;
|
|
typedef CvBoostParams BoostParams;
|
|
typedef CvBoostTree BoostTree;
|
|
typedef CvBoost Boost;
|
|
typedef CvANN_MLP_TrainParams ANN_MLP_TrainParams;
|
|
typedef CvANN_MLP NeuralNet_MLP;
|
|
typedef CvGBTreesParams GradientBoostingTreeParams;
|
|
typedef CvGBTrees GradientBoostingTrees;
|
|
|
|
template<> void DefaultDeleter<CvDTreeSplit>::operator ()(CvDTreeSplit* obj) const;
|
|
|
|
bool initModule_ml(void);
|
|
}
|
|
|
|
#endif // __cplusplus
|
|
#endif // __OPENCV_ML_HPP__
|
|
|
|
/* End of file. */
|