removed many extra whitespaces; fixed 1 warning

This commit is contained in:
Vadim Pisarevsky 2014-08-03 03:08:25 +04:00
parent 28ca6ac05e
commit 2520e335d1
17 changed files with 79 additions and 81 deletions

View File

@ -97,7 +97,7 @@ DTrees::Node
The class represents a decision tree node. It has public members:
.. ocv:member:: double value
Value at the node: a class label in case of classification or estimated function value in case of regression.
.. ocv:member:: int classIdx

View File

@ -128,7 +128,7 @@ Creates empty EM model
:param params: EM parameters
The model should be trained then using ``StatModel::train(traindata, flags)`` method. Alternatively, you can use one of the ``EM::train*`` methods or load it from file using ``StatModel::load<EM>(filename)``.
The model should be trained then using ``StatModel::train(traindata, flags)`` method. Alternatively, you can use one of the ``EM::train*`` methods or load it from file using ``StatModel::load<EM>(filename)``.
EM::train
---------
@ -155,7 +155,7 @@ Static methods that estimate the Gaussian mixture parameters from a samples set
:param labels: The optional output "class label" for each sample: :math:`\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N` (indices of the most probable mixture component for each sample). It has :math:`nsamples \times 1` size and ``CV_32SC1`` type.
:param probs: The optional output matrix that contains posterior probabilities of each Gaussian mixture component given the each sample. It has :math:`nsamples \times nclusters` size and ``CV_64FC1`` type.
:param params: The Gaussian mixture params, see ``EM::Params`` description above.
Three versions of training method differ in the initialization of Gaussian mixture model parameters and start step:

View File

@ -36,18 +36,18 @@ Reads the dataset from a .csv file and returns the ready-to-use training data.
:param filename: The input file name
:param headerLineCount: The number of lines in the beginning to skip; besides the header, the function also skips empty lines and lines staring with '#'
:param responseStartIdx: Index of the first output variable. If -1, the function considers the last variable as the response
:param responseEndIdx: Index of the last output variable + 1. If -1, then there is single response variable at ``responseStartIdx``.
:param varTypeSpec: The optional text string that specifies the variables' types. It has the format ``ord[n1-n2,n3,n4-n5,...]cat[n6,n7-n8,...]``. That is, variables from n1 to n2 (inclusive range), n3, n4 to n5 ... are considered ordered and n6, n7 to n8 ... are considered as categorical. The range [n1..n2] + [n3] + [n4..n5] + ... + [n6] + [n7..n8] should cover all the variables. If varTypeSpec is not specified, then algorithm uses the following rules:
1. all input variables are considered ordered by default. If some column contains has non-numerical values, e.g. 'apple', 'pear', 'apple', 'apple', 'mango', the corresponding variable is considered categorical.
2. if there are several output variables, they are all considered as ordered. Error is reported when non-numerical values are used.
3. if there is a single output variable, then if its values are non-numerical or are all integers, then it's considered categorical. Otherwise, it's considered ordered.
:param delimiter: The character used to separate values in each line.
:param missch: The character used to specify missing measurements. It should not be a digit. Although it's a non-numerical value, it surely does not affect the decision of whether the variable ordered or categorical.
TrainData::create
@ -57,17 +57,17 @@ Creates training data from in-memory arrays.
.. ocv:function:: Ptr<TrainData> create(InputArray samples, int layout, InputArray responses, InputArray varIdx=noArray(), InputArray sampleIdx=noArray(), InputArray sampleWeights=noArray(), InputArray varType=noArray())
:param samples: matrix of samples. It should have ``CV_32F`` type.
:param layout: it's either ``ROW_SAMPLE``, which means that each training sample is a row of ``samples``, or ``COL_SAMPLE``, which means that each training sample occupies a column of ``samples``.
:param responses: matrix of responses. If the responses are scalar, they should be stored as a single row or as a single column. The matrix should have type ``CV_32F`` or ``CV_32S`` (in the former case the responses are considered as ordered by default; in the latter case - as categorical)
:param varIdx: vector specifying which variables to use for training. It can be an integer vector (``CV_32S``) containing 0-based variable indices or byte vector (``CV_8U``) containing a mask of active variables.
:param sampleIdx: vector specifying which samples to use for training. It can be an integer vector (``CV_32S``) containing 0-based sample indices or byte vector (``CV_8U``) containing a mask of training samples.
:param sampleWeights: optional vector with weights for each sample. It should have ``CV_32F`` type.
:param varType: optional vector of type ``CV_8U`` and size <number_of_variables_in_samples> + <number_of_variables_in_responses>, containing types of each input and output variable. The ordered variables are denoted by value ``VAR_ORDERED``, and categorical - by ``VAR_CATEGORICAL``.
@ -78,11 +78,11 @@ Returns matrix of train samples
.. ocv:function:: Mat TrainData::getTrainSamples(int layout=ROW_SAMPLE, bool compressSamples=true, bool compressVars=true) const
:param layout: The requested layout. If it's different from the initial one, the matrix is transposed.
:param compressSamples: if true, the function returns only the training samples (specified by sampleIdx)
:param compressVars: if true, the function returns the shorter training samples, containing only the active variables.
In current implementation the function tries to avoid physical data copying and returns the matrix stored inside TrainData (unless the transposition or compression is needed).

View File

@ -102,26 +102,26 @@ ANN_MLP::Params
Parameters of the MLP and of the training algorithm. You can initialize the structure by a constructor or the individual parameters can be adjusted after the structure is created.
The network structure:
.. ocv:member:: Mat layerSizes
The number of elements in each layer of network. The very first element specifies the number of elements in the input layer. The last element - number of elements in the output layer.
.. ocv:member:: int activateFunc
The activation function. Currently the only fully supported activation function is ``ANN_MLP::SIGMOID_SYM``.
.. ocv:member:: double fparam1
The first parameter of activation function, 0 by default.
.. ocv:member:: double fparam2
The second parameter of the activation function, 0 by default.
.. note::
If you are using the default ``ANN_MLP::SIGMOID_SYM`` activation function with the default parameter values fparam1=0 and fparam2=0 then the function used is y = 1.7159*tanh(2/3 * x), so the output will range from [-1.7159, 1.7159], instead of [0,1].
If you are using the default ``ANN_MLP::SIGMOID_SYM`` activation function with the default parameter values fparam1=0 and fparam2=0 then the function used is y = 1.7159*tanh(2/3 * x), so the output will range from [-1.7159, 1.7159], instead of [0,1].
The back-propagation algorithm parameters:

View File

@ -101,4 +101,3 @@ Returns the variable importance array.
.. ocv:function:: Mat RTrees::getVarImportance() const
The method returns the variable importance vector, computed at the training stage when ``RTParams::calcVarImportance`` is set to true. If this flag was set to false, the empty matrix is returned.

View File

@ -25,15 +25,15 @@ Trains the statistical model
.. ocv:function:: Ptr<_Tp> StatModel::train(InputArray samples, int layout, InputArray responses, const _Tp::Params& p, int flags=0 )
:param trainData: training data that can be loaded from file using ``TrainData::loadFromCSV`` or created with ``TrainData::create``.
:param samples: training samples
:param layout: ``ROW_SAMPLE`` (training samples are the matrix rows) or ``COL_SAMPLE`` (training samples are the matrix columns)
:param responses: vector of responses associated with the training samples.
:param p: the stat model parameters.
:param flags: optional flags, depending on the model. Some of the models can be updated with the new training samples, not completely overwritten (such as ``NormalBayesClassifier`` or ``ANN_MLP``).
There are 2 instance methods and 2 static (class) template methods. The first two train the already created model (the very first method must be overwritten in the derived classes). And the latter two variants are convenience methods that construct empty model and then call its train method.
@ -70,11 +70,11 @@ Predicts response(s) for the provided sample(s)
.. ocv:function:: float StatModel::predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const
:param samples: The input samples, floating-point matrix
:param results: The optional output matrix of results.
:param flags: The optional flags, model-dependent. Some models, such as ``Boost``, ``SVM`` recognize ``StatModel::RAW_OUTPUT`` flag, which makes the method return the raw results (the sum), not the class label.
StatModel::calcError
-------------------------
@ -83,11 +83,11 @@ Computes error on the training or test dataset
.. ocv:function:: float StatModel::calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const
:param data: the training data
:param test: if true, the error is computed over the test subset of the data, otherwise it's computed over the training subset of the data. Please note that if you loaded a completely different dataset to evaluate already trained classifier, you will probably want not to set the test subset at all with ``TrainData::setTrainTestSplitRatio`` and specify ``test=false``, so that the error is computed for the whole new set. Yes, this sounds a bit confusing.
:param resp: the optional output responses.
The method uses ``StatModel::predict`` to compute the error. For regression models the error is computed as RMS, for classifiers - as a percent of missclassified samples (0%-100%).

View File

@ -243,13 +243,13 @@ Retrieves the decision function
.. ocv:function:: double SVM::getDecisionFunction(int i, OutputArray alpha, OutputArray svidx) const
:param i: the index of the decision function. If the problem solved is regression, 1-class or 2-class classification, then there will be just one decision function and the index should always be 0. Otherwise, in the case of N-class classification, there will be N*(N-1)/2 decision functions.
:param alpha: the optional output vector for weights, corresponding to different support vectors. In the case of linear SVM all the alpha's will be 1's.
:param svidx: the optional output vector of indices of support vectors within the matrix of support vectors (which can be retrieved by ``SVM::getSupportVectors``). In the case of linear SVM each decision function consists of a single "compressed" support vector.
The method returns ``rho`` parameter of the decision function, a scalar subtracted from the weighted sum of kernel responses.
Prediction with SVM
--------------------

View File

@ -133,7 +133,7 @@ public:
virtual Mat getCatOfs() const = 0;
virtual Mat getCatMap() const = 0;
virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;
virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
virtual void shuffleTrainTest() = 0;

View File

@ -1137,7 +1137,7 @@ public:
fs << "iterations" << params.termCrit.maxCount;
fs << "}" << "}";
}
void write( FileStorage& fs ) const
{
if( layer_sizes.empty() )
@ -1145,7 +1145,7 @@ public:
int i, l_count = layer_count();
fs << "layer_sizes" << layer_sizes;
write_params( fs );
size_t esz = weights[0].elemSize();
@ -1168,7 +1168,7 @@ public:
}
fs << "]";
}
void read_params( const FileNode& fn )
{
String activ_func_name = (String)fn["activation_function"];
@ -1186,7 +1186,7 @@ public:
f_param2 = (double)fn["f_param2"];
set_activ_func( activ_func, f_param1, f_param2 );
min_val = (double)fn["min_val"];
max_val = (double)fn["max_val"];
min_val1 = (double)fn["min_val1"];
@ -1194,11 +1194,11 @@ public:
FileNode tpn = fn["training_params"];
params = Params();
if( !tpn.empty() )
{
String tmethod_name = (String)tpn["train_method"];
if( tmethod_name == "BACKPROP" )
{
params.trainMethod = Params::BACKPROP;
@ -1216,7 +1216,7 @@ public:
}
else
CV_Error(CV_StsParseError, "Unknown training method (should be BACKPROP or RPROP)");
FileNode tcn = tpn["term_criteria"];
if( !tcn.empty() )
{
@ -1236,7 +1236,7 @@ public:
}
}
}
void read( const FileNode& fn )
{
clear();

View File

@ -174,10 +174,10 @@ public:
for( pidx = node->parent; pidx >= 0 && nodes[pidx].right == nidx;
nidx = pidx, pidx = nodes[pidx].parent )
;
if( pidx < 0 )
break;
nidx = nodes[pidx].right;
}
}
@ -340,7 +340,7 @@ public:
}
printf("%d trees. C=%.2f, training error=%.1f%%, working set size=%d (out of %d)\n", (int)roots.size(), C, err*100./n, (int)sidx.size(), n);
}*/
// renormalize weights
if( sumw > FLT_EPSILON )
normalizeWeights();
@ -453,14 +453,14 @@ public:
FileNode trees_node = fn["trees"];
FileNodeIterator it = trees_node.begin();
CV_Assert( ntrees == (int)trees_node.size() );
for( int treeidx = 0; treeidx < ntrees; treeidx++, ++it )
{
FileNode nfn = (*it)["nodes"];
readTree(nfn);
}
}
Boost::Params bparams;
vector<double> sumResult;
};

View File

@ -750,7 +750,7 @@ public:
void setTrainTestSplit(int count, bool shuffle)
{
int i, nsamples = getNSamples();
CV_Assert( 0 <= count < nsamples );
CV_Assert( 0 <= count && count < nsamples );
trainSampleIdx.release();
testSampleIdx.release();

View File

@ -1363,4 +1363,3 @@ float CvGBTrees::predict( const cv::Mat& sample, const cv::Mat& _missing,
}
#endif

View File

@ -338,7 +338,7 @@ public:
cv::parallel_for_(cv::Range(0, nsamples),
NBPredictBody(c, cov_rotate_mats, inv_eigen_values, avg, samples,
var_idx, cls_labels, results, resultsProb, rawOutput));
return (float)value;
}

View File

@ -248,9 +248,9 @@ namespace ml
virtual const std::vector<Node>& getNodes() const { return nodes; }
virtual const std::vector<Split>& getSplits() const { return splits; }
virtual const std::vector<int>& getSubsets() const { return subsets; }
Params params0, params;
vector<int> varIdx;
vector<int> compVarIdx;
vector<uchar> varType;
@ -263,7 +263,7 @@ namespace ml
vector<int> classLabels;
vector<float> missingSubst;
bool _isClassifier;
Ptr<WorkData> w;
};

View File

@ -393,7 +393,7 @@ public:
{
impl.write(fs);
}
void read( const FileNode& fn )
{
impl.read(fn);

View File

@ -292,7 +292,7 @@ public:
if( vcount > 0 )
exp( R, R );
}
void calc( int vcount, int var_count, const float* vecs,
const float* another, Qfloat* results )
{
@ -353,7 +353,7 @@ static void sortSamplesByClasses( const Mat& _samples, const Mat& _responses,
class_ranges.push_back(i+1);
}
}
//////////////////////// SVM implementation //////////////////////////////
ParamGrid SVM::getDefaultGrid( int param_id )
@ -1205,7 +1205,7 @@ public:
int max_iter;
double C[2]; // C[0] == Cn, C[1] == Cp
Ptr<SVM::Kernel> kernel;
SelectWorkingSet select_working_set_func;
CalcRho calc_rho_func;
GetRow get_row_func;

View File

@ -372,7 +372,7 @@ void DTreesImpl::setDParams(const Params& _params)
if( params.CVFolds == 1 )
params.CVFolds = 0;
if( params.regressionAccuracy < 0 )
CV_Error( CV_StsOutOfRange, "params.regression_accuracy should be >= 0" );
}
@ -637,7 +637,7 @@ void DTreesImpl::calcValue( int nidx, const vector<int>& _sidx )
cv_sum2[j] += t*t*wval;
cv_count[j] += wval;
}
for( j = 0; j < cv_n; j++ )
{
sum += cv_sum[j];
@ -656,7 +656,7 @@ void DTreesImpl::calcValue( int nidx, const vector<int>& _sidx )
w->cv_Tn[nidx*cv_n + j] = INT_MAX;
}
}
node->node_risk = sum2 - (sum/sumw)*sum;
node->value = sum/sumw;
}
@ -822,7 +822,7 @@ void DTreesImpl::clusterCategories( const double* vectors, int n, int m, double*
min_idx = idx;
}
}
if( min_idx != labels[i] )
modified = true;
labels[i] = min_idx;
@ -1116,18 +1116,18 @@ DTreesImpl::WSplit DTreesImpl::findSplitCatReg( int vi, const vector<int>& _sidx
// (there should be a very little loss in accuracy)
for( i = 0; i < mi; i++ )
sum[i] *= counts[i];
for( subset_i = 0; subset_i < mi-1; subset_i++ )
{
int idx = (int)(sum_ptr[subset_i] - sum);
double ni = counts[idx];
if( ni > FLT_EPSILON )
{
double s = sum[idx];
lsum += s; L += ni;
rsum -= s; R -= ni;
if( L > FLT_EPSILON && R > FLT_EPSILON )
{
double val = (lsum*lsum*R + rsum*rsum*L)/(L*R);
@ -1139,7 +1139,7 @@ DTreesImpl::WSplit DTreesImpl::findSplitCatReg( int vi, const vector<int>& _sidx
}
}
}
WSplit split;
if( best_subset >= 0 )
{
@ -1372,17 +1372,17 @@ bool DTreesImpl::cutTree( int root, double T, int fold, double min_alpha )
}
nidx = node->left;
}
for( pidx = node->parent; pidx >= 0 && w->wnodes[pidx].right == nidx;
nidx = pidx, pidx = w->wnodes[pidx].parent )
;
if( pidx < 0 )
break;
nidx = w->wnodes[pidx].right;
}
return false;
}
@ -1807,7 +1807,7 @@ int DTreesImpl::readSplit( const FileNode& fn )
}
split.c = (float)cmpNode;
}
split.quality = (float)fn["quality"];
splits.push_back(split);
@ -1894,7 +1894,7 @@ Ptr<DTrees> DTrees::create(const DTrees::Params& params)
p->setDParams(params);
return p;
}
}
}