diff --git a/modules/ml/doc/mldata.rst b/modules/ml/doc/mldata.rst index 5a3896c62..9b5e805e8 100644 --- a/modules/ml/doc/mldata.rst +++ b/modules/ml/doc/mldata.rst @@ -3,7 +3,7 @@ MLData .. highlight:: cpp -For the machine learning algorithms, the data set is often stored in a file of the ``.csv``-like format. The file contains a table of predictor and response values where each row of the table corresponds to a sample. Missing values are supported. The UC Irvine Machine Learning Repository (http://archive.ics.uci.edu/ml/) provides many data sets stored in such a format to the machine learning community. The class ``MLData`` is implemented to easily load the data for training one of the OpenCV machine learning algorithms. For float values, only the ``'.'`` separator is supported. The table can have a header and in such case the user have to set the number of the header lines to skip them duaring the file reading. +For the machine learning algorithms, the data set is often stored in a file of the ``.csv``-like format. The file contains a table of predictor and response values where each row of the table corresponds to a sample. Missing values are supported. The UC Irvine Machine Learning Repository (http://archive.ics.uci.edu/ml/) provides many data sets stored in such a format to the machine learning community. The class ``MLData`` is implemented to easily load the data for training one of the OpenCV machine learning algorithms. For float values, only the ``'.'`` separator is supported. CvMLData -------- @@ -182,20 +182,6 @@ Sets the variables types in the loaded data. In the string, a variable type is followed by a list of variables indices. For example: ``"ord[0-17],cat[18]"``, ``"ord[0,2,4,10-12], cat[1,3,5-9,13,14]"``, ``"cat"`` (all variables are categorical), ``"ord"`` (all variables are ordered). -CvMLData::get_header_lines_number ---------------------------------- -Returns a number of the table header lines. - -.. ocv:function:: int CvMLData::get_header_lines_number() const - -CvMLData::set_header_lines_number ---------------------------------- -Sets a number of the table header lines. - -.. ocv:function:: void CvMLData::set_header_lines_number( int n ) - -By default it is supposed that the table does not have a header, i.e. it contains only the data. - CvMLData::get_var_type ---------------------- Returns type of the specified variable diff --git a/modules/ml/include/opencv2/ml/ml.hpp b/modules/ml/include/opencv2/ml/ml.hpp index 31ddaaa02..5179b937b 100644 --- a/modules/ml/include/opencv2/ml/ml.hpp +++ b/modules/ml/include/opencv2/ml/ml.hpp @@ -2026,9 +2026,6 @@ public: const CvMat* get_responses(); const CvMat* get_missing() const; - void set_header_lines_number( int n ); - int get_header_lines_number() const; - void set_response_idx( int idx ); // old response become predictors, new response_idx = idx // if idx < 0 there will be no response int get_response_idx() const; @@ -2069,7 +2066,6 @@ protected: char delimiter; char miss_ch; - short header_lines_number; //char flt_separator; CvMat* values; @@ -2081,8 +2077,6 @@ protected: CvMat* var_idx_out; // mat CvMat* var_types_out; // mat - int header_lines_number; - int response_idx; int train_sample_count; diff --git a/modules/ml/src/data.cpp b/modules/ml/src/data.cpp index 9894591e1..f143fdfe7 100644 --- a/modules/ml/src/data.cpp +++ b/modules/ml/src/data.cpp @@ -71,7 +71,6 @@ CvMLData::CvMLData() { values = missing = var_types = var_idx_mask = response_out = var_idx_out = var_types_out = 0; train_sample_idx = test_sample_idx = 0; - header_lines_number = 0; sample_idx = 0; response_idx = -1; @@ -118,17 +117,6 @@ void CvMLData::clear() train_sample_count = -1; } - -void CvMLData::set_header_lines_number( int idx ) -{ - header_lines_number = (short)std::max(0, idx); -} - -int CvMLData::get_header_lines_number() const -{ - return header_lines_number; -} - static char *fgets_chomp(char *str, int n, FILE *stream) { char *head = fgets(str, n, stream); @@ -165,15 +153,9 @@ int CvMLData::read_csv(const char* filename) if( !file ) return -1; - std::vector _buf(M); + // read the first line and determine the number of variables + std::vector _buf(M); char* buf = &_buf[0]; - - // skip header lines - for( int i = 0; i < header_lines_number; i++ ) - if( fgets( buf, M, file ) == 0 ) - return -1; - - // read the first data line and determine the number of variables if( !fgets_chomp( buf, M, file )) { fclose(file);