From 6de422701aa6da643a4abe58e91d901408b0b93c Mon Sep 17 00:00:00 2001 From: LeonidBeynenson Date: Tue, 29 Jan 2013 21:11:52 +0400 Subject: [PATCH] Made changes to allow ml module to work with big data. --- apps/traincascade/boost.cpp | 73 +++++++++++++++-------- modules/ml/include/opencv2/ml/ml.hpp | 8 ++- modules/ml/src/boost.cpp | 14 +++-- modules/ml/src/ertrees.cpp | 56 +++++++++++------ modules/ml/src/tree.cpp | 89 +++++++++++++++++----------- 5 files changed, 156 insertions(+), 84 deletions(-) diff --git a/apps/traincascade/boost.cpp b/apps/traincascade/boost.cpp index 3e17b5d42..0ba11a936 100644 --- a/apps/traincascade/boost.cpp +++ b/apps/traincascade/boost.cpp @@ -360,7 +360,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id if (is_buf_16u) { - unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols + + unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() + vi*sample_count + data_root->offset); for( int i = 0; i < num_valid; i++ ) { @@ -373,7 +373,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id } else { - int* idst_idx = buf->data.i + root->buf_idx*buf->cols + + int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() + vi*sample_count + root->offset; for( int i = 0; i < num_valid; i++ ) { @@ -390,14 +390,14 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf); if (is_buf_16u) { - unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols + + unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() + (workVarCount-1)*sample_count + root->offset); for( int i = 0; i < count; i++ ) udst[i] = (unsigned short)src_lbls[sidx[i]]; } else { - int* idst = buf->data.i + root->buf_idx*buf->cols + + int* idst = buf->data.i + root->buf_idx*get_length_subbuf() + (workVarCount-1)*sample_count + root->offset; for( int i = 0; i < count; i++ ) idst[i] = src_lbls[sidx[i]]; @@ -407,14 +407,14 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf); if (is_buf_16u) { - unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols + + unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() + workVarCount*sample_count + root->offset); for( int i = 0; i < count; i++ ) sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]]; } else { - int* sample_idx_dst = buf->data.i + root->buf_idx*buf->cols + + int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() + workVarCount*sample_count + root->offset; for( int i = 0; i < count; i++ ) sample_idx_dst[i] = sample_idx_src[sidx[i]]; @@ -489,6 +489,10 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat int* idst = 0; unsigned short* udst = 0; + uint64 effective_buf_size = -1; + int effective_buf_height = -1, effective_buf_width = -1; + + clear(); shared = true; have_labels = true; @@ -548,13 +552,28 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat var_type->data.i[var_count] = cat_var_count; var_type->data.i[var_count+1] = cat_var_count+1; work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/; - buf_size = (work_var_count + 1) * sample_count/*sample_indices*/; buf_count = 2; - if ( is_buf_16u ) - buf = cvCreateMat( buf_count, buf_size, CV_16UC1 ); + buf_size = -1; // the member buf_size is obsolete + + effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated + effective_buf_width = sample_count; + effective_buf_height = work_var_count+1; + + if (effective_buf_width >= effective_buf_height) + effective_buf_height *= buf_count; else - buf = cvCreateMat( buf_count, buf_size, CV_32SC1 ); + effective_buf_width *= buf_count; + + if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size) + { + CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit"); + } + + if ( is_buf_16u ) + buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 ); + else + buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 ); cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 ); @@ -609,7 +628,7 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat priors_mult = cvCloneMat( priors ); counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 ); direction = cvCreateMat( 1, sample_count, CV_8UC1 ); - split_buf = cvCreateMat( 1, sample_count, CV_32SC1 ); + split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer } void CvCascadeBoostTrainData::free_train_data() @@ -652,10 +671,10 @@ void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* o if ( vi < numPrecalcIdx ) { if( !is_buf_16u ) - *sortedIndices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset; + *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset; else { - const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols + + const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset ); for( int i = 0; i < nodeSampleCount; i++ ) sortedIndicesBuf[i] = shortIndices[i]; @@ -1027,6 +1046,7 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node ) int newBufIdx = data->get_child_buf_idx( node ); int workVarCount = data->get_work_var_count(); CvMat* buf = data->buf; + size_t length_buf_row = data->get_length_subbuf(); cv::AutoBuffer inn_buf(n*(3*sizeof(int)+sizeof(float))); int* tempBuf = (int*)(uchar*)inn_buf; bool splitInputData; @@ -1070,7 +1090,7 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node ) if (data->is_buf_16u) { ushort *ldst, *rdst; - ldst = (ushort*)(buf->data.s + left->buf_idx*buf->cols + + ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row + vi*scount + left->offset); rdst = (ushort*)(ldst + nl); @@ -1096,9 +1116,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node ) else { int *ldst, *rdst; - ldst = buf->data.i + left->buf_idx*buf->cols + + ldst = buf->data.i + left->buf_idx*length_buf_row + vi*scount + left->offset; - rdst = buf->data.i + right->buf_idx*buf->cols + + rdst = buf->data.i + right->buf_idx*length_buf_row + vi*scount + right->offset; // split sorted @@ -1131,9 +1151,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node ) if (data->is_buf_16u) { - unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols + + unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row + (workVarCount-1)*scount + left->offset); - unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols + + unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row + (workVarCount-1)*scount + right->offset); for( int i = 0; i < n; i++ ) @@ -1154,9 +1174,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node ) } else { - int *ldst = buf->data.i + left->buf_idx*buf->cols + + int *ldst = buf->data.i + left->buf_idx*length_buf_row + (workVarCount-1)*scount + left->offset; - int *rdst = buf->data.i + right->buf_idx*buf->cols + + int *rdst = buf->data.i + right->buf_idx*length_buf_row + (workVarCount-1)*scount + right->offset; for( int i = 0; i < n; i++ ) @@ -1184,9 +1204,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node ) if (data->is_buf_16u) { - unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols + + unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row + workVarCount*scount + left->offset); - unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols + + unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row + workVarCount*scount + right->offset); for (int i = 0; i < n; i++) { @@ -1205,9 +1225,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node ) } else { - int* ldst = buf->data.i + left->buf_idx*buf->cols + + int* ldst = buf->data.i + left->buf_idx*length_buf_row + workVarCount*scount + left->offset; - int* rdst = buf->data.i + right->buf_idx*buf->cols + + int* rdst = buf->data.i + right->buf_idx*length_buf_row + workVarCount*scount + right->offset; for (int i = 0; i < n; i++) { @@ -1352,6 +1372,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree ) sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf ); } CvMat* buf = data->buf; + size_t length_buf_row = data->get_length_subbuf(); if( !tree ) // before training the first tree, initialize weights and other parameters { int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n); @@ -1375,7 +1396,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree ) if (data->is_buf_16u) { - unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*buf->cols + + unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row + data->data_root->offset + (data->work_var_count-1)*data->sample_count); for( int i = 0; i < n; i++ ) { @@ -1393,7 +1414,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree ) } else { - int* labels = buf->data.i + data->data_root->buf_idx*buf->cols + + int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row + data->data_root->offset + (data->work_var_count-1)*data->sample_count; for( int i = 0; i < n; i++ ) diff --git a/modules/ml/include/opencv2/ml/ml.hpp b/modules/ml/include/opencv2/ml/ml.hpp index 32047608e..dc7a4048a 100644 --- a/modules/ml/include/opencv2/ml/ml.hpp +++ b/modules/ml/include/opencv2/ml/ml.hpp @@ -796,7 +796,7 @@ struct CV_EXPORTS CvDTreeTrainData const CvMat* responses; CvMat* responses_copy; // used in Boosting - int buf_count, buf_size; + int buf_count, buf_size; // buf_size is obsolete, please do not use it, use expression ((int64)buf->rows * (int64)buf->cols / buf_count) instead bool shared; int is_buf_16u; @@ -806,6 +806,12 @@ struct CV_EXPORTS CvDTreeTrainData CvMat* counts; CvMat* buf; + inline size_t get_length_subbuf() const + { + size_t res = (size_t)(work_var_count + 1) * (size_t)sample_count; + return res; + } + CvMat* direction; CvMat* split_buf; diff --git a/modules/ml/src/boost.cpp b/modules/ml/src/boost.cpp index 3525a1173..8db94bd71 100644 --- a/modules/ml/src/boost.cpp +++ b/modules/ml/src/boost.cpp @@ -1130,13 +1130,13 @@ CvBoost::update_weights( CvBoostTree* tree ) int *sample_idx_buf; const int* sample_idx = 0; cv::AutoBuffer inn_buf; - size_t _buf_size = (params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? data->sample_count*sizeof(int) : 0; + size_t _buf_size = (params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? (size_t)(data->sample_count)*sizeof(int) : 0; if( !tree ) _buf_size += n*sizeof(int); else { if( have_subsample ) - _buf_size += data->buf->cols*(sizeof(float)+sizeof(uchar)); + _buf_size += data->get_length_subbuf()*(sizeof(float)+sizeof(uchar)); } inn_buf.allocate(_buf_size); uchar* cur_buf_pos = (uchar*)inn_buf; @@ -1151,6 +1151,7 @@ CvBoost::update_weights( CvBoostTree* tree ) sample_idx = data->get_sample_indices( data->data_root, sample_idx_buf ); } CvMat* dtree_data_buf = data->buf; + size_t length_buf_row = data->get_length_subbuf(); if( !tree ) // before training the first tree, initialize weights and other parameters { int* class_labels_buf = (int*)cur_buf_pos; @@ -1189,7 +1190,7 @@ CvBoost::update_weights( CvBoostTree* tree ) if (data->is_buf_16u) { - unsigned short* labels = (unsigned short*)(dtree_data_buf->data.s + data->data_root->buf_idx*dtree_data_buf->cols + + unsigned short* labels = (unsigned short*)(dtree_data_buf->data.s + data->data_root->buf_idx*length_buf_row + data->data_root->offset + (data->work_var_count-1)*data->sample_count); for( i = 0; i < n; i++ ) { @@ -1207,7 +1208,7 @@ CvBoost::update_weights( CvBoostTree* tree ) } else { - int* labels = dtree_data_buf->data.i + data->data_root->buf_idx*dtree_data_buf->cols + + int* labels = dtree_data_buf->data.i + data->data_root->buf_idx*length_buf_row + data->data_root->offset + (data->work_var_count-1)*data->sample_count; for( i = 0; i < n; i++ ) @@ -1254,9 +1255,10 @@ CvBoost::update_weights( CvBoostTree* tree ) if( have_subsample ) { float* values = (float*)cur_buf_pos; - cur_buf_pos = (uchar*)(values + data->buf->cols); + cur_buf_pos = (uchar*)(values + data->get_length_subbuf()); uchar* missing = cur_buf_pos; - cur_buf_pos = missing + data->buf->step; + cur_buf_pos = missing + data->get_length_subbuf() * (size_t)CV_ELEM_SIZE(data->buf->type); + CvMat _sample, _mask; // invert the subsample mask diff --git a/modules/ml/src/ertrees.cpp b/modules/ml/src/ertrees.cpp index 8a3828d99..380d87a79 100644 --- a/modules/ml/src/ertrees.cpp +++ b/modules/ml/src/ertrees.cpp @@ -75,11 +75,14 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag, int sample_all = 0, r_type, cv_n; int total_c_count = 0; int tree_block_size, temp_block_size, max_split_size, nv_size, cv_size = 0; - int ds_step, dv_step, ms_step = 0, mv_step = 0; // {data|mask}{sample|var}_step - int vi, i, size; + int64 ds_step, dv_step, ms_step = 0, mv_step = 0; // {data|mask}{sample|var}_step + int64 vi, i, size; char err[100]; const int *sidx = 0, *vidx = 0; + uint64 effective_buf_size = -1; + int effective_buf_height = -1, effective_buf_width = -1; + if ( _params.use_surrogates ) CV_ERROR(CV_StsBadArg, "CvERTrees do not support surrogate splits"); @@ -179,18 +182,34 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag, have_labels = cv_n > 0 || (ord_var_count == 1 && cat_var_count == 0) || _add_labels; work_var_count = cat_var_count + (is_classifier ? 1 : 0) + (have_labels ? 1 : 0); - buf_size = (work_var_count + 1)*sample_count; + shared = _shared; buf_count = shared ? 2 : 1; + buf_size = -1; // the member buf_size is obsolete + + effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated + effective_buf_width = sample_count; + effective_buf_height = work_var_count+1; + + if (effective_buf_width >= effective_buf_height) + effective_buf_height *= buf_count; + else + effective_buf_width *= buf_count; + + if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size) + { + CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit"); + } + if ( is_buf_16u ) { - CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_16UC1 )); + CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 )); CV_CALL( pair16u32s_ptr = (CvPair16u32s*)cvAlloc( sample_count*sizeof(pair16u32s_ptr[0]) )); } else { - CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_32SC1 )); + CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 )); CV_CALL( int_ptr = (int**)cvAlloc( sample_count*sizeof(int_ptr[0]) )); } @@ -303,7 +322,7 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag, val = cvRound(t); if( val != t ) { - sprintf( err, "%d-th value of %d-th (categorical) " + sprintf( err, "%ld-th value of %ld-th (categorical) " "variable is not an integer", i, vi ); CV_ERROR( CV_StsBadArg, err ); } @@ -311,7 +330,7 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag, if( val == INT_MAX ) { - sprintf( err, "%d-th value of %d-th (categorical) " + sprintf( err, "%ld-th value of %ld-th (categorical) " "variable is too large", i, vi ); CV_ERROR( CV_StsBadArg, err ); } @@ -414,7 +433,7 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag, if( fabs(val) >= ord_nan ) { - sprintf( err, "%d-th value of %d-th (ordered) " + sprintf( err, "%ld-th value of %ld-th (ordered) " "variable (=%g) is too large", i, vi, val ); CV_ERROR( CV_StsBadArg, err ); } @@ -578,9 +597,9 @@ const int* CvERTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat int ci = get_var_type( vi); const int* cat_values = 0; if( !is_buf_16u ) - cat_values = buf->data.i + n->buf_idx*buf->cols + ci*sample_count + n->offset; + cat_values = buf->data.i + n->buf_idx*get_length_subbuf() + ci*sample_count + n->offset; else { - const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols + + const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() + ci*sample_count + n->offset); for( int i = 0; i < n->sample_count; i++ ) cat_values_buf[i] = short_values[i]; @@ -1333,6 +1352,7 @@ void CvForestERTree::split_node_data( CvDTreeNode* node ) CvDTreeNode *left = 0, *right = 0; int new_buf_idx = data->get_child_buf_idx( node ); CvMat* buf = data->buf; + size_t length_buf_row = data->get_length_subbuf(); cv::AutoBuffer temp_buf(n); complete_node_dir(node); @@ -1385,9 +1405,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node ) if (data->is_buf_16u) { - unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols + + unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row + ci*scount + left->offset); - unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols + + unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row + ci*scount + right->offset); for( i = 0; i < n; i++ ) @@ -1415,9 +1435,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node ) } else { - int *ldst = buf->data.i + left->buf_idx*buf->cols + + int *ldst = buf->data.i + left->buf_idx*length_buf_row + ci*scount + left->offset; - int *rdst = buf->data.i + right->buf_idx*buf->cols + + int *rdst = buf->data.i + right->buf_idx*length_buf_row + ci*scount + right->offset; for( i = 0; i < n; i++ ) @@ -1460,9 +1480,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node ) if (data->is_buf_16u) { - unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols + + unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row + pos*scount + left->offset); - unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols + + unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row + pos*scount + right->offset); for (i = 0; i < n; i++) @@ -1483,9 +1503,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node ) } else { - int* ldst = buf->data.i + left->buf_idx*buf->cols + + int* ldst = buf->data.i + left->buf_idx*length_buf_row + pos*scount + left->offset; - int* rdst = buf->data.i + right->buf_idx*buf->cols + + int* rdst = buf->data.i + right->buf_idx*length_buf_row + pos*scount + right->offset; for (i = 0; i < n; i++) { diff --git a/modules/ml/src/tree.cpp b/modules/ml/src/tree.cpp index 0da24d66a..aef4a3523 100644 --- a/modules/ml/src/tree.cpp +++ b/modules/ml/src/tree.cpp @@ -50,7 +50,8 @@ static const int block_size_delta = 1 << 10; CvDTreeTrainData::CvDTreeTrainData() { var_idx = var_type = cat_count = cat_ofs = cat_map = - priors = priors_mult = counts = buf = direction = split_buf = responses_copy = 0; + priors = priors_mult = counts = direction = split_buf = responses_copy = 0; + buf = 0; tree_storage = temp_storage = 0; clear(); @@ -64,7 +65,8 @@ CvDTreeTrainData::CvDTreeTrainData( const CvMat* _train_data, int _tflag, bool _shared, bool _add_labels ) { var_idx = var_type = cat_count = cat_ofs = cat_map = - priors = priors_mult = counts = buf = direction = split_buf = responses_copy = 0; + priors = priors_mult = counts = direction = split_buf = responses_copy = 0; + buf = 0; tree_storage = temp_storage = 0; @@ -152,11 +154,14 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag, int sample_all = 0, r_type, cv_n; int total_c_count = 0; int tree_block_size, temp_block_size, max_split_size, nv_size, cv_size = 0; - int ds_step, dv_step, ms_step = 0, mv_step = 0; // {data|mask}{sample|var}_step - int vi, i, size; + int64 ds_step, dv_step, ms_step = 0, mv_step = 0; // {data|mask}{sample|var}_step + int64 vi, i, size; char err[100]; const int *sidx = 0, *vidx = 0; + uint64 effective_buf_size = -1; + int effective_buf_height = -1, effective_buf_width = -1; + if( _update_data && data_root ) { data = new CvDTreeTrainData( _train_data, _tflag, _responses, _var_idx, @@ -285,18 +290,35 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag, work_var_count = var_count + (is_classifier ? 1 : 0) // for responses class_labels + (have_labels ? 1 : 0); // for cv_labels - buf_size = (work_var_count + 1 /*for sample_indices*/) * sample_count; shared = _shared; buf_count = shared ? 2 : 1; + buf_size = -1; // the member buf_size is obsolete + + effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated + effective_buf_width = sample_count; + effective_buf_height = work_var_count+1; + + if (effective_buf_width >= effective_buf_height) + effective_buf_height *= buf_count; + else + effective_buf_width *= buf_count; + + if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size) + { + CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit"); + } + + + if ( is_buf_16u ) { - CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_16UC1 )); + CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 )); CV_CALL( pair16u32s_ptr = (CvPair16u32s*)cvAlloc( sample_count*sizeof(pair16u32s_ptr[0]) )); } else { - CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_32SC1 )); + CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 )); CV_CALL( int_ptr = (int**)cvAlloc( sample_count*sizeof(int_ptr[0]) )); } @@ -356,7 +378,7 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag, { int ci; const uchar* mask = 0; - int m_step = 0, step; + int64 m_step = 0, step; const int* idata = 0; const float* fdata = 0; int num_valid = 0; @@ -409,7 +431,7 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag, val = cvRound(t); if( fabs(t - val) > FLT_EPSILON ) { - sprintf( err, "%d-th value of %d-th (categorical) " + sprintf( err, "%ld-th value of %ld-th (categorical) " "variable is not an integer", i, vi ); CV_ERROR( CV_StsBadArg, err ); } @@ -417,7 +439,7 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag, if( val == INT_MAX ) { - sprintf( err, "%d-th value of %d-th (categorical) " + sprintf( err, "%ld-th value of %ld-th (categorical) " "variable is too large", i, vi ); CV_ERROR( CV_StsBadArg, err ); } @@ -524,7 +546,7 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag, if( fabs(val) >= ord_nan ) { - sprintf( err, "%d-th value of %d-th (ordered) " + sprintf( err, "%ld-th value of %ld-th (ordered) " "variable (=%g) is too large", i, vi, val ); CV_ERROR( CV_StsBadArg, err ); } @@ -532,7 +554,7 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag, } if (is_buf_16u) - udst[i] = (unsigned short)i; + udst[i] = (unsigned short)i; // TODO: memory corruption may be here else idst[i] = i; _fdst[i] = val; @@ -751,7 +773,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx ) if (is_buf_16u) { - unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols + + unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() + vi*sample_count + root->offset); for( i = 0; i < count; i++ ) { @@ -762,7 +784,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx ) } else { - int* idst = buf->data.i + root->buf_idx*buf->cols + + int* idst = buf->data.i + root->buf_idx*get_length_subbuf() + vi*sample_count + root->offset; for( i = 0; i < count; i++ ) { @@ -788,7 +810,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx ) if (is_buf_16u) { - unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols + + unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() + vi*sample_count + data_root->offset); for( i = 0; i < num_valid; i++ ) { @@ -812,7 +834,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx ) } else { - int* idst_idx = buf->data.i + root->buf_idx*buf->cols + + int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() + vi*sample_count + root->offset; for( i = 0; i < num_valid; i++ ) { @@ -840,14 +862,14 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx ) const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf); if (is_buf_16u) { - unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols + + unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() + workVarCount*sample_count + root->offset); for (i = 0; i < count; i++) sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]]; } else { - int* sample_idx_dst = buf->data.i + root->buf_idx*buf->cols + + int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() + workVarCount*sample_count + root->offset; for (i = 0; i < count; i++) sample_idx_dst[i] = sample_idx_src[sidx[i]]; @@ -1158,10 +1180,10 @@ void CvDTreeTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ord_valu const int* sample_indices = get_sample_indices(n, sample_indices_buf); if( !is_buf_16u ) - *sorted_indices = buf->data.i + n->buf_idx*buf->cols + + *sorted_indices = buf->data.i + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset; else { - const unsigned short* short_indices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols + + const unsigned short* short_indices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset ); for( int i = 0; i < node_sample_count; i++ ) sorted_indices_buf[i] = short_indices[i]; @@ -1232,10 +1254,10 @@ const int* CvDTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat_ { const int* cat_values = 0; if( !is_buf_16u ) - cat_values = buf->data.i + n->buf_idx*buf->cols + + cat_values = buf->data.i + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset; else { - const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols + + const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset); for( int i = 0; i < n->sample_count; i++ ) cat_values_buf[i] = short_values[i]; @@ -3004,6 +3026,7 @@ void CvDTree::split_node_data( CvDTreeNode* node ) int new_buf_idx = data->get_child_buf_idx( node ); int work_var_count = data->get_work_var_count(); CvMat* buf = data->buf; + size_t length_buf_row = data->get_length_subbuf(); cv::AutoBuffer inn_buf(n*(3*sizeof(int) + sizeof(float))); int* temp_buf = (int*)(uchar*)inn_buf; @@ -3049,7 +3072,7 @@ void CvDTree::split_node_data( CvDTreeNode* node ) { unsigned short *ldst, *rdst, *ldst0, *rdst0; //unsigned short tl, tr; - ldst0 = ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols + + ldst0 = ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row + vi*scount + left->offset); rdst0 = rdst = (unsigned short*)(ldst + nl); @@ -3095,9 +3118,9 @@ void CvDTree::split_node_data( CvDTreeNode* node ) else { int *ldst0, *ldst, *rdst0, *rdst; - ldst0 = ldst = buf->data.i + left->buf_idx*buf->cols + + ldst0 = ldst = buf->data.i + left->buf_idx*length_buf_row + vi*scount + left->offset; - rdst0 = rdst = buf->data.i + right->buf_idx*buf->cols + + rdst0 = rdst = buf->data.i + right->buf_idx*length_buf_row + vi*scount + right->offset; // split sorted @@ -3158,9 +3181,9 @@ void CvDTree::split_node_data( CvDTreeNode* node ) if (data->is_buf_16u) { - unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols + + unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row + vi*scount + left->offset); - unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols + + unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row + vi*scount + right->offset); for( i = 0; i < n; i++ ) @@ -3188,9 +3211,9 @@ void CvDTree::split_node_data( CvDTreeNode* node ) } else { - int *ldst = buf->data.i + left->buf_idx*buf->cols + + int *ldst = buf->data.i + left->buf_idx*length_buf_row + vi*scount + left->offset; - int *rdst = buf->data.i + right->buf_idx*buf->cols + + int *rdst = buf->data.i + right->buf_idx*length_buf_row + vi*scount + right->offset; for( i = 0; i < n; i++ ) @@ -3230,9 +3253,9 @@ void CvDTree::split_node_data( CvDTreeNode* node ) int pos = data->get_work_var_count(); if (data->is_buf_16u) { - unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols + + unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row + pos*scount + left->offset); - unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols + + unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row + pos*scount + right->offset); for (i = 0; i < n; i++) { @@ -3252,9 +3275,9 @@ void CvDTree::split_node_data( CvDTreeNode* node ) } else { - int* ldst = buf->data.i + left->buf_idx*buf->cols + + int* ldst = buf->data.i + left->buf_idx*length_buf_row + pos*scount + left->offset; - int* rdst = buf->data.i + right->buf_idx*buf->cols + + int* rdst = buf->data.i + right->buf_idx*length_buf_row + pos*scount + right->offset; for (i = 0; i < n; i++) {