Made changes to allow ml module to work with big data.
This commit is contained in:
@@ -75,11 +75,14 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
|
||||
int sample_all = 0, r_type, cv_n;
|
||||
int total_c_count = 0;
|
||||
int tree_block_size, temp_block_size, max_split_size, nv_size, cv_size = 0;
|
||||
int ds_step, dv_step, ms_step = 0, mv_step = 0; // {data|mask}{sample|var}_step
|
||||
int vi, i, size;
|
||||
int64 ds_step, dv_step, ms_step = 0, mv_step = 0; // {data|mask}{sample|var}_step
|
||||
int64 vi, i, size;
|
||||
char err[100];
|
||||
const int *sidx = 0, *vidx = 0;
|
||||
|
||||
uint64 effective_buf_size = -1;
|
||||
int effective_buf_height = -1, effective_buf_width = -1;
|
||||
|
||||
if ( _params.use_surrogates )
|
||||
CV_ERROR(CV_StsBadArg, "CvERTrees do not support surrogate splits");
|
||||
|
||||
@@ -179,18 +182,34 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
|
||||
have_labels = cv_n > 0 || (ord_var_count == 1 && cat_var_count == 0) || _add_labels;
|
||||
|
||||
work_var_count = cat_var_count + (is_classifier ? 1 : 0) + (have_labels ? 1 : 0);
|
||||
buf_size = (work_var_count + 1)*sample_count;
|
||||
|
||||
shared = _shared;
|
||||
buf_count = shared ? 2 : 1;
|
||||
|
||||
buf_size = -1; // the member buf_size is obsolete
|
||||
|
||||
effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
|
||||
effective_buf_width = sample_count;
|
||||
effective_buf_height = work_var_count+1;
|
||||
|
||||
if (effective_buf_width >= effective_buf_height)
|
||||
effective_buf_height *= buf_count;
|
||||
else
|
||||
effective_buf_width *= buf_count;
|
||||
|
||||
if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
|
||||
{
|
||||
CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
|
||||
}
|
||||
|
||||
if ( is_buf_16u )
|
||||
{
|
||||
CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_16UC1 ));
|
||||
CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 ));
|
||||
CV_CALL( pair16u32s_ptr = (CvPair16u32s*)cvAlloc( sample_count*sizeof(pair16u32s_ptr[0]) ));
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_32SC1 ));
|
||||
CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 ));
|
||||
CV_CALL( int_ptr = (int**)cvAlloc( sample_count*sizeof(int_ptr[0]) ));
|
||||
}
|
||||
|
||||
@@ -303,7 +322,7 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
|
||||
val = cvRound(t);
|
||||
if( val != t )
|
||||
{
|
||||
sprintf( err, "%d-th value of %d-th (categorical) "
|
||||
sprintf( err, "%ld-th value of %ld-th (categorical) "
|
||||
"variable is not an integer", i, vi );
|
||||
CV_ERROR( CV_StsBadArg, err );
|
||||
}
|
||||
@@ -311,7 +330,7 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
|
||||
|
||||
if( val == INT_MAX )
|
||||
{
|
||||
sprintf( err, "%d-th value of %d-th (categorical) "
|
||||
sprintf( err, "%ld-th value of %ld-th (categorical) "
|
||||
"variable is too large", i, vi );
|
||||
CV_ERROR( CV_StsBadArg, err );
|
||||
}
|
||||
@@ -414,7 +433,7 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
|
||||
|
||||
if( fabs(val) >= ord_nan )
|
||||
{
|
||||
sprintf( err, "%d-th value of %d-th (ordered) "
|
||||
sprintf( err, "%ld-th value of %ld-th (ordered) "
|
||||
"variable (=%g) is too large", i, vi, val );
|
||||
CV_ERROR( CV_StsBadArg, err );
|
||||
}
|
||||
@@ -578,9 +597,9 @@ const int* CvERTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat
|
||||
int ci = get_var_type( vi);
|
||||
const int* cat_values = 0;
|
||||
if( !is_buf_16u )
|
||||
cat_values = buf->data.i + n->buf_idx*buf->cols + ci*sample_count + n->offset;
|
||||
cat_values = buf->data.i + n->buf_idx*get_length_subbuf() + ci*sample_count + n->offset;
|
||||
else {
|
||||
const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
|
||||
const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
|
||||
ci*sample_count + n->offset);
|
||||
for( int i = 0; i < n->sample_count; i++ )
|
||||
cat_values_buf[i] = short_values[i];
|
||||
@@ -1333,6 +1352,7 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
|
||||
CvDTreeNode *left = 0, *right = 0;
|
||||
int new_buf_idx = data->get_child_buf_idx( node );
|
||||
CvMat* buf = data->buf;
|
||||
size_t length_buf_row = data->get_length_subbuf();
|
||||
cv::AutoBuffer<int> temp_buf(n);
|
||||
|
||||
complete_node_dir(node);
|
||||
@@ -1385,9 +1405,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
|
||||
|
||||
if (data->is_buf_16u)
|
||||
{
|
||||
unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
|
||||
unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
|
||||
ci*scount + left->offset);
|
||||
unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
|
||||
unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
|
||||
ci*scount + right->offset);
|
||||
|
||||
for( i = 0; i < n; i++ )
|
||||
@@ -1415,9 +1435,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
|
||||
}
|
||||
else
|
||||
{
|
||||
int *ldst = buf->data.i + left->buf_idx*buf->cols +
|
||||
int *ldst = buf->data.i + left->buf_idx*length_buf_row +
|
||||
ci*scount + left->offset;
|
||||
int *rdst = buf->data.i + right->buf_idx*buf->cols +
|
||||
int *rdst = buf->data.i + right->buf_idx*length_buf_row +
|
||||
ci*scount + right->offset;
|
||||
|
||||
for( i = 0; i < n; i++ )
|
||||
@@ -1460,9 +1480,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
|
||||
|
||||
if (data->is_buf_16u)
|
||||
{
|
||||
unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
|
||||
unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
|
||||
pos*scount + left->offset);
|
||||
unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
|
||||
unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
|
||||
pos*scount + right->offset);
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
@@ -1483,9 +1503,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
|
||||
}
|
||||
else
|
||||
{
|
||||
int* ldst = buf->data.i + left->buf_idx*buf->cols +
|
||||
int* ldst = buf->data.i + left->buf_idx*length_buf_row +
|
||||
pos*scount + left->offset;
|
||||
int* rdst = buf->data.i + right->buf_idx*buf->cols +
|
||||
int* rdst = buf->data.i + right->buf_idx*length_buf_row +
|
||||
pos*scount + right->offset;
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
|
Reference in New Issue
Block a user