Made changes to allow ml module to work with big data.

2013-01-29 21:11:52 +04:00
parent 8521ac5d21
commit 6de422701a
5 changed files with 156 additions and 84 deletions
--- a/apps/traincascade/boost.cpp
+++ b/apps/traincascade/boost.cpp
@@ -360,7 +360,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id

            if (is_buf_16u)
            {
-                unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+                unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
                    vi*sample_count + data_root->offset);
                for( int i = 0; i < num_valid; i++ )
                {
@@ -373,7 +373,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
            }
            else
            {
-                int* idst_idx = buf->data.i + root->buf_idx*buf->cols +
+                int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
                    vi*sample_count + root->offset;
                for( int i = 0; i < num_valid; i++ )
                {
@@ -390,14 +390,14 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
        const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf);
        if (is_buf_16u)
        {
-            unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+            unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
                (workVarCount-1)*sample_count + root->offset);
            for( int i = 0; i < count; i++ )
                udst[i] = (unsigned short)src_lbls[sidx[i]];
        }
        else
        {
-            int* idst = buf->data.i + root->buf_idx*buf->cols +
+            int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
                (workVarCount-1)*sample_count + root->offset;
            for( int i = 0; i < count; i++ )
                idst[i] = src_lbls[sidx[i]];
@@ -407,14 +407,14 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
        const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
        if (is_buf_16u)
        {
-            unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+            unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
                workVarCount*sample_count + root->offset);
            for( int i = 0; i < count; i++ )
                sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
        }
        else
        {
-            int* sample_idx_dst = buf->data.i + root->buf_idx*buf->cols +
+            int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
                workVarCount*sample_count + root->offset;
            for( int i = 0; i < count; i++ )
                sample_idx_dst[i] = sample_idx_src[sidx[i]];
@@ -489,6 +489,10 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat
    int* idst = 0;
    unsigned short* udst = 0;

+    uint64 effective_buf_size = -1;
+    int effective_buf_height = -1, effective_buf_width = -1;
+
+
    clear();
    shared = true;
    have_labels = true;
@@ -548,13 +552,28 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat
    var_type->data.i[var_count] = cat_var_count;
    var_type->data.i[var_count+1] = cat_var_count+1;
    work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
-    buf_size = (work_var_count + 1) * sample_count/*sample_indices*/;
    buf_count = 2;

-    if ( is_buf_16u )
-        buf = cvCreateMat( buf_count, buf_size, CV_16UC1 );
+    buf_size = -1; // the member buf_size is obsolete
+
+    effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
+    effective_buf_width = sample_count;
+    effective_buf_height = work_var_count+1;
+
+    if (effective_buf_width >= effective_buf_height)
+        effective_buf_height *= buf_count;
    else
-        buf = cvCreateMat( buf_count, buf_size, CV_32SC1 );
+        effective_buf_width *= buf_count;
+
+    if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
+    {
+        CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
+    }
+
+    if ( is_buf_16u )
+        buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
+    else
+        buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );

    cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );

@@ -609,7 +628,7 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat
    priors_mult = cvCloneMat( priors );
    counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
    direction = cvCreateMat( 1, sample_count, CV_8UC1 );
-    split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );
+    split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
 }

 void CvCascadeBoostTrainData::free_train_data()
@@ -652,10 +671,10 @@ void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* o
    if ( vi < numPrecalcIdx )
    {
        if( !is_buf_16u )
-            *sortedIndices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset;
+            *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset;
        else
        {
-            const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
+            const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
                                                    vi*sample_count + n->offset );
            for( int i = 0; i < nodeSampleCount; i++ )
                sortedIndicesBuf[i] = shortIndices[i];
@@ -1027,6 +1046,7 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
    int newBufIdx = data->get_child_buf_idx( node );
    int workVarCount = data->get_work_var_count();
    CvMat* buf = data->buf;
+    size_t length_buf_row = data->get_length_subbuf();
    cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
    int* tempBuf = (int*)(uchar*)inn_buf;
    bool splitInputData;
@@ -1070,7 +1090,7 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
        if (data->is_buf_16u)
        {
            ushort *ldst, *rdst;
-            ldst = (ushort*)(buf->data.s + left->buf_idx*buf->cols +
+            ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
                vi*scount + left->offset);
            rdst = (ushort*)(ldst + nl);

@@ -1096,9 +1116,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
        else
        {
            int *ldst, *rdst;
-            ldst = buf->data.i + left->buf_idx*buf->cols +
+            ldst = buf->data.i + left->buf_idx*length_buf_row +
                vi*scount + left->offset;
-            rdst = buf->data.i + right->buf_idx*buf->cols +
+            rdst = buf->data.i + right->buf_idx*length_buf_row +
                vi*scount + right->offset;

            // split sorted
@@ -1131,9 +1151,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )

    if (data->is_buf_16u)
    {
-        unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
+        unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
            (workVarCount-1)*scount + left->offset);
-        unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
+        unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
            (workVarCount-1)*scount + right->offset);

        for( int i = 0; i < n; i++ )
@@ -1154,9 +1174,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
    }
    else
    {
-        int *ldst = buf->data.i + left->buf_idx*buf->cols +
+        int *ldst = buf->data.i + left->buf_idx*length_buf_row +
            (workVarCount-1)*scount + left->offset;
-        int *rdst = buf->data.i + right->buf_idx*buf->cols +
+        int *rdst = buf->data.i + right->buf_idx*length_buf_row +
            (workVarCount-1)*scount + right->offset;

        for( int i = 0; i < n; i++ )
@@ -1184,9 +1204,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )

    if (data->is_buf_16u)
    {
-        unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
+        unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
            workVarCount*scount + left->offset);
-        unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
+        unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
            workVarCount*scount + right->offset);
        for (int i = 0; i < n; i++)
        {
@@ -1205,9 +1225,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
    }
    else
    {
-        int* ldst = buf->data.i + left->buf_idx*buf->cols +
+        int* ldst = buf->data.i + left->buf_idx*length_buf_row +
            workVarCount*scount + left->offset;
-        int* rdst = buf->data.i + right->buf_idx*buf->cols +
+        int* rdst = buf->data.i + right->buf_idx*length_buf_row +
            workVarCount*scount + right->offset;
        for (int i = 0; i < n; i++)
        {
@@ -1352,6 +1372,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )
        sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
    }
    CvMat* buf = data->buf;
+    size_t length_buf_row = data->get_length_subbuf();
    if( !tree ) // before training the first tree, initialize weights and other parameters
    {
        int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
@@ -1375,7 +1396,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )

        if (data->is_buf_16u)
        {
-            unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*buf->cols +
+            unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
                data->data_root->offset + (data->work_var_count-1)*data->sample_count);
            for( int i = 0; i < n; i++ )
            {
@@ -1393,7 +1414,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )
        }
        else
        {
-            int* labels = buf->data.i + data->data_root->buf_idx*buf->cols +
+            int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
                data->data_root->offset + (data->work_var_count-1)*data->sample_count;

            for( int i = 0; i < n; i++ )