From a06af5ca250e31840a3e8b86f8d1de3500585137 Mon Sep 17 00:00:00 2001
From: Vadim Pisarevsky <vadim.pisarevsky@gmail.com>
Date: Tue, 19 Mar 2013 18:28:17 +0400
Subject: [PATCH 1/2] dramatic speedup of SVM::predict in the case of linear
 SVM

---
 modules/ml/include/opencv2/ml/ml.hpp |  2 +
 modules/ml/src/svm.cpp               | 55 ++++++++++++++++++++++++++++
 samples/cpp/letter_recog.cpp         |  3 ++
 3 files changed, 60 insertions(+)

diff --git a/modules/ml/include/opencv2/ml/ml.hpp b/modules/ml/include/opencv2/ml/ml.hpp
index dc7a4048a..6612d2ea1 100644
--- a/modules/ml/include/opencv2/ml/ml.hpp
+++ b/modules/ml/include/opencv2/ml/ml.hpp
@@ -534,6 +534,8 @@ protected:
     virtual void write_params( CvFileStorage* fs ) const;
     virtual void read_params( CvFileStorage* fs, CvFileNode* node );
 
+    void optimize_linear_svm();
+
     CvSVMParams params;
     CvMat* class_labels;
     int var_all;
diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp
index 9cbc46be8..3c970f201 100644
--- a/modules/ml/src/svm.cpp
+++ b/modules/ml/src/svm.cpp
@@ -1517,6 +1517,7 @@ bool CvSVM::do_train( int svm_type, int sample_count, int var_count, const float
         }
     }
 
+    optimize_linear_svm();
     ok = true;
 
     __END__;
@@ -1524,6 +1525,59 @@ bool CvSVM::do_train( int svm_type, int sample_count, int var_count, const float
     return ok;
 }
 
+
+void CvSVM::optimize_linear_svm()
+{
+    // we optimize only linear SVM: compress all the support vectors into one.
+    if( params.kernel_type != LINEAR )
+        return;
+
+    int class_count = class_labels ? class_labels->cols :
+            params.svm_type == CvSVM::ONE_CLASS ? 1 : 0;
+
+    int i, df_count = class_count > 1 ? class_count*(class_count-1)/2 : 1;
+    CvSVMDecisionFunc* df = decision_func;
+
+    for( i = 0; i < df_count; i++ )
+    {
+        int sv_count = df[i].sv_count;
+        if( sv_count != 1 )
+            break;
+    }
+
+    // if every decision functions uses a single support vector;
+    // it's already compressed. skip it then.
+    if( i == df_count )
+        return;
+
+    int var_count = get_var_count();
+    int sample_size = (int)(var_count*sizeof(sv[0][0]));
+    float** new_sv = (float**)cvMemStorageAlloc(storage, df_count*sizeof(new_sv[0]));
+
+    for( i = 0; i < df_count; i++ )
+    {
+        new_sv[i] = (float*)cvMemStorageAlloc(storage, sample_size);
+        float* dst = new_sv[i];
+        memset(dst, 0, sample_size);
+        int j, k, sv_count = df[i].sv_count;
+        for( j = 0; j < sv_count; j++ )
+        {
+            const float* src = class_count > 1 ? sv[df[i].sv_index[j]] : sv[j];
+            double a = df[i].alpha[j];
+            for( k = 0; k < var_count; k++ )
+                dst[k] = (float)(dst[k] + src[k]*a);
+        }
+        df[i].sv_count = 1;
+        df[i].alpha[0] = 1.;
+        if( class_count > 1 )
+            df[i].sv_index[0] = i;
+    }
+
+    sv = new_sv;
+    sv_total = df_count;
+}
+
+
 bool CvSVM::train( const CvMat* _train_data, const CvMat* _responses,
     const CvMat* _var_idx, const CvMat* _sample_idx, CvSVMParams _params )
 {
@@ -2516,6 +2570,7 @@ void CvSVM::read( CvFileStorage* fs, CvFileNode* svm_node )
         CV_NEXT_SEQ_ELEM( df_node->data.seq->elem_size, reader );
     }
 
+    optimize_linear_svm();
     create_kernel();
 
     __END__;
diff --git a/samples/cpp/letter_recog.cpp b/samples/cpp/letter_recog.cpp
index 49a55fc62..144dbe836 100644
--- a/samples/cpp/letter_recog.cpp
+++ b/samples/cpp/letter_recog.cpp
@@ -691,7 +691,10 @@ int build_svm_classifier( char* data_filename )
     CvMat *result = cvCreateMat(1, nsamples_all - ntrain_samples, CV_32FC1);
 
     printf("Classification (may take a few minutes)...\n");
+    double t = (double)cvGetTickCount();
     svm.predict(&sample, result);
+    t = (double)cvGetTickCount() - t;
+    printf("Prediction type: %gms\n", t/(cvGetTickFrequency()*1000.));
 
     int true_resp = 0;
     for (int i = 0; i < nsamples_all - ntrain_samples; i++)

From 24c614f686f1281ca21d796fb3f28b1e1d6acfe9 Mon Sep 17 00:00:00 2001
From: Vadim Pisarevsky <vadim.pisarevsky@gmail.com>
Date: Tue, 19 Mar 2013 18:41:38 +0400
Subject: [PATCH 2/2] added load/save feature for SVM classifier in
 letter_recog sample

---
 samples/cpp/letter_recog.cpp | 44 ++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/samples/cpp/letter_recog.cpp b/samples/cpp/letter_recog.cpp
index 144dbe836..74d5971ca 100644
--- a/samples/cpp/letter_recog.cpp
+++ b/samples/cpp/letter_recog.cpp
@@ -131,7 +131,7 @@ int build_rtrees_classifier( char* data_filename,
             printf( "Could not read the classifier %s\n", filename_to_load );
             return -1;
         }
-        printf( "The classifier %s is loaded.\n", data_filename );
+        printf( "The classifier %s is loaded.\n", filename_to_load );
     }
     else
     {
@@ -262,7 +262,7 @@ int build_boost_classifier( char* data_filename,
             printf( "Could not read the classifier %s\n", filename_to_load );
             return -1;
         }
-        printf( "The classifier %s is loaded.\n", data_filename );
+        printf( "The classifier %s is loaded.\n", filename_to_load );
     }
     else
     {
@@ -403,7 +403,7 @@ int build_mlp_classifier( char* data_filename,
             printf( "Could not read the classifier %s\n", filename_to_load );
             return -1;
         }
-        printf( "The classifier %s is loaded.\n", data_filename );
+        printf( "The classifier %s is loaded.\n", filename_to_load );
     }
     else
     {
@@ -639,10 +639,11 @@ int build_nbayes_classifier( char* data_filename )
 }
 
 static
-int build_svm_classifier( char* data_filename )
+int build_svm_classifier( char* data_filename, const char* filename_to_save, const char* filename_to_load )
 {
     CvMat* data = 0;
     CvMat* responses = 0;
+    CvMat* train_resp = 0;
     CvMat train_data;
     int nsamples_all = 0, ntrain_samples = 0;
     int var_count;
@@ -666,13 +667,29 @@ int build_svm_classifier( char* data_filename )
     ntrain_samples = (int)(nsamples_all*0.1);
     var_count = data->cols;
 
-    // train classifier
-    printf( "Training the classifier (may take a few minutes)...\n");
-    cvGetRows( data, &train_data, 0, ntrain_samples );
-    CvMat* train_resp = cvCreateMat( ntrain_samples, 1, CV_32FC1);
-    for (int i = 0; i < ntrain_samples; i++)
-        train_resp->data.fl[i] = responses->data.fl[i];
-    svm.train(&train_data, train_resp, 0, 0, param);
+    // Create or load Random Trees classifier
+    if( filename_to_load )
+    {
+        // load classifier from the specified file
+        svm.load( filename_to_load );
+        ntrain_samples = 0;
+        if( svm.get_var_count() == 0 )
+        {
+            printf( "Could not read the classifier %s\n", filename_to_load );
+            return -1;
+        }
+        printf( "The classifier %s is loaded.\n", filename_to_load );
+    }
+    else
+    {
+        // train classifier
+        printf( "Training the classifier (may take a few minutes)...\n");
+        cvGetRows( data, &train_data, 0, ntrain_samples );
+        train_resp = cvCreateMat( ntrain_samples, 1, CV_32FC1);
+        for (int i = 0; i < ntrain_samples; i++)
+            train_resp->data.fl[i] = responses->data.fl[i];
+        svm.train(&train_data, train_resp, 0, 0, param);
+    }
 
     // classification
     std::vector<float> _sample(var_count * (nsamples_all - ntrain_samples));
@@ -705,6 +722,9 @@ int build_svm_classifier( char* data_filename )
 
     printf("true_resp = %f%%\n", (float)true_resp / (nsamples_all - ntrain_samples) * 100);
 
+    if( filename_to_save )
+        svm.save( filename_to_save );
+
     cvReleaseMat( &train_resp );
     cvReleaseMat( &result );
     cvReleaseMat( &data );
@@ -775,7 +795,7 @@ int main( int argc, char *argv[] )
         method == 4 ?
         build_nbayes_classifier( data_filename) :
         method == 5 ?
-        build_svm_classifier( data_filename ):
+        build_svm_classifier( data_filename, filename_to_save, filename_to_load ):
         -1) < 0)
     {
         help();