446 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			446 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| #include <opencv2/opencv.hpp>
 | |
| 
 | |
| #include <string>
 | |
| #include <iostream>
 | |
| #include <fstream>
 | |
| #include <vector>
 | |
| 
 | |
| #include <time.h>
 | |
| 
 | |
| using namespace cv;
 | |
| using namespace cv::ml;
 | |
| using namespace std;
 | |
| 
 | |
| void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector );
 | |
| void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData );
 | |
| void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst );
 | |
| void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );
 | |
| Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size );
 | |
| void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size );
 | |
| void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels );
 | |
| void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color );
 | |
| void test_it( const Size & size );
 | |
| 
 | |
| void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector )
 | |
| {
 | |
|     // get the support vectors
 | |
|     Mat sv = svm->getSupportVectors();
 | |
|     const int sv_total = sv.rows;
 | |
|     // get the decision function
 | |
|     Mat alpha, svidx;
 | |
|     double rho = svm->getDecisionFunction(0, alpha, svidx);
 | |
| 
 | |
|     CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
 | |
|     CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
 | |
|                (alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
 | |
|     CV_Assert( sv.type() == CV_32F );
 | |
|     hog_detector.clear();
 | |
| 
 | |
|     hog_detector.resize(sv.cols + 1);
 | |
|     memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0]));
 | |
|     hog_detector[sv.cols] = (float)-rho;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
| * Convert training/testing set to be used by OpenCV Machine Learning algorithms.
 | |
| * TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.
 | |
| * Transposition of samples are made if needed.
 | |
| */
 | |
| void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData )
 | |
| {
 | |
|     //--Convert data
 | |
|     const int rows = (int)train_samples.size();
 | |
|     const int cols = (int)std::max( train_samples[0].cols, train_samples[0].rows );
 | |
|     cv::Mat tmp(1, cols, CV_32FC1); //< used for transposition if needed
 | |
|     trainData = cv::Mat(rows, cols, CV_32FC1 );
 | |
|     vector< Mat >::const_iterator itr = train_samples.begin();
 | |
|     vector< Mat >::const_iterator end = train_samples.end();
 | |
|     for( int i = 0 ; itr != end ; ++itr, ++i )
 | |
|     {
 | |
|         CV_Assert( itr->cols == 1 ||
 | |
|             itr->rows == 1 );
 | |
|         if( itr->cols == 1 )
 | |
|         {
 | |
|             transpose( *(itr), tmp );
 | |
|             tmp.copyTo( trainData.row( i ) );
 | |
|         }
 | |
|         else if( itr->rows == 1 )
 | |
|         {
 | |
|             itr->copyTo( trainData.row( i ) );
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst )
 | |
| {
 | |
|     string line;
 | |
|     ifstream file;
 | |
| 
 | |
|     file.open( (prefix+filename).c_str() );
 | |
|     if( !file.is_open() )
 | |
|     {
 | |
|         cerr << "Unable to open the list of images from " << filename << " filename." << endl;
 | |
|         exit( -1 );
 | |
|     }
 | |
| 
 | |
|     bool end_of_parsing = false;
 | |
|     while( !end_of_parsing )
 | |
|     {
 | |
|         getline( file, line );
 | |
|         if( line.empty() ) // no more file to read
 | |
|         {
 | |
|             end_of_parsing = true;
 | |
|             break;
 | |
|         }
 | |
|         Mat img = imread( (prefix+line).c_str() ); // load the image
 | |
|         if( img.empty() ) // invalid image, just skip it.
 | |
|             continue;
 | |
| #ifdef _DEBUG
 | |
|         imshow( "image", img );
 | |
|         waitKey( 10 );
 | |
| #endif
 | |
|         img_lst.push_back( img.clone() );
 | |
|     }
 | |
| }
 | |
| 
 | |
| void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size )
 | |
| {
 | |
|     Rect box;
 | |
|     box.width = size.width;
 | |
|     box.height = size.height;
 | |
| 
 | |
|     const int size_x = box.width;
 | |
|     const int size_y = box.height;
 | |
| 
 | |
|     srand( (unsigned int)time( NULL ) );
 | |
| 
 | |
|     vector< Mat >::const_iterator img = full_neg_lst.begin();
 | |
|     vector< Mat >::const_iterator end = full_neg_lst.end();
 | |
|     for( ; img != end ; ++img )
 | |
|     {
 | |
|         box.x = rand() % (img->cols - size_x);
 | |
|         box.y = rand() % (img->rows - size_y);
 | |
|         Mat roi = (*img)(box);
 | |
|         neg_lst.push_back( roi.clone() );
 | |
| #ifdef _DEBUG
 | |
|         imshow( "img", roi.clone() );
 | |
|         waitKey( 10 );
 | |
| #endif
 | |
|     }
 | |
| }
 | |
| 
 | |
| // From http://www.juergenwiki.de/work/wiki/doku.php?id=public:hog_descriptor_computation_and_visualization
 | |
| Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size )
 | |
| {
 | |
|     const int DIMX = size.width;
 | |
|     const int DIMY = size.height;
 | |
|     float zoomFac = 3;
 | |
|     Mat visu;
 | |
|     resize(color_origImg, visu, Size( (int)(color_origImg.cols*zoomFac), (int)(color_origImg.rows*zoomFac) ) );
 | |
| 
 | |
|     int cellSize        = 8;
 | |
|     int gradientBinSize = 9;
 | |
|     float radRangeForOneBin = (float)(CV_PI/(float)gradientBinSize); // dividing 180 into 9 bins, how large (in rad) is one bin?
 | |
| 
 | |
|     // prepare data structure: 9 orientation / gradient strenghts for each cell
 | |
|     int cells_in_x_dir = DIMX / cellSize;
 | |
|     int cells_in_y_dir = DIMY / cellSize;
 | |
|     float*** gradientStrengths = new float**[cells_in_y_dir];
 | |
|     int** cellUpdateCounter   = new int*[cells_in_y_dir];
 | |
|     for (int y=0; y<cells_in_y_dir; y++)
 | |
|     {
 | |
|         gradientStrengths[y] = new float*[cells_in_x_dir];
 | |
|         cellUpdateCounter[y] = new int[cells_in_x_dir];
 | |
|         for (int x=0; x<cells_in_x_dir; x++)
 | |
|         {
 | |
|             gradientStrengths[y][x] = new float[gradientBinSize];
 | |
|             cellUpdateCounter[y][x] = 0;
 | |
| 
 | |
|             for (int bin=0; bin<gradientBinSize; bin++)
 | |
|                 gradientStrengths[y][x][bin] = 0.0;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // nr of blocks = nr of cells - 1
 | |
|     // since there is a new block on each cell (overlapping blocks!) but the last one
 | |
|     int blocks_in_x_dir = cells_in_x_dir - 1;
 | |
|     int blocks_in_y_dir = cells_in_y_dir - 1;
 | |
| 
 | |
|     // compute gradient strengths per cell
 | |
|     int descriptorDataIdx = 0;
 | |
|     int cellx = 0;
 | |
|     int celly = 0;
 | |
| 
 | |
|     for (int blockx=0; blockx<blocks_in_x_dir; blockx++)
 | |
|     {
 | |
|         for (int blocky=0; blocky<blocks_in_y_dir; blocky++)
 | |
|         {
 | |
|             // 4 cells per block ...
 | |
|             for (int cellNr=0; cellNr<4; cellNr++)
 | |
|             {
 | |
|                 // compute corresponding cell nr
 | |
|                 cellx = blockx;
 | |
|                 celly = blocky;
 | |
|                 if (cellNr==1) celly++;
 | |
|                 if (cellNr==2) cellx++;
 | |
|                 if (cellNr==3)
 | |
|                 {
 | |
|                     cellx++;
 | |
|                     celly++;
 | |
|                 }
 | |
| 
 | |
|                 for (int bin=0; bin<gradientBinSize; bin++)
 | |
|                 {
 | |
|                     float gradientStrength = descriptorValues[ descriptorDataIdx ];
 | |
|                     descriptorDataIdx++;
 | |
| 
 | |
|                     gradientStrengths[celly][cellx][bin] += gradientStrength;
 | |
| 
 | |
|                 } // for (all bins)
 | |
| 
 | |
| 
 | |
|                 // note: overlapping blocks lead to multiple updates of this sum!
 | |
|                 // we therefore keep track how often a cell was updated,
 | |
|                 // to compute average gradient strengths
 | |
|                 cellUpdateCounter[celly][cellx]++;
 | |
| 
 | |
|             } // for (all cells)
 | |
| 
 | |
| 
 | |
|         } // for (all block x pos)
 | |
|     } // for (all block y pos)
 | |
| 
 | |
| 
 | |
|     // compute average gradient strengths
 | |
|     for (celly=0; celly<cells_in_y_dir; celly++)
 | |
|     {
 | |
|         for (cellx=0; cellx<cells_in_x_dir; cellx++)
 | |
|         {
 | |
| 
 | |
|             float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx];
 | |
| 
 | |
|             // compute average gradient strenghts for each gradient bin direction
 | |
|             for (int bin=0; bin<gradientBinSize; bin++)
 | |
|             {
 | |
|                 gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // draw cells
 | |
|     for (celly=0; celly<cells_in_y_dir; celly++)
 | |
|     {
 | |
|         for (cellx=0; cellx<cells_in_x_dir; cellx++)
 | |
|         {
 | |
|             int drawX = cellx * cellSize;
 | |
|             int drawY = celly * cellSize;
 | |
| 
 | |
|             int mx = drawX + cellSize/2;
 | |
|             int my = drawY + cellSize/2;
 | |
| 
 | |
|             rectangle(visu, Point((int)(drawX*zoomFac), (int)(drawY*zoomFac)), Point((int)((drawX+cellSize)*zoomFac), (int)((drawY+cellSize)*zoomFac)), Scalar(100,100,100), 1);
 | |
| 
 | |
|             // draw in each cell all 9 gradient strengths
 | |
|             for (int bin=0; bin<gradientBinSize; bin++)
 | |
|             {
 | |
|                 float currentGradStrength = gradientStrengths[celly][cellx][bin];
 | |
| 
 | |
|                 // no line to draw?
 | |
|                 if (currentGradStrength==0)
 | |
|                     continue;
 | |
| 
 | |
|                 float currRad = bin * radRangeForOneBin + radRangeForOneBin/2;
 | |
| 
 | |
|                 float dirVecX = cos( currRad );
 | |
|                 float dirVecY = sin( currRad );
 | |
|                 float maxVecLen = (float)(cellSize/2.f);
 | |
|                 float scale = 2.5; // just a visualization scale, to see the lines better
 | |
| 
 | |
|                 // compute line coordinates
 | |
|                 float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale;
 | |
|                 float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale;
 | |
|                 float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale;
 | |
|                 float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale;
 | |
| 
 | |
|                 // draw gradient visualization
 | |
|                 line(visu, Point((int)(x1*zoomFac),(int)(y1*zoomFac)), Point((int)(x2*zoomFac),(int)(y2*zoomFac)), Scalar(0,255,0), 1);
 | |
| 
 | |
|             } // for (all bins)
 | |
| 
 | |
|         } // for (cellx)
 | |
|     } // for (celly)
 | |
| 
 | |
| 
 | |
|     // don't forget to free memory allocated by helper data structures!
 | |
|     for (int y=0; y<cells_in_y_dir; y++)
 | |
|     {
 | |
|         for (int x=0; x<cells_in_x_dir; x++)
 | |
|         {
 | |
|             delete[] gradientStrengths[y][x];
 | |
|         }
 | |
|         delete[] gradientStrengths[y];
 | |
|         delete[] cellUpdateCounter[y];
 | |
|     }
 | |
|     delete[] gradientStrengths;
 | |
|     delete[] cellUpdateCounter;
 | |
| 
 | |
|     return visu;
 | |
| 
 | |
| } // get_hogdescriptor_visu
 | |
| 
 | |
| void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size )
 | |
| {
 | |
|     HOGDescriptor hog;
 | |
|     hog.winSize = size;
 | |
|     Mat gray;
 | |
|     vector< Point > location;
 | |
|     vector< float > descriptors;
 | |
| 
 | |
|     vector< Mat >::const_iterator img = img_lst.begin();
 | |
|     vector< Mat >::const_iterator end = img_lst.end();
 | |
|     for( ; img != end ; ++img )
 | |
|     {
 | |
|         cvtColor( *img, gray, COLOR_BGR2GRAY );
 | |
|         hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ), location );
 | |
|         gradient_lst.push_back( Mat( descriptors ).clone() );
 | |
| #ifdef _DEBUG
 | |
|         imshow( "gradient", get_hogdescriptor_visu( img->clone(), descriptors, size ) );
 | |
|         waitKey( 10 );
 | |
| #endif
 | |
|     }
 | |
| }
 | |
| 
 | |
| void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels )
 | |
| {
 | |
| 
 | |
|     Mat train_data;
 | |
|     convert_to_ml( gradient_lst, train_data );
 | |
| 
 | |
|     clog << "Start training...";
 | |
|     Ptr<SVM> svm = SVM::create();
 | |
|     /* Default values to train SVM */
 | |
|     svm->setCoef0(0.0);
 | |
|     svm->setDegree(3);
 | |
|     svm->setTermCriteria(TermCriteria( CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 1e-3 ));
 | |
|     svm->setGamma(0);
 | |
|     svm->setKernel(SVM::LINEAR);
 | |
|     svm->setNu(0.5);
 | |
|     svm->setP(0.1); // for EPSILON_SVR, epsilon in loss function?
 | |
|     svm->setC(0.01); // From paper, soft classifier
 | |
|     svm->setType(SVM::EPS_SVR); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
 | |
|     svm->train(train_data, ROW_SAMPLE, Mat(labels));
 | |
|     clog << "...[done]" << endl;
 | |
| 
 | |
|     svm->save( "my_people_detector.yml" );
 | |
| }
 | |
| 
 | |
| void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color )
 | |
| {
 | |
|     if( !locations.empty() )
 | |
|     {
 | |
|         vector< Rect >::const_iterator loc = locations.begin();
 | |
|         vector< Rect >::const_iterator end = locations.end();
 | |
|         for( ; loc != end ; ++loc )
 | |
|         {
 | |
|             rectangle( img, *loc, color, 2 );
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| void test_it( const Size & size )
 | |
| {
 | |
|     char key = 27;
 | |
|     Scalar reference( 0, 255, 0 );
 | |
|     Scalar trained( 0, 0, 255 );
 | |
|     Mat img, draw;
 | |
|     Ptr<SVM> svm;
 | |
|     HOGDescriptor hog;
 | |
|     HOGDescriptor my_hog;
 | |
|     my_hog.winSize = size;
 | |
|     VideoCapture video;
 | |
|     vector< Rect > locations;
 | |
| 
 | |
|     // Load the trained SVM.
 | |
|     svm = StatModel::load<SVM>( "my_people_detector.yml" );
 | |
|     // Set the trained svm to my_hog
 | |
|     vector< float > hog_detector;
 | |
|     get_svm_detector( svm, hog_detector );
 | |
|     my_hog.setSVMDetector( hog_detector );
 | |
|     // Set the people detector.
 | |
|     hog.setSVMDetector( hog.getDefaultPeopleDetector() );
 | |
|     // Open the camera.
 | |
|     video.open(0);
 | |
|     if( !video.isOpened() )
 | |
|     {
 | |
|         cerr << "Unable to open the device 0" << endl;
 | |
|         exit( -1 );
 | |
|     }
 | |
| 
 | |
|     bool end_of_process = false;
 | |
|     while( !end_of_process )
 | |
|     {
 | |
|         video >> img;
 | |
|         if( img.empty() )
 | |
|             break;
 | |
| 
 | |
|         draw = img.clone();
 | |
| 
 | |
|         locations.clear();
 | |
|         hog.detectMultiScale( img, locations );
 | |
|         draw_locations( draw, locations, reference );
 | |
| 
 | |
|         locations.clear();
 | |
|         my_hog.detectMultiScale( img, locations );
 | |
|         draw_locations( draw, locations, trained );
 | |
| 
 | |
|         imshow( "Video", draw );
 | |
|         key = (char)waitKey( 10 );
 | |
|         if( 27 == key )
 | |
|             end_of_process = true;
 | |
|     }
 | |
| }
 | |
| 
 | |
| int main( int argc, char** argv )
 | |
| {
 | |
|     cv::CommandLineParser parser(argc, argv, "{help h|| show help message}"
 | |
|             "{pd||pos_dir}{p||pos.lst}{nd||neg_dir}{n||neg.lst}");
 | |
|     if (parser.has("help"))
 | |
|     {
 | |
|         parser.printMessage();
 | |
|         exit(0);
 | |
|     }
 | |
|     vector< Mat > pos_lst;
 | |
|     vector< Mat > full_neg_lst;
 | |
|     vector< Mat > neg_lst;
 | |
|     vector< Mat > gradient_lst;
 | |
|     vector< int > labels;
 | |
|     string pos_dir = parser.get<string>("pd");
 | |
|     string pos = parser.get<string>("p");
 | |
|     string neg_dir = parser.get<string>("nd");
 | |
|     string neg = parser.get<string>("n");
 | |
|     if( pos_dir.empty() || pos.empty() || neg_dir.empty() || neg.empty() )
 | |
|     {
 | |
|         cout << "Wrong number of parameters." << endl
 | |
|             << "Usage: " << argv[0] << " --pd=pos_dir -p=pos.lst --nd=neg_dir -n=neg.lst" << endl
 | |
|             << "example: " << argv[0] << " --pd=/INRIA_dataset/ -p=Train/pos.lst --nd=/INRIA_dataset/ -n=Train/neg.lst" << endl;
 | |
|         exit( -1 );
 | |
|     }
 | |
|     load_images( pos_dir, pos, pos_lst );
 | |
|     labels.assign( pos_lst.size(), +1 );
 | |
|     const unsigned int old = (unsigned int)labels.size();
 | |
|     load_images( neg_dir, neg, full_neg_lst );
 | |
|     sample_neg( full_neg_lst, neg_lst, Size( 96,160 ) );
 | |
|     labels.insert( labels.end(), neg_lst.size(), -1 );
 | |
|     CV_Assert( old < labels.size() );
 | |
| 
 | |
|     compute_hog( pos_lst, gradient_lst, Size( 96, 160 ) );
 | |
|     compute_hog( neg_lst, gradient_lst, Size( 96, 160 ) );
 | |
| 
 | |
|     train_svm( gradient_lst, labels );
 | |
| 
 | |
|     test_it( Size( 96, 160 ) ); // change with your parameters
 | |
| 
 | |
|     return 0;
 | |
| }
 | 
