From 81aefed13ab61b081762ac4c41f2911dd2071e3a Mon Sep 17 00:00:00 2001 From: Grigory Serebryakov Date: Tue, 29 Jul 2014 14:12:17 +0400 Subject: [PATCH 1/8] Can create training set in PNG format The format of the training set can be changed with the `-pngoutput` key. Output image will be resized to a 640x480 size if greater. --- apps/haartraining/CMakeLists.txt | 3 + apps/haartraining/createsamples.cpp | 44 ++++- apps/haartraining/cvhaartraining.cpp | 177 +++++++++++------- apps/haartraining/cvhaartraining.h | 63 ++++++- apps/haartraining/cvsamplesoutput.cpp | 255 ++++++++++++++++++++++++++ apps/haartraining/cvsamplesoutput.h | 49 +++++ apps/haartraining/ioutput.h | 34 ++++ doc/user_guide/ug_traincascade.rst | 84 ++++++++- 8 files changed, 627 insertions(+), 82 deletions(-) create mode 100644 apps/haartraining/cvsamplesoutput.cpp create mode 100644 apps/haartraining/cvsamplesoutput.h create mode 100644 apps/haartraining/ioutput.h diff --git a/apps/haartraining/CMakeLists.txt b/apps/haartraining/CMakeLists.txt index d8a3c55c8..2ac332316 100644 --- a/apps/haartraining/CMakeLists.txt +++ b/apps/haartraining/CMakeLists.txt @@ -29,6 +29,9 @@ set(cvhaartraining_lib_src cvhaarclassifier.cpp cvhaartraining.cpp cvsamples.cpp + cvsamplesoutput.cpp + cvsamplesoutput.h + ioutput.h ) add_library(opencv_haartraining_engine STATIC ${cvhaartraining_lib_src}) diff --git a/apps/haartraining/createsamples.cpp b/apps/haartraining/createsamples.cpp index 2e86cca9a..136dcb006 100644 --- a/apps/haartraining/createsamples.cpp +++ b/apps/haartraining/createsamples.cpp @@ -54,6 +54,7 @@ using namespace std; #include "cvhaartraining.h" +#include "ioutput.h" int main( int argc, char* argv[] ) { @@ -76,6 +77,7 @@ int main( int argc, char* argv[] ) double scale = 4.0; int width = 24; int height = 24; + bool pngoutput = false; /* whether to make the samples in png or in jpg*/ srand((unsigned int)time(0)); @@ -92,7 +94,8 @@ int main( int argc, char* argv[] ) " [-maxyangle ]\n" " [-maxzangle ]\n" " [-show []]\n" - " [-w ]\n [-h ]\n", + " [-w ]\n [-h ]\n" + " [-pngoutput]", argv[0], num, bgcolor, bgthreshold, maxintensitydev, maxxangle, maxyangle, maxzangle, scale, width, height ); @@ -172,6 +175,10 @@ int main( int argc, char* argv[] ) { height = atoi( argv[++i] ); } + else if( !strcmp( argv[i], "-pngoutput" ) ) + { + pngoutput = true; + } } printf( "Info file name: %s\n", ((infoname == NULL) ? nullname : infoname ) ); @@ -190,10 +197,14 @@ int main( int argc, char* argv[] ) printf( "Show samples: %s\n", (showsamples) ? "TRUE" : "FALSE" ); if( showsamples ) { - printf( "Scale: %g\n", scale ); + printf( "Scale applied to display : %g\n", scale ); + } + if( !pngoutput) + { + printf( "Original image whill be scaled to:\n"); + printf( "\tWidth: $backgroundWidth / %d\n", width ); + printf( "\tHeight: $backgroundHeight / %d\n", height ); } - printf( "Width: %d\n", width ); - printf( "Height: %d\n", height ); /* determine action */ if( imagename && vecname ) @@ -207,13 +218,30 @@ int main( int argc, char* argv[] ) printf( "Done\n" ); } + else if( imagename && bgfilename && infoname && pngoutput) + { + printf( "Create training set from a single image and a collection of backgrounds.\n" + "Output format: %s\n" + "Annotations are in a separate directory\n", + (( pngoutput ) ? "JPG" : "PNG") ); + + PngTrainingSetGenerator creator( infoname ); + creator.create( imagename, bgcolor, bgthreshold, bgfilename, num, + invert, maxintensitydev, maxxangle, maxyangle, maxzangle, + showsamples, width, height ); + + printf( "Done\n" ); + } else if( imagename && bgfilename && infoname ) { - printf( "Create test samples from single image applying distortions...\n" ); + printf( "Create test samples from single image applying distortions...\n" + "Output format: %s\n", + (( pngoutput ) ? "JPG" : "PNG") ); - cvCreateTestSamples( infoname, imagename, bgcolor, bgthreshold, bgfilename, num, - invert, maxintensitydev, - maxxangle, maxyangle, maxzangle, showsamples, width, height ); + TestSamplesGenerator creator( infoname ); + creator.create( imagename, bgcolor, bgthreshold, bgfilename, num, + invert, maxintensitydev, maxxangle, maxyangle, maxzangle, + showsamples, width, height ); printf( "Done\n" ); } diff --git a/apps/haartraining/cvhaartraining.cpp b/apps/haartraining/cvhaartraining.cpp index 661bc959b..3d12eab2d 100644 --- a/apps/haartraining/cvhaartraining.cpp +++ b/apps/haartraining/cvhaartraining.cpp @@ -48,6 +48,8 @@ #include "cvhaartraining.h" #include "_cvhaartraining.h" +#include "ioutput.h" + #include #include #include @@ -2841,8 +2843,6 @@ void cvCreateTreeCascadeClassifier( const char* dirname, cvReleaseMat( &features_idx ); } - - void cvCreateTrainingSamples( const char* filename, const char* imgfilename, int bgcolor, int bgthreshold, const char* bgfilename, int count, @@ -2942,45 +2942,43 @@ void cvCreateTrainingSamples( const char* filename, } -#define CV_INFO_FILENAME "info.dat" +SamplesGenerator::SamplesGenerator( IOutput* _writer ) + :writer(_writer) +{ +} -void cvCreateTestSamples( const char* infoname, - const char* imgfilename, int bgcolor, int bgthreshold, - const char* bgfilename, int count, - int invert, int maxintensitydev, - double maxxangle, double maxyangle, double maxzangle, - int showsamples, - int winwidth, int winheight ) +void SamplesGenerator::showSamples(bool* show, CvMat *img) const +{ + if( *show ) + { + cvShowImage( "Image", img); + if( cvWaitKey( 0 ) == 27 ) + { + *show = false; + } + } +} + +void SamplesGenerator::create(const char* imgfilename, int bgcolor, int bgthreshold, + const char* bgfilename, int count, + int invert, int maxintensitydev, + double maxxangle, double maxyangle, double maxzangle, + bool showsamples, + int winwidth, int winheight ) { CvSampleDistortionData data; - assert( infoname != NULL ); assert( imgfilename != NULL ); assert( bgfilename != NULL ); - if( !icvMkDir( infoname ) ) - { - -#if CV_VERBOSE - fprintf( stderr, "Unable to create directory hierarchy: %s\n", infoname ); -#endif /* CV_VERBOSE */ - - return; - } if( icvStartSampleDistortion( imgfilename, bgcolor, bgthreshold, &data ) ) { - char fullname[PATH_MAX]; - char* filename; CvMat win; - FILE* info; if( icvInitBackgroundReaders( bgfilename, cvSize( 10, 10 ) ) ) { int i; - int x, y, width, height; - float scale; - float maxscale; int inverse; if( showsamples ) @@ -2988,73 +2986,112 @@ void cvCreateTestSamples( const char* infoname, cvNamedWindow( "Image", CV_WINDOW_AUTOSIZE ); } - info = fopen( infoname, "w" ); - strcpy( fullname, infoname ); - filename = strrchr( fullname, '\\' ); - if( filename == NULL ) - { - filename = strrchr( fullname, '/' ); - } - if( filename == NULL ) - { - filename = fullname; - } - else - { - filename++; - } - count = MIN( count, cvbgdata->count ); inverse = invert; + for( i = 0; i < count; i++ ) { icvGetNextFromBackgroundData( cvbgdata, cvbgreader ); - maxscale = MIN( 0.7F * cvbgreader->src.cols / winwidth, - 0.7F * cvbgreader->src.rows / winheight ); - if( maxscale < 1.0F ) continue; + CvRect boundingBox = getObjectPosition( cvSize( cvbgreader->src.cols, + cvbgreader->src.rows ), + cvGetSize(data.img), + cvSize( winwidth, winheight ) ); + if(boundingBox.width <= 0 || boundingBox.height <= 0) + { + continue; + } - scale = (maxscale - 1.0F) * rand() / RAND_MAX + 1.0F; - width = (int) (scale * winwidth); - height = (int) (scale * winheight); - x = (int) ((0.1+0.8 * rand()/RAND_MAX) * (cvbgreader->src.cols - width)); - y = (int) ((0.1+0.8 * rand()/RAND_MAX) * (cvbgreader->src.rows - height)); + cvGetSubArr( &cvbgreader->src, &win, boundingBox ); - cvGetSubArr( &cvbgreader->src, &win, cvRect( x, y ,width, height ) ); if( invert == CV_RANDOM_INVERT ) { inverse = (rand() > (RAND_MAX/2)); } + icvPlaceDistortedSample( &win, inverse, maxintensitydev, maxxangle, maxyangle, maxzangle, 1, 0.0, 0.0, &data ); + writer->write( cvbgreader->src, boundingBox ); - sprintf( filename, "%04d_%04d_%04d_%04d_%04d.jpg", - (i + 1), x, y, width, height ); - - if( info ) - { - fprintf( info, "%s %d %d %d %d %d\n", - filename, 1, x, y, width, height ); - } - - cvSaveImage( fullname, &cvbgreader->src ); - if( showsamples ) - { - cvShowImage( "Image", &cvbgreader->src ); - if( cvWaitKey( 0 ) == 27 ) - { - showsamples = 0; - } - } + showSamples(&showsamples, &cvbgreader->src); } - if( info ) fclose( info ); icvDestroyBackgroundReaders(); } icvEndSampleDistortion( &data ); } } +SamplesGenerator::~SamplesGenerator() +{ + delete writer; +} + + +TestSamplesGenerator::TestSamplesGenerator(const char* filename) + :SamplesGenerator(IOutput::createOutput(filename,IOutput::JPG_TEST_SET)) +{ +} + +CvSize TestSamplesGenerator::scaleObjectSize(const CvSize& bgImgSize, + const CvSize& , + const CvSize& sampleSize) const +{ + float scale; + float maxscale; + + maxscale = MIN( 0.7F * bgImgSize.width / sampleSize.width, + 0.7F * bgImgSize.height / sampleSize.height ); + if( maxscale < 1.0F ) + { + scale = -1.f; + } + else + { + scale = (maxscale - 1.0F) * rand() / RAND_MAX + 1.0F; + } + + int width = (int) (scale * sampleSize.width); + int height = (int) (scale * sampleSize.height); + + return cvSize( width, height ); +} + +CvRect SamplesGenerator::getObjectPosition(const CvSize& bgImgSize, + const CvSize& imgSize, + const CvSize& sampleSize) const +{ + CvSize size = scaleObjectSize( bgImgSize, imgSize, sampleSize ); + + int width = size.width; + int height = size.height; + int x = (int) ((0.1 + 0.8 * rand() / RAND_MAX) * (bgImgSize.width - width)); + int y = (int) ((0.1 + 0.8 * rand() / RAND_MAX) * (bgImgSize.height - height)); + + return cvRect( x, y, width, height ); +} + + +PngTrainingSetGenerator::PngTrainingSetGenerator(const char* filename) + :SamplesGenerator(IOutput::createOutput(filename,IOutput::PNG_TRAINING_SET)) +{ +} + +CvSize PngTrainingSetGenerator::scaleObjectSize( const CvSize& bgImgSize, + const CvSize& imgSize, + const CvSize& ) const +{ + float scale; + + scale = MIN( 0.3F * bgImgSize.width / imgSize.width, + 0.3F * bgImgSize.height / imgSize.height ); + + + int width = (int) (scale * imgSize.width); + int height = (int) (scale * imgSize.height); + + return cvSize( width, height ); +} /* End of file. */ diff --git a/apps/haartraining/cvhaartraining.h b/apps/haartraining/cvhaartraining.h index 5a57e1773..f1e931813 100644 --- a/apps/haartraining/cvhaartraining.h +++ b/apps/haartraining/cvhaartraining.h @@ -48,6 +48,11 @@ #ifndef _CVHAARTRAINING_H_ #define _CVHAARTRAINING_H_ +class IOutput; +struct CvRect; +struct CvSize; +struct CvMat; + /* * cvCreateTrainingSamples * @@ -84,13 +89,20 @@ void cvCreateTrainingSamples( const char* filename, int showsamples = 0, int winwidth = 24, int winheight = 24 ); -void cvCreateTestSamples( const char* infoname, - const char* imgfilename, int bgcolor, int bgthreshold, +void cvCreatePngTrainingSet(const char* imgfilename, int bgcolor, int bgthreshold, + const char* bgfilename, int count, + int invert, int maxintensitydev, + double maxxangle, double maxyangle, double maxzangle, + int winwidth, int winheight, + IOutput *writer ); + +void cvCreateTestSamples(const char* imgfilename, int bgcolor, int bgthreshold, const char* bgfilename, int count, int invert, int maxintensitydev, double maxxangle, double maxyangle, double maxzangle, int showsamples, - int winwidth, int winheight ); + int winwidth, int winheight, + IOutput* writer); /* * cvCreateTrainingSamplesFromInfo @@ -189,4 +201,49 @@ void cvCreateTreeCascadeClassifier( const char* dirname, int boosttype, int stumperror, int maxtreesplits, int minpos, bool bg_vecfile = false ); + +class SamplesGenerator +{ +public: + SamplesGenerator( IOutput* _writer ); + void create( const char* imgfilename, int bgcolor, int bgthreshold, + const char* bgfilename, int count, + int invert, int maxintensitydev, + double maxxangle, double maxyangle, double maxzangle, + bool showsamples, + int winwidth, int winheight); + virtual ~SamplesGenerator(); +private: + virtual void showSamples( bool* showSamples, CvMat* img ) const; + + CvRect getObjectPosition( const CvSize& bgImgSize, + const CvSize& imgSize, + const CvSize& sampleSize ) const; + virtual CvSize scaleObjectSize(const CvSize& bgImgSize, + const CvSize& imgSize , + const CvSize& sampleSize) const =0 ; +private: + IOutput* writer; +}; + +class TestSamplesGenerator: public SamplesGenerator +{ +public: + TestSamplesGenerator(const char* filename); +private: + CvSize scaleObjectSize(const CvSize& bgImgSize, + const CvSize& , + const CvSize& sampleSize) const; +}; + +class PngTrainingSetGenerator: public SamplesGenerator +{ +public: + PngTrainingSetGenerator(const char *filename); +private: + CvSize scaleObjectSize(const CvSize& bgImgSize, + const CvSize& imgSize , + const CvSize& ) const; +}; + #endif /* _CVHAARTRAINING_H_ */ diff --git a/apps/haartraining/cvsamplesoutput.cpp b/apps/haartraining/cvsamplesoutput.cpp new file mode 100644 index 000000000..ada852a53 --- /dev/null +++ b/apps/haartraining/cvsamplesoutput.cpp @@ -0,0 +1,255 @@ +#include "cvsamplesoutput.h" + +#include + +#include "_cvcommon.h" +#include "opencv2/opencv.hpp" + +/* print statistic info */ +#define CV_VERBOSE 1 + +IOutput::IOutput() + : currentIdx(0) +{} + +void IOutput::findFilePathPart(char **partOfPath, char *fullPath) +{ + *partOfPath = strrchr( fullPath, '\\' ); + if( *partOfPath == NULL ) + { + *partOfPath = strrchr( fullPath, '/' ); + } + if( *partOfPath == NULL ) + { + *partOfPath = fullPath; + } + else + { + *partOfPath += 1; + } +} + +IOutput* IOutput::createOutput(const char *filename, + IOutput::OutputType type) +{ + IOutput* output = 0; + switch (type) { + case IOutput::PNG_TRAINING_SET: + output = new PngTrainingSetOutput(); + break; + case IOutput::JPG_TEST_SET: + output = new TestSamplesOutput(); + break; + default: +#if CV_VERBOSE + fprintf( stderr, "Invalid output type, valid types are: PNG_TRAINING_SET, JPG_TEST_SET"); +#endif /* CV_VERBOSE */ + return 0; + } + + if ( output->init( filename ) ) + return output; + else + return 0; +} + +bool PngTrainingSetOutput::init( const char* annotationsListFileName ) +{ + IOutput::init( annotationsListFileName ); + + if(imgFileName == imgFullPath) + { + #if CV_VERBOSE + fprintf( stderr, "Invalid path to annotations file: %s\n" + "It should contain a parent directory name\n", imgFullPath ); + #endif /* CV_VERBOSE */ + return false; + } + + + const char* annotationsdirname = "/annotations/"; + const char* positivesdirname = "/pos/"; + + imgFileName[-1] = '\0'; //erase slash at the end of the path + imgFileName -= 1; + + //copy path to dataset top-level dir + strcpy(annotationFullPath, imgFullPath); + //find the name of annotation starting from the top-level dataset dir + findFilePathPart(&annotationRelativePath, annotationFullPath); + if( !strcmp( annotationRelativePath, ".." ) || !strcmp( annotationRelativePath, "." ) ) + { + #if CV_VERBOSE + fprintf( stderr, "Invalid path to annotations file: %s\n" + "It should contain a parent directory name\n", annotationsListFileName ); + #endif /* CV_VERBOSE */ + return false; + } + //find the name of output image starting from the top-level dataset dir + findFilePathPart(&imgRelativePath, imgFullPath); + annotationFileName = annotationFullPath + strlen(annotationFullPath); + + sprintf(annotationFileName, "%s", annotationsdirname); + annotationFileName += strlen(annotationFileName); + sprintf(imgFileName, "%s", positivesdirname); + imgFileName += strlen(imgFileName); + + if( !icvMkDir( annotationFullPath ) ) + { + #if CV_VERBOSE + fprintf( stderr, "Unable to create directory hierarchy: %s\n", annotationFullPath ); + #endif /* CV_VERBOSE */ + return false; + } + if( !icvMkDir( imgFullPath ) ) + { + #if CV_VERBOSE + fprintf( stderr, "Unable to create directory hierarchy: %s\n", imgFullPath ); + #endif /* CV_VERBOSE */ + return false; + } + + return true; +} + +bool PngTrainingSetOutput::write( const CvMat& img, + const CvRect& boundingBox ) +{ + CvRect bbox = scaleBoundingBox(cvGetSize(&img), boundingBox); + + sprintf( imgFileName, + "%04d_%04d_%04d_%04d_%04d", + ++currentIdx, + bbox.x, + bbox.y, + bbox.width, + bbox.height ); + + sprintf( annotationFileName, "%s.txt", imgFileName ); + fprintf( annotationsList, "%s\n", annotationRelativePath ); + + FILE* annotationFile = fopen( annotationFullPath, "w" ); + if(annotationFile == 0) + { + return false; + } + + sprintf( imgFileName + strlen(imgFileName), ".%s", extension ); + + + + fprintf( annotationFile, + "Image filename : \"%s\"\n" + "Bounding box for object 1 \"PASperson\" (Xmin, Ymin) - (Xmax, Ymax) : (%d, %d) - (%d, %d)", + imgRelativePath, + bbox.x, + bbox.y, + bbox.x + bbox.width, + bbox.y + bbox.height ); + fclose( annotationFile ); + + writeImage(img); + + return true; +} + +void PngTrainingSetOutput::writeImage(const CvMat &img) const +{ + CvSize origsize = cvGetSize(&img); + + if( origsize.height > destImgHeight || origsize.width > destImgWidth ) + { + CvMat result = cvMat( destImgHeight, destImgWidth, CV_8UC1, + cvAlloc( sizeof( uchar ) * destImgHeight * destImgWidth ) ); + cvResize(&img, &result); + cvSaveImage( imgFullPath, &result ); + cvFree( &(result.data.ptr) ); + } + else + { + cvSaveImage( imgFullPath, &img); + } + + return; +} + +CvRect PngTrainingSetOutput::scaleBoundingBox(const CvSize& imgSize, const CvRect& bbox) +{ + double scale = MAX( (float) destImgWidth / imgSize.width, + (float) destImgHeight / imgSize.height ); + CvRect boundingBox = bbox; + int border = 5; + if( scale < 1. ) + { + boundingBox.x = bbox.x * scale; + boundingBox.y = bbox.y * scale; + boundingBox.width = bbox.width * scale; + boundingBox.height = bbox.height * scale; + } + boundingBox.x -= border; + boundingBox.y -= border; + boundingBox.width += 2*border; + boundingBox.height += 2*border; + + return boundingBox; +} + +IOutput::~IOutput() +{ + if(annotationsList) + { + fclose(annotationsList); + } +} + +bool IOutput::init(const char *filename) +{ + assert( filename != NULL ); + + if( !icvMkDir( filename ) ) + { + +#if CV_VERBOSE + fprintf( stderr, "Unable to create directory hierarchy: %s\n", filename ); +#endif /* CV_VERBOSE */ + + return false; + } + + annotationsList = fopen( filename, "w" ); + if( annotationsList == NULL ) + { +#if CV_VERBOSE + fprintf( stderr, "Unable to create info file: %s\n", filename ); +#endif /* CV_VERBOSE */ + return false; + } + strcpy( imgFullPath, filename ); + + findFilePathPart( &imgFileName, imgFullPath ); + + return true; +} + +bool TestSamplesOutput::write( const CvMat& img, + const CvRect& boundingBox ) +{ + sprintf( imgFileName, "%04d_%04d_%04d_%04d_%04d.jpg", + ++currentIdx, + boundingBox.x, + boundingBox.y, + boundingBox.width, + boundingBox.height ); + + fprintf( annotationsList, "%s %d %d %d %d %d\n", + imgFullPath, + 1, + boundingBox.x, + boundingBox.y, + boundingBox.width, + boundingBox.height ); + + cvSaveImage( imgFullPath, &img); + + return true; +} diff --git a/apps/haartraining/cvsamplesoutput.h b/apps/haartraining/cvsamplesoutput.h new file mode 100644 index 000000000..9e8362f0c --- /dev/null +++ b/apps/haartraining/cvsamplesoutput.h @@ -0,0 +1,49 @@ +#ifndef CVSAMPLESOUTPUT_H +#define CVSAMPLESOUTPUT_H + +#include "ioutput.h" + +class PngTrainingSetOutput: public IOutput +{ + friend IOutput* IOutput::createOutput(const char *filename, OutputType type); +public: + virtual bool write( const CvMat& img, + const CvRect& boundingBox); + + virtual ~PngTrainingSetOutput(){} +private: + PngTrainingSetOutput() + : extension("png") + , destImgWidth(640) + , destImgHeight(480) + {} + + virtual bool init(const char* annotationsListFileName ); + + void writeImage( const CvMat& img ) const; + + CvRect scaleBoundingBox(const CvSize& imgSize, + const CvRect& bbox); +private: + + char annotationFullPath[PATH_MAX]; + char* annotationFileName; + char* annotationRelativePath; + char* imgRelativePath; + const char* extension; + + int destImgWidth; + int destImgHeight ; +}; + +class TestSamplesOutput: public IOutput +{ + friend IOutput* IOutput::createOutput(const char *filename, OutputType type); +public: + virtual bool write( const CvMat& img, + const CvRect& boundingBox ); + virtual ~TestSamplesOutput(){} +private: + TestSamplesOutput(){} +}; +#endif // CVSAMPLESOUTPUT_H diff --git a/apps/haartraining/ioutput.h b/apps/haartraining/ioutput.h new file mode 100644 index 000000000..eccebc116 --- /dev/null +++ b/apps/haartraining/ioutput.h @@ -0,0 +1,34 @@ +#ifndef IOUTPUT_H +#define IOUTPUT_H + +#include + +#include "_cvcommon.h" + +struct CvMat; +struct CvRect; + +class IOutput +{ +public: + enum OutputType {PNG_TRAINING_SET, JPG_TEST_SET}; +public: + virtual bool write( const CvMat& img, + const CvRect& boundingBox ) =0; + + virtual ~IOutput(); + + static IOutput* createOutput( const char *filename, OutputType type ); +protected: + IOutput(); + /* finds the beginning of the last token in the path */ + void findFilePathPart( char **partOfPath, char *fullPath ); + virtual bool init( const char* filename ); +protected: + int currentIdx; + char imgFullPath[PATH_MAX]; + char* imgFileName; + FILE* annotationsList; +}; + +#endif // IOUTPUT_H diff --git a/doc/user_guide/ug_traincascade.rst b/doc/user_guide/ug_traincascade.rst index 601f50438..b6d861e88 100644 --- a/doc/user_guide/ug_traincascade.rst +++ b/doc/user_guide/ug_traincascade.rst @@ -117,9 +117,27 @@ Command line arguments: Height (in pixels) of the output samples. -For following procedure is used to create a sample object instance: +* ``-pngoutput`` + + With this option switched on ``opencv_createsamples`` tool generates a collection of PNG samples and a number of associated annotation files, instead of a single ``vec`` file. + +The ``opencv_createsamples`` utility may work in a number of modes, namely: + +* Creating training set from a single image and a collection of backgrounds with a single ``vec`` file as an output; +* Converting the marked-up collection of samples into a ``vec`` format; +* Creating training set from a single image, as specified above, but with a collection of PNG images and associated annotation files as a result; +* Creating test set that consists of JPG samples collection and a signle file with annotations; +* Showing the content of the ``vec`` file. + +Creating training set from a single image and a collection of backgrounds with a single ``vec`` file as an output +----------------------------------------------------------------------------------------------------------------- + +The following procedure is used to create a sample object instance: The source image is rotated randomly around all three axes. The chosen angle is limited my ``-max?angle``. Then pixels having the intensity from [``bg_color-bg_color_threshold``; ``bg_color+bg_color_threshold``] range are interpreted as transparent. White noise is added to the intensities of the foreground. If the ``-inv`` key is specified then foreground pixel intensities are inverted. If ``-randinv`` key is specified then algorithm randomly selects whether inversion should be applied to this sample. Finally, the obtained image is placed onto an arbitrary background from the background description file, resized to the desired size specified by ``-w`` and ``-h`` and stored to the vec-file, specified by the ``-vec`` command line option. +Converting the marked-up collection of samples into a ``vec`` format +-------------------------------------------------------------------- + Positive samples also may be obtained from a collection of previously marked up images. This collection is described by a text file similar to background description file. Each line of this file corresponds to an image. The first element of the line is the filename. It is followed by the number of object instances. The following numbers are the coordinates of objects bounding rectangles (x, y, width, height). An example of description file: @@ -150,6 +168,70 @@ In order to create positive samples from such collection, ``-info`` argument sho The scheme of samples creation in this case is as follows. The object instances are taken from images. Then they are resized to target samples size and stored in output vec-file. No distortion is applied, so the only affecting arguments are ``-w``, ``-h``, ``-show`` and ``-num``. +Creating training set from a single image, but with a collection of PNG images and associated annotation files as a result +-------------------------------------------------------------------------------------------------------------------------- + +To obtain such behaviour the ``-img``, ``-bg`` and ``-info`` keys should be specified. The file name specified with ``-info`` key should include at least one level of directory hierarchy, that directory +will be used as the top-level dir for the training set. +For example, with the ``opencv_createsamples`` called as following: + + opencv_createsamples -img /home/user/logo.png -bg /home/user/bg.txt -info /home/user/annotations.lst -pngoutput -maxxangle 0.1 -maxyangle 0.1 -maxzangle 0.1 + +The output will have the following structure: + + .. code-block:: text + + /home/user/ + annotations/ + 0001_0107_0099_0195_0139.txt + 0002_0107_0115_0195_0139.txt + ... + neg/ + + pos/ + 0001_0107_0099_0195_0139.png + 0002_0107_0115_0195_0139.png + ... + annotations.lst + +With ``*.txt`` files in ``annotations`` directory containing information about object bounding box on the sample in a next format: + + .. code-block:: text + + Image filename : "createsamples/pos/0002_0107_0115_0195_0139.png" + Bounding box for object 1 "PASperson" (Xmin, Ymin) - (Xmax, Ymax) : (107, 115) - (302, 254) + +And ``annotations.lst`` file containing the list of all annotations file: + + .. code-block:: text + + createsamples/annotations/0001_0109_0209_0195_0139.txt + createsamples/annotations/0002_0241_0245_0139_0100.txt + + +Creating test set that consists of JPG samples collection and a signle file with annotations +-------------------------------------------------------------------------------------------- + +This variant of ``opencv_createsamples`` usage is very similar to the previous one, but generates the output in a different format; + +Directory structure: + + .. code-block:: text + + info.dat + img1.jpg + img2.jpg + +File info.dat: + + .. code-block:: text + + img1.jpg 1 140 100 45 45 + img2.jpg 2 100 200 50 50 50 30 25 25 + +Showing the content of the ``vec`` file +--------------------------------------- + ``opencv_createsamples`` utility may be used for examining samples stored in positive samples file. In order to do this only ``-vec``, ``-w`` and ``-h`` parameters should be specified. Note that for training, it does not matter how vec-files with positive samples are generated. But ``opencv_createsamples`` utility is the only one way to collect/create a vector file of positive samples, provided by OpenCV. From f81b3101e8a97293c34594a896b63e8194d343e4 Mon Sep 17 00:00:00 2001 From: Grigory Serebryakov Date: Mon, 25 Aug 2014 18:46:23 +0400 Subject: [PATCH 2/8] Typo in string fixed --- apps/haartraining/createsamples.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/haartraining/createsamples.cpp b/apps/haartraining/createsamples.cpp index 136dcb006..4cfb79040 100644 --- a/apps/haartraining/createsamples.cpp +++ b/apps/haartraining/createsamples.cpp @@ -201,7 +201,7 @@ int main( int argc, char* argv[] ) } if( !pngoutput) { - printf( "Original image whill be scaled to:\n"); + printf( "Original image will be scaled to:\n"); printf( "\tWidth: $backgroundWidth / %d\n", width ); printf( "\tHeight: $backgroundHeight / %d\n", height ); } From 57cf3d176673e3de9e2b874b5aab0cb7a7d9e03e Mon Sep 17 00:00:00 2001 From: Grigory Serebryakov Date: Tue, 26 Aug 2014 08:55:59 +0400 Subject: [PATCH 3/8] Class naming update Documentation improvement Bug in output format for JPG set fixed --- apps/haartraining/createsamples.cpp | 4 +- apps/haartraining/cvhaartraining.cpp | 22 ++-- apps/haartraining/cvhaartraining.h | 15 +-- apps/haartraining/cvsamplesoutput.cpp | 16 +-- apps/haartraining/cvsamplesoutput.h | 12 +-- doc/user_guide/ug_traincascade.rst | 148 ++++++++++++++------------ 6 files changed, 112 insertions(+), 105 deletions(-) diff --git a/apps/haartraining/createsamples.cpp b/apps/haartraining/createsamples.cpp index 4cfb79040..dd15a5ffd 100644 --- a/apps/haartraining/createsamples.cpp +++ b/apps/haartraining/createsamples.cpp @@ -225,7 +225,7 @@ int main( int argc, char* argv[] ) "Annotations are in a separate directory\n", (( pngoutput ) ? "JPG" : "PNG") ); - PngTrainingSetGenerator creator( infoname ); + PngDatasetGenerator creator( infoname ); creator.create( imagename, bgcolor, bgthreshold, bgfilename, num, invert, maxintensitydev, maxxangle, maxyangle, maxzangle, showsamples, width, height ); @@ -238,7 +238,7 @@ int main( int argc, char* argv[] ) "Output format: %s\n", (( pngoutput ) ? "JPG" : "PNG") ); - TestSamplesGenerator creator( infoname ); + JpgDatasetGrenerator creator( infoname ); creator.create( imagename, bgcolor, bgthreshold, bgfilename, num, invert, maxintensitydev, maxxangle, maxyangle, maxzangle, showsamples, width, height ); diff --git a/apps/haartraining/cvhaartraining.cpp b/apps/haartraining/cvhaartraining.cpp index 3d12eab2d..27483e6cd 100644 --- a/apps/haartraining/cvhaartraining.cpp +++ b/apps/haartraining/cvhaartraining.cpp @@ -2942,13 +2942,13 @@ void cvCreateTrainingSamples( const char* filename, } -SamplesGenerator::SamplesGenerator( IOutput* _writer ) +DatasetGenerator::DatasetGenerator( IOutput* _writer ) :writer(_writer) { } -void SamplesGenerator::showSamples(bool* show, CvMat *img) const +void DatasetGenerator::showSamples(bool* show, CvMat *img) const { if( *show ) { @@ -2960,7 +2960,7 @@ void SamplesGenerator::showSamples(bool* show, CvMat *img) const } } -void SamplesGenerator::create(const char* imgfilename, int bgcolor, int bgthreshold, +void DatasetGenerator::create(const char* imgfilename, int bgcolor, int bgthreshold, const char* bgfilename, int count, int invert, int maxintensitydev, double maxxangle, double maxyangle, double maxzangle, @@ -3023,18 +3023,18 @@ void SamplesGenerator::create(const char* imgfilename, int bgcolor, int bgthresh } } -SamplesGenerator::~SamplesGenerator() +DatasetGenerator::~DatasetGenerator() { delete writer; } -TestSamplesGenerator::TestSamplesGenerator(const char* filename) - :SamplesGenerator(IOutput::createOutput(filename,IOutput::JPG_TEST_SET)) +JpgDatasetGrenerator::JpgDatasetGrenerator(const char* filename) + :DatasetGenerator(IOutput::createOutput(filename,IOutput::JPG_TEST_SET)) { } -CvSize TestSamplesGenerator::scaleObjectSize(const CvSize& bgImgSize, +CvSize JpgDatasetGrenerator::scaleObjectSize(const CvSize& bgImgSize, const CvSize& , const CvSize& sampleSize) const { @@ -3058,7 +3058,7 @@ CvSize TestSamplesGenerator::scaleObjectSize(const CvSize& bgImgSize, return cvSize( width, height ); } -CvRect SamplesGenerator::getObjectPosition(const CvSize& bgImgSize, +CvRect DatasetGenerator::getObjectPosition(const CvSize& bgImgSize, const CvSize& imgSize, const CvSize& sampleSize) const { @@ -3073,12 +3073,12 @@ CvRect SamplesGenerator::getObjectPosition(const CvSize& bgImgSize, } -PngTrainingSetGenerator::PngTrainingSetGenerator(const char* filename) - :SamplesGenerator(IOutput::createOutput(filename,IOutput::PNG_TRAINING_SET)) +PngDatasetGenerator::PngDatasetGenerator(const char* filename) + :DatasetGenerator(IOutput::createOutput(filename,IOutput::PNG_TRAINING_SET)) { } -CvSize PngTrainingSetGenerator::scaleObjectSize( const CvSize& bgImgSize, +CvSize PngDatasetGenerator::scaleObjectSize( const CvSize& bgImgSize, const CvSize& imgSize, const CvSize& ) const { diff --git a/apps/haartraining/cvhaartraining.h b/apps/haartraining/cvhaartraining.h index f1e931813..77e492335 100644 --- a/apps/haartraining/cvhaartraining.h +++ b/apps/haartraining/cvhaartraining.h @@ -202,17 +202,17 @@ void cvCreateTreeCascadeClassifier( const char* dirname, int maxtreesplits, int minpos, bool bg_vecfile = false ); -class SamplesGenerator +class DatasetGenerator { public: - SamplesGenerator( IOutput* _writer ); + DatasetGenerator( IOutput* _writer ); void create( const char* imgfilename, int bgcolor, int bgthreshold, const char* bgfilename, int count, int invert, int maxintensitydev, double maxxangle, double maxyangle, double maxzangle, bool showsamples, int winwidth, int winheight); - virtual ~SamplesGenerator(); + virtual ~DatasetGenerator(); private: virtual void showSamples( bool* showSamples, CvMat* img ) const; @@ -226,20 +226,21 @@ private: IOutput* writer; }; -class TestSamplesGenerator: public SamplesGenerator +/* Provides the functionality of test set generating */ +class JpgDatasetGrenerator: public DatasetGenerator { public: - TestSamplesGenerator(const char* filename); + JpgDatasetGrenerator(const char* filename); private: CvSize scaleObjectSize(const CvSize& bgImgSize, const CvSize& , const CvSize& sampleSize) const; }; -class PngTrainingSetGenerator: public SamplesGenerator +class PngDatasetGenerator: public DatasetGenerator { public: - PngTrainingSetGenerator(const char *filename); + PngDatasetGenerator(const char *filename); private: CvSize scaleObjectSize(const CvSize& bgImgSize, const CvSize& imgSize , diff --git a/apps/haartraining/cvsamplesoutput.cpp b/apps/haartraining/cvsamplesoutput.cpp index ada852a53..83eae58d6 100644 --- a/apps/haartraining/cvsamplesoutput.cpp +++ b/apps/haartraining/cvsamplesoutput.cpp @@ -35,10 +35,10 @@ IOutput* IOutput::createOutput(const char *filename, IOutput* output = 0; switch (type) { case IOutput::PNG_TRAINING_SET: - output = new PngTrainingSetOutput(); + output = new PngDatasetOutput(); break; case IOutput::JPG_TEST_SET: - output = new TestSamplesOutput(); + output = new JpgDatasetOutput(); break; default: #if CV_VERBOSE @@ -53,7 +53,7 @@ IOutput* IOutput::createOutput(const char *filename, return 0; } -bool PngTrainingSetOutput::init( const char* annotationsListFileName ) +bool PngDatasetOutput::init( const char* annotationsListFileName ) { IOutput::init( annotationsListFileName ); @@ -112,7 +112,7 @@ bool PngTrainingSetOutput::init( const char* annotationsListFileName ) return true; } -bool PngTrainingSetOutput::write( const CvMat& img, +bool PngDatasetOutput::write( const CvMat& img, const CvRect& boundingBox ) { CvRect bbox = scaleBoundingBox(cvGetSize(&img), boundingBox); @@ -153,7 +153,7 @@ bool PngTrainingSetOutput::write( const CvMat& img, return true; } -void PngTrainingSetOutput::writeImage(const CvMat &img) const +void PngDatasetOutput::writeImage(const CvMat &img) const { CvSize origsize = cvGetSize(&img); @@ -173,7 +173,7 @@ void PngTrainingSetOutput::writeImage(const CvMat &img) const return; } -CvRect PngTrainingSetOutput::scaleBoundingBox(const CvSize& imgSize, const CvRect& bbox) +CvRect PngDatasetOutput::scaleBoundingBox(const CvSize& imgSize, const CvRect& bbox) { double scale = MAX( (float) destImgWidth / imgSize.width, (float) destImgHeight / imgSize.height ); @@ -231,7 +231,7 @@ bool IOutput::init(const char *filename) return true; } -bool TestSamplesOutput::write( const CvMat& img, +bool JpgDatasetOutput::write( const CvMat& img, const CvRect& boundingBox ) { sprintf( imgFileName, "%04d_%04d_%04d_%04d_%04d.jpg", @@ -242,7 +242,7 @@ bool TestSamplesOutput::write( const CvMat& img, boundingBox.height ); fprintf( annotationsList, "%s %d %d %d %d %d\n", - imgFullPath, + imgFileName, 1, boundingBox.x, boundingBox.y, diff --git a/apps/haartraining/cvsamplesoutput.h b/apps/haartraining/cvsamplesoutput.h index 9e8362f0c..90854bab9 100644 --- a/apps/haartraining/cvsamplesoutput.h +++ b/apps/haartraining/cvsamplesoutput.h @@ -3,16 +3,16 @@ #include "ioutput.h" -class PngTrainingSetOutput: public IOutput +class PngDatasetOutput: public IOutput { friend IOutput* IOutput::createOutput(const char *filename, OutputType type); public: virtual bool write( const CvMat& img, const CvRect& boundingBox); - virtual ~PngTrainingSetOutput(){} + virtual ~PngDatasetOutput(){} private: - PngTrainingSetOutput() + PngDatasetOutput() : extension("png") , destImgWidth(640) , destImgHeight(480) @@ -36,14 +36,14 @@ private: int destImgHeight ; }; -class TestSamplesOutput: public IOutput +class JpgDatasetOutput: public IOutput { friend IOutput* IOutput::createOutput(const char *filename, OutputType type); public: virtual bool write( const CvMat& img, const CvRect& boundingBox ); - virtual ~TestSamplesOutput(){} + virtual ~JpgDatasetOutput(){} private: - TestSamplesOutput(){} + JpgDatasetOutput(){} }; #endif // CVSAMPLESOUTPUT_H diff --git a/doc/user_guide/ug_traincascade.rst b/doc/user_guide/ug_traincascade.rst index b6d861e88..46b758359 100644 --- a/doc/user_guide/ug_traincascade.rst +++ b/doc/user_guide/ug_traincascade.rst @@ -6,7 +6,7 @@ Cascade Classifier Training Introduction ============ -The work with a cascade classifier inlcudes two major stages: training and detection. +The work with a cascade classifier includes two major stages: training and detection. Detection stage is described in a documentation of ``objdetect`` module of general OpenCV documentation. Documentation gives some basic information about cascade classifier. Current guide is describing how to train a cascade classifier: preparation of a training data and running the training application. @@ -14,26 +14,30 @@ Important notes --------------- There are two applications in OpenCV to train cascade classifier: ``opencv_haartraining`` and ``opencv_traincascade``. ``opencv_traincascade`` is a newer version, written in C++ in accordance to OpenCV 2.x API. But the main difference between this two applications is that ``opencv_traincascade`` supports both Haar [Viola2001]_ and LBP [Liao2007]_ (Local Binary Patterns) features. LBP features are integer in contrast to Haar features, so both training and detection with LBP are several times faster then with Haar features. Regarding the LBP and Haar detection quality, it depends on training: the quality of training dataset first of all and training parameters too. It's possible to train a LBP-based classifier that will provide almost the same quality as Haar-based one. -``opencv_traincascade`` and ``opencv_haartraining`` store the trained classifier in different file formats. Note, the newer cascade detection interface (see ``CascadeClassifier`` class in ``objdetect`` module) support both formats. ``opencv_traincascade`` can save (export) a trained cascade in the older format. But ``opencv_traincascade`` and ``opencv_haartraining`` can not load (import) a classifier in another format for the futher training after interruption. +``opencv_traincascade`` and ``opencv_haartraining`` store the trained classifier in different file formats. Note, the newer cascade detection interface (see ``CascadeClassifier`` class in ``objdetect`` module) support both formats. ``opencv_traincascade`` can save (export) a trained cascade in the older format. But ``opencv_traincascade`` and ``opencv_haartraining`` can not load (import) a classifier in another format for the further training after interruption. Note that ``opencv_traincascade`` application can use TBB for multi-threading. To use it in multicore mode OpenCV must be built with TBB. -Also there are some auxilary utilities related to the training. +Also there are some auxiliary utilities related to the training. * ``opencv_createsamples`` is used to prepare a training dataset of positive and test samples. ``opencv_createsamples`` produces dataset of positive samples in a format that is supported by both ``opencv_haartraining`` and ``opencv_traincascade`` applications. The output is a file with \*.vec extension, it is a binary format which contains images. * ``opencv_performance`` may be used to evaluate the quality of classifiers, but for trained by ``opencv_haartraining`` only. It takes a collection of marked up images, runs the classifier and reports the performance, i.e. number of found objects, number of missed objects, number of false alarms and other information. -Since ``opencv_haartraining`` is an obsolete application, only ``opencv_traincascade`` will be described futher. ``opencv_createsamples`` utility is needed to prepare a training data for ``opencv_traincascade``, so it will be described too. +Since ``opencv_haartraining`` is an obsolete application, only ``opencv_traincascade`` will be described further. ``opencv_createsamples`` utility is needed to prepare a training data for ``opencv_traincascade``, so it will be described too. +``opencv_createsamples`` utility +================================ +An ``opencv_createsamples`` utility provides functionality for dataset generating, writing and viewing. The term *dataset* is used here for both training set and test set. + Training data preparation ========================= For training we need a set of samples. There are two types of samples: negative and positive. Negative samples correspond to non-object images. Positive samples correspond to images with detected objects. Set of negative samples must be prepared manually, whereas set of positive samples is created using ``opencv_createsamples`` utility. Negative Samples ---------------- -Negative samples are taken from arbitrary images. These images must not contain detected objects. Negative samples are enumerated in a special file. It is a text file in which each line contains an image filename (relative to the directory of the description file) of negative sample image. This file must be created manually. Note that negative samples and sample images are also called background samples or background samples images, and are used interchangeably in this document. Described images may be of different sizes. But each image should be (but not nessesarily) larger then a training window size, because these images are used to subsample negative image to the training size. +Negative samples are taken from arbitrary images. These images must not contain detected objects. Negative samples are enumerated in a special file. It is a text file in which each line contains an image filename (relative to the directory of the description file) of negative sample image. This file must be created manually. Note that negative samples and sample images are also called background samples or background samples images, and are used interchangeably in this document. Described images may be of different sizes. But each image should be (but not necessarily) larger then a training window size, because these images are used to subsample negative image to the training size. An example of description file: @@ -57,7 +61,7 @@ Positive Samples ---------------- Positive samples are created by ``opencv_createsamples`` utility. They may be created from a single image with object or from a collection of previously marked up images. -Please note that you need a large dataset of positive samples before you give it to the mentioned utility, because it only applies perspective transformation. For example you may need only one positive sample for absolutely rigid object like an OpenCV logo, but you definetely need hundreds and even thousands of positive samples for faces. In the case of faces you should consider all the race and age groups, emotions and perhaps beard styles. +Please note that you need a large dataset of positive samples before you give it to the mentioned utility, because it only applies perspective transformation. For example you may need only one positive sample for absolutely rigid object like an OpenCV logo, but you definitely need hundreds and even thousands of positive samples for faces. In the case of faces you should consider all the race and age groups, emotions and perhaps beard styles. So, a single object image may contain a company logo. Then a large set of positive samples is created from the given object image by random rotating, changing the logo intensity as well as placing the logo on arbitrary background. The amount and range of randomness can be controlled by command line arguments of ``opencv_createsamples`` utility. @@ -123,10 +127,11 @@ Command line arguments: The ``opencv_createsamples`` utility may work in a number of modes, namely: -* Creating training set from a single image and a collection of backgrounds with a single ``vec`` file as an output; +* Creating training set from a single image and a collection of backgrounds: + * with a single ``vec`` file as an output; + * with a collection of JPG images and a file with annotations list as an output; + * with a collection of PNG images and associated files with annotations as an output; * Converting the marked-up collection of samples into a ``vec`` format; -* Creating training set from a single image, as specified above, but with a collection of PNG images and associated annotation files as a result; -* Creating test set that consists of JPG samples collection and a signle file with annotations; * Showing the content of the ``vec`` file. Creating training set from a single image and a collection of backgrounds with a single ``vec`` file as an output @@ -135,6 +140,68 @@ Creating training set from a single image and a collection of backgrounds with a The following procedure is used to create a sample object instance: The source image is rotated randomly around all three axes. The chosen angle is limited my ``-max?angle``. Then pixels having the intensity from [``bg_color-bg_color_threshold``; ``bg_color+bg_color_threshold``] range are interpreted as transparent. White noise is added to the intensities of the foreground. If the ``-inv`` key is specified then foreground pixel intensities are inverted. If ``-randinv`` key is specified then algorithm randomly selects whether inversion should be applied to this sample. Finally, the obtained image is placed onto an arbitrary background from the background description file, resized to the desired size specified by ``-w`` and ``-h`` and stored to the vec-file, specified by the ``-vec`` command line option. +Creating training set as a collection of JPG images +--------------------------------------------------- + +To obtain such behaviour the ``-img``, ``-bg`` and ``-info`` keys should be specified. The file name specified with ``-info`` key should include at least one level of directory hierarchy, that directory +will be used as the top-level directory for the training set. +For example, with the ``opencv_createsamples`` called as following: + + .. code-block:: text + + opencv_createsamples -img /home/user/logo.png -bg /home/user/bg.txt -info /home/user/annotations.lst -pngoutput -maxxangle 0.1 -maxyangle 0.1 -maxzangle 0.1 + +The output will have the following structure: + + .. code-block:: text + + /home/user/ + annotations/ + 0001_0107_0099_0195_0139.txt + 0002_0107_0115_0195_0139.txt + ... + neg/ + + pos/ + 0001_0107_0099_0195_0139.png + 0002_0107_0115_0195_0139.png + ... + annotations.lst + +With ``*.txt`` files in ``annotations`` directory containing information about object bounding box on the sample in a next format: + + .. code-block:: text + + Image filename : "createsamples/pos/0002_0107_0115_0195_0139.png" + Bounding box for object 1 "PASperson" (Xmin, Ymin) - (Xmax, Ymax) : (107, 115) - (302, 254) + +And ``annotations.lst`` file containing the list of all annotations file: + + .. code-block:: text + + createsamples/annotations/0001_0109_0209_0195_0139.txt + createsamples/annotations/0002_0241_0245_0139_0100.txt + +Creating test set as a collection of JPG images +----------------------------------------------- + +This variant of ``opencv_createsamples`` usage is very similar to the previous one, but generates the output in a different format; + +Directory structure: + + .. code-block:: text + + info.dat + img1.jpg + img2.jpg + +File info.dat: + + .. code-block:: text + + img1.jpg 1 140 100 45 45 + img2.jpg 2 100 200 50 50 50 30 25 25 + Converting the marked-up collection of samples into a ``vec`` format -------------------------------------------------------------------- @@ -168,67 +235,6 @@ In order to create positive samples from such collection, ``-info`` argument sho The scheme of samples creation in this case is as follows. The object instances are taken from images. Then they are resized to target samples size and stored in output vec-file. No distortion is applied, so the only affecting arguments are ``-w``, ``-h``, ``-show`` and ``-num``. -Creating training set from a single image, but with a collection of PNG images and associated annotation files as a result --------------------------------------------------------------------------------------------------------------------------- - -To obtain such behaviour the ``-img``, ``-bg`` and ``-info`` keys should be specified. The file name specified with ``-info`` key should include at least one level of directory hierarchy, that directory -will be used as the top-level dir for the training set. -For example, with the ``opencv_createsamples`` called as following: - - opencv_createsamples -img /home/user/logo.png -bg /home/user/bg.txt -info /home/user/annotations.lst -pngoutput -maxxangle 0.1 -maxyangle 0.1 -maxzangle 0.1 - -The output will have the following structure: - - .. code-block:: text - - /home/user/ - annotations/ - 0001_0107_0099_0195_0139.txt - 0002_0107_0115_0195_0139.txt - ... - neg/ - - pos/ - 0001_0107_0099_0195_0139.png - 0002_0107_0115_0195_0139.png - ... - annotations.lst - -With ``*.txt`` files in ``annotations`` directory containing information about object bounding box on the sample in a next format: - - .. code-block:: text - - Image filename : "createsamples/pos/0002_0107_0115_0195_0139.png" - Bounding box for object 1 "PASperson" (Xmin, Ymin) - (Xmax, Ymax) : (107, 115) - (302, 254) - -And ``annotations.lst`` file containing the list of all annotations file: - - .. code-block:: text - - createsamples/annotations/0001_0109_0209_0195_0139.txt - createsamples/annotations/0002_0241_0245_0139_0100.txt - - -Creating test set that consists of JPG samples collection and a signle file with annotations --------------------------------------------------------------------------------------------- - -This variant of ``opencv_createsamples`` usage is very similar to the previous one, but generates the output in a different format; - -Directory structure: - - .. code-block:: text - - info.dat - img1.jpg - img2.jpg - -File info.dat: - - .. code-block:: text - - img1.jpg 1 140 100 45 45 - img2.jpg 2 100 200 50 50 50 30 25 25 - Showing the content of the ``vec`` file --------------------------------------- @@ -240,7 +246,7 @@ Example of vec-file is available here ``opencv/data/vec_files/trainingfaces_24-2 Cascade Training ================ -The next step is the training of classifier. As mentioned above ``opencv_traincascade`` or ``opencv_haartraining`` may be used to train a cascade classifier, but only the newer ``opencv_traincascade`` will be described futher. +The next step is the training of classifier. As mentioned above ``opencv_traincascade`` or ``opencv_haartraining`` may be used to train a cascade classifier, but only the newer ``opencv_traincascade`` will be described further. Command line arguments of ``opencv_traincascade`` application grouped by purposes: From 74d8527f8a85e8fc33287f15f886302a33d9d825 Mon Sep 17 00:00:00 2001 From: Grigory Serebryakov Date: Wed, 27 Aug 2014 12:41:18 +0400 Subject: [PATCH 4/8] Naming fixes and code beautification --- apps/haartraining/createsamples.cpp | 33 ++++++++++++--------------- apps/haartraining/cvhaartraining.cpp | 12 +++++----- apps/haartraining/cvhaartraining.h | 4 ++-- apps/haartraining/cvsamplesoutput.cpp | 4 ++-- apps/haartraining/ioutput.h | 2 +- 5 files changed, 25 insertions(+), 30 deletions(-) diff --git a/apps/haartraining/createsamples.cpp b/apps/haartraining/createsamples.cpp index dd15a5ffd..a1a11e0b5 100644 --- a/apps/haartraining/createsamples.cpp +++ b/apps/haartraining/createsamples.cpp @@ -50,6 +50,7 @@ #include #include #include +#include using namespace std; @@ -218,28 +219,22 @@ int main( int argc, char* argv[] ) printf( "Done\n" ); } - else if( imagename && bgfilename && infoname && pngoutput) + else if( imagename && bgfilename && infoname) { - printf( "Create training set from a single image and a collection of backgrounds.\n" - "Output format: %s\n" - "Annotations are in a separate directory\n", - (( pngoutput ) ? "JPG" : "PNG") ); - - PngDatasetGenerator creator( infoname ); - creator.create( imagename, bgcolor, bgthreshold, bgfilename, num, - invert, maxintensitydev, maxxangle, maxyangle, maxzangle, - showsamples, width, height ); - - printf( "Done\n" ); - } - else if( imagename && bgfilename && infoname ) - { - printf( "Create test samples from single image applying distortions...\n" + printf( "Create data set from single image applying distortions...\n" "Output format: %s\n", - (( pngoutput ) ? "JPG" : "PNG") ); + (( pngoutput ) ? "PNG" : "JPG") ); - JpgDatasetGrenerator creator( infoname ); - creator.create( imagename, bgcolor, bgthreshold, bgfilename, num, + std::auto_ptr creator; + if( pngoutput ) + { + creator = std::auto_ptr( new PngDatasetGenerator( infoname ) ); + } + else + { + creator = std::auto_ptr( new JpgDatasetGenerator( infoname ) ); + } + creator->create( imagename, bgcolor, bgthreshold, bgfilename, num, invert, maxintensitydev, maxxangle, maxyangle, maxzangle, showsamples, width, height ); diff --git a/apps/haartraining/cvhaartraining.cpp b/apps/haartraining/cvhaartraining.cpp index 27483e6cd..31abfff6d 100644 --- a/apps/haartraining/cvhaartraining.cpp +++ b/apps/haartraining/cvhaartraining.cpp @@ -3029,12 +3029,12 @@ DatasetGenerator::~DatasetGenerator() } -JpgDatasetGrenerator::JpgDatasetGrenerator(const char* filename) - :DatasetGenerator(IOutput::createOutput(filename,IOutput::JPG_TEST_SET)) +JpgDatasetGenerator::JpgDatasetGenerator( const char* filename ) + :DatasetGenerator( IOutput::createOutput( filename, IOutput::JPG_DATASET ) ) { } -CvSize JpgDatasetGrenerator::scaleObjectSize(const CvSize& bgImgSize, +CvSize JpgDatasetGenerator::scaleObjectSize( const CvSize& bgImgSize, const CvSize& , const CvSize& sampleSize) const { @@ -3074,13 +3074,13 @@ CvRect DatasetGenerator::getObjectPosition(const CvSize& bgImgSize, PngDatasetGenerator::PngDatasetGenerator(const char* filename) - :DatasetGenerator(IOutput::createOutput(filename,IOutput::PNG_TRAINING_SET)) + :DatasetGenerator( IOutput::createOutput( filename, IOutput::PNG_DATASET ) ) { } CvSize PngDatasetGenerator::scaleObjectSize( const CvSize& bgImgSize, - const CvSize& imgSize, - const CvSize& ) const + const CvSize& imgSize, + const CvSize& ) const { float scale; diff --git a/apps/haartraining/cvhaartraining.h b/apps/haartraining/cvhaartraining.h index 77e492335..84a5643e7 100644 --- a/apps/haartraining/cvhaartraining.h +++ b/apps/haartraining/cvhaartraining.h @@ -227,10 +227,10 @@ private: }; /* Provides the functionality of test set generating */ -class JpgDatasetGrenerator: public DatasetGenerator +class JpgDatasetGenerator: public DatasetGenerator { public: - JpgDatasetGrenerator(const char* filename); + JpgDatasetGenerator(const char* filename); private: CvSize scaleObjectSize(const CvSize& bgImgSize, const CvSize& , diff --git a/apps/haartraining/cvsamplesoutput.cpp b/apps/haartraining/cvsamplesoutput.cpp index 83eae58d6..a2b7e263c 100644 --- a/apps/haartraining/cvsamplesoutput.cpp +++ b/apps/haartraining/cvsamplesoutput.cpp @@ -34,10 +34,10 @@ IOutput* IOutput::createOutput(const char *filename, { IOutput* output = 0; switch (type) { - case IOutput::PNG_TRAINING_SET: + case IOutput::PNG_DATASET: output = new PngDatasetOutput(); break; - case IOutput::JPG_TEST_SET: + case IOutput::JPG_DATASET: output = new JpgDatasetOutput(); break; default: diff --git a/apps/haartraining/ioutput.h b/apps/haartraining/ioutput.h index eccebc116..aef35ba90 100644 --- a/apps/haartraining/ioutput.h +++ b/apps/haartraining/ioutput.h @@ -11,7 +11,7 @@ struct CvRect; class IOutput { public: - enum OutputType {PNG_TRAINING_SET, JPG_TEST_SET}; + enum OutputType {PNG_DATASET, JPG_DATASET}; public: virtual bool write( const CvMat& img, const CvRect& boundingBox ) =0; From d1229efeec08df70760c29787b8469b5ab0bdefb Mon Sep 17 00:00:00 2001 From: Grigory Serebryakov Date: Mon, 1 Sep 2014 13:02:27 +0400 Subject: [PATCH 5/8] No output image resize in case of PNG dataset --- apps/haartraining/cvsamplesoutput.cpp | 38 ++++----------------------- apps/haartraining/cvsamplesoutput.h | 5 +--- 2 files changed, 6 insertions(+), 37 deletions(-) diff --git a/apps/haartraining/cvsamplesoutput.cpp b/apps/haartraining/cvsamplesoutput.cpp index a2b7e263c..62729040a 100644 --- a/apps/haartraining/cvsamplesoutput.cpp +++ b/apps/haartraining/cvsamplesoutput.cpp @@ -113,9 +113,9 @@ bool PngDatasetOutput::init( const char* annotationsListFileName ) } bool PngDatasetOutput::write( const CvMat& img, - const CvRect& boundingBox ) + const CvRect& boundingBox ) { - CvRect bbox = scaleBoundingBox(cvGetSize(&img), boundingBox); + CvRect bbox = addBoundingboxBorder(boundingBox); sprintf( imgFileName, "%04d_%04d_%04d_%04d_%04d", @@ -148,44 +148,16 @@ bool PngDatasetOutput::write( const CvMat& img, bbox.y + bbox.height ); fclose( annotationFile ); - writeImage(img); + cvSaveImage( imgFullPath, &img); return true; } -void PngDatasetOutput::writeImage(const CvMat &img) const +CvRect PngDatasetOutput::addBoundingboxBorder(const CvRect& bbox) const { - CvSize origsize = cvGetSize(&img); - - if( origsize.height > destImgHeight || origsize.width > destImgWidth ) - { - CvMat result = cvMat( destImgHeight, destImgWidth, CV_8UC1, - cvAlloc( sizeof( uchar ) * destImgHeight * destImgWidth ) ); - cvResize(&img, &result); - cvSaveImage( imgFullPath, &result ); - cvFree( &(result.data.ptr) ); - } - else - { - cvSaveImage( imgFullPath, &img); - } - - return; -} - -CvRect PngDatasetOutput::scaleBoundingBox(const CvSize& imgSize, const CvRect& bbox) -{ - double scale = MAX( (float) destImgWidth / imgSize.width, - (float) destImgHeight / imgSize.height ); CvRect boundingBox = bbox; int border = 5; - if( scale < 1. ) - { - boundingBox.x = bbox.x * scale; - boundingBox.y = bbox.y * scale; - boundingBox.width = bbox.width * scale; - boundingBox.height = bbox.height * scale; - } + boundingBox.x -= border; boundingBox.y -= border; boundingBox.width += 2*border; diff --git a/apps/haartraining/cvsamplesoutput.h b/apps/haartraining/cvsamplesoutput.h index 90854bab9..7941e73df 100644 --- a/apps/haartraining/cvsamplesoutput.h +++ b/apps/haartraining/cvsamplesoutput.h @@ -20,10 +20,7 @@ private: virtual bool init(const char* annotationsListFileName ); - void writeImage( const CvMat& img ) const; - - CvRect scaleBoundingBox(const CvSize& imgSize, - const CvRect& bbox); + CvRect addBoundingboxBorder(const CvRect& bbox) const; private: char annotationFullPath[PATH_MAX]; From 25d125fba1e4d3e358555d3b97ed31db0b6b4d75 Mon Sep 17 00:00:00 2001 From: Grigory Serebryakov Date: Tue, 2 Sep 2014 10:55:45 +0400 Subject: [PATCH 6/8] Documentation update: createsamples usage --- doc/user_guide/ug_traincascade.rst | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/doc/user_guide/ug_traincascade.rst b/doc/user_guide/ug_traincascade.rst index 46b758359..9bf4ae925 100644 --- a/doc/user_guide/ug_traincascade.rst +++ b/doc/user_guide/ug_traincascade.rst @@ -140,10 +140,10 @@ Creating training set from a single image and a collection of backgrounds with a The following procedure is used to create a sample object instance: The source image is rotated randomly around all three axes. The chosen angle is limited my ``-max?angle``. Then pixels having the intensity from [``bg_color-bg_color_threshold``; ``bg_color+bg_color_threshold``] range are interpreted as transparent. White noise is added to the intensities of the foreground. If the ``-inv`` key is specified then foreground pixel intensities are inverted. If ``-randinv`` key is specified then algorithm randomly selects whether inversion should be applied to this sample. Finally, the obtained image is placed onto an arbitrary background from the background description file, resized to the desired size specified by ``-w`` and ``-h`` and stored to the vec-file, specified by the ``-vec`` command line option. -Creating training set as a collection of JPG images +Creating training set as a collection of PNG images --------------------------------------------------- -To obtain such behaviour the ``-img``, ``-bg`` and ``-info`` keys should be specified. The file name specified with ``-info`` key should include at least one level of directory hierarchy, that directory +To obtain such behaviour the ``-img``, ``-bg``, ``-info`` and ``-pngoutput`` keys should be specified. The file name specified with ``-info`` key should include at least one level of directory hierarchy, that directory will be used as the top-level directory for the training set. For example, with the ``opencv_createsamples`` called as following: @@ -172,20 +172,26 @@ With ``*.txt`` files in ``annotations`` directory containing information about o .. code-block:: text - Image filename : "createsamples/pos/0002_0107_0115_0195_0139.png" + Image filename : "/home/user/pos/0002_0107_0115_0195_0139.png" Bounding box for object 1 "PASperson" (Xmin, Ymin) - (Xmax, Ymax) : (107, 115) - (302, 254) And ``annotations.lst`` file containing the list of all annotations file: .. code-block:: text - createsamples/annotations/0001_0109_0209_0195_0139.txt - createsamples/annotations/0002_0241_0245_0139_0100.txt + /home/user/annotations/0001_0109_0209_0195_0139.txt + /home/user/annotations/0002_0241_0245_0139_0100.txt Creating test set as a collection of JPG images ----------------------------------------------- This variant of ``opencv_createsamples`` usage is very similar to the previous one, but generates the output in a different format; +To obtain such behaviour the ``-img``, ``-bg`` and ``-info`` keys should be specified. +For example, with the ``opencv_createsamples`` called as following: + + .. code-block:: text + + opencv_createsamples -img /home/user/logo.png -bg /home/user/bg.txt -info annotations.lst -maxxangle 0.1 -maxyangle 0.1 -maxzangle 0.1 Directory structure: From 06a1c90679addcdb5be4be4bbf4257333b74a3b7 Mon Sep 17 00:00:00 2001 From: Grigory Serebryakov Date: Fri, 5 Sep 2014 10:45:54 +0400 Subject: [PATCH 7/8] Include for cvSameImage corrected --- apps/haartraining/cvsamplesoutput.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/haartraining/cvsamplesoutput.cpp b/apps/haartraining/cvsamplesoutput.cpp index 62729040a..f54736dd2 100644 --- a/apps/haartraining/cvsamplesoutput.cpp +++ b/apps/haartraining/cvsamplesoutput.cpp @@ -3,7 +3,7 @@ #include #include "_cvcommon.h" -#include "opencv2/opencv.hpp" +#include "highgui.h" /* print statistic info */ #define CV_VERBOSE 1 From 18c0511d3c0874895dd9518ffe28d762dd066b55 Mon Sep 17 00:00:00 2001 From: Grigory Serebryakov Date: Fri, 5 Sep 2014 11:27:43 +0400 Subject: [PATCH 8/8] Warning on converting int to bool fixed --- apps/haartraining/createsamples.cpp | 4 ++-- apps/haartraining/cvhaartraining.cpp | 4 ++-- apps/haartraining/cvhaartraining.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/haartraining/createsamples.cpp b/apps/haartraining/createsamples.cpp index a1a11e0b5..4d01fbd10 100644 --- a/apps/haartraining/createsamples.cpp +++ b/apps/haartraining/createsamples.cpp @@ -73,7 +73,7 @@ int main( int argc, char* argv[] ) double maxxangle = 1.1; double maxyangle = 1.1; double maxzangle = 0.5; - int showsamples = 0; + bool showsamples = false; /* the samples are adjusted to this scale in the sample preview window */ double scale = 4.0; int width = 24; @@ -159,7 +159,7 @@ int main( int argc, char* argv[] ) } else if( !strcmp( argv[i], "-show" ) ) { - showsamples = 1; + showsamples = true; if( i+1 < argc && strlen( argv[i+1] ) > 0 && argv[i+1][0] != '-' ) { double d; diff --git a/apps/haartraining/cvhaartraining.cpp b/apps/haartraining/cvhaartraining.cpp index 31abfff6d..ed2684813 100644 --- a/apps/haartraining/cvhaartraining.cpp +++ b/apps/haartraining/cvhaartraining.cpp @@ -2848,7 +2848,7 @@ void cvCreateTrainingSamples( const char* filename, const char* bgfilename, int count, int invert, int maxintensitydev, double maxxangle, double maxyangle, double maxzangle, - int showsamples, + bool showsamples, int winwidth, int winheight ) { CvSampleDistortionData data; @@ -2915,7 +2915,7 @@ void cvCreateTrainingSamples( const char* filename, cvShowImage( "Sample", &sample ); if( cvWaitKey( 0 ) == 27 ) { - showsamples = 0; + showsamples = false; } } diff --git a/apps/haartraining/cvhaartraining.h b/apps/haartraining/cvhaartraining.h index 84a5643e7..038e1cc35 100644 --- a/apps/haartraining/cvhaartraining.h +++ b/apps/haartraining/cvhaartraining.h @@ -79,14 +79,14 @@ struct CvMat; */ #define CV_RANDOM_INVERT 0x7FFFFFFF -void cvCreateTrainingSamples( const char* filename, +void cvCreateTrainingSamples(const char* filename, const char* imgfilename, int bgcolor, int bgthreshold, const char* bgfilename, int count, int invert = 0, int maxintensitydev = 40, double maxxangle = 1.1, double maxyangle = 1.1, double maxzangle = 0.5, - int showsamples = 0, + bool showsamples = false, int winwidth = 24, int winheight = 24 ); void cvCreatePngTrainingSet(const char* imgfilename, int bgcolor, int bgthreshold,