refactored hog, added camera support into hog_sample

This commit is contained in:
Alexey Spizhevoy 2010-12-29 15:45:01 +00:00
parent 16bcf9b645
commit 2f13e4ce58
2 changed files with 150 additions and 148 deletions

View File

@ -1178,21 +1178,25 @@ namespace cv
size_t getDescriptorSize() const; size_t getDescriptorSize() const;
size_t getBlockHistogramSize() const; size_t getBlockHistogramSize() const;
double getWinSigma() const;
void setSVMDetector(const vector<float>& detector);
bool checkDetectorSize() const;
static vector<float> getDefaultPeopleDetector(); static vector<float> getDefaultPeopleDetector();
static vector<float> getPeopleDetector_48x96(); static vector<float> getPeopleDetector_48x96();
static vector<float> getPeopleDetector_64x128(); static vector<float> getPeopleDetector_64x128();
void setSVMDetector(const vector<float>& detector);
bool checkDetectorSize() const;
void detect(const GpuMat& img, vector<Point>& found_locations, double hit_threshold=0, void detect(const GpuMat& img, vector<Point>& found_locations,
Size win_stride=Size(), Size padding=Size()); double hit_threshold=0, Size win_stride=Size(),
Size padding=Size());
void detectMultiScale(const GpuMat& img, vector<Rect>& found_locations, void detectMultiScale(const GpuMat& img, vector<Rect>& found_locations,
double hit_threshold=0, Size win_stride=Size(), Size padding=Size(), double hit_threshold=0, Size win_stride=Size(),
double scale0=1.05, int group_threshold=2); Size padding=Size(), double scale0=1.05,
int group_threshold=2);
void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, void getDescriptors(const GpuMat& img, Size win_stride,
GpuMat& descriptors,
int descr_format=DESCR_FORMAT_COL_BY_COL); int descr_format=DESCR_FORMAT_COL_BY_COL);
Size win_size; Size win_size;
@ -1208,6 +1212,7 @@ namespace cv
protected: protected:
void computeBlockHistograms(const GpuMat& img); void computeBlockHistograms(const GpuMat& img);
void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle); void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
double getWinSigma() const;
static int numPartsWithin(int size, int part_size, int stride); static int numPartsWithin(int size, int part_size, int stride);
static Size numPartsWithin(Size size, Size part_size, Size stride); static Size numPartsWithin(Size size, Size part_size, Size stride);

View File

@ -12,61 +12,56 @@ using namespace cv;
//#define WRITE_VIDEO //#define WRITE_VIDEO
class Args
/** Contains all properties of application (including those which can be
changed by user in runtime) */
class Settings
{ {
public: public:
/** Sets default values */ Args();
Settings(); static Args read(int argc, char** argv);
/** Reads settings from command args */
static Settings Read(int argc, char** argv);
string src; string src;
bool src_is_video; bool src_is_video;
bool src_is_camera;
int camera_id;
bool make_gray; bool make_gray;
bool resize_src; bool resize_src;
double resize_src_scale; int resized_width, resized_height;
double scale; double scale;
int nlevels; int nlevels;
int gr_threshold; int gr_threshold;
double hit_threshold; double hit_threshold;
int win_width; int win_width;
int win_stride_width; int win_stride_width, win_stride_height;
int win_stride_height;
bool gamma_corr; bool gamma_corr;
}; };
/** Describes aplication logic */
class App class App
{ {
public: public:
/** Initializes application */ App(const Args& s);
App(const Settings& s); void run();
/** Runs demo using OpenCV highgui module for GUI building */ void handleKey(char key);
void RunOpencvGui();
/** Processes user keybord input */ void hogWorkBegin();
void HandleKey(char key); void hogWorkEnd();
string hogWorkFps() const;
void HogWorkBegin(); void workBegin();
void HogWorkEnd(); void workEnd();
double HogWorkFps() const; string workFps() const;
void WorkBegin(); string message() const;
void WorkEnd();
double WorkFps() const;
const string GetPerformanceSummary() const;
private: private:
App operator=(App&); App operator=(App&);
Settings settings; Args args;
bool running; bool running;
bool use_gpu; bool use_gpu;
@ -89,26 +84,29 @@ int main(int argc, char** argv)
{ {
try try
{ {
cout << "Histogram of Oriented Gradients descriptor and detector sample.\n";
if (argc < 2) if (argc < 2)
{ {
cout << "Usage:\nhog_gpu\n" cout << "\nUsage: hog_gpu\n"
<< " -src <path_to_the_source>\n" << " --src <path> # it's image file by default\n"
<< " [-src_is_video <true/false>] # says to interp. src as img or as video\n" << " [--src-is-video <true/false>] # says to interpretate src as video\n"
<< " [-make_gray <true/false>] # convert image to gray one or not\n" << " [--src-is-camera <true/false>] # says to interpretate src as camera\n"
<< " [-resize_src <true/false>] # do resize of the source image or not\n" << " [--make-gray <true/false>] # convert image to gray one or not\n"
<< " [-resize_src_scale <double>] # preprocessing image scale factor\n" << " [--resize-src <true/false>] # do resize of the source image or not\n"
<< " [-hit_threshold <double>] # classifying plane dist. threshold (0.0 usually)\n" << " [--src-width <int>] # resized image width\n"
<< " [-scale <double>] # HOG window scale factor\n" << " [--src-height <int>] # resized image height\n"
<< " [-nlevels <int>] # max number of HOG window scales\n" << " [--hit-threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
<< " [-win_width <int>] # width of the window (48 or 64)\n" << " [--scale <double>] # HOG window scale factor\n"
<< " [-win_stride_width <int>] # distance by OX axis between neighbour wins\n" << " [--nlevels <int>] # max number of HOG window scales\n"
<< " [-win_stride_height <int>] # distance by OY axis between neighbour wins\n" << " [--win-width <int>] # width of the window (48 or 64)\n"
<< " [-gr_threshold <int>] # merging similar rects constant\n" << " [--win-stride-width <int>] # distance by OX axis between neighbour wins\n"
<< " [-gamma_corr <int>] # do gamma correction or not\n"; << " [--win-stride-height <int>] # distance by OY axis between neighbour wins\n"
<< " [--gr-threshold <int>] # merging similar rects constant\n"
<< " [--gamma-correct <int>] # do gamma correction or not\n";
return 1; return 1;
} }
App app(Settings::Read(argc, argv)); App app(Args::read(argc, argv));
app.RunOpencvGui(); app.run();
} }
catch (const Exception& e) { return cout << "Error: " << e.what() << endl, 1; } catch (const Exception& e) { return cout << "Error: " << e.what() << endl, 1; }
catch (const exception& e) { return cout << "Error: " << e.what() << endl, 1; } catch (const exception& e) { return cout << "Error: " << e.what() << endl, 1; }
@ -117,56 +115,63 @@ int main(int argc, char** argv)
} }
Settings::Settings() Args::Args()
{ {
src_is_video = false; src_is_video = false;
src_is_camera = false;
camera_id = 0;
make_gray = false; make_gray = false;
resize_src = true;
resize_src_scale = 1.5; resize_src = false;
resized_width = 640;
resized_height = 480;
scale = 1.05; scale = 1.05;
nlevels = 13; nlevels = 13;
gr_threshold = 8; gr_threshold = 8;
hit_threshold = 1.4; hit_threshold = 1.4;
win_width = 48; win_width = 48;
win_stride_width = 8; win_stride_width = 8;
win_stride_height = 8; win_stride_height = 8;
gamma_corr = true; gamma_corr = true;
} }
Settings Settings::Read(int argc, char** argv) Args Args::read(int argc, char** argv)
{ {
cout << "Parsing command args" << endl; Args args;
Settings settings;
for (int i = 1; i < argc - 1; i += 2) for (int i = 1; i < argc - 1; i += 2)
{ {
string key = argv[i]; string key = argv[i];
string val = argv[i + 1]; string val = argv[i + 1];
if (key == "-src") settings.src = val; if (key == "--src") args.src = val;
else if (key == "-src_is_video") settings.src_is_video = (val == "true"); else if (key == "--src-is-video") args.src_is_video = (val == "true");
else if (key == "-make_gray") settings.make_gray = (val == "true"); else if (key == "--src-is-camera") args.src_is_camera = (val == "true");
else if (key == "-resize_src") settings.resize_src = (val == "true"); else if (key == "--camera-id") args.camera_id = atoi(val.c_str());
else if (key == "-resize_src_scale") settings.resize_src_scale = atof(val.c_str()); else if (key == "--make-gray") args.make_gray = (val == "true");
else if (key == "-hit_threshold") settings.hit_threshold = atof(val.c_str()); else if (key == "--resize-src") args.resize_src = (val == "true");
else if (key == "-scale") settings.scale = atof(val.c_str()); else if (key == "--src-width") args.resized_width = atoi(val.c_str());
else if (key == "-nlevels") settings.nlevels = atoi(val.c_str()); else if (key == "--src-height") args.resized_height = atoi(val.c_str());
else if (key == "-win_width") settings.win_width = atoi(val.c_str()); else if (key == "--hit-threshold") args.hit_threshold = atof(val.c_str());
else if (key == "-win_stride_width") settings.win_stride_width = atoi(val.c_str()); else if (key == "--scale") args.scale = atof(val.c_str());
else if (key == "-win_stride_height") settings.win_stride_height = atoi(val.c_str()); else if (key == "--nlevels") args.nlevels = atoi(val.c_str());
else if (key == "-gr_threshold") settings.gr_threshold = atoi(val.c_str()); else if (key == "--win-width") args.win_width = atoi(val.c_str());
else if (key == "-gamma_corr") settings.gamma_corr = atoi(val.c_str()) != 0; else if (key == "--win-stride-width") args.win_stride_width = atoi(val.c_str());
else throw runtime_error((string("Unknown key: ") + key)); else if (key == "--win-stride-height") args.win_stride_height = atoi(val.c_str());
else if (key == "--gr-threshold") args.gr_threshold = atoi(val.c_str());
else if (key == "--gamma-correct") args.gamma_corr = atoi(val.c_str()) != 0;
else throw runtime_error((string("unknown key: ") + key));
} }
return args;
cout << "Command args are parsed\n";
return settings;
} }
App::App(const Settings &s) App::App(const Args& s)
{ {
settings = s; args = s;
cout << "\nControls:\n" cout << "\nControls:\n"
<< "\tESC - exit\n" << "\tESC - exit\n"
<< "\tm - change mode GPU <-> CPU\n" << "\tm - change mode GPU <-> CPU\n"
@ -178,79 +183,81 @@ App::App(const Settings &s)
<< endl; << endl;
use_gpu = true; use_gpu = true;
make_gray = settings.make_gray; make_gray = args.make_gray;
scale = settings.scale; scale = args.scale;
gr_threshold = settings.gr_threshold; gr_threshold = args.gr_threshold;
nlevels = settings.nlevels; nlevels = args.nlevels;
hit_threshold = settings.hit_threshold; hit_threshold = args.hit_threshold;
gamma_corr = settings.gamma_corr; gamma_corr = args.gamma_corr;
if (settings.win_width != 64 && settings.win_width != 48) if (args.win_width != 64 && args.win_width != 48)
settings.win_width = 64; args.win_width = 64;
cout << "Scale: " << scale << endl; cout << "Scale: " << scale << endl;
if (args.resize_src)
cout << "Source size: (" << args.resized_width << ", " << args.resized_height << ")\n";
cout << "Group threshold: " << gr_threshold << endl; cout << "Group threshold: " << gr_threshold << endl;
cout << "Levels number: " << nlevels << endl; cout << "Levels number: " << nlevels << endl;
cout << "Win width: " << settings.win_width << endl; cout << "Win width: " << args.win_width << endl;
cout << "Win stride: (" << settings.win_stride_width << ", " << settings.win_stride_height << ")\n"; cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
cout << "Hit threshold: " << hit_threshold << endl; cout << "Hit threshold: " << hit_threshold << endl;
cout << "Gamma correction: " << gamma_corr << endl; cout << "Gamma correction: " << gamma_corr << endl;
cout << endl; cout << endl;
} }
void App::RunOpencvGui() void App::run()
{ {
running = true; running = true;
Size win_size(settings.win_width, settings.win_width * 2); //(64, 128) or (48, 96) Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
Size win_stride(settings.win_stride_width, settings.win_stride_height); Size win_stride(args.win_stride_width, args.win_stride_height);
vector<float> detector; vector<float> detector;
if (win_size == Size(64, 128)) if (win_size == Size(64, 128))
detector = cv::gpu::HOGDescriptor::getPeopleDetector_64x128(); detector = cv::gpu::HOGDescriptor::getPeopleDetector_64x128();
else else
detector = cv::gpu::HOGDescriptor::getPeopleDetector_48x96(); detector = cv::gpu::HOGDescriptor::getPeopleDetector_48x96();
// GPU's HOG classifier
cv::gpu::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, cv::gpu::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::gpu::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, cv::gpu::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::gpu::HOGDescriptor::DEFAULT_NLEVELS); cv::gpu::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
// CPU's HOG classifier
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
cpu_hog.setSVMDetector(detector); cpu_hog.setSVMDetector(detector);
#ifdef WRITE_VIDEO #ifdef WRITE_VIDEO
cv::VideoWriter video_writer; cv::VideoWriter video_writer;
video_writer.open("output.avi", CV_FOURCC('x','v','i','d'), 24., cv::Size(640, 480), true); video_writer.open("output.avi", CV_FOURCC('x','v','i','d'), 24., cv::Size(640, 480), true);
if (!video_writer.isOpened()) if (!video_writer.isOpened())
throw std::runtime_error("can't create video writer"); throw std::runtime_error("can't create video writer");
#endif #endif
// Make endless cycle from video (if src is video)
while (running) while (running)
{ {
VideoCapture vc; VideoCapture vc;
Mat frame; Mat frame;
if (settings.src_is_video) if (args.src_is_video)
{ {
vc.open(settings.src.c_str()); vc.open(args.src.c_str());
if (!vc.isOpened()) if (!vc.isOpened())
throw runtime_error(string("Can't open video file: " + settings.src)); throw runtime_error(string("can't open video file: " + args.src));
vc >> frame;
}
else if (args.src_is_camera)
{
vc.open(args.camera_id);
if (!vc.isOpened())
throw runtime_error(string("can't open video file: " + args.src));
vc >> frame; vc >> frame;
} }
else else
{ {
frame = imread(settings.src); frame = imread(args.src);
if (frame.empty()) if (frame.empty())
throw runtime_error(string("Can't open image file: " + settings.src)); throw runtime_error(string("can't open image file: " + args.src));
} }
Mat img_aux, img, img_to_show; Mat img_aux, img, img_to_show;
@ -259,38 +266,34 @@ void App::RunOpencvGui()
// Iterate over all frames // Iterate over all frames
while (running && !frame.empty()) while (running && !frame.empty())
{ {
WorkBegin(); workBegin();
vector<Rect> found; // Change format of the image
if (make_gray) cvtColor(frame, img_aux, CV_BGR2GRAY);
// Change format of the image (input must be 8UC3) else if (use_gpu) cvtColor(frame, img_aux, CV_BGR2BGRA);
if (make_gray) else img_aux = frame;
cvtColor(frame, img_aux, CV_BGR2GRAY);
else if (use_gpu)
cvtColor(frame, img_aux, CV_BGR2BGRA);
else
img_aux = frame;
// Resize image // Resize image
if (settings.resize_src) if (args.resize_src) resize(img_aux, img, Size(args.resized_width, args.resized_height));
resize(img_aux, img, Size(int(frame.cols * settings.resize_src_scale), int(frame.rows * settings.resize_src_scale))); else img = img_aux;
else
img = img_aux;
img_to_show = img; img_to_show = img;
gpu_hog.nlevels = nlevels; gpu_hog.nlevels = nlevels;
cpu_hog.nlevels = nlevels; cpu_hog.nlevels = nlevels;
vector<Rect> found;
// Perform HOG classification // Perform HOG classification
HogWorkBegin(); hogWorkBegin();
if (use_gpu) if (use_gpu)
{ {
gpu_img = img; gpu_img = img;
gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold); gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
} }
else else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold); Size(0, 0), scale, gr_threshold);
HogWorkEnd(); hogWorkEnd();
// Draw positive classified windows // Draw positive classified windows
for (size_t i = 0; i < found.size(); i++) for (size_t i = 0; i < found.size(); i++)
@ -299,17 +302,14 @@ void App::RunOpencvGui()
rectangle(img_to_show, r.tl(), r.br(), CV_RGB(0, 255, 0), 3); rectangle(img_to_show, r.tl(), r.br(), CV_RGB(0, 255, 0), 3);
} }
// Show results putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, GetPerformanceSummary(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2); putText(img_to_show, "FPS (total): " + workFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
imshow("opencv_gpu_hog", img_to_show); imshow("opencv_gpu_hog", img_to_show);
HandleKey((char)waitKey(3)); handleKey((char)waitKey(3));
if (settings.src_is_video) if (args.src_is_video || args.src_is_camera) vc >> frame;
{
vc >> frame;
}
WorkEnd(); workEnd();
#ifdef WRITE_VIDEO #ifdef WRITE_VIDEO
cvtColor(img_to_show, img, CV_BGRA2BGR); cvtColor(img_to_show, img, CV_BGRA2BGR);
@ -320,7 +320,7 @@ void App::RunOpencvGui()
} }
void App::HandleKey(char key) void App::handleKey(char key)
{ {
switch (key) switch (key)
{ {
@ -382,38 +382,35 @@ void App::HandleKey(char key)
} }
inline void App::HogWorkBegin() { hog_work_begin = getTickCount(); } inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); }
inline void App::hogWorkEnd()
inline void App::HogWorkEnd()
{ {
int64 delta = getTickCount() - hog_work_begin; int64 delta = getTickCount() - hog_work_begin;
double freq = getTickFrequency(); double freq = getTickFrequency();
hog_work_fps = freq / delta; hog_work_fps = freq / delta;
} }
inline string App::hogWorkFps() const
inline double App::HogWorkFps() const { return hog_work_fps; } {
stringstream ss;
ss << hog_work_fps;
return ss.str();
}
inline void App::WorkBegin() { work_begin = getTickCount(); } inline void App::workBegin() { work_begin = getTickCount(); }
inline void App::workEnd()
inline void App::WorkEnd()
{ {
int64 delta = getTickCount() - work_begin; int64 delta = getTickCount() - work_begin;
double freq = getTickFrequency(); double freq = getTickFrequency();
work_fps = freq / delta; work_fps = freq / delta;
} }
inline string App::workFps() const
inline double App::WorkFps() const { return work_fps; }
inline const string App::GetPerformanceSummary() const
{ {
stringstream ss; stringstream ss;
ss << (use_gpu ? "GPU" : "CPU") << " HOG FPS: " << setiosflags(ios::left) << setprecision(4) << ss << work_fps;
setw(7) << HogWorkFps() << " Total FPS: " << setprecision(4) << setw(7) << WorkFps();
return ss.str(); return ss.str();
} }