From 2f13e4ce5853368f4b00fc21606d20c44368c520 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Wed, 29 Dec 2010 15:45:01 +0000 Subject: [PATCH] refactored hog, added camera support into hog_sample --- modules/gpu/include/opencv2/gpu/gpu.hpp | 21 +- samples/gpu/hog.cpp | 277 ++++++++++++------------ 2 files changed, 150 insertions(+), 148 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 0ac16e2b1..b7e85a308 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1178,21 +1178,25 @@ namespace cv size_t getDescriptorSize() const; size_t getBlockHistogramSize() const; - double getWinSigma() const; + + void setSVMDetector(const vector& detector); + bool checkDetectorSize() const; static vector getDefaultPeopleDetector(); static vector getPeopleDetector_48x96(); static vector getPeopleDetector_64x128(); - void setSVMDetector(const vector& detector); - bool checkDetectorSize() const; - void detect(const GpuMat& img, vector& found_locations, double hit_threshold=0, - Size win_stride=Size(), Size padding=Size()); + void detect(const GpuMat& img, vector& found_locations, + double hit_threshold=0, Size win_stride=Size(), + Size padding=Size()); + void detectMultiScale(const GpuMat& img, vector& found_locations, - double hit_threshold=0, Size win_stride=Size(), Size padding=Size(), - double scale0=1.05, int group_threshold=2); + double hit_threshold=0, Size win_stride=Size(), + Size padding=Size(), double scale0=1.05, + int group_threshold=2); - void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, + void getDescriptors(const GpuMat& img, Size win_stride, + GpuMat& descriptors, int descr_format=DESCR_FORMAT_COL_BY_COL); Size win_size; @@ -1208,6 +1212,7 @@ namespace cv protected: void computeBlockHistograms(const GpuMat& img); void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle); + double getWinSigma() const; static int numPartsWithin(int size, int part_size, int stride); static Size numPartsWithin(Size size, Size part_size, Size stride); diff --git a/samples/gpu/hog.cpp b/samples/gpu/hog.cpp index 466f709b2..ebcdc64e0 100644 --- a/samples/gpu/hog.cpp +++ b/samples/gpu/hog.cpp @@ -12,61 +12,56 @@ using namespace cv; //#define WRITE_VIDEO - -/** Contains all properties of application (including those which can be -changed by user in runtime) */ -class Settings +class Args { public: - /** Sets default values */ - Settings(); - - /** Reads settings from command args */ - static Settings Read(int argc, char** argv); + Args(); + static Args read(int argc, char** argv); string src; bool src_is_video; + bool src_is_camera; + int camera_id; + bool make_gray; + bool resize_src; - double resize_src_scale; + int resized_width, resized_height; + double scale; int nlevels; int gr_threshold; double hit_threshold; + int win_width; - int win_stride_width; - int win_stride_height; + int win_stride_width, win_stride_height; + bool gamma_corr; }; -/** Describes aplication logic */ class App { public: - /** Initializes application */ - App(const Settings& s); + App(const Args& s); + void run(); - /** Runs demo using OpenCV highgui module for GUI building */ - void RunOpencvGui(); + void handleKey(char key); - /** Processes user keybord input */ - void HandleKey(char key); + void hogWorkBegin(); + void hogWorkEnd(); + string hogWorkFps() const; - void HogWorkBegin(); - void HogWorkEnd(); - double HogWorkFps() const; + void workBegin(); + void workEnd(); + string workFps() const; - void WorkBegin(); - void WorkEnd(); - double WorkFps() const; - - const string GetPerformanceSummary() const; + string message() const; private: App operator=(App&); - Settings settings; + Args args; bool running; bool use_gpu; @@ -89,26 +84,29 @@ int main(int argc, char** argv) { try { + cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"; if (argc < 2) { - cout << "Usage:\nhog_gpu\n" - << " -src \n" - << " [-src_is_video ] # says to interp. src as img or as video\n" - << " [-make_gray ] # convert image to gray one or not\n" - << " [-resize_src ] # do resize of the source image or not\n" - << " [-resize_src_scale ] # preprocessing image scale factor\n" - << " [-hit_threshold ] # classifying plane dist. threshold (0.0 usually)\n" - << " [-scale ] # HOG window scale factor\n" - << " [-nlevels ] # max number of HOG window scales\n" - << " [-win_width ] # width of the window (48 or 64)\n" - << " [-win_stride_width ] # distance by OX axis between neighbour wins\n" - << " [-win_stride_height ] # distance by OY axis between neighbour wins\n" - << " [-gr_threshold ] # merging similar rects constant\n" - << " [-gamma_corr ] # do gamma correction or not\n"; + cout << "\nUsage: hog_gpu\n" + << " --src # it's image file by default\n" + << " [--src-is-video ] # says to interpretate src as video\n" + << " [--src-is-camera ] # says to interpretate src as camera\n" + << " [--make-gray ] # convert image to gray one or not\n" + << " [--resize-src ] # do resize of the source image or not\n" + << " [--src-width ] # resized image width\n" + << " [--src-height ] # resized image height\n" + << " [--hit-threshold ] # classifying plane distance threshold (0.0 usually)\n" + << " [--scale ] # HOG window scale factor\n" + << " [--nlevels ] # max number of HOG window scales\n" + << " [--win-width ] # width of the window (48 or 64)\n" + << " [--win-stride-width ] # distance by OX axis between neighbour wins\n" + << " [--win-stride-height ] # distance by OY axis between neighbour wins\n" + << " [--gr-threshold ] # merging similar rects constant\n" + << " [--gamma-correct ] # do gamma correction or not\n"; return 1; } - App app(Settings::Read(argc, argv)); - app.RunOpencvGui(); + App app(Args::read(argc, argv)); + app.run(); } catch (const Exception& e) { return cout << "Error: " << e.what() << endl, 1; } catch (const exception& e) { return cout << "Error: " << e.what() << endl, 1; } @@ -117,56 +115,63 @@ int main(int argc, char** argv) } -Settings::Settings() +Args::Args() { src_is_video = false; + src_is_camera = false; + camera_id = 0; + make_gray = false; - resize_src = true; - resize_src_scale = 1.5; + + resize_src = false; + resized_width = 640; + resized_height = 480; + scale = 1.05; nlevels = 13; gr_threshold = 8; hit_threshold = 1.4; + win_width = 48; win_stride_width = 8; win_stride_height = 8; + gamma_corr = true; } -Settings Settings::Read(int argc, char** argv) +Args Args::read(int argc, char** argv) { - cout << "Parsing command args" << endl; - - Settings settings; + Args args; for (int i = 1; i < argc - 1; i += 2) { string key = argv[i]; string val = argv[i + 1]; - if (key == "-src") settings.src = val; - else if (key == "-src_is_video") settings.src_is_video = (val == "true"); - else if (key == "-make_gray") settings.make_gray = (val == "true"); - else if (key == "-resize_src") settings.resize_src = (val == "true"); - else if (key == "-resize_src_scale") settings.resize_src_scale = atof(val.c_str()); - else if (key == "-hit_threshold") settings.hit_threshold = atof(val.c_str()); - else if (key == "-scale") settings.scale = atof(val.c_str()); - else if (key == "-nlevels") settings.nlevels = atoi(val.c_str()); - else if (key == "-win_width") settings.win_width = atoi(val.c_str()); - else if (key == "-win_stride_width") settings.win_stride_width = atoi(val.c_str()); - else if (key == "-win_stride_height") settings.win_stride_height = atoi(val.c_str()); - else if (key == "-gr_threshold") settings.gr_threshold = atoi(val.c_str()); - else if (key == "-gamma_corr") settings.gamma_corr = atoi(val.c_str()) != 0; - else throw runtime_error((string("Unknown key: ") + key)); + if (key == "--src") args.src = val; + else if (key == "--src-is-video") args.src_is_video = (val == "true"); + else if (key == "--src-is-camera") args.src_is_camera = (val == "true"); + else if (key == "--camera-id") args.camera_id = atoi(val.c_str()); + else if (key == "--make-gray") args.make_gray = (val == "true"); + else if (key == "--resize-src") args.resize_src = (val == "true"); + else if (key == "--src-width") args.resized_width = atoi(val.c_str()); + else if (key == "--src-height") args.resized_height = atoi(val.c_str()); + else if (key == "--hit-threshold") args.hit_threshold = atof(val.c_str()); + else if (key == "--scale") args.scale = atof(val.c_str()); + else if (key == "--nlevels") args.nlevels = atoi(val.c_str()); + else if (key == "--win-width") args.win_width = atoi(val.c_str()); + else if (key == "--win-stride-width") args.win_stride_width = atoi(val.c_str()); + else if (key == "--win-stride-height") args.win_stride_height = atoi(val.c_str()); + else if (key == "--gr-threshold") args.gr_threshold = atoi(val.c_str()); + else if (key == "--gamma-correct") args.gamma_corr = atoi(val.c_str()) != 0; + else throw runtime_error((string("unknown key: ") + key)); } - - cout << "Command args are parsed\n"; - return settings; + return args; } -App::App(const Settings &s) +App::App(const Args& s) { - settings = s; + args = s; cout << "\nControls:\n" << "\tESC - exit\n" << "\tm - change mode GPU <-> CPU\n" @@ -178,79 +183,81 @@ App::App(const Settings &s) << endl; use_gpu = true; - make_gray = settings.make_gray; - scale = settings.scale; - gr_threshold = settings.gr_threshold; - nlevels = settings.nlevels; - hit_threshold = settings.hit_threshold; - gamma_corr = settings.gamma_corr; + make_gray = args.make_gray; + scale = args.scale; + gr_threshold = args.gr_threshold; + nlevels = args.nlevels; + hit_threshold = args.hit_threshold; + gamma_corr = args.gamma_corr; - if (settings.win_width != 64 && settings.win_width != 48) - settings.win_width = 64; + if (args.win_width != 64 && args.win_width != 48) + args.win_width = 64; cout << "Scale: " << scale << endl; + if (args.resize_src) + cout << "Source size: (" << args.resized_width << ", " << args.resized_height << ")\n"; cout << "Group threshold: " << gr_threshold << endl; cout << "Levels number: " << nlevels << endl; - cout << "Win width: " << settings.win_width << endl; - cout << "Win stride: (" << settings.win_stride_width << ", " << settings.win_stride_height << ")\n"; + cout << "Win width: " << args.win_width << endl; + cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n"; cout << "Hit threshold: " << hit_threshold << endl; cout << "Gamma correction: " << gamma_corr << endl; cout << endl; } -void App::RunOpencvGui() +void App::run() { running = true; - Size win_size(settings.win_width, settings.win_width * 2); //(64, 128) or (48, 96) - Size win_stride(settings.win_stride_width, settings.win_stride_height); + Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96) + Size win_stride(args.win_stride_width, args.win_stride_height); vector detector; - - if (win_size == Size(64, 128)) + if (win_size == Size(64, 128)) detector = cv::gpu::HOGDescriptor::getPeopleDetector_64x128(); else detector = cv::gpu::HOGDescriptor::getPeopleDetector_48x96(); - // GPU's HOG classifier cv::gpu::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, cv::gpu::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, cv::gpu::HOGDescriptor::DEFAULT_NLEVELS); - gpu_hog.setSVMDetector(detector); - - // CPU's HOG classifier cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); + gpu_hog.setSVMDetector(detector); cpu_hog.setSVMDetector(detector); #ifdef WRITE_VIDEO cv::VideoWriter video_writer; - video_writer.open("output.avi", CV_FOURCC('x','v','i','d'), 24., cv::Size(640, 480), true); - if (!video_writer.isOpened()) throw std::runtime_error("can't create video writer"); #endif - // Make endless cycle from video (if src is video) while (running) { VideoCapture vc; Mat frame; - if (settings.src_is_video) + if (args.src_is_video) { - vc.open(settings.src.c_str()); + vc.open(args.src.c_str()); if (!vc.isOpened()) - throw runtime_error(string("Can't open video file: " + settings.src)); + throw runtime_error(string("can't open video file: " + args.src)); + vc >> frame; + } + else if (args.src_is_camera) + { + vc.open(args.camera_id); + if (!vc.isOpened()) + throw runtime_error(string("can't open video file: " + args.src)); vc >> frame; } else { - frame = imread(settings.src); + frame = imread(args.src); if (frame.empty()) - throw runtime_error(string("Can't open image file: " + settings.src)); + throw runtime_error(string("can't open image file: " + args.src)); } Mat img_aux, img, img_to_show; @@ -259,38 +266,34 @@ void App::RunOpencvGui() // Iterate over all frames while (running && !frame.empty()) { - WorkBegin(); + workBegin(); - vector found; - - // Change format of the image (input must be 8UC3) - if (make_gray) - cvtColor(frame, img_aux, CV_BGR2GRAY); - else if (use_gpu) - cvtColor(frame, img_aux, CV_BGR2BGRA); - else - img_aux = frame; + // Change format of the image + if (make_gray) cvtColor(frame, img_aux, CV_BGR2GRAY); + else if (use_gpu) cvtColor(frame, img_aux, CV_BGR2BGRA); + else img_aux = frame; // Resize image - if (settings.resize_src) - resize(img_aux, img, Size(int(frame.cols * settings.resize_src_scale), int(frame.rows * settings.resize_src_scale))); - else - img = img_aux; + if (args.resize_src) resize(img_aux, img, Size(args.resized_width, args.resized_height)); + else img = img_aux; img_to_show = img; gpu_hog.nlevels = nlevels; cpu_hog.nlevels = nlevels; + vector found; + // Perform HOG classification - HogWorkBegin(); + hogWorkBegin(); if (use_gpu) { gpu_img = img; - gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold); + gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride, + Size(0, 0), scale, gr_threshold); } - else - cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold); - HogWorkEnd(); + else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, + Size(0, 0), scale, gr_threshold); + hogWorkEnd(); // Draw positive classified windows for (size_t i = 0; i < found.size(); i++) @@ -299,17 +302,14 @@ void App::RunOpencvGui() rectangle(img_to_show, r.tl(), r.br(), CV_RGB(0, 255, 0), 3); } - // Show results - putText(img_to_show, GetPerformanceSummary(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2); + putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); + putText(img_to_show, "FPS (total): " + workFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); imshow("opencv_gpu_hog", img_to_show); - HandleKey((char)waitKey(3)); + handleKey((char)waitKey(3)); - if (settings.src_is_video) - { - vc >> frame; - } + if (args.src_is_video || args.src_is_camera) vc >> frame; - WorkEnd(); + workEnd(); #ifdef WRITE_VIDEO cvtColor(img_to_show, img, CV_BGRA2BGR); @@ -320,7 +320,7 @@ void App::RunOpencvGui() } -void App::HandleKey(char key) +void App::handleKey(char key) { switch (key) { @@ -382,38 +382,35 @@ void App::HandleKey(char key) } -inline void App::HogWorkBegin() { hog_work_begin = getTickCount(); } +inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); } - -inline void App::HogWorkEnd() +inline void App::hogWorkEnd() { int64 delta = getTickCount() - hog_work_begin; double freq = getTickFrequency(); hog_work_fps = freq / delta; } - -inline double App::HogWorkFps() const { return hog_work_fps; } +inline string App::hogWorkFps() const +{ + stringstream ss; + ss << hog_work_fps; + return ss.str(); +} -inline void App::WorkBegin() { work_begin = getTickCount(); } +inline void App::workBegin() { work_begin = getTickCount(); } - -inline void App::WorkEnd() +inline void App::workEnd() { int64 delta = getTickCount() - work_begin; double freq = getTickFrequency(); work_fps = freq / delta; } - -inline double App::WorkFps() const { return work_fps; } - - -inline const string App::GetPerformanceSummary() const +inline string App::workFps() const { stringstream ss; - ss << (use_gpu ? "GPU" : "CPU") << " HOG FPS: " << setiosflags(ios::left) << setprecision(4) << - setw(7) << HogWorkFps() << " Total FPS: " << setprecision(4) << setw(7) << WorkFps(); + ss << work_fps; return ss.str(); }