refactor CUDA HOG algorithm:

use abstract interface with hidden implementation
2015-01-14 18:18:51 +03:00
parent 0af7597d36
commit 8257dc3c1e
5 changed files with 1697 additions and 1720 deletions
--- a/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
+++ b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
@@ -65,32 +65,24 @@ namespace cv { namespace cuda {
 // HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector
 //
 struct CV_EXPORTS HOGConfidence
 {
   double scale;
   std::vector<Point> locations;
   std::vector<double> confidences;
   std::vector<double> part_scores[4];
 };
 /** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
 Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much
 as possible.
@note
-   -   An example applying the HOG descriptor for people detection can be found at
+    -   An example applying the HOG descriptor for people detection can be found at
        opencv_source_code/samples/cpp/peopledetect.cpp
    -   A CUDA example applying the HOG descriptor for people detection can be found at
        opencv_source_code/samples/gpu/hog.cpp
    -   (Python) An example applying the HOG descriptor for people detection can be found at
        opencv_source_code/samples/python2/peopledetect.py
 */
-struct CV_EXPORTS HOGDescriptor
+class CV_EXPORTS HOG : public cv::Algorithm
 {
-    enum { DEFAULT_WIN_SIGMA = -1 };
+public:
-    enum { DEFAULT_NLEVELS = 64 };
+    enum
-    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+    {
        DESCR_FORMAT_ROW_BY_ROW,
        DESCR_FORMAT_COL_BY_COL
    };
    /** @brief Creates the HOG descriptor and detector.
@@ -99,132 +91,105 @@ struct CV_EXPORTS HOGDescriptor
    @param block_stride Block stride. It must be a multiple of cell size.
    @param cell_size Cell size. Only (8, 8) is supported for now.
    @param nbins Number of bins. Only 9 bins per cell are supported for now.
    @param win_sigma Gaussian smoothing window parameter.
    @param threshold_L2hys L2-Hys normalization method shrinkage.
    @param gamma_correction Flag to specify whether the gamma correction preprocessing is required or
    not.
    @param nlevels Maximum number of detection window increases.
     */
-    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
+    static Ptr<HOG> create(Size win_size = Size(64, 128),
-                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
+                           Size block_size = Size(16, 16),
-                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
+                           Size block_stride = Size(8, 8),
-                  double threshold_L2hys=0.2, bool gamma_correction=true,
+                           Size cell_size = Size(8, 8),
-                  int nlevels=DEFAULT_NLEVELS);
+                           int nbins = 9);
    //! Gaussian smoothing window parameter.
    virtual void setWinSigma(double win_sigma) = 0;
    virtual double getWinSigma() const = 0;
    //! L2-Hys normalization method shrinkage.
    virtual void setL2HysThreshold(double threshold_L2hys) = 0;
    virtual double getL2HysThreshold() const = 0;
    //! Flag to specify whether the gamma correction preprocessing is required or not.
    virtual void setGammaCorrection(bool gamma_correction) = 0;
    virtual bool getGammaCorrection() const = 0;
    //! Maximum number of detection window increases.
    virtual void setNumLevels(int nlevels) = 0;
    virtual int getNumLevels() const = 0;
    //! Threshold for the distance between features and SVM classifying plane.
    //! Usually it is 0 and should be specfied in the detector coefficients (as the last free
    //! coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
    //! manually here.
    virtual void setHitThreshold(double hit_threshold) = 0;
    virtual double getHitThreshold() const = 0;
    //! Window stride. It must be a multiple of block stride.
    virtual void setWinStride(Size win_stride) = 0;
    virtual Size getWinStride() const = 0;
    //! Coefficient of the detection window increase.
    virtual void setScaleFactor(double scale0) = 0;
    virtual double getScaleFactor() const = 0;
    //! Coefficient to regulate the similarity threshold. When detected, some
    //! objects can be covered by many rectangles. 0 means not to perform grouping.
    //! See groupRectangles.
    virtual void setGroupThreshold(int group_threshold) = 0;
    virtual int getGroupThreshold() const = 0;
    //! Descriptor storage format:
    //!   - **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
    //!   - **DESCR_FORMAT_COL_BY_COL** - Column-major order.
    virtual void setDescriptorFormat(int descr_format) = 0;
    virtual int getDescriptorFormat() const = 0;
    /** @brief Returns the number of coefficients required for the classification.
     */
-    size_t getDescriptorSize() const;
+    virtual size_t getDescriptorSize() const = 0;
    /** @brief Returns the block histogram size.
-    */
+     */
-    size_t getBlockHistogramSize() const;
+    virtual size_t getBlockHistogramSize() const = 0;
    /** @brief Sets coefficients for the linear SVM classifier.
-    */
+     */
-    void setSVMDetector(const std::vector<float>& detector);
+    virtual void setSVMDetector(InputArray detector) = 0;
-    /** @brief Returns coefficients of the classifier trained for people detection (for default window size).
+    /** @brief Returns coefficients of the classifier trained for people detection.
-    */
+     */
-    static std::vector<float> getDefaultPeopleDetector();
+    virtual Mat getDefaultPeopleDetector() const = 0;
    /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
    */
    static std::vector<float> getPeopleDetector48x96();
    /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
    */
    static std::vector<float> getPeopleDetector64x128();
    /** @brief Performs object detection without a multi-scale window.
    @param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
    @param found_locations Left-top corner points of detected objects boundaries.
-    @param hit_threshold Threshold for the distance between features and SVM classifying plane.
+    @param confidences Optional output array for confidences.
    Usually it is 0 and should be specfied in the detector coefficients (as the last free
    coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
    manually here.
    @param win_stride Window stride. It must be a multiple of block stride.
    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
     */
-    void detect(const GpuMat& img, std::vector<Point>& found_locations,
+    virtual void detect(InputArray img,
-                double hit_threshold=0, Size win_stride=Size(),
+                        std::vector<Point>& found_locations,
-                Size padding=Size());
+                        std::vector<double>* confidences = NULL) = 0;
    /** @brief Performs object detection with a multi-scale window.
    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
    @param found_locations Detected objects boundaries.
    @param confidences Optional output array for confidences.
    @param hit_threshold Threshold for the distance between features and SVM classifying plane. See
    cuda::HOGDescriptor::detect for details.
    @param win_stride Window stride. It must be a multiple of block stride.
    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
    @param scale0 Coefficient of the detection window increase.
    @param group_threshold Coefficient to regulate the similarity threshold. When detected, some
    objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles .
     */
-    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+    virtual void detectMultiScale(InputArray img,
-                          double hit_threshold=0, Size win_stride=Size(),
+                                  std::vector<Rect>& found_locations,
-                          Size padding=Size(), double scale0=1.05,
+                                  std::vector<double>* confidences = NULL) = 0;
                          int group_threshold=2);
    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
                                                                    double hit_threshold, Size win_stride, Size padding,
                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
    /** @brief Returns block descriptors computed for the whole image.
    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
    @param win_stride Window stride. It must be a multiple of block stride.
    @param descriptors 2D array of descriptors.
-    @param descr_format Descriptor storage format:
+    @param stream CUDA stream.
    -   **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
    -   **DESCR_FORMAT_COL_BY_COL** - Column-major order.
    The function is mainly used to learn the classifier.
     */
-    void getDescriptors(const GpuMat& img, Size win_stride,
+    virtual void compute(InputArray img,
-                        GpuMat& descriptors,
+                         OutputArray descriptors,
-                        int descr_format=DESCR_FORMAT_COL_BY_COL);
+                         Stream& stream = Stream::Null()) = 0;
    Size win_size;
    Size block_size;
    Size block_stride;
    Size cell_size;
    int nbins;
    double win_sigma;
    double threshold_L2hys;
    bool gamma_correction;
    int nlevels;
 protected:
    void computeBlockHistograms(const GpuMat& img);
    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
    double getWinSigma() const;
    bool checkDetectorSize() const;
    static int numPartsWithin(int size, int part_size, int stride);
    static Size numPartsWithin(Size size, Size part_size, Size stride);
    // Coefficients of the separating plane
    float free_coef;
    GpuMat detector;
    // Results of the last classification step
    GpuMat labels, labels_buf;
    Mat labels_host;
    // Results of the last histogram evaluation step
    GpuMat block_hists, block_hists_buf;
    // Gradients conputation results
    GpuMat grad, qangle, grad_buf, qangle_buf;
    // returns subbuffer with required size, reallocates buffer if nessesary.
    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
    std::vector<GpuMat> image_scales;
 };
 //
--- a/modules/cudaobjdetect/perf/perf_objdetect.cpp
+++ b/modules/cudaobjdetect/perf/perf_objdetect.cpp
@@ -71,10 +71,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
        const cv::cuda::GpuMat d_img(img);
        std::vector<cv::Rect> gpu_found_locations;
-        cv::cuda::HOGDescriptor d_hog;
+        cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
-        d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+        d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
-        TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
+        TEST_CYCLE() d_hog->detectMultiScale(d_img, gpu_found_locations);
        SANITY_CHECK(gpu_found_locations);
    }
@@ -82,8 +82,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
    {
        std::vector<cv::Rect> cpu_found_locations;
        cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
        cv::HOGDescriptor hog;
-        hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+        hog.setSVMDetector(d_hog->getDefaultPeopleDetector());
        TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
--- a/modules/cudaobjdetect/src/hog.cpp
+++ b/modules/cudaobjdetect/src/hog.cpp
--- a/modules/cudaobjdetect/test/test_objdetect.cpp
+++ b/modules/cudaobjdetect/test/test_objdetect.cpp
@@ -48,9 +48,10 @@ using namespace cvtest;
 //#define DUMP
-struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescriptor
+struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
    cv::cuda::DeviceInfo devInfo;
    cv::Ptr<cv::cuda::HOG> hog;
 #ifdef DUMP
    std::ofstream f;
@@ -69,23 +70,13 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
        devInfo = GetParam();
        cv::cuda::setDevice(devInfo.deviceID());
        hog = cv::cuda::HOG::create();
    }
 #ifdef DUMP
-    void dump(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
+    void dump(const std::vector<cv::Point>& locations)
    {
        f.write((char*)&blockHists.rows, sizeof(blockHists.rows));
        f.write((char*)&blockHists.cols, sizeof(blockHists.cols));
        for (int i = 0; i < blockHists.rows; ++i)
        {
            for (int j = 0; j < blockHists.cols; ++j)
            {
                float val = blockHists.at<float>(i, j);
                f.write((char*)&val, sizeof(val));
            }
        }
        int nlocations = locations.size();
        f.write((char*)&nlocations, sizeof(nlocations));
@@ -93,21 +84,18 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
            f.write((char*)&locations[i], sizeof(locations[i]));
    }
 #else
-    void compare(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
+    void compare(const std::vector<cv::Point>& locations)
    {
        // skip block_hists check
        int rows, cols;
        f.read((char*)&rows, sizeof(rows));
        f.read((char*)&cols, sizeof(cols));
-        ASSERT_EQ(rows, blockHists.rows);
+        for (int i = 0; i < rows; ++i)
        ASSERT_EQ(cols, blockHists.cols);
        for (int i = 0; i < blockHists.rows; ++i)
        {
-            for (int j = 0; j < blockHists.cols; ++j)
+            for (int j = 0; j < cols; ++j)
            {
                float val;
                f.read((char*)&val, sizeof(val));
                ASSERT_NEAR(val, blockHists.at<float>(i, j), 1e-3);
            }
        }
@@ -126,54 +114,41 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
    void testDetect(const cv::Mat& img)
    {
-        gamma_correction = false;
+        hog->setGammaCorrection(false);
-        setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+        hog->setSVMDetector(hog->getDefaultPeopleDetector());
        std::vector<cv::Point> locations;
        // Test detect
-        detect(loadMat(img), locations, 0);
+        hog->detect(loadMat(img), locations);
 #ifdef DUMP
-        dump(cv::Mat(block_hists), locations);
+        dump(locations);
 #else
-        compare(cv::Mat(block_hists), locations);
+        compare(locations);
 #endif
        // Test detect on smaller image
        cv::Mat img2;
        cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2));
-        detect(loadMat(img2), locations, 0);
+        hog->detect(loadMat(img2), locations);
 #ifdef DUMP
-        dump(cv::Mat(block_hists), locations);
+        dump(locations);
 #else
-        compare(cv::Mat(block_hists), locations);
+        compare(locations);
 #endif
        // Test detect on greater image
        cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2));
-        detect(loadMat(img2), locations, 0);
+        hog->detect(loadMat(img2), locations);
 #ifdef DUMP
-        dump(cv::Mat(block_hists), locations);
+        dump(locations);
 #else
-        compare(cv::Mat(block_hists), locations);
+        compare(locations);
 #endif
    }
    // Does not compare border value, as interpolation leads to delta
    void compare_inner_parts(cv::Mat d1, cv::Mat d2)
    {
        for (int i = 1; i < blocks_per_win_y - 1; ++i)
            for (int j = 1; j < blocks_per_win_x - 1; ++j)
                for (int k = 0; k < block_hist_size; ++k)
                {
                    float a = d1.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
                    float b = d2.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
                    ASSERT_FLOAT_EQ(a, b);
                }
    }
 };
 // desabled while resize does not fixed
@@ -182,13 +157,8 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
    cv::Mat img_rgb = readImage("hog/road.png");
    ASSERT_FALSE(img_rgb.empty());
 #ifdef DUMP
    f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
    ASSERT_TRUE(f.is_open());
 #else
    f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
    ASSERT_TRUE(f.is_open());
 #endif
    // Test on color image
    cv::Mat img;
@@ -198,8 +168,6 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
    // Test on gray image
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
    testDetect(img);
    f.close();
 }
 CUDA_TEST_P(HOG, GetDescriptors)
@@ -216,8 +184,14 @@ CUDA_TEST_P(HOG, GetDescriptors)
    // Convert train images into feature vectors (train table)
    cv::cuda::GpuMat descriptors, descriptors_by_cols;
-    getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
+
-    getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);
+    hog->setWinStride(Size(64, 128));
    hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_ROW_BY_ROW);
    hog->compute(d_img, descriptors);
    hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_COL_BY_COL);
    hog->compute(d_img, descriptors_by_cols);
    // Check size of the result train table
    wins_per_img_x = 3;
@@ -242,48 +216,6 @@ CUDA_TEST_P(HOG, GetDescriptors)
                    ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
                              r[(x * blocks_per_win_y + y) * block_hist_size + k]);
    }
    /* Now we want to extract the same feature vectors, but from single images. NOTE: results will
    be defferent, due to border values interpolation. Using of many small images is slower, however we
    wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
    works good, it can be checked in the gpu_hog sample */
    img_rgb = readImage("hog/positive1.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    // Everything is fine with interpolation for left top subimage
    ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
    img_rgb = readImage("hog/positive2.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
    img_rgb = readImage("hog/negative1.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
    img_rgb = readImage("hog/negative2.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
    img_rgb = readImage("hog/positive3.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
    img_rgb = readImage("hog/negative3.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
 }
 INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, HOG, ALL_DEVICES);
@@ -310,12 +242,12 @@ CUDA_TEST_P(CalTech, HOG)
    cv::cuda::GpuMat d_img(img);
    cv::Mat markedImage(img.clone());
-    cv::cuda::HOGDescriptor d_hog;
+    cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
-    d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+    d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
-    d_hog.nlevels = d_hog.nlevels + 32;
+    d_hog->setNumLevels(d_hog->getNumLevels() + 32);
    std::vector<cv::Rect> found_locations;
-    d_hog.detectMultiScale(d_img, found_locations);
+    d_hog->detectMultiScale(d_img, found_locations);
 #if defined (LOG_CASCADE_STATISTIC)
    for (int i = 0; i < (int)found_locations.size(); i++)
@@ -326,7 +258,8 @@ CUDA_TEST_P(CalTech, HOG)
        cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
    }
-    cv::imshow("Res", markedImage); cv::waitKey();
+    cv::imshow("Res", markedImage);
    cv::waitKey();
 #endif
 }
--- a/samples/gpu/hog.cpp
+++ b/samples/gpu/hog.cpp
@@ -244,19 +244,13 @@ void App::run()
    Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
    Size win_stride(args.win_stride_width, args.win_stride_height);
-    // Create HOG descriptors and detectors here
+    cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size);
-    vector<float> detector;
+    cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9);
    if (win_size == Size(64, 128))
        detector = cv::cuda::HOGDescriptor::getPeopleDetector64x128();
    else
        detector = cv::cuda::HOGDescriptor::getPeopleDetector48x96();
-    cv::cuda::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
+    // Create HOG descriptors and detectors here
-                                   cv::cuda::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
+    Mat detector = gpu_hog->getDefaultPeopleDetector();
-                                   cv::cuda::HOGDescriptor::DEFAULT_NLEVELS);
+
-    cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
+    gpu_hog->setSVMDetector(detector);
                              HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
    gpu_hog.setSVMDetector(detector);
    cpu_hog.setSVMDetector(detector);
    while (running)
@@ -307,9 +301,6 @@ void App::run()
            else img = img_aux;
            img_to_show = img;
            gpu_hog.nlevels = nlevels;
            cpu_hog.nlevels = nlevels;
            vector<Rect> found;
            // Perform HOG classification
@@ -317,11 +308,19 @@ void App::run()
            if (use_gpu)
            {
                gpu_img.upload(img);
-                gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
+                gpu_hog->setNumLevels(nlevels);
-                                         Size(0, 0), scale, gr_threshold);
+                gpu_hog->setHitThreshold(hit_threshold);
                gpu_hog->setWinStride(win_stride);
                gpu_hog->setScaleFactor(scale);
                gpu_hog->setGroupThreshold(gr_threshold);
                gpu_hog->detectMultiScale(gpu_img, found);
            }
-            else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
+            else
            {
                cpu_hog.nlevels = nlevels;
                cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
                                          Size(0, 0), scale, gr_threshold);
            }
            hogWorkEnd();
            // Draw positive classified windows