Merge pull request #3600 from jet47:cuda-objdetect-module

2015-01-20 13:29:32 +00:00 · 2015-01-20 13:29:32 +00:00 · 95ecdc3af9
commit 95ecdc3af9
parent 579ce93885 dccdadc38a
23 changed files with 2809 additions and 2397 deletions
--- a/modules/cuda/CMakeLists.txt
+++ b/modules/cuda/CMakeLists.txt
@ -6,4 +6,4 @@ set(the_description "CUDA-accelerated Computer Vision")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 /wd4515 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
-ocv_define_module(cuda opencv_calib3d opencv_objdetect opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)
+ocv_define_module(cuda opencv_calib3d opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)
--- a/modules/cuda/include/opencv2/cuda.hpp
+++ b/modules/cuda/include/opencv2/cuda.hpp
@ -53,274 +53,11 @@
    @addtogroup cuda
    @{
        @defgroup cuda_calib3d Camera Calibration and 3D Reconstruction
        @defgroup cuda_objdetect Object Detection
    @}
 */
 namespace cv { namespace cuda {
 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 //! @addtogroup cuda_objdetect
 //! @{
 struct CV_EXPORTS HOGConfidence
 {
   double scale;
   std::vector<Point> locations;
   std::vector<double> confidences;
   std::vector<double> part_scores[4];
 };
 /** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
 Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much
 as possible.
@note
   -   An example applying the HOG descriptor for people detection can be found at
        opencv_source_code/samples/cpp/peopledetect.cpp
    -   A CUDA example applying the HOG descriptor for people detection can be found at
        opencv_source_code/samples/gpu/hog.cpp
    -   (Python) An example applying the HOG descriptor for people detection can be found at
        opencv_source_code/samples/python2/peopledetect.py
 */
 struct CV_EXPORTS HOGDescriptor
 {
    enum { DEFAULT_WIN_SIGMA = -1 };
    enum { DEFAULT_NLEVELS = 64 };
    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
    /** @brief Creates the HOG descriptor and detector.
    @param win_size Detection window size. Align to block size and block stride.
    @param block_size Block size in pixels. Align to cell size. Only (16,16) is supported for now.
    @param block_stride Block stride. It must be a multiple of cell size.
    @param cell_size Cell size. Only (8, 8) is supported for now.
    @param nbins Number of bins. Only 9 bins per cell are supported for now.
    @param win_sigma Gaussian smoothing window parameter.
    @param threshold_L2hys L2-Hys normalization method shrinkage.
    @param gamma_correction Flag to specify whether the gamma correction preprocessing is required or
    not.
    @param nlevels Maximum number of detection window increases.
     */
    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
                  double threshold_L2hys=0.2, bool gamma_correction=true,
                  int nlevels=DEFAULT_NLEVELS);
    /** @brief Returns the number of coefficients required for the classification.
     */
    size_t getDescriptorSize() const;
    /** @brief Returns the block histogram size.
    */
    size_t getBlockHistogramSize() const;
    /** @brief Sets coefficients for the linear SVM classifier.
    */
    void setSVMDetector(const std::vector<float>& detector);
    /** @brief Returns coefficients of the classifier trained for people detection (for default window size).
    */
    static std::vector<float> getDefaultPeopleDetector();
    /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
    */
    static std::vector<float> getPeopleDetector48x96();
    /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
    */
    static std::vector<float> getPeopleDetector64x128();
    /** @brief Performs object detection without a multi-scale window.
    @param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
    @param found_locations Left-top corner points of detected objects boundaries.
    @param hit_threshold Threshold for the distance between features and SVM classifying plane.
    Usually it is 0 and should be specfied in the detector coefficients (as the last free
    coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
    manually here.
    @param win_stride Window stride. It must be a multiple of block stride.
    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
     */
    void detect(const GpuMat& img, std::vector<Point>& found_locations,
                double hit_threshold=0, Size win_stride=Size(),
                Size padding=Size());
    /** @brief Performs object detection with a multi-scale window.
    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
    @param found_locations Detected objects boundaries.
    @param hit_threshold Threshold for the distance between features and SVM classifying plane. See
    cuda::HOGDescriptor::detect for details.
    @param win_stride Window stride. It must be a multiple of block stride.
    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
    @param scale0 Coefficient of the detection window increase.
    @param group_threshold Coefficient to regulate the similarity threshold. When detected, some
    objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles .
     */
    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
                          double hit_threshold=0, Size win_stride=Size(),
                          Size padding=Size(), double scale0=1.05,
                          int group_threshold=2);
    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
                                                                    double hit_threshold, Size win_stride, Size padding,
                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
    /** @brief Returns block descriptors computed for the whole image.
    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
    @param win_stride Window stride. It must be a multiple of block stride.
    @param descriptors 2D array of descriptors.
    @param descr_format Descriptor storage format:
    -   **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
    -   **DESCR_FORMAT_COL_BY_COL** - Column-major order.
    The function is mainly used to learn the classifier.
     */
    void getDescriptors(const GpuMat& img, Size win_stride,
                        GpuMat& descriptors,
                        int descr_format=DESCR_FORMAT_COL_BY_COL);
    Size win_size;
    Size block_size;
    Size block_stride;
    Size cell_size;
    int nbins;
    double win_sigma;
    double threshold_L2hys;
    bool gamma_correction;
    int nlevels;
 protected:
    void computeBlockHistograms(const GpuMat& img);
    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
    double getWinSigma() const;
    bool checkDetectorSize() const;
    static int numPartsWithin(int size, int part_size, int stride);
    static Size numPartsWithin(Size size, Size part_size, Size stride);
    // Coefficients of the separating plane
    float free_coef;
    GpuMat detector;
    // Results of the last classification step
    GpuMat labels, labels_buf;
    Mat labels_host;
    // Results of the last histogram evaluation step
    GpuMat block_hists, block_hists_buf;
    // Gradients conputation results
    GpuMat grad, qangle, grad_buf, qangle_buf;
    // returns subbuffer with required size, reallocates buffer if nessesary.
    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
    std::vector<GpuMat> image_scales;
 };
 //////////////////////////// CascadeClassifier ////////////////////////////
 /** @brief Cascade classifier class used for object detection. Supports HAAR and LBP cascades. :
@note
   -   A cascade classifier example can be found at
        opencv_source_code/samples/gpu/cascadeclassifier.cpp
    -   A Nvidea API specific cascade classifier example can be found at
        opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
 */
 class CV_EXPORTS CascadeClassifier_CUDA
 {
 public:
    CascadeClassifier_CUDA();
    /** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
    type of OpenCV XML cascade supported for LBP.
     */
    CascadeClassifier_CUDA(const String& filename);
    ~CascadeClassifier_CUDA();
    /** @brief Checks whether the classifier is loaded or not.
    */
    bool empty() const;
    /** @brief Loads the classifier from a file. The previous content is destroyed.
    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
    type of OpenCV XML cascade supported for LBP.
     */
    bool load(const String& filename);
    /** @brief Destroys the loaded classifier.
    */
    void release();
    /** @overload */
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
    /** @brief Detects objects of different sizes in the input image.
    @param image Matrix of type CV_8U containing an image where objects should be detected.
    @param objectsBuf Buffer to store detected objects (rectangles). If it is empty, it is allocated
    with the default size. If not empty, the function searches not more than N objects, where
    N = sizeof(objectsBufer's data)/sizeof(cv::Rect).
    @param maxObjectSize Maximum possible object size. Objects larger than that are ignored. Used for
    second signature and supported only for LBP cascades.
    @param scaleFactor Parameter specifying how much the image size is reduced at each image scale.
    @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have
    to retain it.
    @param minSize Minimum possible object size. Objects smaller than that are ignored.
    The detected objects are returned as a list of rectangles.
    The function returns the number of detected objects, so you can retrieve them as in the following
    example:
    @code
        cuda::CascadeClassifier_CUDA cascade_gpu(...);
        Mat image_cpu = imread(...)
        GpuMat image_gpu(image_cpu);
        GpuMat objbuf;
        int detections_number = cascade_gpu.detectMultiScale( image_gpu,
                  objbuf, 1.2, minNeighbors);
        Mat obj_host;
        // download only detected number of rectangles
        objbuf.colRange(0, detections_number).download(obj_host);
        Rect* faces = obj_host.ptr<Rect>();
        for(int i = 0; i < detections_num; ++i)
           cv::rectangle(image_cpu, faces[i], Scalar(255));
        imshow("Faces", image_cpu);
    @endcode
    @sa CascadeClassifier::detectMultiScale
     */
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
    bool findLargestObject;
    bool visualizeInPlace;
    Size getClassifierSize() const;
 private:
    struct CascadeClassifierImpl;
    CascadeClassifierImpl* impl;
    struct HaarCascade;
    struct LbpCascade;
    friend class CascadeClassifier_CUDA_LBP;
 };
 //! @} cuda_objdetect
 //////////////////////////// Labeling ////////////////////////////
 //! @addtogroup cuda
--- a/modules/cuda/perf/perf_precomp.hpp
+++ b/modules/cuda/perf/perf_precomp.hpp
@ -56,7 +56,6 @@
 #include "opencv2/cuda.hpp"
 #include "opencv2/calib3d.hpp"
 #include "opencv2/objdetect.hpp"
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
--- a/modules/cuda/src/hog.cpp
+++ b/modules/cuda/src/hog.cpp
--- a/modules/cuda/src/precomp.hpp
+++ b/modules/cuda/src/precomp.hpp
@ -47,7 +47,6 @@
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudawarping.hpp"
 #include "opencv2/calib3d.hpp"
 #include "opencv2/objdetect.hpp"
 #include "opencv2/core/private.cuda.hpp"
 #include "opencv2/core/utility.hpp"
--- a/modules/cuda/test/test_precomp.hpp
+++ b/modules/cuda/test/test_precomp.hpp
@ -60,7 +60,6 @@
 #include "opencv2/core.hpp"
 #include "opencv2/core/opengl.hpp"
 #include "opencv2/calib3d.hpp"
 #include "opencv2/objdetect.hpp"
 #include "cvconfig.h"
--- a/modules/cudaobjdetect/CMakeLists.txt
+++ b/modules/cudaobjdetect/CMakeLists.txt
@ -0,0 +1,9 @@
 if(IOS OR (NOT HAVE_CUDA AND NOT BUILD_CUDA_STUBS))
  ocv_module_disable(cudaobjdetect)
 endif()
 set(the_description "CUDA-accelerated Object Detection")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
 ocv_define_module(cudaobjdetect opencv_objdetect opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)
--- a/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
+++ b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
@ -0,0 +1,288 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef __OPENCV_CUDAOBJDETECT_HPP__
 #define __OPENCV_CUDAOBJDETECT_HPP__
 #ifndef __cplusplus
 #  error cudaobjdetect.hpp header must be compiled as C++
 #endif
 #include "opencv2/core/cuda.hpp"
 /**
  @addtogroup cuda
  @{
      @defgroup cudaobjdetect Object Detection
  @}
 */
 namespace cv { namespace cuda {
 //! @addtogroup cudaobjdetect
 //! @{
 //
 // HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector
 //
 /** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
@note
    -   An example applying the HOG descriptor for people detection can be found at
        opencv_source_code/samples/cpp/peopledetect.cpp
    -   A CUDA example applying the HOG descriptor for people detection can be found at
        opencv_source_code/samples/gpu/hog.cpp
    -   (Python) An example applying the HOG descriptor for people detection can be found at
        opencv_source_code/samples/python2/peopledetect.py
 */
 class CV_EXPORTS HOG : public Algorithm
 {
 public:
    enum
    {
        DESCR_FORMAT_ROW_BY_ROW,
        DESCR_FORMAT_COL_BY_COL
    };
    /** @brief Creates the HOG descriptor and detector.
    @param win_size Detection window size. Align to block size and block stride.
    @param block_size Block size in pixels. Align to cell size. Only (16,16) is supported for now.
    @param block_stride Block stride. It must be a multiple of cell size.
    @param cell_size Cell size. Only (8, 8) is supported for now.
    @param nbins Number of bins. Only 9 bins per cell are supported for now.
     */
    static Ptr<HOG> create(Size win_size = Size(64, 128),
                           Size block_size = Size(16, 16),
                           Size block_stride = Size(8, 8),
                           Size cell_size = Size(8, 8),
                           int nbins = 9);
    //! Gaussian smoothing window parameter.
    virtual void setWinSigma(double win_sigma) = 0;
    virtual double getWinSigma() const = 0;
    //! L2-Hys normalization method shrinkage.
    virtual void setL2HysThreshold(double threshold_L2hys) = 0;
    virtual double getL2HysThreshold() const = 0;
    //! Flag to specify whether the gamma correction preprocessing is required or not.
    virtual void setGammaCorrection(bool gamma_correction) = 0;
    virtual bool getGammaCorrection() const = 0;
    //! Maximum number of detection window increases.
    virtual void setNumLevels(int nlevels) = 0;
    virtual int getNumLevels() const = 0;
    //! Threshold for the distance between features and SVM classifying plane.
    //! Usually it is 0 and should be specfied in the detector coefficients (as the last free
    //! coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
    //! manually here.
    virtual void setHitThreshold(double hit_threshold) = 0;
    virtual double getHitThreshold() const = 0;
    //! Window stride. It must be a multiple of block stride.
    virtual void setWinStride(Size win_stride) = 0;
    virtual Size getWinStride() const = 0;
    //! Coefficient of the detection window increase.
    virtual void setScaleFactor(double scale0) = 0;
    virtual double getScaleFactor() const = 0;
    //! Coefficient to regulate the similarity threshold. When detected, some
    //! objects can be covered by many rectangles. 0 means not to perform grouping.
    //! See groupRectangles.
    virtual void setGroupThreshold(int group_threshold) = 0;
    virtual int getGroupThreshold() const = 0;
    //! Descriptor storage format:
    //!   - **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
    //!   - **DESCR_FORMAT_COL_BY_COL** - Column-major order.
    virtual void setDescriptorFormat(int descr_format) = 0;
    virtual int getDescriptorFormat() const = 0;
    /** @brief Returns the number of coefficients required for the classification.
     */
    virtual size_t getDescriptorSize() const = 0;
    /** @brief Returns the block histogram size.
     */
    virtual size_t getBlockHistogramSize() const = 0;
    /** @brief Sets coefficients for the linear SVM classifier.
     */
    virtual void setSVMDetector(InputArray detector) = 0;
    /** @brief Returns coefficients of the classifier trained for people detection.
     */
    virtual Mat getDefaultPeopleDetector() const = 0;
    /** @brief Performs object detection without a multi-scale window.
    @param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
    @param found_locations Left-top corner points of detected objects boundaries.
    @param confidences Optional output array for confidences.
     */
    virtual void detect(InputArray img,
                        std::vector<Point>& found_locations,
                        std::vector<double>* confidences = NULL) = 0;
    /** @brief Performs object detection with a multi-scale window.
    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
    @param found_locations Detected objects boundaries.
    @param confidences Optional output array for confidences.
     */
    virtual void detectMultiScale(InputArray img,
                                  std::vector<Rect>& found_locations,
                                  std::vector<double>* confidences = NULL) = 0;
    /** @brief Returns block descriptors computed for the whole image.
    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
    @param descriptors 2D array of descriptors.
    @param stream CUDA stream.
     */
    virtual void compute(InputArray img,
                         OutputArray descriptors,
                         Stream& stream = Stream::Null()) = 0;
 };
 //
 // CascadeClassifier
 //
 /** @brief Cascade classifier class used for object detection. Supports HAAR and LBP cascades. :
@note
   -   A cascade classifier example can be found at
        opencv_source_code/samples/gpu/cascadeclassifier.cpp
    -   A Nvidea API specific cascade classifier example can be found at
        opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
 */
 class CV_EXPORTS CascadeClassifier : public Algorithm
 {
 public:
    /** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
    type of OpenCV XML cascade supported for LBP.
     */
    static Ptr<CascadeClassifier> create(const String& filename);
    /** @overload
     */
    static Ptr<CascadeClassifier> create(const FileStorage& file);
    //! Maximum possible object size. Objects larger than that are ignored. Used for
    //! second signature and supported only for LBP cascades.
    virtual void setMaxObjectSize(Size maxObjectSize) = 0;
    virtual Size getMaxObjectSize() const = 0;
    //! Minimum possible object size. Objects smaller than that are ignored.
    virtual void setMinObjectSize(Size minSize) = 0;
    virtual Size getMinObjectSize() const = 0;
    //! Parameter specifying how much the image size is reduced at each image scale.
    virtual void setScaleFactor(double scaleFactor) = 0;
    virtual double getScaleFactor() const = 0;
    //! Parameter specifying how many neighbors each candidate rectangle should have
    //! to retain it.
    virtual void setMinNeighbors(int minNeighbors) = 0;
    virtual int getMinNeighbors() const = 0;
    virtual void setFindLargestObject(bool findLargestObject) = 0;
    virtual bool getFindLargestObject() = 0;
    virtual void setMaxNumObjects(int maxNumObjects) = 0;
    virtual int getMaxNumObjects() const = 0;
    virtual Size getClassifierSize() const = 0;
    /** @brief Detects objects of different sizes in the input image.
    @param image Matrix of type CV_8U containing an image where objects should be detected.
    @param objects Buffer to store detected objects (rectangles).
    @param stream CUDA stream.
    To get final array of detected objects use CascadeClassifier::convert method.
    @code
        Ptr<cuda::CascadeClassifier> cascade_gpu = cuda::CascadeClassifier::create(...);
        Mat image_cpu = imread(...)
        GpuMat image_gpu(image_cpu);
        GpuMat objbuf;
        cascade_gpu->detectMultiScale(image_gpu, objbuf);
        std::vector<Rect> faces;
        cascade_gpu->convert(objbuf, faces);
        for(int i = 0; i < detections_num; ++i)
           cv::rectangle(image_cpu, faces[i], Scalar(255));
        imshow("Faces", image_cpu);
    @endcode
    @sa CascadeClassifier::detectMultiScale
     */
    virtual void detectMultiScale(InputArray image,
                                  OutputArray objects,
                                  Stream& stream = Stream::Null()) = 0;
    /** @brief Converts objects array from internal representation to standard vector.
    @param gpu_objects Objects array in internal representation.
    @param objects Resulting array.
     */
    virtual void convert(OutputArray gpu_objects,
                         std::vector<Rect>& objects) = 0;
 };
 //! @}
 }} // namespace cv { namespace cuda {
 #endif /* __OPENCV_CUDAOBJDETECT_HPP__ */
--- a/modules/cudaobjdetect/perf/perf_main.cpp
+++ b/modules/cudaobjdetect/perf/perf_main.cpp
@ -0,0 +1,47 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #include "perf_precomp.hpp"
 using namespace perf;
 CV_PERF_TEST_CUDA_MAIN(cudaobjdetect)
--- a/modules/cudaobjdetect/perf/perf_objdetect.cpp
+++ b/modules/cudaobjdetect/perf/perf_objdetect.cpp
@ -71,10 +71,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
        const cv::cuda::GpuMat d_img(img);
        std::vector<cv::Rect> gpu_found_locations;
-        cv::cuda::HOGDescriptor d_hog;
+        cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
-        d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+        d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
-        TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
+        TEST_CYCLE() d_hog->detectMultiScale(d_img, gpu_found_locations);
        SANITY_CHECK(gpu_found_locations);
    }
@ -82,8 +82,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
    {
        std::vector<cv::Rect> cpu_found_locations;
        cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
        cv::HOGDescriptor hog;
-        hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+        hog.setSVMDetector(d_hog->getDefaultPeopleDetector());
        TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
@ -105,18 +107,17 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
    if (PERF_RUN_CUDA())
    {
-        cv::cuda::CascadeClassifier_CUDA d_cascade;
+        cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
-        ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
+                cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
        const cv::cuda::GpuMat d_img(img);
        cv::cuda::GpuMat objects_buffer;
        int detections_num = 0;
-        TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
+        TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
        std::vector<cv::Rect> gpu_rects;
        d_cascade->convert(objects_buffer, gpu_rects);
        std::vector<cv::Rect> gpu_rects(detections_num);
        cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
        objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
        cv::groupRectangles(gpu_rects, 3, 0.2);
        SANITY_CHECK(gpu_rects);
    }
@ -144,18 +145,17 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
    if (PERF_RUN_CUDA())
    {
-        cv::cuda::CascadeClassifier_CUDA d_cascade;
+        cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
-        ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
+                cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
        const cv::cuda::GpuMat d_img(img);
        cv::cuda::GpuMat objects_buffer;
        int detections_num = 0;
-        TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
+        TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
        std::vector<cv::Rect> gpu_rects;
        d_cascade->convert(objects_buffer, gpu_rects);
        std::vector<cv::Rect> gpu_rects(detections_num);
        cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
        objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
        cv::groupRectangles(gpu_rects, 3, 0.2);
        SANITY_CHECK(gpu_rects);
    }
--- a/modules/cudaobjdetect/perf/perf_precomp.hpp
+++ b/modules/cudaobjdetect/perf/perf_precomp.hpp
@ -0,0 +1,64 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifdef __GNUC__
 #  pragma GCC diagnostic ignored "-Wmissing-declarations"
 #  if defined __clang__ || defined __APPLE__
 #    pragma GCC diagnostic ignored "-Wmissing-prototypes"
 #    pragma GCC diagnostic ignored "-Wextra"
 #  endif
 #endif
 #ifndef __OPENCV_PERF_PRECOMP_HPP__
 #define __OPENCV_PERF_PRECOMP_HPP__
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/cuda_perf.hpp"
 #include "opencv2/cudaobjdetect.hpp"
 #include "opencv2/objdetect.hpp"
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
 #endif
 #endif
--- a/modules/cudaobjdetect/src/cascadeclassifier.cpp
+++ b/modules/cudaobjdetect/src/cascadeclassifier.cpp
@ -48,160 +48,185 @@ using namespace cv::cuda;
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
-cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA()               { throw_no_cuda(); }
+Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
-cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA(const String&)  { throw_no_cuda(); }
+Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
 cv::cuda::CascadeClassifier_CUDA::~CascadeClassifier_CUDA()              { throw_no_cuda(); }
 bool cv::cuda::CascadeClassifier_CUDA::empty() const                    { throw_no_cuda(); return true; }
 bool cv::cuda::CascadeClassifier_CUDA::load(const String&)              { throw_no_cuda(); return true; }
 Size cv::cuda::CascadeClassifier_CUDA::getClassifierSize() const        { throw_no_cuda(); return Size();}
 void cv::cuda::CascadeClassifier_CUDA::release()                        { throw_no_cuda(); }
 int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat&, GpuMat&, double, int, Size)       {throw_no_cuda(); return -1;}
 int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat&, GpuMat&, Size, Size, double, int) {throw_no_cuda(); return -1;}
 #else
-struct cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
+//
 // CascadeClassifierBase
 //
 namespace
 {
-public:
+    class CascadeClassifierBase : public cuda::CascadeClassifier
-    CascadeClassifierImpl(){}
+    {
-    virtual ~CascadeClassifierImpl(){}
+    public:
        CascadeClassifierBase();
-    virtual unsigned int process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
+        virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; }
-                      bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize, cv::Size maxObjectSize) = 0;
+        virtual Size getMaxObjectSize() const { return maxObjectSize_; }
-    virtual cv::Size getClassifierCvSize() const = 0;
+        virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; }
-    virtual bool read(const String& classifierAsXml) = 0;
+        virtual Size getMinObjectSize() const { return minObjectSize_; }
 };
-#ifndef HAVE_OPENCV_CUDALEGACY
+        virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
        virtual double getScaleFactor() const { return scaleFactor_; }
-struct cv::cuda::CascadeClassifier_CUDA::HaarCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
+        virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; }
        virtual int getMinNeighbors() const { return minNeighbors_; }
        virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; }
        virtual bool getFindLargestObject() { return findLargestObject_; }
        virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; }
        virtual int getMaxNumObjects() const { return maxNumObjects_; }
    protected:
        Size maxObjectSize_;
        Size minObjectSize_;
        double scaleFactor_;
        int minNeighbors_;
        bool findLargestObject_;
        int maxNumObjects_;
    };
    CascadeClassifierBase::CascadeClassifierBase() :
        maxObjectSize_(),
        minObjectSize_(),
        scaleFactor_(1.2),
        minNeighbors_(4),
        findLargestObject_(false),
        maxNumObjects_(100)
    {
    }
 }
 //
 // HaarCascade
 //
 #ifdef HAVE_OPENCV_CUDALEGACY
 namespace
 {
-public:
+    class HaarCascade_Impl : public CascadeClassifierBase
    HaarCascade()
    {
-        throw_no_cuda();
+    public:
        explicit HaarCascade_Impl(const String& filename);
        virtual Size getClassifierSize() const;
        virtual void detectMultiScale(InputArray image,
                                      OutputArray objects,
                                      Stream& stream);
        virtual void convert(OutputArray gpu_objects,
                             std::vector<Rect>& objects);
    private:
        NCVStatus load(const String& classifierFile);
        NCVStatus calculateMemReqsAndAllocate(const Size& frameSize);
        NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections);
        Size lastAllocatedFrameSize;
        Ptr<NCVMemStackAllocator> gpuAllocator;
        Ptr<NCVMemStackAllocator> cpuAllocator;
        cudaDeviceProp devProp;
        NCVStatus ncvStat;
        Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
        Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
        Ptr<NCVVectorAlloc<HaarStage64> >           h_haarStages;
        Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
        Ptr<NCVVectorAlloc<HaarFeature64> >         h_haarFeatures;
        HaarClassifierCascadeDescriptor haar;
        Ptr<NCVVectorAlloc<HaarStage64> >           d_haarStages;
        Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
        Ptr<NCVVectorAlloc<HaarFeature64> >         d_haarFeatures;
    };
    static void NCVDebugOutputHandler(const String &msg)
    {
        CV_Error(Error::GpuApiCallError, msg.c_str());
    }
-    unsigned int process(const GpuMat&, GpuMat&, float, int, bool, bool, cv::Size, cv::Size)
+    HaarCascade_Impl::HaarCascade_Impl(const String& filename) :
-    {
+        lastAllocatedFrameSize(-1, -1)
        throw_no_cuda();
        return 0;
    }
    cv::Size getClassifierCvSize() const
    {
        throw_no_cuda();
        return cv::Size();
    }
    bool read(const String&)
    {
        throw_no_cuda();
        return false;
    }
 };
 #else
 struct cv::cuda::CascadeClassifier_CUDA::HaarCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
 {
 public:
    HaarCascade() : lastAllocatedFrameSize(-1, -1)
    {
        ncvSetDebugOutputHandler(NCVDebugOutputHandler);
    }
    bool read(const String& filename)
    {
        ncvSafeCall( load(filename) );
        return true;
    }
-    NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
+    Size HaarCascade_Impl::getClassifierSize() const
                      bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize,
                      /*out*/unsigned int& numDetections)
    {
-        calculateMemReqsAndAllocate(src.size());
+        return Size(haar.ClassifierSize.width, haar.ClassifierSize.height);
        NCVMemPtr src_beg;
        src_beg.ptr = (void*)src.ptr<Ncv8u>();
        src_beg.memtype = NCVMemoryTypeDevice;
        NCVMemSegment src_seg;
        src_seg.begin = src_beg;
        src_seg.size  = src.step * src.rows;
        NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
        ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
        CV_Assert(objects.rows == 1);
        NCVMemPtr objects_beg;
        objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
        objects_beg.memtype = NCVMemoryTypeDevice;
        NCVMemSegment objects_seg;
        objects_seg.begin = objects_beg;
        objects_seg.size = objects.step * objects.rows;
        NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
        ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
        NcvSize32u roi;
        roi.width = d_src.width();
        roi.height = d_src.height();
        NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
        Ncv32u flags = 0;
        flags |= findLargestObject? NCVPipeObjDet_FindLargestObject : 0;
        flags |= visualizeInPlace ? NCVPipeObjDet_VisualizeInPlace  : 0;
        ncvStat = ncvDetectObjectsMultiScale_device(
            d_src, roi, d_rects, numDetections, haar, *h_haarStages,
            *d_haarStages, *d_haarNodes, *d_haarFeatures,
            winMinSize,
            minNeighbors,
            scaleStep, 1,
            flags,
            *gpuAllocator, *cpuAllocator, devProp, 0);
        ncvAssertReturnNcvStat(ncvStat);
        ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
        return NCV_SUCCESS;
    }
-    unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors,
+    void HaarCascade_Impl::detectMultiScale(InputArray _image,
-                      bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size /*maxObjectSize*/)
+                                            OutputArray _objects,
                                            Stream& stream)
    {
-        CV_Assert( scaleFactor > 1 && image.depth() == CV_8U);
+        const GpuMat image = _image.getGpuMat();
-        const int defaultObjSearchNum = 100;
+        CV_Assert( image.depth() == CV_8U);
-        if (objectsBuf.empty())
+        CV_Assert( scaleFactor_ > 1 );
        CV_Assert( !stream );
        Size ncvMinSize = getClassifierSize();
        if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height)
        {
-            objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type);
+            ncvMinSize.width = minObjectSize_.width;
            ncvMinSize.height = minObjectSize_.height;
        }
-        cv::Size ncvMinSize = this->getClassifierCvSize();
+        BufferPool pool(stream);
-
+        GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
        if (ncvMinSize.width < minSize.width && ncvMinSize.height < minSize.height)
        {
            ncvMinSize.width = minSize.width;
            ncvMinSize.height = minSize.height;
        }
        unsigned int numDetections;
-        ncvSafeCall(this->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections));
+        ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) );
-        return numDetections;
+        if (numDetections > 0)
        {
            objectsBuf.colRange(0, numDetections).copyTo(_objects);
        }
        else
        {
            _objects.release();
        }
    }
-    cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); }
+    void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
    {
        if (_gpu_objects.empty())
        {
            objects.clear();
            return;
        }
-private:
+        Mat gpu_objects;
-    static void NCVDebugOutputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); }
+        if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
        {
            _gpu_objects.getGpuMat().download(gpu_objects);
        }
        else
        {
            gpu_objects = _gpu_objects.getMat();
        }
-    NCVStatus load(const String& classifierFile)
+        CV_Assert( gpu_objects.rows == 1 );
        CV_Assert( gpu_objects.type() == DataType<Rect>::type );
        Rect* ptr = gpu_objects.ptr<Rect>();
        objects.assign(ptr, ptr + gpu_objects.cols);
    }
    NCVStatus HaarCascade_Impl::load(const String& classifierFile)
    {
        int devId = cv::cuda::getDevice();
        ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
@ -246,7 +271,7 @@ private:
        return NCV_SUCCESS;
    }
-    NCVStatus calculateMemReqsAndAllocate(const Size& frameSize)
+    NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize)
    {
        if (lastAllocatedFrameSize == frameSize)
        {
@ -289,88 +314,62 @@ private:
        return NCV_SUCCESS;
    }
-    cudaDeviceProp devProp;
+    NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections)
-    NCVStatus ncvStat;
+    {
        calculateMemReqsAndAllocate(src.size());
-    Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
+        NCVMemPtr src_beg;
-    Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
+        src_beg.ptr = (void*)src.ptr<Ncv8u>();
        src_beg.memtype = NCVMemoryTypeDevice;
-    Ptr<NCVVectorAlloc<HaarStage64> >           h_haarStages;
+        NCVMemSegment src_seg;
-    Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
+        src_seg.begin = src_beg;
-    Ptr<NCVVectorAlloc<HaarFeature64> >         h_haarFeatures;
+        src_seg.size  = src.step * src.rows;
-    HaarClassifierCascadeDescriptor haar;
+        NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
        ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
-    Ptr<NCVVectorAlloc<HaarStage64> >           d_haarStages;
+        CV_Assert(objects.rows == 1);
    Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
    Ptr<NCVVectorAlloc<HaarFeature64> >         d_haarFeatures;
-    Size lastAllocatedFrameSize;
+        NCVMemPtr objects_beg;
        objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
        objects_beg.memtype = NCVMemoryTypeDevice;
-    Ptr<NCVMemStackAllocator> gpuAllocator;
+        NCVMemSegment objects_seg;
-    Ptr<NCVMemStackAllocator> cpuAllocator;
+        objects_seg.begin = objects_beg;
        objects_seg.size = objects.step * objects.rows;
        NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
        ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
-    virtual ~HaarCascade(){}
+        NcvSize32u roi;
-};
+        roi.width = d_src.width();
        roi.height = d_src.height();
        NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
        Ncv32u flags = 0;
        flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0;
        ncvStat = ncvDetectObjectsMultiScale_device(
            d_src, roi, d_rects, numDetections, haar, *h_haarStages,
            *d_haarStages, *d_haarNodes, *d_haarFeatures,
            winMinSize,
            minNeighbors_,
            scaleFactor_, 1,
            flags,
            *gpuAllocator, *cpuAllocator, devProp, 0);
        ncvAssertReturnNcvStat(ncvStat);
        ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
        return NCV_SUCCESS;
    }
 }
 #endif
-cv::Size operator -(const cv::Size& a, const cv::Size& b)
+//
-{
+// LbpCascade
-    return cv::Size(a.width - b.width, a.height - b.height);
+//
 }
 cv::Size operator +(const cv::Size& a, const int& i)
 {
    return cv::Size(a.width + i, a.height + i);
 }
 cv::Size operator *(const cv::Size& a, const float& f)
 {
    return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
 }
 cv::Size operator /(const cv::Size& a, const float& f)
 {
    return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
 }
 bool operator <=(const cv::Size& a, const cv::Size& b)
 {
    return a.width <= b.width && a.height <= b.width;
 }
 struct PyrLavel
 {
    PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
    {
        do
        {
            order = _order;
            scale = pow(_scale, order);
            sFrame = frame / scale;
            workArea = sFrame - window + 1;
            sWindow = window * scale;
            _order++;
        } while (sWindow <= minObjectSize);
    }
    bool isFeasible(cv::Size maxObj)
    {
        return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
    }
    PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
    {
        return PyrLavel(order + 1, factor, frame, window, minObjectSize);
    }
    int order;
    float scale;
    cv::Size sFrame;
    cv::Size workArea;
    cv::Size sWindow;
 };
 namespace cv { namespace cuda { namespace device
 {
@ -394,42 +393,154 @@ namespace cv { namespace cuda { namespace device
                             unsigned int* classified,
                             PtrStepSzi integral);
-        void connectedConmonents(PtrStepSz<int4>  candidates, int ncandidates, PtrStepSz<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
+        void connectedConmonents(PtrStepSz<int4> candidates,
                                 int ncandidates,
                                 PtrStepSz<int4> objects,
                                 int groupThreshold,
                                 float grouping_eps,
                                 unsigned int* nclasses);
    }
 }}}
-struct cv::cuda::CascadeClassifier_CUDA::LbpCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
+namespace
 {
-public:
+    cv::Size operator -(const cv::Size& a, const cv::Size& b)
    struct Stage
    {
-        int    first;
+        return cv::Size(a.width - b.width, a.height - b.height);
-        int    ntrees;
+    }
-        float  threshold;
+
    cv::Size operator +(const cv::Size& a, const int& i)
    {
        return cv::Size(a.width + i, a.height + i);
    }
    cv::Size operator *(const cv::Size& a, const float& f)
    {
        return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
    }
    cv::Size operator /(const cv::Size& a, const float& f)
    {
        return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
    }
    bool operator <=(const cv::Size& a, const cv::Size& b)
    {
        return a.width <= b.width && a.height <= b.width;
    }
    struct PyrLavel
    {
        PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
        {
            do
            {
                order = _order;
                scale = pow(_scale, order);
                sFrame = frame / scale;
                workArea = sFrame - window + 1;
                sWindow = window * scale;
                _order++;
            } while (sWindow <= minObjectSize);
        }
        bool isFeasible(cv::Size maxObj)
        {
            return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
        }
        PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
        {
            return PyrLavel(order + 1, factor, frame, window, minObjectSize);
        }
        int order;
        float scale;
        cv::Size sFrame;
        cv::Size workArea;
        cv::Size sWindow;
    };
-    LbpCascade(){}
+    class LbpCascade_Impl : public CascadeClassifierBase
    virtual ~LbpCascade(){}
    virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool /*findLargestObject*/,
        bool /*visualizeInPlace*/, cv::Size minObjectSize, cv::Size maxObjectSize)
    {
-        CV_Assert(scaleFactor > 1 && image.depth() == CV_8U);
+    public:
        explicit LbpCascade_Impl(const FileStorage& file);
        virtual Size getClassifierSize() const { return NxM; }
        virtual void detectMultiScale(InputArray image,
                                      OutputArray objects,
                                      Stream& stream);
        virtual void convert(OutputArray gpu_objects,
                             std::vector<Rect>& objects);
    private:
        bool load(const FileNode &root);
        void allocateBuffers(cv::Size frame);
    private:
        struct Stage
        {
            int    first;
            int    ntrees;
            float  threshold;
        };
        enum stage { BOOST = 0 };
        enum feature { LBP = 1, HAAR = 2 };
        static const stage stageType = BOOST;
        static const feature featureType = LBP;
        cv::Size NxM;
        bool isStumps;
        int ncategories;
        int subsetSize;
        int nodeStep;
        // gpu representation of classifier
        GpuMat stage_mat;
        GpuMat trees_mat;
        GpuMat nodes_mat;
        GpuMat leaves_mat;
        GpuMat subsets_mat;
        GpuMat features_mat;
        GpuMat integral;
        GpuMat integralBuffer;
        GpuMat resuzeBuffer;
        GpuMat candidates;
        static const int integralFactor = 4;
    };
    LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file)
    {
        load(file.getFirstTopLevelNode());
    }
    void LbpCascade_Impl::detectMultiScale(InputArray _image,
                                           OutputArray _objects,
                                           Stream& stream)
    {
        const GpuMat image = _image.getGpuMat();
        CV_Assert( image.depth() == CV_8U);
        CV_Assert( scaleFactor_ > 1 );
        CV_Assert( !stream );
        // const int defaultObjSearchNum = 100;
        const float grouping_eps = 0.2f;
-        if( !objects.empty() && objects.depth() == CV_32S)
+        BufferPool pool(stream);
-            objects.reshape(4, 1);
+        GpuMat objects = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
        else
            objects.create(1 , image.cols >> 4, CV_32SC4);
        // used for debug
        // candidates.setTo(cv::Scalar::all(0));
        // objects.setTo(cv::Scalar::all(0));
-        if (maxObjectSize == cv::Size())
+        if (maxObjectSize_ == cv::Size())
-            maxObjectSize = image.size();
+            maxObjectSize_ = image.size();
        allocateBuffers(image.size());
@ -437,9 +548,9 @@ public:
        GpuMat dclassified(1, 1, CV_32S);
        cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
-        PyrLavel level(0, scaleFactor, image.size(), NxM, minObjectSize);
+        PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_);
-        while (level.isFeasible(maxObjectSize))
+        while (level.isFeasible(maxObjectSize_))
        {
            int acc = level.sFrame.width + 1;
            float iniScale = level.scale;
@ -449,7 +560,7 @@ public:
            int total = 0, prev  = 0;
-            while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize))
+            while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_))
            {
                // create sutable matrix headers
                GpuMat src  = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
@ -464,7 +575,7 @@ public:
                total += totalWidth * (level.workArea.height / step);
                // go to next pyramide level
-                level = level.next(scaleFactor, image.size(), NxM, minObjectSize);
+                level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_);
                area = level.workArea;
                step = (1 + (level.scale <= 2.f));
@ -472,60 +583,55 @@ public:
                acc += level.sFrame.width + 1;
            }
-            device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
+            device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
                leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
        }
-        if (groupThreshold <= 0  || objects.empty())
+        if (minNeighbors_ <= 0  || objects.empty())
-            return 0;
+            return;
        cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
-        device::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>());
+        device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>());
        cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
        cudaSafeCall( cudaDeviceSynchronize() );
        return classified;
    }
-    virtual cv::Size getClassifierCvSize() const { return NxM; }
+        if (classified > 0)
    bool read(const String& classifierAsXml)
    {
        FileStorage fs(classifierAsXml, FileStorage::READ);
        return fs.isOpened() ? read(fs.getFirstTopLevelNode()) : false;
    }
 private:
    void allocateBuffers(cv::Size frame)
    {
        if (frame == cv::Size())
            return;
        if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
        {
-            resuzeBuffer.create(frame, CV_8UC1);
+            objects.colRange(0, classified).copyTo(_objects);
-
+        }
-            integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
+        else
-
+        {
-#ifdef HAVE_OPENCV_CUDALEGACY
+            _objects.release();
            NcvSize32u roiSize;
            roiSize.width = frame.width;
            roiSize.height = frame.height;
            cudaDeviceProp prop;
            cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
            Ncv32u bufSize;
            ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
            integralBuffer.create(1, bufSize, CV_8UC1);
 #endif
            candidates.create(1 , frame.width >> 1, CV_32SC4);
        }
    }
-    bool read(const FileNode &root)
+    void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
    {
        if (_gpu_objects.empty())
        {
            objects.clear();
            return;
        }
        Mat gpu_objects;
        if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
        {
            _gpu_objects.getGpuMat().download(gpu_objects);
        }
        else
        {
            gpu_objects = _gpu_objects.getMat();
        }
        CV_Assert( gpu_objects.rows == 1 );
        CV_Assert( gpu_objects.type() == DataType<Rect>::type );
        Rect* ptr = gpu_objects.ptr<Rect>();
        objects.assign(ptr, ptr + gpu_objects.cols);
    }
    bool LbpCascade_Impl::load(const FileNode &root)
    {
        const char *CUDA_CC_STAGE_TYPE       = "stageType";
        const char *CUDA_CC_FEATURE_TYPE     = "featureType";
@ -666,92 +772,90 @@ private:
        return true;
    }
-    enum stage { BOOST = 0 };
+    void LbpCascade_Impl::allocateBuffers(cv::Size frame)
-    enum feature { LBP = 1, HAAR = 2 };
+    {
-    static const stage stageType = BOOST;
+        if (frame == cv::Size())
-    static const feature featureType = LBP;
+            return;
-    cv::Size NxM;
+        if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
-    bool isStumps;
+        {
-    int ncategories;
+            resuzeBuffer.create(frame, CV_8UC1);
    int subsetSize;
    int nodeStep;
-    // gpu representation of classifier
+            integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
    GpuMat stage_mat;
    GpuMat trees_mat;
    GpuMat nodes_mat;
    GpuMat leaves_mat;
    GpuMat subsets_mat;
    GpuMat features_mat;
-    GpuMat integral;
+        #ifdef HAVE_OPENCV_CUDALEGACY
-    GpuMat integralBuffer;
+            NcvSize32u roiSize;
-    GpuMat resuzeBuffer;
+            roiSize.width = frame.width;
            roiSize.height = frame.height;
-    GpuMat candidates;
+            cudaDeviceProp prop;
-    static const int integralFactor = 4;
+            cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
 };
-cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA()
+            Ncv32u bufSize;
-: findLargestObject(false), visualizeInPlace(false), impl(0) {}
+            ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
            integralBuffer.create(1, bufSize, CV_8UC1);
        #endif
-cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA(const String& filename)
+            candidates.create(1 , frame.width >> 1, CV_32SC4);
-: findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); }
+        }
    }
 cv::cuda::CascadeClassifier_CUDA::~CascadeClassifier_CUDA() { release(); }
 void cv::cuda::CascadeClassifier_CUDA::release() { if (impl) { delete impl; impl = 0; } }
 bool cv::cuda::CascadeClassifier_CUDA::empty() const { return impl == 0; }
 Size cv::cuda::CascadeClassifier_CUDA::getClassifierSize() const
 {
    return this->empty() ? Size() : impl->getClassifierCvSize();
 }
-int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize)
+//
-{
+// create
-    CV_Assert( !this->empty());
+//
    return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, cv::Size());
 }
-int cv::cuda::CascadeClassifier_CUDA::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize, double scaleFactor, int minNeighbors)
+Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename)
 {
    CV_Assert( !this->empty());
    return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, maxObjectSize);
 }
 bool cv::cuda::CascadeClassifier_CUDA::load(const String& filename)
 {
    release();
    String fext = filename.substr(filename.find_last_of(".") + 1);
    fext = fext.toLowerCase();
    if (fext == "nvbin")
    {
-        impl = new HaarCascade();
+    #ifndef HAVE_OPENCV_CUDALEGACY
-        return impl->read(filename);
+        CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
        return Ptr<cuda::CascadeClassifier>();
    #else
        return makePtr<HaarCascade_Impl>(filename);
    #endif
    }
    FileStorage fs(filename, FileStorage::READ);
    if (!fs.isOpened())
    {
-        impl = new HaarCascade();
+    #ifndef HAVE_OPENCV_CUDALEGACY
-        return impl->read(filename);
+        CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
        return Ptr<cuda::CascadeClassifier>();
    #else
        return makePtr<HaarCascade_Impl>(filename);
    #endif
    }
    const char *CUDA_CC_LBP = "LBP";
    String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"];
    if (featureTypeStr == CUDA_CC_LBP)
-        impl = new LbpCascade();
+    {
        return makePtr<LbpCascade_Impl>(fs);
    }
    else
-        impl = new HaarCascade();
+    {
    #ifndef HAVE_OPENCV_CUDALEGACY
        CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
        return Ptr<cuda::CascadeClassifier>();
    #else
        return makePtr<HaarCascade_Impl>(filename);
    #endif
    }
-    impl->read(filename);
+    CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier");
-    return !this->empty();
+    return Ptr<cuda::CascadeClassifier>();
 }
 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file)
 {
    return makePtr<LbpCascade_Impl>(file);
 }
 #endif
--- a/modules/cudaobjdetect/src/cuda/hog.cu
+++ b/modules/cudaobjdetect/src/cuda/hog.cu
--- a/modules/cudaobjdetect/src/cuda/lbp.cu
+++ b/modules/cudaobjdetect/src/cuda/lbp.cu
--- a/modules/cudaobjdetect/src/cuda/lbp.hpp
+++ b/modules/cudaobjdetect/src/cuda/lbp.hpp
--- a/modules/cudaobjdetect/src/hog.cpp
+++ b/modules/cudaobjdetect/src/hog.cpp
--- a/modules/cudaobjdetect/src/precomp.hpp
+++ b/modules/cudaobjdetect/src/precomp.hpp
@ -0,0 +1,62 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef __OPENCV_PRECOMP_H__
 #define __OPENCV_PRECOMP_H__
 #include <limits>
 #include "opencv2/cudaobjdetect.hpp"
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudawarping.hpp"
 #include "opencv2/objdetect.hpp"
 #include "opencv2/core/private.cuda.hpp"
 #include "opencv2/core/utility.hpp"
 #include "opencv2/opencv_modules.hpp"
 #ifdef HAVE_OPENCV_CUDALEGACY
 #  include "opencv2/cudalegacy/private.hpp"
 #endif
 #endif /* __OPENCV_PRECOMP_H__ */
--- a/modules/cudaobjdetect/test/test_main.cpp
+++ b/modules/cudaobjdetect/test/test_main.cpp
@ -0,0 +1,45 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #include "test_precomp.hpp"
 CV_CUDA_TEST_MAIN("gpu")
--- a/modules/cudaobjdetect/test/test_objdetect.cpp
+++ b/modules/cudaobjdetect/test/test_objdetect.cpp
@ -48,9 +48,10 @@ using namespace cvtest;
 //#define DUMP
-struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescriptor
+struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
    cv::cuda::DeviceInfo devInfo;
    cv::Ptr<cv::cuda::HOG> hog;
 #ifdef DUMP
    std::ofstream f;
@ -69,23 +70,13 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
        devInfo = GetParam();
        cv::cuda::setDevice(devInfo.deviceID());
        hog = cv::cuda::HOG::create();
    }
 #ifdef DUMP
-    void dump(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
+    void dump(const std::vector<cv::Point>& locations)
    {
        f.write((char*)&blockHists.rows, sizeof(blockHists.rows));
        f.write((char*)&blockHists.cols, sizeof(blockHists.cols));
        for (int i = 0; i < blockHists.rows; ++i)
        {
            for (int j = 0; j < blockHists.cols; ++j)
            {
                float val = blockHists.at<float>(i, j);
                f.write((char*)&val, sizeof(val));
            }
        }
        int nlocations = locations.size();
        f.write((char*)&nlocations, sizeof(nlocations));
@ -93,21 +84,18 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
            f.write((char*)&locations[i], sizeof(locations[i]));
    }
 #else
-    void compare(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
+    void compare(const std::vector<cv::Point>& locations)
    {
        // skip block_hists check
        int rows, cols;
        f.read((char*)&rows, sizeof(rows));
        f.read((char*)&cols, sizeof(cols));
-        ASSERT_EQ(rows, blockHists.rows);
+        for (int i = 0; i < rows; ++i)
        ASSERT_EQ(cols, blockHists.cols);
        for (int i = 0; i < blockHists.rows; ++i)
        {
-            for (int j = 0; j < blockHists.cols; ++j)
+            for (int j = 0; j < cols; ++j)
            {
                float val;
                f.read((char*)&val, sizeof(val));
                ASSERT_NEAR(val, blockHists.at<float>(i, j), 1e-3);
            }
        }
@ -126,54 +114,41 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
    void testDetect(const cv::Mat& img)
    {
-        gamma_correction = false;
+        hog->setGammaCorrection(false);
-        setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+        hog->setSVMDetector(hog->getDefaultPeopleDetector());
        std::vector<cv::Point> locations;
        // Test detect
-        detect(loadMat(img), locations, 0);
+        hog->detect(loadMat(img), locations);
 #ifdef DUMP
-        dump(cv::Mat(block_hists), locations);
+        dump(locations);
 #else
-        compare(cv::Mat(block_hists), locations);
+        compare(locations);
 #endif
        // Test detect on smaller image
        cv::Mat img2;
        cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2));
-        detect(loadMat(img2), locations, 0);
+        hog->detect(loadMat(img2), locations);
 #ifdef DUMP
-        dump(cv::Mat(block_hists), locations);
+        dump(locations);
 #else
-        compare(cv::Mat(block_hists), locations);
+        compare(locations);
 #endif
        // Test detect on greater image
        cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2));
-        detect(loadMat(img2), locations, 0);
+        hog->detect(loadMat(img2), locations);
 #ifdef DUMP
-        dump(cv::Mat(block_hists), locations);
+        dump(locations);
 #else
-        compare(cv::Mat(block_hists), locations);
+        compare(locations);
 #endif
    }
    // Does not compare border value, as interpolation leads to delta
    void compare_inner_parts(cv::Mat d1, cv::Mat d2)
    {
        for (int i = 1; i < blocks_per_win_y - 1; ++i)
            for (int j = 1; j < blocks_per_win_x - 1; ++j)
                for (int k = 0; k < block_hist_size; ++k)
                {
                    float a = d1.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
                    float b = d2.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
                    ASSERT_FLOAT_EQ(a, b);
                }
    }
 };
 // desabled while resize does not fixed
@ -182,13 +157,8 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
    cv::Mat img_rgb = readImage("hog/road.png");
    ASSERT_FALSE(img_rgb.empty());
 #ifdef DUMP
    f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
    ASSERT_TRUE(f.is_open());
 #else
    f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
    ASSERT_TRUE(f.is_open());
 #endif
    // Test on color image
    cv::Mat img;
@ -198,8 +168,6 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
    // Test on gray image
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
    testDetect(img);
    f.close();
 }
 CUDA_TEST_P(HOG, GetDescriptors)
@ -216,8 +184,14 @@ CUDA_TEST_P(HOG, GetDescriptors)
    // Convert train images into feature vectors (train table)
    cv::cuda::GpuMat descriptors, descriptors_by_cols;
-    getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
+
-    getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);
+    hog->setWinStride(Size(64, 128));
    hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_ROW_BY_ROW);
    hog->compute(d_img, descriptors);
    hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_COL_BY_COL);
    hog->compute(d_img, descriptors_by_cols);
    // Check size of the result train table
    wins_per_img_x = 3;
@ -242,48 +216,6 @@ CUDA_TEST_P(HOG, GetDescriptors)
                    ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
                              r[(x * blocks_per_win_y + y) * block_hist_size + k]);
    }
    /* Now we want to extract the same feature vectors, but from single images. NOTE: results will
    be defferent, due to border values interpolation. Using of many small images is slower, however we
    wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
    works good, it can be checked in the gpu_hog sample */
    img_rgb = readImage("hog/positive1.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    // Everything is fine with interpolation for left top subimage
    ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
    img_rgb = readImage("hog/positive2.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
    img_rgb = readImage("hog/negative1.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
    img_rgb = readImage("hog/negative2.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
    img_rgb = readImage("hog/positive3.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
    img_rgb = readImage("hog/negative3.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
    computeBlockHistograms(cv::cuda::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
 }
 INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, HOG, ALL_DEVICES);
@ -310,12 +242,12 @@ CUDA_TEST_P(CalTech, HOG)
    cv::cuda::GpuMat d_img(img);
    cv::Mat markedImage(img.clone());
-    cv::cuda::HOGDescriptor d_hog;
+    cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
-    d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+    d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
-    d_hog.nlevels = d_hog.nlevels + 32;
+    d_hog->setNumLevels(d_hog->getNumLevels() + 32);
    std::vector<cv::Rect> found_locations;
-    d_hog.detectMultiScale(d_img, found_locations);
+    d_hog->detectMultiScale(d_img, found_locations);
 #if defined (LOG_CASCADE_STATISTIC)
    for (int i = 0; i < (int)found_locations.size(); i++)
@ -326,7 +258,8 @@ CUDA_TEST_P(CalTech, HOG)
        cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
    }
-    cv::imshow("Res", markedImage); cv::waitKey();
+    cv::imshow("Res", markedImage);
    cv::waitKey();
 #endif
 }
@ -354,9 +287,15 @@ PARAM_TEST_CASE(LBP_Read_classifier, cv::cuda::DeviceInfo, int)
 CUDA_TEST_P(LBP_Read_classifier, Accuracy)
 {
    cv::cuda::CascadeClassifier_CUDA classifier;
    std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
-    ASSERT_TRUE(classifier.load(classifierXmlPath));
+
    cv::Ptr<cv::cuda::CascadeClassifier> d_cascade;
    ASSERT_NO_THROW(
        d_cascade = cv::cuda::CascadeClassifier::create(classifierXmlPath);
    );
    ASSERT_FALSE(d_cascade.empty());
 }
 INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_Read_classifier,
@ -396,29 +335,28 @@ CUDA_TEST_P(LBP_classify, Accuracy)
    for (; it != rects.end(); ++it)
        cv::rectangle(markedImage, *it, cv::Scalar(255, 0, 0));
-    cv::cuda::CascadeClassifier_CUDA gpuClassifier;
+    cv::Ptr<cv::cuda::CascadeClassifier> gpuClassifier =
-    ASSERT_TRUE(gpuClassifier.load(classifierXmlPath));
+            cv::cuda::CascadeClassifier::create(classifierXmlPath);
    cv::cuda::GpuMat gpu_rects;
    cv::cuda::GpuMat tested(grey);
-    int count = gpuClassifier.detectMultiScale(tested, gpu_rects);
+    cv::cuda::GpuMat gpu_rects_buf;
    gpuClassifier->detectMultiScale(tested, gpu_rects_buf);
    std::vector<cv::Rect> gpu_rects;
    gpuClassifier->convert(gpu_rects_buf, gpu_rects);
 #if defined (LOG_CASCADE_STATISTIC)
-    cv::Mat downloaded(gpu_rects);
+    for (size_t i = 0; i < gpu_rects.size(); i++)
    const cv::Rect* faces = downloaded.ptr<cv::Rect>();
    for (int i = 0; i < count; i++)
    {
-        cv::Rect r = faces[i];
+        cv::Rect r = gpu_rects[i];
        std::cout << r.x << " " << r.y  << " " << r.width << " " << r.height << std::endl;
        cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
    }
 #endif
-#if defined (LOG_CASCADE_STATISTIC)
+    cv::imshow("Res", markedImage);
-    cv::imshow("Res", markedImage); cv::waitKey();
+    cv::waitKey();
 #endif
    (void)count;
 }
 INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_classify,
--- a/modules/cudaobjdetect/test/test_precomp.hpp
+++ b/modules/cudaobjdetect/test/test_precomp.hpp
@ -0,0 +1,64 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifdef __GNUC__
 #  pragma GCC diagnostic ignored "-Wmissing-declarations"
 #  if defined __clang__ || defined __APPLE__
 #    pragma GCC diagnostic ignored "-Wmissing-prototypes"
 #    pragma GCC diagnostic ignored "-Wextra"
 #  endif
 #endif
 #ifndef __OPENCV_TEST_PRECOMP_HPP__
 #define __OPENCV_TEST_PRECOMP_HPP__
 #include <fstream>
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/cuda_test.hpp"
 #include "opencv2/cudaobjdetect.hpp"
 #include "opencv2/objdetect.hpp"
 #include "cvconfig.h"
 #endif
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@ -3,7 +3,7 @@ SET(OPENCV_CUDA_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc op
                                     opencv_calib3d opencv_cuda opencv_superres
                                     opencv_cudaarithm opencv_cudafilters opencv_cudawarping opencv_cudaimgproc
                                     opencv_cudafeatures2d opencv_cudaoptflow opencv_cudabgsegm
-                                     opencv_cudastereo opencv_cudalegacy)
+                                     opencv_cudastereo opencv_cudalegacy opencv_cudaobjdetect)
 ocv_check_dependencies(${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS})
--- a/samples/gpu/cascadeclassifier.cpp
+++ b/samples/gpu/cascadeclassifier.cpp
@ -9,7 +9,7 @@
 #include "opencv2/objdetect/objdetect.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/cuda.hpp"
+#include "opencv2/cudaobjdetect.hpp"
 #include "opencv2/cudaimgproc.hpp"
 #include "opencv2/cudawarping.hpp"
@ -173,13 +173,9 @@ int main(int argc, const char *argv[])
        }
    }
-    CascadeClassifier_CUDA cascade_gpu;
+    Ptr<cuda::CascadeClassifier> cascade_gpu = cuda::CascadeClassifier::create(cascadeName);
    if (!cascade_gpu.load(cascadeName))
    {
        return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
    }
-    CascadeClassifier cascade_cpu;
+    cv::CascadeClassifier cascade_cpu;
    if (!cascade_cpu.load(cascadeName))
    {
        return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
@ -206,8 +202,8 @@ int main(int argc, const char *argv[])
    namedWindow("result", 1);
-    Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp;
+    Mat frame, frame_cpu, gray_cpu, resized_cpu, frameDisp;
-    vector<Rect> facesBuf_cpu;
+    vector<Rect> faces;
    GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;
@ -218,7 +214,6 @@ int main(int argc, const char *argv[])
    bool filterRects = true;
    bool helpScreen = false;
    int detections_num;
    for (;;)
    {
        if (isInputCamera || isInputVideo)
@ -241,40 +236,26 @@ int main(int argc, const char *argv[])
        if (useGPU)
        {
-            //cascade_gpu.visualizeInPlace = true;
+            cascade_gpu->setFindLargestObject(findLargestObject);
-            cascade_gpu.findLargestObject = findLargestObject;
+            cascade_gpu->setScaleFactor(1.2);
            cascade_gpu->setMinNeighbors((filterRects || findLargestObject) ? 4 : 0);
-            detections_num = cascade_gpu.detectMultiScale(resized_gpu, facesBuf_gpu, 1.2,
+            cascade_gpu->detectMultiScale(resized_gpu, facesBuf_gpu);
-                                                          (filterRects || findLargestObject) ? 4 : 0);
+            cascade_gpu->convert(facesBuf_gpu, faces);
            facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
        }
        else
        {
-            Size minSize = cascade_gpu.getClassifierSize();
+            Size minSize = cascade_gpu->getClassifierSize();
-            cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2,
+            cascade_cpu.detectMultiScale(resized_cpu, faces, 1.2,
                                         (filterRects || findLargestObject) ? 4 : 0,
                                         (findLargestObject ? CASCADE_FIND_BIGGEST_OBJECT : 0)
                                            | CASCADE_SCALE_IMAGE,
                                         minSize);
            detections_num = (int)facesBuf_cpu.size();
        }
-        if (!useGPU && detections_num)
+        for (size_t i = 0; i < faces.size(); ++i)
        {
-            for (int i = 0; i < detections_num; ++i)
+            rectangle(resized_cpu, faces[i], Scalar(255));
            {
                rectangle(resized_cpu, facesBuf_cpu[i], Scalar(255));
            }
        }
        if (useGPU)
        {
            resized_gpu.download(resized_cpu);
             for (int i = 0; i < detections_num; ++i)
             {
                rectangle(resized_cpu, faces_downloaded.ptr<cv::Rect>()[i], Scalar(255));
             }
        }
        tm.stop();
@ -283,16 +264,15 @@ int main(int argc, const char *argv[])
        //print detections to console
        cout << setfill(' ') << setprecision(2);
-        cout << setw(6) << fixed << fps << " FPS, " << detections_num << " det";
+        cout << setw(6) << fixed << fps << " FPS, " << faces.size() << " det";
-        if ((filterRects || findLargestObject) && detections_num > 0)
+        if ((filterRects || findLargestObject) && !faces.empty())
        {
-            Rect *faceRects = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0];
+            for (size_t i = 0; i < faces.size(); ++i)
            for (int i = 0; i < min(detections_num, 2); ++i)
            {
-                cout << ", [" << setw(4) << faceRects[i].x
+                cout << ", [" << setw(4) << faces[i].x
-                     << ", " << setw(4) << faceRects[i].y
+                     << ", " << setw(4) << faces[i].y
-                     << ", " << setw(4) << faceRects[i].width
+                     << ", " << setw(4) << faces[i].width
-                     << ", " << setw(4) << faceRects[i].height << "]";
+                     << ", " << setw(4) << faces[i].height << "]";
            }
        }
        cout << endl;
--- a/samples/gpu/hog.cpp
+++ b/samples/gpu/hog.cpp
@ -5,7 +5,7 @@
 #include <iomanip>
 #include <stdexcept>
 #include <opencv2/core/utility.hpp>
-#include "opencv2/cuda.hpp"
+#include "opencv2/cudaobjdetect.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/objdetect.hpp"
 #include "opencv2/imgproc.hpp"
@ -252,19 +252,13 @@ void App::run()
    Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
    Size win_stride(args.win_stride_width, args.win_stride_height);
-    // Create HOG descriptors and detectors here
+    cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size);
-    vector<float> detector;
+    cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9);
    if (win_size == Size(64, 128))
        detector = cv::cuda::HOGDescriptor::getPeopleDetector64x128();
    else
        detector = cv::cuda::HOGDescriptor::getPeopleDetector48x96();
-    cv::cuda::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
+    // Create HOG descriptors and detectors here
-                                   cv::cuda::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
+    Mat detector = gpu_hog->getDefaultPeopleDetector();
-                                   cv::cuda::HOGDescriptor::DEFAULT_NLEVELS);
+
-    cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
+    gpu_hog->setSVMDetector(detector);
                              HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
    gpu_hog.setSVMDetector(detector);
    cpu_hog.setSVMDetector(detector);
    while (running)
@ -315,9 +309,6 @@ void App::run()
            else img = img_aux;
            img_to_show = img;
            gpu_hog.nlevels = nlevels;
            cpu_hog.nlevels = nlevels;
            vector<Rect> found;
            // Perform HOG classification
@ -325,11 +316,19 @@ void App::run()
            if (use_gpu)
            {
                gpu_img.upload(img);
-                gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
+                gpu_hog->setNumLevels(nlevels);
-                                         Size(0, 0), scale, gr_threshold);
+                gpu_hog->setHitThreshold(hit_threshold);
                gpu_hog->setWinStride(win_stride);
                gpu_hog->setScaleFactor(scale);
                gpu_hog->setGroupThreshold(gr_threshold);
                gpu_hog->detectMultiScale(gpu_img, found);
            }
-            else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
+            else
            {
                cpu_hog.nlevels = nlevels;
                cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
                                          Size(0, 0), scale, gr_threshold);
            }
            hogWorkEnd();
            // Draw positive classified windows
`@ -6,4 +6,4 @@ set(the_description "CUDA-accelerated Computer Vision")`

	`ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 /wd4515 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)`	`ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 /wd4515 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)`

	`ocv_define_module(cuda opencv_calib3d opencv_objdetect opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)`	`ocv_define_module(cuda opencv_calib3d opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)`