fixed compilation with latest master changes

2013-03-14 13:49:48 +04:00 · 2013-03-14 13:49:48 +04:00 · a476664144
commit a476664144
parent 6f11dc03b9
12 changed files with 65 additions and 254 deletions
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@ -55,142 +55,6 @@
 #include "opencv2/features2d.hpp"

 namespace cv { namespace gpu {
-
-//////////////////////////////// CudaMem ////////////////////////////////
-// CudaMem is limited cv::Mat with page locked memory allocation.
-// Page locked memory is only needed for async and faster coping to GPU.
-// It is convertable to cv::Mat header without reference counting
-// so you can use it with other opencv functions.
-
-// Page-locks the matrix m memory and maps it for the device(s)
-CV_EXPORTS void registerPageLocked(Mat& m);
-// Unmaps the memory of matrix m, and makes it pageable again.
-CV_EXPORTS void unregisterPageLocked(Mat& m);
-
-class CV_EXPORTS CudaMem
-{
-public:
-    enum  { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, ALLOC_WRITE_COMBINED = 4 };
-
-    CudaMem();
-    CudaMem(const CudaMem& m);
-
-    CudaMem(int rows, int cols, int type, int _alloc_type = ALLOC_PAGE_LOCKED);
-    CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
-
-
-    //! creates from cv::Mat with coping data
-    explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED);
-
-    ~CudaMem();
-
-    CudaMem& operator = (const CudaMem& m);
-
-    //! returns deep copy of the matrix, i.e. the data is copied
-    CudaMem clone() const;
-
-    //! allocates new matrix data unless the matrix already has specified size and type.
-    void create(int rows, int cols, int type, int alloc_type = ALLOC_PAGE_LOCKED);
-    void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
-
-    //! decrements reference counter and released memory if needed.
-    void release();
-
-    //! returns matrix header with disabled reference counting for CudaMem data.
-    Mat createMatHeader() const;
-    operator Mat() const;
-
-    //! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
-    GpuMat createGpuMatHeader() const;
-    operator GpuMat() const;
-
-    //returns if host memory can be mapperd to gpu address space;
-    static bool canMapHostMemory();
-
-    // Please see cv::Mat for descriptions
-    bool isContinuous() const;
-    size_t elemSize() const;
-    size_t elemSize1() const;
-    int type() const;
-    int depth() const;
-    int channels() const;
-    size_t step1() const;
-    Size size() const;
-    bool empty() const;
-
-
-    // Please see cv::Mat for descriptions
-    int flags;
-    int rows, cols;
-    size_t step;
-
-    uchar* data;
-    int* refcount;
-
-    uchar* datastart;
-    uchar* dataend;
-
-    int alloc_type;
-};
-
-//////////////////////////////// CudaStream ////////////////////////////////
-// Encapculates Cuda Stream. Provides interface for async coping.
-// Passed to each function that supports async kernel execution.
-// Reference counting is enabled
-
-class CV_EXPORTS Stream
-{
-public:
-    Stream();
-    ~Stream();
-
-    Stream(const Stream&);
-    Stream& operator =(const Stream&);
-
-    bool queryIfComplete();
-    void waitForCompletion();
-
-    //! downloads asynchronously
-    // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat)
-    void enqueueDownload(const GpuMat& src, CudaMem& dst);
-    void enqueueDownload(const GpuMat& src, Mat& dst);
-
-    //! uploads asynchronously
-    // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI)
-    void enqueueUpload(const CudaMem& src, GpuMat& dst);
-    void enqueueUpload(const Mat& src, GpuMat& dst);
-
-    //! copy asynchronously
-    void enqueueCopy(const GpuMat& src, GpuMat& dst);
-
-    //! memory set asynchronously
-    void enqueueMemSet(GpuMat& src, Scalar val);
-    void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask);
-
-    //! converts matrix type, ex from float to uchar depending on type
-    void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0);
-
-    //! adds a callback to be called on the host after all currently enqueued items in the stream have completed
-    typedef void (*StreamCallback)(Stream& stream, int status, void* userData);
-    void enqueueHostCallback(StreamCallback callback, void* userData);
-
-    static Stream& Null();
-
-    operator bool() const;
-
-private:
-    struct Impl;
-
-    explicit Stream(Impl* impl);
-    void create();
-    void release();
-
-    Impl *impl;
-
-    friend struct StreamAccessor;
-};
-
-
 //////////////////////////////// Filter Engine ////////////////////////////////

 /*!
@ -1522,97 +1386,6 @@ private:
    friend class CascadeClassifier_GPU_LBP;
 };

-// ======================== GPU version for soft cascade ===================== //
-
-class CV_EXPORTS ChannelsProcessor
-{
-public:
-    enum
-    {
-        GENERIC   = 1 << 4,
-        SEPARABLE = 2 << 4
-    };
-
-    // Appends specified number of HOG first-order features integrals into given vector.
-    // Param frame is an input 3-channel bgr image.
-    // Param channels is a GPU matrix of optionally shrinked channels
-    // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution.
-    virtual void apply(InputArray frame, OutputArray channels, Stream& stream = Stream::Null()) = 0;
-
-    // Creates a specific preprocessor implementation.
-    // Param shrinkage is a resizing factor. Resize is applied before the computing integral sum
-    // Param bins is a number of HOG-like channels.
-    // Param flags is a channel computing extra flags.
-    static cv::Ptr<ChannelsProcessor> create(const int shrinkage, const int bins, const int flags = GENERIC);
-
-    virtual ~ChannelsProcessor();
-
-protected:
-    ChannelsProcessor();
-};
-
-// Implementation of soft (stage-less) cascaded detector.
-class CV_EXPORTS SCascade : public cv::Algorithm
-{
-public:
-
-    // Representation of detectors result.
-    struct CV_EXPORTS Detection
-    {
-        ushort x;
-        ushort y;
-        ushort w;
-        ushort h;
-        float confidence;
-        int kind;
-
-        enum {PEDESTRIAN = 0};
-    };
-
-    enum { NO_REJECT = 1, DOLLAR = 2, /*PASCAL = 4,*/ DEFAULT = NO_REJECT, NMS_MASK = 0xF};
-
-    // An empty cascade will be created.
-    // Param minScale is a minimum scale relative to the original size of the image on which cascade will be applied.
-    // Param minScale is a maximum scale relative to the original size of the image on which cascade will be applied.
-    // Param scales is a number of scales from minScale to maxScale.
-    // Param flags is an extra tuning flags.
-    SCascade(const double minScale = 0.4, const double maxScale = 5., const int scales = 55,
-        const int flags = NO_REJECT || ChannelsProcessor::GENERIC);
-
-    virtual ~SCascade();
-
-    cv::AlgorithmInfo* info() const;
-
-    // Load cascade from FileNode.
-    // Param fn is a root node for cascade. Should be <cascade>.
-    virtual bool load(const FileNode& fn);
-
-    // Load cascade config.
-    virtual void read(const FileNode& fn);
-
-    // Return the matrix of of detected objects.
-    // Param image is a frame on which detector will be applied.
-    // Param rois is a regions of interests mask generated by genRoi.
-    //    Only the objects that fall into one of the regions will be returned.
-    // Param objects is an output array of Detections represented as GpuMat of detections (SCascade::Detection)
-    //    The first element of the matrix is  actually a count of detections.
-    // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution
-    virtual void detect(InputArray image, InputArray rois, OutputArray objects, Stream& stream = Stream::Null()) const;
-
-private:
-
-    struct Fields;
-    Fields* fields;
-
-    double minScale;
-    double maxScale;
-    int scales;
-
-    int flags;
-};
-
-CV_EXPORTS bool initModule_gpu(void);
-
 ////////////////////////////////// SURF //////////////////////////////////////////

 class CV_EXPORTS SURF_GPU
--- a/modules/softcascade/CMakeLists.txt
+++ b/modules/softcascade/CMakeLists.txt
@ -1,3 +1,3 @@
 set(the_description "Soft Cascade detection and training")
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4310 -Wundef -Wsign-promo -Wmissing-declarations -Wmissing-prototypes)
 ocv_define_module(softcascade opencv_core opencv_imgproc opencv_ml)
-ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4310 -Wundef)
--- a/modules/softcascade/doc/softcascade_cuda.rst
+++ b/modules/softcascade/doc/softcascade_cuda.rst
@ -30,28 +30,28 @@ Implementation of soft (stageless) cascaded detector. ::


 softcascade::SCascade::~SCascade
---------------------------
+---------------------------------
 Destructor for SCascade.

-.. ocv:function:: gpu::SCascade::~SCascade()
+.. ocv:function:: softcascade::SCascade::~SCascade()



 softcascade::SCascade::load
--------------------------
+----------------------------
 Load cascade from FileNode.

-.. ocv:function:: bool gpu::SCascade::load(const FileNode& fn)
+.. ocv:function:: bool softcascade::SCascade::load(const FileNode& fn)

    :param fn: File node from which the soft cascade are read.



 softcascade::SCascade::detect
--------------------------
+------------------------------
 Apply cascade to an input frame and return the vector of Decection objcts.

-.. ocv:function:: void gpu::SCascade::detect(InputArray image, InputArray rois, OutputArray objects, Stream& stream = Stream::Null()) const
+.. ocv:function:: void softcascade::SCascade::detect(InputArray image, InputArray rois, OutputArray objects, cv::gpu::Stream& stream = cv::gpu::Stream::Null()) const

    :param image: a frame on which detector will be applied.

--- a/modules/softcascade/include/opencv2/softcascade.hpp
+++ b/modules/softcascade/include/opencv2/softcascade.hpp
@ -44,6 +44,7 @@
 #define __OPENCV_SOFTCASCADE_HPP__

 #include "opencv2/core.hpp"
+#include "opencv2/core/gpumat.hpp"

 namespace cv { namespace softcascade {

--- a/modules/softcascade/src/cuda/channels.cu
+++ b/modules/softcascade/src/cuda/channels.cu
@ -42,6 +42,9 @@

 #include "opencv2/core/cuda_devptrs.hpp"

+namespace cv { namespace softcascade { namespace internal {
+void error(const char *error_string, const char *file, const int line, const char *func);
+}}}
 #if defined(__GNUC__)
    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
 #else /* defined(__CUDACC__) || defined(__MSVC__) */
@ -50,7 +53,7 @@

 static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
 {
-    // if (cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), file, line, func);
+    if (cudaSuccess != err) cv::softcascade::internal::error(cudaGetErrorString(err), file, line, func);
 }

 __host__ __device__ __forceinline__ int divUp(int total, int grain)
@ -490,16 +493,30 @@ namespace cv { namespace softcascade { namespace device
        B2Y        = 1868
    };

-    template <int bidx> static __device__ __forceinline__ unsigned char RGB2GrayConvert(uint src)
+    template <int bidx> static __device__ __forceinline__ unsigned char RGB2GrayConvert(unsigned char b, unsigned char g, unsigned char r)
    {
-        uint b = 0xffu & (src >> (bidx * 8));
-        uint g = 0xffu & (src >> 8);
-        uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
+        // uint b = 0xffu & (src >> (bidx * 8));
+        // uint g = 0xffu & (src >> 8);
+        // uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
        return CV_DESCALE((uint)(b * B2Y + g * G2Y + r * R2Y), yuv_shift);
    }

+    __global__ void device_transform(const cv::gpu::PtrStepSz<uchar3> bgr, cv::gpu::PtrStepSzb gray)
+    {
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+        const uchar3 colored = (uchar3)(bgr.ptr(y))[x];
+
+        gray.ptr(y)[x] = RGB2GrayConvert<0>(colored.x, colored.y, colored.z);
+    }
+
+    ///////
    void transform(const cv::gpu::PtrStepSz<uchar3>& bgr, cv::gpu::PtrStepSzb gray)
    {
-
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(bgr.cols, block.x), divUp(bgr.rows, block.y));
+        device_transform<<<grid, block>>>(bgr, gray);
+        cudaSafeCall(cudaDeviceSynchronize());
    }
 }}}
--- a/modules/softcascade/src/detector_cuda.cpp
+++ b/modules/softcascade/src/detector_cuda.cpp
@ -536,7 +536,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp

    flds.mask.create( rois.cols / shr, rois.rows / shr, rois.type());

-    device::shrink(rois, flds.genRoiTmp);
+    device::shrink(rois, flds.mask);
    //cv::gpu::transpose(flds.genRoiTmp, flds.mask, s);

    if (type == CV_8UC3)
@ -594,15 +594,16 @@ struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor

    virtual void apply(InputArray _frame, OutputArray _shrunk, cv::gpu::Stream& s = cv::gpu::Stream::Null())
    {
-        const cv::gpu::GpuMat frame = _frame.getGpuMat();
+        bgr = _frame.getGpuMat();
        //cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);

-        _shrunk.create(frame.rows * (4 + bins) / shrinkage, frame.cols / shrinkage, CV_8UC1);
+        _shrunk.create(bgr.rows * (4 + bins) / shrinkage, bgr.cols / shrinkage, CV_8UC1);
        cv::gpu::GpuMat shrunk = _shrunk.getGpuMat();

-        channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
+        channels.create(bgr.rows * (4 + bins), bgr.cols, CV_8UC1);
        setZero(channels, s);

+        gray.create(bgr.size(), CV_8UC1);
        cv::softcascade::device::transform(bgr, gray); //cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
        cv::softcascade::device::gray2hog(gray, channels(cv::Rect(0, 0, bgr.cols, bgr.rows * (bins + 1))), bins);

--- a/modules/softcascade/src/precomp.hpp
+++ b/modules/softcascade/src/precomp.hpp
@ -56,6 +56,7 @@

 namespace cv { namespace softcascade { namespace internal
 {
+
 namespace rnd {

 typedef cv::RNG_MT19937 engine;
--- a/modules/softcascade/src/softcascade_init.cpp
+++ b/modules/softcascade/src/softcascade_init.cpp
@ -63,4 +63,22 @@ bool initModule_softcascade(void)
    return (sc1->info() != 0) && (sc->info() != 0);
 }

+namespace internal {
+void error(const char *error_string, const char *file, const int line, const char *func)
+{
+    int code = CV_GpuApiCallError;
+
+    if (std::uncaught_exception())
+    {
+        const char* errorStr = cvErrorStr(code);
+        const char* function = func ? func : "unknown function";
+
+        std::cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line;
+        std::cerr.flush();
+    }
+    else
+        cv::error( cv::Exception(code, error_string, func, file, line) );
+}
+}
+
 } }
--- a/modules/softcascade/test/test_precomp.hpp
+++ b/modules/softcascade/test/test_precomp.hpp
@ -55,5 +55,6 @@
 # include "opencv2/softcascade.hpp"
 # include "opencv2/imgproc.hpp"
 # include "opencv2/highgui.hpp"
+# include "utility.hpp"

 #endif
--- a/modules/softcascade/test/utility.hpp
+++ b/modules/softcascade/test/utility.hpp
@ -42,10 +42,9 @@
 #ifndef __OPENCV_SOFTCASCADE_TEST_UTILITY_HPP__
 #define __OPENCV_SOFTCASCADE_TEST_UTILITY_HPP__

-#include "opencv2/core/core.hpp"
+#include "opencv2/core.hpp"
 #include "opencv2/core/gpumat.hpp"
-#include "opencv2/ts/ts.hpp"
-#include "opencv2/ts/ts_perf.hpp"
+#include "opencv2/ts.hpp"

 //////////////////////////////////////////////////////////////////////
 // Gpu devices
--- a/samples/cpp/peopledetect.cpp
+++ b/samples/cpp/peopledetect.cpp
@ -1,7 +1,7 @@
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/objdetect/objdetect.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include <opencv2/softcascade/softcascade.hpp>
+#include "opencv2/imgproc.hpp"
+#include "opencv2/objdetect.hpp"
+#include "opencv2/highgui.hpp"
+#include <opencv2/softcascade.hpp>

 #include <iostream>
 #include <vector>
--- a/samples/gpu/softcascade.cpp
+++ b/samples/gpu/softcascade.cpp
@ -1,6 +1,6 @@
-#include <opencv2/gpu/gpu.hpp>
-#include <opencv2/softcascade/softcascade.hpp>
-#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/gpu.hpp>
+#include <opencv2/softcascade.hpp>
+#include <opencv2/highgui.hpp>
 #include <iostream>

 int main(int argc, char** argv)