remove debug detect at scale method

2012-11-14 14:21:22 +04:00
parent 72e2b8b370
commit 8acfbde68e
6 changed files with 53 additions and 106 deletions
--- a/modules/gpu/src/cuda/isf-sc.cu
+++ b/modules/gpu/src/cuda/isf-sc.cu
@@ -45,15 +45,6 @@
 #include <stdio.h>
 #include <float.h>

-// #define LOG_CUDA_CASCADE
-
-#if defined LOG_CUDA_CASCADE
-# define dprintf(format, ...) \
-            do { printf(format, __VA_ARGS__); } while (0)
-#else
-# define dprintf(format, ...)
-#endif
-
 namespace cv { namespace gpu { namespace device {
 namespace icf {

@@ -254,12 +245,12 @@ __global__ void soft_cascade(const CascadeInvoker<Policy> invoker, Detection* ob

 template<typename Policy>
 void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
-    PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale, const cudaStream_t& stream) const
+    PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const
 {
    int fw = roi.rows;
    int fh = roi.cols;

-    dim3 grid(fw, fh / Policy::STA_Y, (scale == -1) ? downscales : 1);
+    dim3 grid(fw, fh / Policy::STA_Y, downscales);

    uint* ctr = (uint*)(counter.ptr(0));
    Detection* det = (Detection*)objects.ptr();
@@ -268,26 +259,16 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi&
    cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
    cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));

-    cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<float2>();
-    cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step));
+    cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<typename Policy::roi_type>();
+    cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / Policy::STA_Y, roi.rows, roi.step));

    const CascadeInvoker<Policy> inv = *this;

-    if (scale == -1)
-    {
-        soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, 0);
-        cudaSafeCall( cudaGetLastError());
+    soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, 0);
+    cudaSafeCall( cudaGetLastError());

-        grid = dim3(fw, fh / Policy::STA_Y, scales - downscales);
-        soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, downscales);
-    }
-    else
-    {
-        if (scale >= downscales)
-            soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, scale);
-        else
-            soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, scale);
-    }
+    grid = dim3(fw, fh / Policy::STA_Y, scales - downscales);
+    soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, downscales);

    if (!stream)
    {
@@ -297,7 +278,7 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi&
 }

 template void CascadeInvoker<GK107PolicyX4>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
-    PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale, const cudaStream_t& stream) const;
+    PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const;

 }
 }}}
--- a/modules/gpu/src/icf.hpp
+++ b/modules/gpu/src/icf.hpp
@@ -118,9 +118,10 @@ struct __align__(16) Detection
 struct GK107PolicyX4
 {
    enum {WARP = 32, STA_X = WARP, STA_Y = 8, SHRINKAGE = 4};
+    typedef float2 roi_type;
    static const dim3 block()
    {
-        return dim3(GK107PolicyX4::STA_X, GK107PolicyX4::STA_Y);
+        return dim3(STA_X, STA_Y);
    }
 };

@@ -146,7 +147,7 @@ struct CascadeInvoker
    int scales;

    void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
-        PtrStepSzi counter, const int downscales, const int csale = -1, const cudaStream_t& stream = 0) const;
+        PtrStepSzi counter, const int downscales, const cudaStream_t& stream = 0) const;

    template<bool isUp>
    __device void detect(Detection* objects, const uint ndetections, uint* ctr, const int downscales) const;
--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp
@@ -311,13 +311,13 @@ struct cv::gpu::SCascade::Fields
        leaves.upload(hleaves);
    }

-    void detect(int scale, const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const
+    void detect(const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const
    {
        cudaMemset(count.data, 0, sizeof(Detection));
        cudaSafeCall( cudaGetLastError());
        device::icf::CascadeInvoker<device::icf::GK107PolicyX4> invoker
        = device::icf::CascadeInvoker<device::icf::GK107PolicyX4>(levels, octaves, stages, nodes, leaves);
-        invoker(roi, hogluv, objects, count, downscales, scale, stream);
+        invoker(roi, hogluv, objects, count, downscales, stream);
    }

    void preprocess(const cv::gpu::GpuMat& colored, Stream& s)
@@ -521,36 +521,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
    objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols -  sizeof(Detection), 1));
    cudaStream_t stream = StreamAccessor::getStream(s);

-    flds.detect(-1, rois, tmp, objects, stream);
-}
-
-void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, const int level, Stream& s) const
-{
-    CV_Assert(fields);
-
-    const GpuMat colored = image.getGpuMat();
-    // only color images are supperted
-    CV_Assert(colored.type() == CV_8UC3 || colored.type() == CV_32SC1);
-
-    Fields& flds = *fields;
-    if (colored.type() == CV_8UC3)
-    {
-        // only this window size allowed
-        // CV_Assert(colored.cols == Fields::FRAME_WIDTH && colored.rows == Fields::FRAME_HEIGHT);
-        flds.preprocess(colored, s);
-    }
-    else
-    {
-        colored.copyTo(flds.hogluv);
-    }
-
-    GpuMat rois = _rois.getGpuMat(), objects = _objects.getGpuMat();
-
-    GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
-    objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols -  sizeof(Detection), 1));
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    flds.detect(level, rois, tmp, objects, stream);
+    flds.detect(rois, tmp, objects, stream);
 }

 void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const