remove debug detect at scale method
This commit is contained in:
@@ -45,15 +45,6 @@
|
||||
#include <stdio.h>
|
||||
#include <float.h>
|
||||
|
||||
// #define LOG_CUDA_CASCADE
|
||||
|
||||
#if defined LOG_CUDA_CASCADE
|
||||
# define dprintf(format, ...) \
|
||||
do { printf(format, __VA_ARGS__); } while (0)
|
||||
#else
|
||||
# define dprintf(format, ...)
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu { namespace device {
|
||||
namespace icf {
|
||||
|
||||
@@ -254,12 +245,12 @@ __global__ void soft_cascade(const CascadeInvoker<Policy> invoker, Detection* ob
|
||||
|
||||
template<typename Policy>
|
||||
void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
|
||||
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale, const cudaStream_t& stream) const
|
||||
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const
|
||||
{
|
||||
int fw = roi.rows;
|
||||
int fh = roi.cols;
|
||||
|
||||
dim3 grid(fw, fh / Policy::STA_Y, (scale == -1) ? downscales : 1);
|
||||
dim3 grid(fw, fh / Policy::STA_Y, downscales);
|
||||
|
||||
uint* ctr = (uint*)(counter.ptr(0));
|
||||
Detection* det = (Detection*)objects.ptr();
|
||||
@@ -268,26 +259,16 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi&
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
|
||||
|
||||
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<float2>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step));
|
||||
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<typename Policy::roi_type>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / Policy::STA_Y, roi.rows, roi.step));
|
||||
|
||||
const CascadeInvoker<Policy> inv = *this;
|
||||
|
||||
if (scale == -1)
|
||||
{
|
||||
soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, 0);
|
||||
cudaSafeCall( cudaGetLastError());
|
||||
soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, 0);
|
||||
cudaSafeCall( cudaGetLastError());
|
||||
|
||||
grid = dim3(fw, fh / Policy::STA_Y, scales - downscales);
|
||||
soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, downscales);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (scale >= downscales)
|
||||
soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, scale);
|
||||
else
|
||||
soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, scale);
|
||||
}
|
||||
grid = dim3(fw, fh / Policy::STA_Y, scales - downscales);
|
||||
soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, downscales);
|
||||
|
||||
if (!stream)
|
||||
{
|
||||
@@ -297,7 +278,7 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi&
|
||||
}
|
||||
|
||||
template void CascadeInvoker<GK107PolicyX4>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
|
||||
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale, const cudaStream_t& stream) const;
|
||||
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const;
|
||||
|
||||
}
|
||||
}}}
|
@@ -118,9 +118,10 @@ struct __align__(16) Detection
|
||||
struct GK107PolicyX4
|
||||
{
|
||||
enum {WARP = 32, STA_X = WARP, STA_Y = 8, SHRINKAGE = 4};
|
||||
typedef float2 roi_type;
|
||||
static const dim3 block()
|
||||
{
|
||||
return dim3(GK107PolicyX4::STA_X, GK107PolicyX4::STA_Y);
|
||||
return dim3(STA_X, STA_Y);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -146,7 +147,7 @@ struct CascadeInvoker
|
||||
int scales;
|
||||
|
||||
void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
|
||||
PtrStepSzi counter, const int downscales, const int csale = -1, const cudaStream_t& stream = 0) const;
|
||||
PtrStepSzi counter, const int downscales, const cudaStream_t& stream = 0) const;
|
||||
|
||||
template<bool isUp>
|
||||
__device void detect(Detection* objects, const uint ndetections, uint* ctr, const int downscales) const;
|
||||
|
@@ -311,13 +311,13 @@ struct cv::gpu::SCascade::Fields
|
||||
leaves.upload(hleaves);
|
||||
}
|
||||
|
||||
void detect(int scale, const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const
|
||||
void detect(const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const
|
||||
{
|
||||
cudaMemset(count.data, 0, sizeof(Detection));
|
||||
cudaSafeCall( cudaGetLastError());
|
||||
device::icf::CascadeInvoker<device::icf::GK107PolicyX4> invoker
|
||||
= device::icf::CascadeInvoker<device::icf::GK107PolicyX4>(levels, octaves, stages, nodes, leaves);
|
||||
invoker(roi, hogluv, objects, count, downscales, scale, stream);
|
||||
invoker(roi, hogluv, objects, count, downscales, stream);
|
||||
}
|
||||
|
||||
void preprocess(const cv::gpu::GpuMat& colored, Stream& s)
|
||||
@@ -521,36 +521,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
|
||||
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
flds.detect(-1, rois, tmp, objects, stream);
|
||||
}
|
||||
|
||||
void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, const int level, Stream& s) const
|
||||
{
|
||||
CV_Assert(fields);
|
||||
|
||||
const GpuMat colored = image.getGpuMat();
|
||||
// only color images are supperted
|
||||
CV_Assert(colored.type() == CV_8UC3 || colored.type() == CV_32SC1);
|
||||
|
||||
Fields& flds = *fields;
|
||||
if (colored.type() == CV_8UC3)
|
||||
{
|
||||
// only this window size allowed
|
||||
// CV_Assert(colored.cols == Fields::FRAME_WIDTH && colored.rows == Fields::FRAME_HEIGHT);
|
||||
flds.preprocess(colored, s);
|
||||
}
|
||||
else
|
||||
{
|
||||
colored.copyTo(flds.hogluv);
|
||||
}
|
||||
|
||||
GpuMat rois = _rois.getGpuMat(), objects = _objects.getGpuMat();
|
||||
|
||||
GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
|
||||
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
flds.detect(level, rois, tmp, objects, stream);
|
||||
flds.detect(rois, tmp, objects, stream);
|
||||
}
|
||||
|
||||
void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const
|
||||
|
Reference in New Issue
Block a user