remove debug detect at scale method

This commit is contained in:
marina.kolpakova
2012-11-14 14:21:22 +04:00
parent 72e2b8b370
commit 8acfbde68e
6 changed files with 53 additions and 106 deletions

View File

@@ -45,15 +45,6 @@
#include <stdio.h>
#include <float.h>
// #define LOG_CUDA_CASCADE
#if defined LOG_CUDA_CASCADE
# define dprintf(format, ...) \
do { printf(format, __VA_ARGS__); } while (0)
#else
# define dprintf(format, ...)
#endif
namespace cv { namespace gpu { namespace device {
namespace icf {
@@ -254,12 +245,12 @@ __global__ void soft_cascade(const CascadeInvoker<Policy> invoker, Detection* ob
template<typename Policy>
void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale, const cudaStream_t& stream) const
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const
{
int fw = roi.rows;
int fh = roi.cols;
dim3 grid(fw, fh / Policy::STA_Y, (scale == -1) ? downscales : 1);
dim3 grid(fw, fh / Policy::STA_Y, downscales);
uint* ctr = (uint*)(counter.ptr(0));
Detection* det = (Detection*)objects.ptr();
@@ -268,26 +259,16 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi&
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<float2>();
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step));
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<typename Policy::roi_type>();
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / Policy::STA_Y, roi.rows, roi.step));
const CascadeInvoker<Policy> inv = *this;
if (scale == -1)
{
soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, 0);
cudaSafeCall( cudaGetLastError());
soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, 0);
cudaSafeCall( cudaGetLastError());
grid = dim3(fw, fh / Policy::STA_Y, scales - downscales);
soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, downscales);
}
else
{
if (scale >= downscales)
soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, scale);
else
soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, scale);
}
grid = dim3(fw, fh / Policy::STA_Y, scales - downscales);
soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv, det, max_det, ctr, downscales);
if (!stream)
{
@@ -297,7 +278,7 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi&
}
template void CascadeInvoker<GK107PolicyX4>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale, const cudaStream_t& stream) const;
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const;
}
}}}

View File

@@ -118,9 +118,10 @@ struct __align__(16) Detection
struct GK107PolicyX4
{
enum {WARP = 32, STA_X = WARP, STA_Y = 8, SHRINKAGE = 4};
typedef float2 roi_type;
static const dim3 block()
{
return dim3(GK107PolicyX4::STA_X, GK107PolicyX4::STA_Y);
return dim3(STA_X, STA_Y);
}
};
@@ -146,7 +147,7 @@ struct CascadeInvoker
int scales;
void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter, const int downscales, const int csale = -1, const cudaStream_t& stream = 0) const;
PtrStepSzi counter, const int downscales, const cudaStream_t& stream = 0) const;
template<bool isUp>
__device void detect(Detection* objects, const uint ndetections, uint* ctr, const int downscales) const;

View File

@@ -311,13 +311,13 @@ struct cv::gpu::SCascade::Fields
leaves.upload(hleaves);
}
void detect(int scale, const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const
void detect(const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const
{
cudaMemset(count.data, 0, sizeof(Detection));
cudaSafeCall( cudaGetLastError());
device::icf::CascadeInvoker<device::icf::GK107PolicyX4> invoker
= device::icf::CascadeInvoker<device::icf::GK107PolicyX4>(levels, octaves, stages, nodes, leaves);
invoker(roi, hogluv, objects, count, downscales, scale, stream);
invoker(roi, hogluv, objects, count, downscales, stream);
}
void preprocess(const cv::gpu::GpuMat& colored, Stream& s)
@@ -521,36 +521,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
cudaStream_t stream = StreamAccessor::getStream(s);
flds.detect(-1, rois, tmp, objects, stream);
}
void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, const int level, Stream& s) const
{
CV_Assert(fields);
const GpuMat colored = image.getGpuMat();
// only color images are supperted
CV_Assert(colored.type() == CV_8UC3 || colored.type() == CV_32SC1);
Fields& flds = *fields;
if (colored.type() == CV_8UC3)
{
// only this window size allowed
// CV_Assert(colored.cols == Fields::FRAME_WIDTH && colored.rows == Fields::FRAME_HEIGHT);
flds.preprocess(colored, s);
}
else
{
colored.copyTo(flds.hogluv);
}
GpuMat rois = _rois.getGpuMat(), objects = _objects.getGpuMat();
GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
cudaStream_t stream = StreamAccessor::getStream(s);
flds.detect(level, rois, tmp, objects, stream);
flds.detect(rois, tmp, objects, stream);
}
void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const