fix CUDA support for streams for NMS; refactor tests
This commit is contained in:
parent
60c0e41ba5
commit
bd3179bda8
@ -134,7 +134,8 @@ namespace icf {
|
||||
}
|
||||
}
|
||||
|
||||
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed)
|
||||
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections,
|
||||
PtrStepSzb suppressed, cudaStream_t stream)
|
||||
{
|
||||
int block = 192;
|
||||
int grid = 1;
|
||||
@ -146,7 +147,7 @@ namespace icf {
|
||||
overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0));
|
||||
collect<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0), (uint*)suppressed.ptr(0), ((uint4*)suppressed.ptr(0)) + 1);
|
||||
|
||||
// if (!stream)
|
||||
if (!stream)
|
||||
{
|
||||
cudaSafeCall( cudaGetLastError());
|
||||
cudaSafeCall( cudaDeviceSynchronize());
|
||||
@ -330,15 +331,15 @@ __global__ void soft_cascade(const CascadeInvoker<Policy> invoker, Detection* ob
|
||||
|
||||
template<typename Policy>
|
||||
void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
|
||||
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const
|
||||
PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const
|
||||
{
|
||||
int fw = roi.rows;
|
||||
int fh = roi.cols;
|
||||
|
||||
dim3 grid(fw, fh / Policy::STA_Y, downscales);
|
||||
|
||||
uint* ctr = (uint*)(counter.ptr(0));
|
||||
Detection* det = (Detection*)objects.ptr();
|
||||
uint* ctr = (uint*)(objects.ptr(0));
|
||||
Detection* det = ((Detection*)objects.ptr(0)) + 1;
|
||||
uint max_det = objects.cols / sizeof(Detection);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
|
||||
@ -363,7 +364,7 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi&
|
||||
}
|
||||
|
||||
template void CascadeInvoker<GK107PolicyX4>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
|
||||
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const;
|
||||
PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const;
|
||||
|
||||
}
|
||||
}}}
|
@ -147,7 +147,7 @@ struct CascadeInvoker
|
||||
int scales;
|
||||
|
||||
void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
|
||||
PtrStepSzi counter, const int downscales, const cudaStream_t& stream = 0) const;
|
||||
const int downscales, const cudaStream_t& stream = 0) const;
|
||||
|
||||
template<bool isUp>
|
||||
__device void detect(Detection* objects, const uint ndetections, uint* ctr, const int downscales) const;
|
||||
|
@ -86,7 +86,8 @@ namespace icf {
|
||||
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
|
||||
const int fw, const int fh, const int bins, cudaStream_t stream);
|
||||
|
||||
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed);
|
||||
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections,
|
||||
PtrStepSzb suppressed, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace imgproc {
|
||||
@ -328,13 +329,20 @@ struct cv::gpu::SCascade::Fields
|
||||
leaves.upload(hleaves);
|
||||
}
|
||||
|
||||
void detect(const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const
|
||||
void detect(const cv::gpu::GpuMat& roi, cv::gpu::GpuMat& objects, Stream& s) const
|
||||
{
|
||||
cudaMemset(count.data, 0, sizeof(Detection));
|
||||
if (s)
|
||||
s.enqueueMemSet(objects, 0);
|
||||
else
|
||||
cudaMemset(objects.data, 0, sizeof(Detection));
|
||||
|
||||
cudaSafeCall( cudaGetLastError());
|
||||
|
||||
device::icf::CascadeInvoker<device::icf::GK107PolicyX4> invoker
|
||||
= device::icf::CascadeInvoker<device::icf::GK107PolicyX4>(levels, stages, nodes, leaves);
|
||||
invoker(roi, hogluv, objects, count, downscales, stream);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
invoker(roi, hogluv, objects, downscales, stream);
|
||||
}
|
||||
|
||||
void preprocess(const cv::gpu::GpuMat& colored, Stream& s)
|
||||
@ -356,6 +364,26 @@ struct cv::gpu::SCascade::Fields
|
||||
integrate(fh, fw, s);
|
||||
}
|
||||
|
||||
void suppress(GpuMat& objects, Stream& s)
|
||||
{
|
||||
GpuMat ndetections = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
|
||||
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
|
||||
|
||||
if (s)
|
||||
{
|
||||
s.enqueueMemSet(overlaps, 0);
|
||||
s.enqueueMemSet(suppressed, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
overlaps.setTo(0);
|
||||
suppressed.setTo(0);
|
||||
}
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
device::icf::suppress(objects, overlaps, ndetections, suppressed, stream);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typedef std::vector<device::icf::Octave>::const_iterator octIt_t;
|
||||
@ -442,17 +470,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
#include <iostream>
|
||||
public:
|
||||
void suppress(GpuMat& ndetections, GpuMat& objects)
|
||||
{
|
||||
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
|
||||
overlaps.setTo(0);
|
||||
suppressed.setTo(0);
|
||||
|
||||
device::icf::suppress(objects, overlaps, ndetections, suppressed);
|
||||
// std::cout << cv::Mat(overlaps) << std::endl;
|
||||
}
|
||||
|
||||
// scales range
|
||||
float minScale;
|
||||
@ -547,20 +565,18 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
|
||||
}
|
||||
else
|
||||
{
|
||||
colored.copyTo(flds.hogluv);
|
||||
if (s)
|
||||
s.enqueueCopy(colored, flds.hogluv);
|
||||
else
|
||||
colored.copyTo(flds.hogluv);
|
||||
}
|
||||
|
||||
GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
|
||||
|
||||
GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
|
||||
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
flds.detect(rois, tmp, objects, stream);
|
||||
flds.detect(rois, objects, s);
|
||||
|
||||
if (rejCriteria != NO_REJECT)
|
||||
{
|
||||
flds.suppress(tmp, objects);
|
||||
GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
|
||||
flds.suppress(objects, s);
|
||||
flds.suppressed.copyTo(spr);
|
||||
}
|
||||
}
|
||||
|
@ -47,7 +47,7 @@
|
||||
using cv::gpu::GpuMat;
|
||||
|
||||
// show detection results on input image with cv::imshow
|
||||
#define SHOW_DETECTIONS
|
||||
// #define SHOW_DETECTIONS
|
||||
|
||||
#if defined SHOW_DETECTIONS
|
||||
# define SHOW(res) \
|
||||
@ -99,6 +99,35 @@ namespace {
|
||||
return std::string(s);
|
||||
}
|
||||
|
||||
static void print(std::ostream &out, const Detection& d)
|
||||
{
|
||||
#if defined SHOW_DETECTIONS
|
||||
out << "\x1b[32m[ detection]\x1b[0m ("
|
||||
<< std::setw(4) << d.x
|
||||
<< " "
|
||||
<< std::setw(4) << d.y
|
||||
<< ") ("
|
||||
<< std::setw(4) << d.w
|
||||
<< " "
|
||||
<< std::setw(4) << d.h
|
||||
<< ") "
|
||||
<< std::setw(12) << d.confidence
|
||||
<< std::endl;
|
||||
#else
|
||||
(void)out; (void)d;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void printTotal(std::ostream &out, int detbytes)
|
||||
{
|
||||
#if defined SHOW_DETECTIONS
|
||||
out << "\x1b[32m[ ]\x1b[0m Total detections " << (detbytes / sizeof(Detection)) << std::endl;
|
||||
#else
|
||||
(void)out; (void)detbytes;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined SHOW_DETECTIONS
|
||||
static std::string getImageName(int level)
|
||||
{
|
||||
time_t rawtime;
|
||||
@ -112,32 +141,13 @@ namespace {
|
||||
return "gpu_rec_level_" + itoa(level)+ "_" + std::string(buffer) + ".png";
|
||||
}
|
||||
|
||||
static void print(std::ostream &out, const Detection& d)
|
||||
{
|
||||
out << "\x1b[32m[ detection]\x1b[0m ("
|
||||
<< std::setw(4) << d.x
|
||||
<< " "
|
||||
<< std::setw(4) << d.y
|
||||
<< ") ("
|
||||
<< std::setw(4) << d.w
|
||||
<< " "
|
||||
<< std::setw(4) << d.h
|
||||
<< ") "
|
||||
<< std::setw(12) << d.confidence
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
static void printTotal(std::ostream &out, int detbytes)
|
||||
{
|
||||
out << "\x1b[32m[ ]\x1b[0m Total detections " << (detbytes / sizeof(Detection)) << std::endl;
|
||||
}
|
||||
|
||||
static void writeResult(const cv::Mat& result, const int level)
|
||||
{
|
||||
std::string path = cv::tempfile(getImageName(level).c_str());
|
||||
cv::imwrite(path, result);
|
||||
std::cout << "\x1b[32m" << "[ ]" << std::endl << "[ stored in]"<< "\x1b[0m" << path << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef ::testing::TestWithParam<std::tr1::tuple<cv::gpu::DeviceInfo, std::string, std::string, int> > SCascadeTestRoi;
|
||||
|
Loading…
x
Reference in New Issue
Block a user