integrate NMS (Dollar's criteria)
This commit is contained in:
parent
d2e88e1d4d
commit
60c0e41ba5
@ -88,19 +88,23 @@ namespace icf {
|
|||||||
return (w < 0 || h < 0)? 0.f : (float)(w * h);
|
return (w < 0 || h < 0)? 0.f : (float)(w * h);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void overlap(const uint* n, const Detection* detections, uchar* overlaps)
|
texture<uint4, cudaTextureType2D, cudaReadModeElementType> tdetections;
|
||||||
|
|
||||||
|
__global__ void overlap(const uint* n, uchar* overlaps)
|
||||||
{
|
{
|
||||||
const int idx = threadIdx.x;
|
const int idx = threadIdx.x;
|
||||||
const int total = *n;
|
const int total = *n;
|
||||||
|
|
||||||
for (int i = idx; i < total; i += 192)
|
for (int i = idx + 1; i < total; i += 192)
|
||||||
{
|
{
|
||||||
const Detection& a = detections[i];
|
const uint4 _a = tex2D(tdetections, i, 0);
|
||||||
|
const Detection& a = *((Detection*)(&_a));
|
||||||
bool excluded = false;
|
bool excluded = false;
|
||||||
|
|
||||||
for (int j = i + 1; j < total; ++j)
|
for (int j = i + 1; j < total; ++j)
|
||||||
{
|
{
|
||||||
const Detection& b = detections[j];
|
const uint4 _b = tex2D(tdetections, j, 0);
|
||||||
|
const Detection& b = *((Detection*)(&_b));
|
||||||
float ovl = overlapArea(a, b) / ::min(a.w * a.h, b.w * b.h);
|
float ovl = overlapArea(a, b) / ::min(a.w * a.h, b.w * b.h);
|
||||||
|
|
||||||
if (ovl > 0.65f)
|
if (ovl > 0.65f)
|
||||||
@ -115,7 +119,7 @@ namespace icf {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void collect(const uint* n, const Detection* detections, uchar* overlaps)
|
__global__ void collect(const uint* n, uchar* overlaps, uint* ctr, uint4* suppressed)
|
||||||
{
|
{
|
||||||
const int idx = threadIdx.x;
|
const int idx = threadIdx.x;
|
||||||
const int total = *n;
|
const int total = *n;
|
||||||
@ -124,19 +128,24 @@ namespace icf {
|
|||||||
{
|
{
|
||||||
if (!overlaps[i])
|
if (!overlaps[i])
|
||||||
{
|
{
|
||||||
const Detection& det = detections[i];
|
int oidx = atomicInc(ctr, 50);
|
||||||
// printf("%d: %d %d %d %d %f\n", i, det.x, det.y, det.w, det.h, det.confidence );
|
suppressed[oidx] = tex2D(tdetections, i + 1, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections)
|
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed)
|
||||||
{
|
{
|
||||||
int block = 192;
|
int block = 192;
|
||||||
int grid = 1;
|
int grid = 1;
|
||||||
|
|
||||||
overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uint4>();
|
||||||
collect<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
|
size_t offset;
|
||||||
|
cudaSafeCall( cudaBindTexture2D(&offset, tdetections, objects.data, desc, objects.cols / sizeof(uint4), objects.rows, objects.step));
|
||||||
|
|
||||||
|
overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0));
|
||||||
|
collect<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0), (uint*)suppressed.ptr(0), ((uint4*)suppressed.ptr(0)) + 1);
|
||||||
|
|
||||||
// if (!stream)
|
// if (!stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaGetLastError());
|
cudaSafeCall( cudaGetLastError());
|
||||||
|
@ -86,7 +86,7 @@ namespace icf {
|
|||||||
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
|
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
|
||||||
const int fw, const int fh, const int bins, cudaStream_t stream);
|
const int fw, const int fh, const int bins, cudaStream_t stream);
|
||||||
|
|
||||||
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections);
|
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace imgproc {
|
namespace imgproc {
|
||||||
@ -312,6 +312,7 @@ struct cv::gpu::SCascade::Fields
|
|||||||
hogluv.setTo(cv::Scalar::all(0));
|
hogluv.setTo(cv::Scalar::all(0));
|
||||||
|
|
||||||
overlaps.create(1, 5000, CV_8UC1);
|
overlaps.create(1, 5000, CV_8UC1);
|
||||||
|
suppressed.create(1, sizeof(Detection) * 51, CV_8UC1);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -447,7 +448,9 @@ public:
|
|||||||
{
|
{
|
||||||
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
|
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
|
||||||
overlaps.setTo(0);
|
overlaps.setTo(0);
|
||||||
device::icf::suppress(objects, overlaps, ndetections);
|
suppressed.setTo(0);
|
||||||
|
|
||||||
|
device::icf::suppress(objects, overlaps, ndetections, suppressed);
|
||||||
// std::cout << cv::Mat(overlaps) << std::endl;
|
// std::cout << cv::Mat(overlaps) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -484,6 +487,9 @@ public:
|
|||||||
// used for area overlap computing during
|
// used for area overlap computing during
|
||||||
GpuMat overlaps;
|
GpuMat overlaps;
|
||||||
|
|
||||||
|
// used for suppression
|
||||||
|
GpuMat suppressed;
|
||||||
|
|
||||||
// Cascade from xml
|
// Cascade from xml
|
||||||
GpuMat octaves;
|
GpuMat octaves;
|
||||||
GpuMat stages;
|
GpuMat stages;
|
||||||
@ -525,7 +531,6 @@ bool cv::gpu::SCascade::load(const FileNode& fn)
|
|||||||
void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, Stream& s) const
|
void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, Stream& s) const
|
||||||
{
|
{
|
||||||
CV_Assert(fields);
|
CV_Assert(fields);
|
||||||
|
|
||||||
const GpuMat colored = image.getGpuMat();
|
const GpuMat colored = image.getGpuMat();
|
||||||
|
|
||||||
// only color images are supperted
|
// only color images are supperted
|
||||||
@ -545,6 +550,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
|
|||||||
colored.copyTo(flds.hogluv);
|
colored.copyTo(flds.hogluv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
|
||||||
|
|
||||||
GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
|
GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
|
||||||
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
|
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
|
||||||
@ -552,8 +558,11 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
|
|||||||
|
|
||||||
flds.detect(rois, tmp, objects, stream);
|
flds.detect(rois, tmp, objects, stream);
|
||||||
|
|
||||||
// if (rejCriteria != NO_REJECT)
|
if (rejCriteria != NO_REJECT)
|
||||||
|
{
|
||||||
flds.suppress(tmp, objects);
|
flds.suppress(tmp, objects);
|
||||||
|
flds.suppressed.copyTo(spr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const
|
void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const
|
||||||
|
Loading…
x
Reference in New Issue
Block a user