[~] Refactored, cleaned up, and consolidated the code of GPU examples (cascadeclassifier and cascadeclassifier_nvidia_api)

This commit is contained in:
Anton Obukhov
2011-04-07 12:59:01 +00:00
parent daac469b83
commit 07d19c2c6f
4 changed files with 293 additions and 259 deletions

View File

@@ -62,16 +62,22 @@ int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& , GpuMat& ,
#else
struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
{
{
CascadeClassifierImpl(const string& filename) : lastAllocatedFrameSize(-1, -1)
{
ncvSetDebugOutputHandler(NCVDebugOutputHandler);
ncvSetDebugOutputHandler(NCVDebugOutputHandler);
if (ncvStat != load(filename))
{
CV_Error(CV_GpuApiCallError, "Error in GPU cacade load");
}
NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors, bool findLargestObject, bool visualizeInPlace, NcvSize32u ncvMinSize, /*out*/unsigned int& numDetections)
{
calculateMemReqsAndAllocate(src.size());
}
}
NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
bool findLargestObject, bool visualizeInPlace, NcvSize32u ncvMinSize,
/*out*/unsigned int& numDetections)
{
calculateMemReqsAndAllocate(src.size());
NCVMemPtr src_beg;
src_beg.ptr = (void*)src.ptr<Ncv8u>();
@@ -81,14 +87,8 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
src_seg.begin = src_beg;
src_seg.size = src.step * src.rows;
NCVMatrixReuse<Ncv8u> d_src(src_seg, devProp.textureAlignment, src.cols, src.rows, src.step, true);
ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
//NCVMatrixAlloc<Ncv8u> d_src(*gpuAllocator, src.cols, src.rows);
//ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
//NCVMatrixAlloc<Ncv8u> h_src(*cpuAllocator, src.cols, src.rows);
//ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NCVMatrixReuse<Ncv8u> d_src(src_seg, devProp.textureAlignment, src.cols, src.rows, src.step, true);
ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
CV_Assert(objects.rows == 1);
@@ -100,10 +100,8 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
objects_seg.begin = objects_beg;
objects_seg.size = objects.step * objects.rows;
NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
//NCVVectorAlloc<NcvRect32u> d_rects(*gpuAllocator, 100);
//ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
NcvSize32u roi;
roi.width = d_src.width();
roi.height = d_src.height();
@@ -111,7 +109,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
Ncv32u flags = 0;
flags |= findLargestObject? NCVPipeObjDet_FindLargestObject : 0;
flags |= visualizeInPlace ? NCVPipeObjDet_VisualizeInPlace : 0;
ncvStat = ncvDetectObjectsMultiScale_device(
d_src, roi, d_rects, numDetections, haar, *h_haarStages,
*d_haarStages, *d_haarNodes, *d_haarFeatures,
@@ -122,24 +120,28 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
*gpuAllocator, *cpuAllocator, devProp, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
return NCV_SUCCESS;
}
////
NcvSize32u getClassifierSize() const { return haar.ClassifierSize; }
cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); }
private:
static void NCVDebugOutputHandler(const char* msg) { CV_Error(CV_GpuApiCallError, msg); }
NCVStatus load(const string& classifierFile)
{
int devId = cv::gpu::getDevice();
{
int devId = cv::gpu::getDevice();
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
// Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment);
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment);
cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment);
ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
@@ -149,12 +151,12 @@ private:
ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR);
h_haarStages = new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages);
h_haarStages = new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages);
h_haarNodes = new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes);
h_haarFeatures = new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures);
ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(h_haarFeatures->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures);
@@ -165,7 +167,7 @@ private:
d_haarFeatures = new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures);
ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(d_haarFeatures->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
ncvStat = h_haarStages->copySolid(*d_haarStages, 0);
@@ -173,31 +175,33 @@ private:
ncvStat = h_haarNodes->copySolid(*d_haarNodes, 0);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
ncvStat = h_haarFeatures->copySolid(*d_haarFeatures, 0);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
return NCV_SUCCESS;
}
////
NCVStatus calculateMemReqsAndAllocate(const Size& frameSize)
{
{
if (lastAllocatedFrameSize == frameSize)
{
return NCV_SUCCESS;
}
// Calculate memory requirements and create real allocators
NCVMemStackAllocator gpuCounter(devProp.textureAlignment);
NCVMemStackAllocator cpuCounter(devProp.textureAlignment);
ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR);
ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR);
NCVMatrixAlloc<Ncv8u> d_src(gpuCounter, frameSize.width, frameSize.height);
NCVMatrixAlloc<Ncv8u> h_src(cpuCounter, frameSize.width, frameSize.height);
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NCVVectorAlloc<NcvRect32u> d_rects(gpuCounter, 100);
NCVVectorAlloc<NcvRect32u> d_rects(gpuCounter, 100);
ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NcvSize32u roi;
@@ -209,23 +213,23 @@ private:
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
gpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment);
gpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment);
cpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), devProp.textureAlignment);
ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR);
return NCV_SUCCESS;
}
////
cudaDeviceProp devProp;
NCVStatus ncvStat;
Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages;
Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages;
Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures;
@@ -237,96 +241,103 @@ private:
Size lastAllocatedFrameSize;
Ptr<NCVMemStackAllocator> gpuAllocator;
Ptr<NCVMemStackAllocator> gpuAllocator;
Ptr<NCVMemStackAllocator> cpuAllocator;
};
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() : findLargestObject(false), visualizeInPlace(false), impl(0) {}
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string& filename) : findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); }
cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { release(); }
bool cv::gpu::CascadeClassifier_GPU::empty() const { return impl == 0; }
void cv::gpu::CascadeClassifier_GPU::release() { if (impl) { delete impl; impl = 0; } }
bool cv::gpu::CascadeClassifier_GPU::load(const string& filename)
{
{
release();
impl = new CascadeClassifierImpl(filename);
return !this->empty();
return !this->empty();
}
Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const
{
return this->empty() ? Size() : impl->getClassifierCvSize();
}
int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize)
{
{
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U);
CV_Assert( !this->empty());
const int defaultObjSearchNum = 100;
if (objectsBuf.empty())
{
objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type);
}
NcvSize32u ncvMinSize = impl->getClassifierSize();
if (ncvMinSize.width < (unsigned)minSize.width && ncvMinSize.height < (unsigned)minSize.height)
{
ncvMinSize.width = minSize.width;
ncvMinSize.height = minSize.height;
}
}
unsigned int numDetections;
NCVStatus ncvStat = impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections);
NCVStatus ncvStat = impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections);
if (ncvStat != NCV_SUCCESS)
{
CV_Error(CV_GpuApiCallError, "Error in face detectioln");
}
return numDetections;
}
struct RectConvert
{
Rect operator()(const NcvRect32u& nr) const { return Rect(nr.x, nr.y, nr.width, nr.height); }
NcvRect32u operator()(const Rect& nr) const
{
NcvRect32u rect;
rect.x = nr.x;
rect.y = nr.y;
rect.width = nr.width;
rect.height = nr.height;
return rect;
}
Rect operator()(const NcvRect32u& nr) const { return Rect(nr.x, nr.y, nr.width, nr.height); }
NcvRect32u operator()(const Rect& nr) const
{
NcvRect32u rect;
rect.x = nr.x;
rect.y = nr.y;
rect.width = nr.width;
rect.height = nr.height;
return rect;
}
};
void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights)
{
vector<Rect> rects(hypotheses.size());
std::transform(hypotheses.begin(), hypotheses.end(), rects.begin(), RectConvert());
if (weights)
{
vector<int> weights_int;
weights_int.assign(weights->begin(), weights->end());
cv::groupRectangles(rects, weights_int, groupThreshold, eps);
}
else
{
cv::groupRectangles(rects, groupThreshold, eps);
}
std::transform(rects.begin(), rects.end(), hypotheses.begin(), RectConvert());
hypotheses.resize(rects.size());
vector<Rect> rects(hypotheses.size());
std::transform(hypotheses.begin(), hypotheses.end(), rects.begin(), RectConvert());
if (weights)
{
vector<int> weights_int;
weights_int.assign(weights->begin(), weights->end());
cv::groupRectangles(rects, weights_int, groupThreshold, eps);
}
else
{
cv::groupRectangles(rects, groupThreshold, eps);
}
std::transform(rects.begin(), rects.end(), hypotheses.begin(), RectConvert());
hypotheses.resize(rects.size());
}
#if 1 /* loadFromXML implementation switch */
NCVStatus loadFromXML(const std::string &filename,
HaarClassifierCascadeDescriptor &haar,
std::vector<HaarStage64> &haarStages,
std::vector<HaarClassifierNode128> &haarClassifierNodes,
NCVStatus loadFromXML(const std::string &filename,
HaarClassifierCascadeDescriptor &haar,
std::vector<HaarStage64> &haarStages,
std::vector<HaarClassifierNode128> &haarClassifierNodes,
std::vector<HaarFeature64> &haarFeatures)
{
NCVStatus ncvStat;
@@ -347,12 +358,12 @@ NCVStatus loadFromXML(const std::string &filename,
haarStages.resize(0);
haarClassifierNodes.resize(0);
haarFeatures.resize(0);
Ptr<CvHaarClassifierCascade> oldCascade = (CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0);
if (oldCascade.empty())
{
return NCV_HAAR_XML_LOADING_EXCEPTION;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
haar.ClassifierSize.width = oldCascade->orig_window_size.width;
haar.ClassifierSize.height = oldCascade->orig_window_size.height;
@@ -384,14 +395,14 @@ NCVStatus loadFromXML(const std::string &filename,
HaarClassifierNodeDescriptor32 nodeLeft;
if ( tree->left[n] <= 0 )
{
{
Ncv32f leftVal = tree->alpha[-tree->left[n]];
ncvStat = nodeLeft.create(leftVal);
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
bIsLeftNodeLeaf = true;
}
else
{
{
Ncv32u leftNodeOffset = tree->left[n];
nodeLeft.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + leftNodeOffset - 1));
haar.bHasStumpsOnly = false;
@@ -419,8 +430,8 @@ NCVStatus loadFromXML(const std::string &filename,
Ncv32u featureId = 0;
for(int l = 0; l < CV_HAAR_FEATURE_MAX; ++l) //by rects
{
Ncv32u rectX = feature->rect[l].r.x;
{
Ncv32u rectX = feature->rect[l].r.x;
Ncv32u rectY = feature->rect[l].r.y;
Ncv32u rectWidth = feature->rect[l].r.width;
Ncv32u rectHeight = feature->rect[l].r.height;
@@ -441,7 +452,7 @@ NCVStatus loadFromXML(const std::string &filename,
HaarFeatureDescriptor32 tmpFeatureDesc;
ncvStat = tmpFeatureDesc.create(haar.bNeedsTiltedII, bIsLeftNodeLeaf, bIsRightNodeLeaf,
featureId, haarFeatures.size() - featureId);
featureId, haarFeatures.size() - featureId);
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
curNode.setFeatureDesc(tmpFeatureDesc);
@@ -466,8 +477,6 @@ NCVStatus loadFromXML(const std::string &filename,
haarStages.push_back(curStage);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//fill in cascade stats
haar.NumStages = haarStages.size();
haar.NumClassifierRootNodes = haarClassifierNodes.size();
@@ -496,6 +505,7 @@ NCVStatus loadFromXML(const std::string &filename,
}
haarClassifierNodes[i].setRightNodeDesc(nodeRight);
}
for (Ncv32u i=0; i<h_TmpClassifierNotRootNodes.size(); i++)
{
HaarFeatureDescriptor32 featureDesc = h_TmpClassifierNotRootNodes[i].getFeatureDesc();
@@ -522,8 +532,6 @@ NCVStatus loadFromXML(const std::string &filename,
return NCV_SUCCESS;
}
////
#else /* loadFromXML implementation switch */
#include "e:/devNPP-OpenCV/src/external/_rapidxml-1.13/rapidxml.hpp"
@@ -793,5 +801,3 @@ NCVStatus loadFromXML(const std::string &filename,
#endif /* loadFromXML implementation switch */
#endif /* HAVE_CUDA */