First version of CascadeClassifier_GPU.

Only for VS2008 now.
Sample for it.
new NPP_staging for VS2008 only
This commit is contained in:
Anatoly Baksheev
2011-01-13 13:04:00 +00:00
parent 31e582e314
commit 1a94186195
17 changed files with 6067 additions and 186 deletions

View File

@@ -0,0 +1,362 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <cstdio>
#include <cuda_runtime.h>
#define CV_NO_BACKWARD_COMPATIBILITY
#include "opencv2/opencv.hpp"
#include "NCVHaarObjectDetection.hpp"
using namespace cv;
using namespace std;
const Size preferredVideoFrameSize(640, 480);
string preferredClassifier = "haarcascade_frontalface_alt.xml";
string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed";
void printSyntax(void)
{
printf("Syntax: FaceDetectionFeed.exe [-c cameranum | -v filename] classifier.xml\n");
}
void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...)
{
int fontFace = CV_FONT_HERSHEY_PLAIN;
double fontScale = 1;
int baseline;
Size textSize = cv::getTextSize("T", fontFace, fontScale, 1, &baseline);
va_list arg_ptr;
va_start(arg_ptr, format);
int len = _vscprintf(format, arg_ptr) + 1;
vector<char> strBuf(len);
vsprintf_s(&strBuf[0], len, format, arg_ptr);
Point org(1, 3 * textSize.height * (lineOffsY + 1) / 2);
putText(img, &strBuf[0], org, fontFace, fontScale, color);
va_end(arg_ptr);
}
NCVStatus process(Mat *srcdst,
Ncv32u width, Ncv32u height,
NcvBool bShowAllHypotheses, NcvBool bLargestFace,
HaarClassifierCascadeDescriptor &haar,
NCVVector<HaarStage64> &d_haarStages, NCVVector<HaarClassifierNode128> &d_haarNodes,
NCVVector<HaarFeature64> &d_haarFeatures, NCVVector<HaarStage64> &h_haarStages,
INCVMemAllocator &gpuAllocator,
INCVMemAllocator &cpuAllocator,
cudaDeviceProp &devProp)
{
ncvAssertReturn(!((srcdst == NULL) ^ gpuAllocator.isCounting()), NCV_NULL_PTR);
NCVStatus ncvStat;
NCV_SET_SKIP_COND(gpuAllocator.isCounting());
NCVMatrixAlloc<Ncv8u> d_src(gpuAllocator, width, height);
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height);
ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);
ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
Mat h_src_hdr(Size(width, height), CV_8U, h_src.ptr(), h_src.stride());
NCV_SKIP_COND_BEGIN
(*srcdst).copyTo(h_src_hdr);
ncvStat = h_src.copySolid(d_src, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
NCV_SKIP_COND_END
NcvSize32u roi;
roi.width = d_src.width();
roi.height = d_src.height();
Ncv32u numDetections;
ncvStat = ncvDetectObjectsMultiScale_device(
d_src, roi, d_rects, numDetections, haar, h_haarStages,
d_haarStages, d_haarNodes, d_haarFeatures,
haar.ClassifierSize,
bShowAllHypotheses ? 0 : 4,
1.2f, 1,
(bLargestFace ? NCVPipeObjDet_FindLargestObject : 0) | NCVPipeObjDet_VisualizeInPlace,
gpuAllocator, cpuAllocator, devProp.major, devProp.minor, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
NCV_SKIP_COND_BEGIN
ncvStat = d_src.copySolid(h_src, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
h_src_hdr.copyTo(*srcdst);
NCV_SKIP_COND_END
return NCV_SUCCESS;
}
int main( int argc, const char** argv )
{
NCVStatus ncvStat;
printf("NVIDIA Computer Vision SDK\n");
printf("Face Detection in video and live feed\n");
printf("=========================================\n");
printf(" Esc - Quit\n");
printf(" Space - Switch between NCV and OpenCV\n");
printf(" L - Switch between FullSearch and LargestFace modes\n");
printf(" U - Toggle unfiltered hypotheses visualization in FullSearch\n");
if (argc != 4 && argc != 1)
return printSyntax(), -1;
VideoCapture capture;
Size frameSize;
if (argc == 1 || strcmp(argv[1], "-c") == 0)
{
// Camera input is specified
int camIdx = (argc == 3) ? atoi(argv[2]) : 0;
if(!capture.open(camIdx))
return printf("Error opening camera\n"), -1;
capture.set(CV_CAP_PROP_FRAME_WIDTH, preferredVideoFrameSize.width);
capture.set(CV_CAP_PROP_FRAME_HEIGHT, preferredVideoFrameSize.height);
capture.set(CV_CAP_PROP_FPS, 25);
frameSize = preferredVideoFrameSize;
}
else if (strcmp(argv[1], "-v") == 0)
{
// Video file input (avi)
if(!capture.open(argv[2]))
return printf("Error opening video file\n"), -1;
frameSize.width = (int)capture.get(CV_CAP_PROP_FRAME_WIDTH);
frameSize.height = (int)capture.get(CV_CAP_PROP_FRAME_HEIGHT);
}
else
return printSyntax(), -1;
NcvBool bUseOpenCV = true;
NcvBool bLargestFace = true;
NcvBool bShowAllHypotheses = false;
string classifierFile = (argc == 1) ? preferredClassifier : argv[3];
CascadeClassifier classifierOpenCV;
if (!classifierOpenCV.load(classifierFile))
return printf("Error (in OpenCV) opening classifier\n"), printSyntax(), -1;
int devId;
ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);
cudaDeviceProp devProp;
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1);
printf("Using GPU %d %s, arch=%d.%d\n", devId, devProp.name, devProp.major, devProp.minor);
//==============================================================================
//
// Load the classifier from file (assuming its size is about 1 mb)
// using a simple allocator
//
//==============================================================================
NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice);
ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1);
NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned);
ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1);
Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", -1);
NCVVectorAlloc<HaarStage64> h_haarStages(cpuCascadeAllocator, haarNumStages);
ncvAssertPrintReturn(h_haarStages.isMemAllocated(), "Error in cascade CPU allocator", -1);
NCVVectorAlloc<HaarClassifierNode128> h_haarNodes(cpuCascadeAllocator, haarNumNodes);
ncvAssertPrintReturn(h_haarNodes.isMemAllocated(), "Error in cascade CPU allocator", -1);
NCVVectorAlloc<HaarFeature64> h_haarFeatures(cpuCascadeAllocator, haarNumFeatures);
ncvAssertPrintReturn(h_haarFeatures.isMemAllocated(), "Error in cascade CPU allocator", -1);
HaarClassifierCascadeDescriptor haar;
ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, h_haarStages, h_haarNodes, h_haarFeatures);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", -1);
NCVVectorAlloc<HaarStage64> d_haarStages(gpuCascadeAllocator, haarNumStages);
ncvAssertPrintReturn(d_haarStages.isMemAllocated(), "Error in cascade GPU allocator", -1);
NCVVectorAlloc<HaarClassifierNode128> d_haarNodes(gpuCascadeAllocator, haarNumNodes);
ncvAssertPrintReturn(d_haarNodes.isMemAllocated(), "Error in cascade GPU allocator", -1);
NCVVectorAlloc<HaarFeature64> d_haarFeatures(gpuCascadeAllocator, haarNumFeatures);
ncvAssertPrintReturn(d_haarFeatures.isMemAllocated(), "Error in cascade GPU allocator", -1);
ncvStat = h_haarStages.copySolid(d_haarStages, 0);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
ncvStat = h_haarNodes.copySolid(d_haarNodes, 0);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
ncvStat = h_haarFeatures.copySolid(d_haarFeatures, 0);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
//==============================================================================
//
// Calculate memory requirements and create real allocators
//
//==============================================================================
NCVMemStackAllocator gpuCounter(devProp.textureAlignment);
ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", -1);
NCVMemStackAllocator cpuCounter(devProp.textureAlignment);
ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", -1);
ncvStat = process(NULL, frameSize.width, frameSize.height,
false, false, haar,
d_haarStages, d_haarNodes,
d_haarFeatures, h_haarStages,
gpuCounter, cpuCounter, devProp);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment);
ncvAssertPrintReturn(gpuAllocator.isInitialized(), "Error creating GPU memory allocator", -1);
NCVMemStackAllocator cpuAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), devProp.textureAlignment);
ncvAssertPrintReturn(cpuAllocator.isInitialized(), "Error creating CPU memory allocator", -1);
printf("Initialized for frame size [%dx%d]\n", frameSize.width, frameSize.height);
//==============================================================================
//
// Main processing loop
//
//==============================================================================
namedWindow(wndTitle, 1);
Mat frame, gray, frameDisp;
for(;;)
{
// For camera and video file, capture the next image
capture >> frame;
if (frame.empty())
break;
cvtColor(frame, gray, CV_BGR2GRAY);
// process
NcvSize32u minSize = haar.ClassifierSize;
if (bLargestFace)
{
Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width;
Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height;
Ncv32u ratioSmallest = std::min(ratioX, ratioY);
ratioSmallest = (Ncv32u)std::max(ratioSmallest / 2.5f, 1.f);
minSize.width *= ratioSmallest;
minSize.height *= ratioSmallest;
}
NcvTimer timer = ncvStartTimer();
if (!bUseOpenCV)
{
ncvStat = process(&gray, frameSize.width, frameSize.height,
bShowAllHypotheses, bLargestFace, haar,
d_haarStages, d_haarNodes,
d_haarFeatures, h_haarStages,
gpuAllocator, cpuAllocator, devProp);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
}
else
{
vector<Rect> rectsOpenCV;
classifierOpenCV.detectMultiScale(
gray,
rectsOpenCV,
1.2f,
bShowAllHypotheses && !bLargestFace ? 0 : 4,
(bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE,
Size(minSize.width, minSize.height));
for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt)
rectangle(gray, rectsOpenCV[rt], Scalar(255));
}
Ncv32f avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);
cvtColor(gray, frameDisp, CV_GRAY2BGR);
imagePrintf(frameDisp, 0, CV_RGB(255, 0,0), "Space - Switch NCV%s / OpenCV%s", bUseOpenCV?"":" (ON)", bUseOpenCV?" (ON)":"");
imagePrintf(frameDisp, 1, CV_RGB(255, 0,0), "L - Switch FullSearch%s / LargestFace%s modes", bLargestFace?"":" (ON)", bLargestFace?" (ON)":"");
imagePrintf(frameDisp, 2, CV_RGB(255, 0,0), "U - Toggle unfiltered hypotheses visualization in FullSearch %s", bShowAllHypotheses?"(ON)":"(OFF)");
imagePrintf(frameDisp, 3, CV_RGB(118,185,0), " Running at %f FPS on %s", 1000.0f / avgTime, bUseOpenCV?"CPU":"GPU");
cv::imshow(wndTitle, frameDisp);
switch (cvWaitKey(1))
{
case ' ':
bUseOpenCV = !bUseOpenCV;
break;
case 'L':case 'l':
bLargestFace = !bLargestFace;
break;
case 'U':case 'u':
bShowAllHypotheses = !bShowAllHypotheses;
break;
case 27:
return 0;
}
}
return 0;
}

View File

@@ -0,0 +1,571 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <precomp.hpp>
#if !defined (HAVE_CUDA)
#else /* !defined (HAVE_CUDA) */
#include <stdarg.h>
#include "NCV.hpp"
//==============================================================================
//
// Error handling helpers
//
//==============================================================================
static void stdioDebugOutput(const char *msg)
{
printf("%s", msg);
}
static NCVDebugOutputHandler *debugOutputHandler = stdioDebugOutput;
void ncvDebugOutput(const char *msg, ...)
{
const int K_DEBUG_STRING_MAXLEN = 1024;
char buffer[K_DEBUG_STRING_MAXLEN];
va_list args;
va_start(args, msg);
vsnprintf_s(buffer, K_DEBUG_STRING_MAXLEN, K_DEBUG_STRING_MAXLEN-1, msg, args);
va_end (args);
debugOutputHandler(buffer);
}
void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
{
debugOutputHandler = func;
}
//==============================================================================
//
// Memory wrappers and helpers
//
//==============================================================================
NCVStatus GPUAlignmentValue(Ncv32u &alignment)
{
int curDev;
cudaDeviceProp curProp;
ncvAssertCUDAReturn(cudaGetDevice(&curDev), NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaGetDeviceProperties(&curProp, curDev), NCV_CUDA_ERROR);
alignment = curProp.textureAlignment; //GPUAlignmentValue(curProp.major);
return NCV_SUCCESS;
}
Ncv32u alignUp(Ncv32u what, Ncv32u alignment)
{
Ncv32u alignMask = alignment-1;
Ncv32u inverseAlignMask = ~alignMask;
Ncv32u res = (what + alignMask) & inverseAlignMask;
return res;
}
void NCVMemPtr::clear()
{
ptr = NULL;
memtype = NCVMemoryTypeNone;
}
void NCVMemSegment::clear()
{
begin.clear();
size = 0;
}
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, const void *src, NCVMemoryType srcType, size_t sz, cudaStream_t cuStream)
{
NCVStatus ncvStat;
switch (dstType)
{
case NCVMemoryTypeHostPageable:
case NCVMemoryTypeHostPinned:
switch (srcType)
{
case NCVMemoryTypeHostPageable:
case NCVMemoryTypeHostPinned:
memcpy(dst, src, sz);
ncvStat = NCV_SUCCESS;
break;
case NCVMemoryTypeDevice:
if (cuStream != 0)
{
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR);
}
else
{
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
}
ncvStat = NCV_SUCCESS;
break;
default:
ncvStat = NCV_MEM_RESIDENCE_ERROR;
}
break;
case NCVMemoryTypeDevice:
switch (srcType)
{
case NCVMemoryTypeHostPageable:
case NCVMemoryTypeHostPinned:
if (cuStream != 0)
{
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR);
}
else
{
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyHostToDevice), NCV_CUDA_ERROR);
}
ncvStat = NCV_SUCCESS;
break;
case NCVMemoryTypeDevice:
if (cuStream != 0)
{
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR);
}
else
{
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR);
}
ncvStat = NCV_SUCCESS;
break;
default:
ncvStat = NCV_MEM_RESIDENCE_ERROR;
}
break;
default:
ncvStat = NCV_MEM_RESIDENCE_ERROR;
}
return ncvStat;
}
//===================================================================
//
// NCVMemStackAllocator class members implementation
//
//===================================================================
NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
:
currentSize(0),
_maxSize(0),
allocBegin(NULL),
begin(NULL),
_memType(NCVMemoryTypeNone),
_alignment(alignment)
{
NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2");
}
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment)
:
currentSize(0),
_maxSize(0),
allocBegin(NULL),
_memType(memT),
_alignment(alignment)
{
NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: _alignment not power of 2");
allocBegin = NULL;
switch (memT)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPageable:
allocBegin = (Ncv8u *)malloc(capacity);
break;
}
if (capacity == 0)
{
allocBegin = (Ncv8u *)(0x1);
}
if (!isCounting())
{
begin = allocBegin;
end = begin + capacity;
}
}
NCVMemStackAllocator::~NCVMemStackAllocator()
{
if (allocBegin != NULL)
{
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
switch (_memType)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaFree(allocBegin), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
break;
case NCVMemoryTypeHostPageable:
free(allocBegin);
break;
}
allocBegin = NULL;
}
}
NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size)
{
seg.clear();
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
size = alignUp(size, this->_alignment);
this->currentSize += size;
this->_maxSize = std::max(this->_maxSize, this->currentSize);
if (!isCounting())
{
size_t availSize = end - begin;
ncvAssertReturn(size <= availSize, NCV_ALLOCATOR_INSUFFICIENT_CAPACITY);
}
seg.begin.ptr = begin;
seg.begin.memtype = this->_memType;
seg.size = size;
begin += size;
return NCV_SUCCESS;
}
NCVStatus NCVMemStackAllocator::dealloc(NCVMemSegment &seg)
{
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
ncvAssertReturn(seg.begin.ptr != NULL || isCounting(), NCV_ALLOCATOR_BAD_DEALLOC);
ncvAssertReturn(seg.begin.ptr == begin - seg.size, NCV_ALLOCATOR_DEALLOC_ORDER);
currentSize -= seg.size;
begin -= seg.size;
seg.clear();
ncvAssertReturn(allocBegin <= begin, NCV_ALLOCATOR_BAD_DEALLOC);
return NCV_SUCCESS;
}
NcvBool NCVMemStackAllocator::isInitialized(void) const
{
return ((this->_alignment & (this->_alignment-1)) == 0) && isCounting() || this->allocBegin != NULL;
}
NcvBool NCVMemStackAllocator::isCounting(void) const
{
return this->_memType == NCVMemoryTypeNone;
}
NCVMemoryType NCVMemStackAllocator::memType(void) const
{
return this->_memType;
}
Ncv32u NCVMemStackAllocator::alignment(void) const
{
return this->_alignment;
}
size_t NCVMemStackAllocator::maxSize(void) const
{
return this->_maxSize;
}
//===================================================================
//
// NCVMemNativeAllocator class members implementation
//
//===================================================================
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT)
:
currentSize(0),
_maxSize(0),
_memType(memT)
{
ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
ncvAssertPrintReturn(NCV_SUCCESS == GPUAlignmentValue(this->_alignment), "NCVMemNativeAllocator ctor:: couldn't get device _alignment", );
}
NCVMemNativeAllocator::~NCVMemNativeAllocator()
{
ncvAssertPrintCheck(currentSize == 0, "NCVMemNativeAllocator dtor:: detected memory leak");
}
NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size)
{
seg.clear();
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
switch (this->_memType)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaMalloc(&seg.begin.ptr, size), NCV_CUDA_ERROR);
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaMallocHost(&seg.begin.ptr, size), NCV_CUDA_ERROR);
break;
case NCVMemoryTypeHostPageable:
seg.begin.ptr = (Ncv8u *)malloc(size);
break;
}
this->currentSize += alignUp(size, this->_alignment);
this->_maxSize = std::max(this->_maxSize, this->currentSize);
seg.begin.memtype = this->_memType;
seg.size = size;
return NCV_SUCCESS;
}
NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg)
{
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC);
ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);
currentSize -= alignUp(seg.size, this->_alignment);
switch (this->_memType)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaFree(seg.begin.ptr), NCV_CUDA_ERROR);
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaFreeHost(seg.begin.ptr), NCV_CUDA_ERROR);
break;
case NCVMemoryTypeHostPageable:
free(seg.begin.ptr);
break;
}
seg.clear();
return NCV_SUCCESS;
}
NcvBool NCVMemNativeAllocator::isInitialized(void) const
{
return (this->_alignment != 0);
}
NcvBool NCVMemNativeAllocator::isCounting(void) const
{
return false;
}
NCVMemoryType NCVMemNativeAllocator::memType(void) const
{
return this->_memType;
}
Ncv32u NCVMemNativeAllocator::alignment(void) const
{
return this->_alignment;
}
size_t NCVMemNativeAllocator::maxSize(void) const
{
return this->_maxSize;
}
//===================================================================
//
// Time and timer routines
//
//===================================================================
typedef struct _NcvTimeMoment NcvTimeMoment;
#if defined(_WIN32) || defined(_WIN64)
#include <Windows.h>
typedef struct _NcvTimeMoment
{
LONGLONG moment, freq;
} NcvTimeMoment;
static void _ncvQueryMoment(NcvTimeMoment *t)
{
QueryPerformanceFrequency((LARGE_INTEGER *)&(t->freq));
QueryPerformanceCounter((LARGE_INTEGER *)&(t->moment));
}
double _ncvMomentToMicroseconds(NcvTimeMoment *t)
{
return 1000000.0 * t->moment / t->freq;
}
double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
{
return 1000000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);
}
double _ncvMomentsDiffToMilliseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
{
return 1000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);
}
#elif defined(__unix__)
#include <sys/time.h>
typedef struct _NcvTimeMoment
{
struct timeval tv;
struct timezone tz;
} NcvTimeMoment;
void _ncvQueryMoment(NcvTimeMoment *t)
{
gettimeofday(& t->tv, & t->tz);
}
double _ncvMomentToMicroseconds(NcvTimeMoment *t)
{
return 1000000.0 * t->tv.tv_sec + (double)t->tv.tv_usec;
}
double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
{
return (((double)t2->tv.tv_sec - (double)t1->tv.tv_sec) * 1000000 + (double)t2->tv.tv_usec - (double)t1->tv.tv_usec);
}
#endif //#if defined(_WIN32) || defined(_WIN64)
struct _NcvTimer
{
NcvTimeMoment t1, t2;
};
NcvTimer ncvStartTimer(void)
{
struct _NcvTimer *t;
t = (struct _NcvTimer *)malloc(sizeof(struct _NcvTimer));
_ncvQueryMoment(&t->t1);
return t;
}
double ncvEndQueryTimerUs(NcvTimer t)
{
double res;
_ncvQueryMoment(&t->t2);
res = _ncvMomentsDiffToMicroseconds(&t->t1, &t->t2);
free(t);
return res;
}
double ncvEndQueryTimerMs(NcvTimer t)
{
double res;
_ncvQueryMoment(&t->t2);
res = _ncvMomentsDiffToMilliseconds(&t->t1, &t->t2);
free(t);
return res;
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -0,0 +1,837 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef _ncv_hpp_
#define _ncv_hpp_
#include <cuda_runtime.h>
#include "npp_staging.h"
//==============================================================================
//
// Alignment macros
//
//==============================================================================
#if !defined(__align__) && !defined(__CUDACC__)
#if defined(_WIN32) || defined(_WIN64)
#define __align__(n) __declspec(align(n))
#elif defined(__unix__)
#define __align__(n) __attribute__((__aligned__(n)))
#endif
#endif
//==============================================================================
//
// Integral and compound types of guaranteed size
//
//==============================================================================
typedef bool NcvBool;
typedef long long Ncv64s;
typedef unsigned long long Ncv64u;
typedef int Ncv32s;
typedef unsigned int Ncv32u;
typedef short Ncv16s;
typedef unsigned short Ncv16u;
typedef char Ncv8s;
typedef unsigned char Ncv8u;
typedef float Ncv32f;
typedef double Ncv64f;
typedef struct
{
Ncv8u x;
Ncv8u y;
Ncv8u width;
Ncv8u height;
} NcvRect8u;
typedef struct
{
Ncv32s x; ///< x-coordinate of upper left corner.
Ncv32s y; ///< y-coordinate of upper left corner.
Ncv32s width; ///< Rectangle width.
Ncv32s height; ///< Rectangle height.
} NcvRect32s;
typedef struct
{
Ncv32u x; ///< x-coordinate of upper left corner.
Ncv32u y; ///< y-coordinate of upper left corner.
Ncv32u width; ///< Rectangle width.
Ncv32u height; ///< Rectangle height.
} NcvRect32u;
typedef struct
{
Ncv32s width; ///< Rectangle width.
Ncv32s height; ///< Rectangle height.
} NcvSize32s;
typedef struct
{
Ncv32u width; ///< Rectangle width.
Ncv32u height; ///< Rectangle height.
} NcvSize32u;
NPPST_CT_ASSERT(sizeof(NcvBool) <= 4);
NPPST_CT_ASSERT(sizeof(Ncv64s) == 8);
NPPST_CT_ASSERT(sizeof(Ncv64u) == 8);
NPPST_CT_ASSERT(sizeof(Ncv32s) == 4);
NPPST_CT_ASSERT(sizeof(Ncv32u) == 4);
NPPST_CT_ASSERT(sizeof(Ncv16s) == 2);
NPPST_CT_ASSERT(sizeof(Ncv16u) == 2);
NPPST_CT_ASSERT(sizeof(Ncv8s) == 1);
NPPST_CT_ASSERT(sizeof(Ncv8u) == 1);
NPPST_CT_ASSERT(sizeof(Ncv32f) == 4);
NPPST_CT_ASSERT(sizeof(Ncv64f) == 8);
NPPST_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
NPPST_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
NPPST_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
NPPST_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
//==============================================================================
//
// Persistent constants
//
//==============================================================================
const Ncv32u K_WARP_SIZE = 32;
const Ncv32u K_LOG2_WARP_SIZE = 5;
//==============================================================================
//
// Error handling
//
//==============================================================================
#define NCV_CT_PREP_STRINGIZE_AUX(x) #x
#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x)
void ncvDebugOutput(const char *msg, ...);
typedef void NCVDebugOutputHandler(const char* msg);
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
#define ncvAssertPrintCheck(pred, msg) \
((pred) ? true : (ncvDebugOutput("\n%s\n", \
"NCV Assertion Failed: " msg ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__) \
), false))
#define ncvAssertPrintReturn(pred, msg, err) \
if (ncvAssertPrintCheck(pred, msg)) ; else return err
#define ncvAssertReturn(pred, err) \
do \
{ \
if (!(pred)) \
{ \
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: retcode=", (int)err, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
return err; \
} \
} while (0)
#define ncvAssertReturnNcvStat(ncvOp) \
do \
{ \
NCVStatus _ncvStat = ncvOp; \
if (NCV_SUCCESS != _ncvStat) \
{ \
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: NcvStat=", (int)_ncvStat, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
return _ncvStat; \
} \
} while (0)
#define ncvAssertCUDAReturn(cudacall, errCode) \
do \
{ \
cudaError_t resCall = cudacall; \
cudaError_t resGLE = cudaGetLastError(); \
if (cudaSuccess != resCall || cudaSuccess != resGLE) \
{ \
ncvDebugOutput("\n%s%d%s\n", "NCV CUDA Assertion Failed: cudaError_t=", (int)(resCall | resGLE), ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
return errCode; \
} \
} while (0)
/**
* Return-codes for status notification, errors and warnings
*/
enum NCVStatus
{
NCV_SUCCESS,
NCV_CUDA_ERROR,
NCV_NPP_ERROR,
NCV_FILE_ERROR,
NCV_NULL_PTR,
NCV_INCONSISTENT_INPUT,
NCV_TEXTURE_BIND_ERROR,
NCV_DIMENSIONS_INVALID,
NCV_INVALID_ROI,
NCV_INVALID_STEP,
NCV_INVALID_SCALE,
NCV_ALLOCATOR_NOT_INITIALIZED,
NCV_ALLOCATOR_BAD_ALLOC,
NCV_ALLOCATOR_BAD_DEALLOC,
NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
NCV_ALLOCATOR_DEALLOC_ORDER,
NCV_ALLOCATOR_BAD_REUSE,
NCV_MEM_COPY_ERROR,
NCV_MEM_RESIDENCE_ERROR,
NCV_MEM_INSUFFICIENT_CAPACITY,
NCV_HAAR_INVALID_PIXEL_STEP,
NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
NCV_HAAR_TOO_LARGE_FEATURES,
NCV_HAAR_XML_LOADING_EXCEPTION,
NCV_NOIMPL_HAAR_TILTED_FEATURES,
NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
};
#define NCV_SET_SKIP_COND(x) \
bool __ncv_skip_cond = x
#define NCV_RESET_SKIP_COND(x) \
__ncv_skip_cond = x
#define NCV_SKIP_COND_BEGIN \
if (!__ncv_skip_cond) {
#define NCV_SKIP_COND_END \
}
//==============================================================================
//
// Timer
//
//==============================================================================
typedef struct _NcvTimer *NcvTimer;
NcvTimer ncvStartTimer(void);
double ncvEndQueryTimerUs(NcvTimer t);
double ncvEndQueryTimerMs(NcvTimer t);
//==============================================================================
//
// Memory management classes template compound types
//
//==============================================================================
/**
* Alignment of GPU memory chunks in bytes
*/
NCVStatus GPUAlignmentValue(Ncv32u &alignment);
/**
* Calculates the aligned top bound value
*/
Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
/**
* NCVMemoryType
*/
enum NCVMemoryType
{
NCVMemoryTypeNone,
NCVMemoryTypeHostPageable,
NCVMemoryTypeHostPinned,
NCVMemoryTypeDevice
};
/**
* NCVMemPtr
*/
struct NCVMemPtr
{
void *ptr;
NCVMemoryType memtype;
void clear();
};
/**
* NCVMemSegment
*/
struct NCVMemSegment
{
NCVMemPtr begin;
size_t size;
void clear();
};
/**
* INCVMemAllocator (Interface)
*/
class INCVMemAllocator
{
public:
virtual ~INCVMemAllocator() = 0;
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size) = 0;
virtual NCVStatus dealloc(NCVMemSegment &seg) = 0;
virtual NcvBool isInitialized(void) const = 0;
virtual NcvBool isCounting(void) const = 0;
virtual NCVMemoryType memType(void) const = 0;
virtual Ncv32u alignment(void) const = 0;
virtual size_t maxSize(void) const = 0;
};
inline INCVMemAllocator::~INCVMemAllocator() {}
/**
* NCVMemStackAllocator
*/
class NCVMemStackAllocator : public INCVMemAllocator
{
NCVMemStackAllocator();
NCVMemStackAllocator(const NCVMemStackAllocator &);
public:
explicit NCVMemStackAllocator(Ncv32u alignment);
NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment);
virtual ~NCVMemStackAllocator();
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
virtual NCVStatus dealloc(NCVMemSegment &seg);
virtual NcvBool isInitialized(void) const;
virtual NcvBool isCounting(void) const;
virtual NCVMemoryType memType(void) const;
virtual Ncv32u alignment(void) const;
virtual size_t maxSize(void) const;
private:
NCVMemoryType _memType;
Ncv32u _alignment;
Ncv8u *allocBegin;
Ncv8u *begin;
Ncv8u *end;
size_t currentSize;
size_t _maxSize;
};
/**
* NCVMemNativeAllocator
*/
class NCVMemNativeAllocator : public INCVMemAllocator
{
public:
NCVMemNativeAllocator(NCVMemoryType memT);
virtual ~NCVMemNativeAllocator();
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
virtual NCVStatus dealloc(NCVMemSegment &seg);
virtual NcvBool isInitialized(void) const;
virtual NcvBool isCounting(void) const;
virtual NCVMemoryType memType(void) const;
virtual Ncv32u alignment(void) const;
virtual size_t maxSize(void) const;
private:
NCVMemNativeAllocator();
NCVMemNativeAllocator(const NCVMemNativeAllocator &);
NCVMemoryType _memType;
Ncv32u _alignment;
size_t currentSize;
size_t _maxSize;
};
/**
* Copy dispatcher
*/
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
const void *src, NCVMemoryType srcType,
size_t sz, cudaStream_t cuStream);
/**
* NCVVector (1D)
*/
template <class T>
class NCVVector
{
NCVVector(const NCVVector &);
public:
NCVVector()
{
clear();
}
virtual ~NCVVector() {}
void clear()
{
_ptr = NULL;
_length = 0;
_memtype = NCVMemoryTypeNone;
}
NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0)
{
if (howMuch == 0)
{
ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR);
howMuch = this->_length * sizeof(T);
}
else
{
ncvAssertReturn(dst._length * sizeof(T) >= howMuch &&
this->_length * sizeof(T) >= howMuch &&
howMuch > 0, NCV_MEM_COPY_ERROR);
}
ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
(dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
NCVStatus ncvStat = NCV_SUCCESS;
if (this->_memtype != NCVMemoryTypeNone)
{
ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
this->_ptr, this->_memtype,
howMuch, cuStream);
}
return ncvStat;
}
T *ptr() const {return this->_ptr;}
size_t length() const {return this->_length;}
NCVMemoryType memType() const {return this->_memtype;}
protected:
T *_ptr;
size_t _length;
NCVMemoryType _memtype;
};
/**
* NCVVectorAlloc
*/
template <class T>
class NCVVectorAlloc : public NCVVector<T>
{
NCVVectorAlloc();
NCVVectorAlloc(const NCVVectorAlloc &);
public:
NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length)
:
allocator(allocator)
{
NCVStatus ncvStat;
this->clear();
this->allocatedMem.clear();
ncvStat = allocator.alloc(this->allocatedMem, length * sizeof(T));
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", );
this->_ptr = (T *)this->allocatedMem.begin.ptr;
this->_length = length;
this->_memtype = this->allocatedMem.begin.memtype;
}
~NCVVectorAlloc()
{
NCVStatus ncvStat;
ncvStat = allocator.dealloc(this->allocatedMem);
ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed");
this->clear();
}
NcvBool isMemAllocated() const
{
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
}
Ncv32u getAllocatorsAlignment() const
{
return allocator.alignment();
}
NCVMemSegment getSegment() const
{
return allocatedMem;
}
private:
INCVMemAllocator &allocator;
NCVMemSegment allocatedMem;
};
/**
* NCVVectorReuse
*/
template <class T>
class NCVVectorReuse : public NCVVector<T>
{
NCVVectorReuse();
NCVVectorReuse(const NCVVectorReuse &);
public:
explicit NCVVectorReuse(const NCVMemSegment &memSegment)
{
this->bReused = false;
this->clear();
this->_length = memSegment.size / sizeof(T);
this->_ptr = (T *)memSegment.begin.ptr;
this->_memtype = memSegment.begin.memtype;
this->bReused = true;
}
NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)
{
this->bReused = false;
this->clear();
ncvAssertPrintReturn(length * sizeof(T) <= memSegment.size, \
"NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
this->_length = length;
this->_ptr = (T *)memSegment.begin.ptr;
this->_memtype = memSegment.begin.memtype;
this->bReused = true;
}
NcvBool isMemReused() const
{
return this->bReused;
}
private:
NcvBool bReused;
};
/**
* NCVMatrix (2D)
*/
template <class T>
class NCVMatrix
{
NCVMatrix(const NCVMatrix &);
public:
NCVMatrix()
{
clear();
}
virtual ~NCVMatrix() {}
void clear()
{
_ptr = NULL;
_pitch = 0;
_width = 0;
_height = 0;
_memtype = NCVMemoryTypeNone;
}
Ncv32u stride() const
{
return _pitch / sizeof(T);
}
NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0)
{
if (howMuch == 0)
{
ncvAssertReturn(dst._pitch == this->_pitch &&
dst._height == this->_height, NCV_MEM_COPY_ERROR);
howMuch = this->_pitch * this->_height;
}
else
{
ncvAssertReturn(dst._pitch * dst._height >= howMuch &&
this->_pitch * this->_height >= howMuch &&
howMuch > 0, NCV_MEM_COPY_ERROR);
}
ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
(dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
NCVStatus ncvStat = NCV_SUCCESS;
if (this->_memtype != NCVMemoryTypeNone)
{
ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
this->_ptr, this->_memtype,
howMuch, cuStream);
}
return ncvStat;
}
T *ptr() const {return this->_ptr;}
Ncv32u width() const {return this->_width;}
Ncv32u height() const {return this->_height;}
Ncv32u pitch() const {return this->_pitch;}
NCVMemoryType memType() const {return this->_memtype;}
protected:
T *_ptr;
Ncv32u _width;
Ncv32u _height;
Ncv32u _pitch;
NCVMemoryType _memtype;
};
/**
* NCVMatrixAlloc
*/
template <class T>
class NCVMatrixAlloc : public NCVMatrix<T>
{
NCVMatrixAlloc();
NCVMatrixAlloc(const NCVMatrixAlloc &);
public:
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
:
allocator(allocator)
{
NCVStatus ncvStat;
this->clear();
this->allocatedMem.clear();
Ncv32u widthBytes = width * sizeof(T);
Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment());
if (pitch != 0)
{
ncvAssertPrintReturn(pitch >= pitchBytes &&
(pitch & (allocator.alignment() - 1)) == 0,
"NCVMatrixAlloc ctor:: incorrect pitch passed", );
pitchBytes = pitch;
}
Ncv32u requiredAllocSize = pitchBytes * height;
ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", );
this->_ptr = (T *)this->allocatedMem.begin.ptr;
this->_width = width;
this->_height = height;
this->_pitch = pitchBytes;
this->_memtype = this->allocatedMem.begin.memtype;
}
~NCVMatrixAlloc()
{
NCVStatus ncvStat;
ncvStat = allocator.dealloc(this->allocatedMem);
ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed");
this->clear();
}
NcvBool isMemAllocated() const
{
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
}
Ncv32u getAllocatorsAlignment() const
{
return allocator.alignment();
}
NCVMemSegment getSegment() const
{
return allocatedMem;
}
private:
INCVMemAllocator &allocator;
NCVMemSegment allocatedMem;
};
/**
* NCVMatrixReuse
*/
template <class T>
class NCVMatrixReuse : public NCVMatrix<T>
{
NCVMatrixReuse();
NCVMatrixReuse(const NCVMatrixReuse &);
public:
NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)
{
this->bReused = false;
this->clear();
Ncv32u widthBytes = width * sizeof(T);
Ncv32u pitchBytes = alignUp(widthBytes, alignment);
if (pitch != 0)
{
if (!bSkipPitchCheck)
{
ncvAssertPrintReturn(pitch >= pitchBytes &&
(pitch & (alignment - 1)) == 0,
"NCVMatrixReuse ctor:: incorrect pitch passed", );
}
else
{
ncvAssertPrintReturn(pitch >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );
}
pitchBytes = pitch;
}
ncvAssertPrintReturn(pitchBytes * height <= memSegment.size, \
"NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
this->_width = width;
this->_height = height;
this->_pitch = pitchBytes;
this->_ptr = (T *)memSegment.begin.ptr;
this->_memtype = memSegment.begin.memtype;
this->bReused = true;
}
NcvBool isMemReused() const
{
return this->bReused;
}
private:
NcvBool bReused;
};
#endif // _ncv_hpp_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,501 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
////////////////////////////////////////////////////////////////////////////////
//
// NVIDIA CUDA implementation of Viola-Jones Object Detection Framework
//
// The algorithm and code are explained in the upcoming GPU Computing Gems
// chapter in detail:
//
// Anton Obukhov, "Haar Classifiers for Object Detection with CUDA"
// PDF URL placeholder
// email: aobukhov@nvidia.com, devsupport@nvidia.com
//
// Credits for help with the code to:
// Alexey Mendelenko, Cyril Crassin, and Mikhail Smirnov.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _ncvhaarobjectdetection_hpp_
#define _ncvhaarobjectdetection_hpp_
#include <string>
#include "NCV.hpp"
//==============================================================================
//
// Guaranteed size cross-platform classifier structures
//
//==============================================================================
struct HaarFeature64
{
uint2 _ui2;
#define HaarFeature64_CreateCheck_MaxRectField 0xFF
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u clsWidth, Ncv32u clsHeight)
{
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
((NcvRect8u*)&(this->_ui2.x))->x = rectX;
((NcvRect8u*)&(this->_ui2.x))->y = rectY;
((NcvRect8u*)&(this->_ui2.x))->width = rectWidth;
((NcvRect8u*)&(this->_ui2.x))->height = rectHeight;
return NCV_SUCCESS;
}
__host__ NCVStatus setWeight(Ncv32f weight)
{
((Ncv32f*)&(this->_ui2.y))[0] = weight;
return NCV_SUCCESS;
}
__device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight)
{
NcvRect8u tmpRect = *(NcvRect8u*)(&this->_ui2.x);
*rectX = tmpRect.x;
*rectY = tmpRect.y;
*rectWidth = tmpRect.width;
*rectHeight = tmpRect.height;
}
__device__ __host__ Ncv32f getWeight(void)
{
return *(Ncv32f*)(&this->_ui2.y);
}
};
struct HaarFeatureDescriptor32
{
private:
#define HaarFeatureDescriptor32_Interpret_MaskFlagTilted 0x80000000
#define HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures 0x7F
#define HaarFeatureDescriptor32_NumFeatures_Shift 24
#define HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset 0x00FFFFFF
Ncv32u desc;
public:
__host__ NCVStatus create(NcvBool bTilted, Ncv32u numFeatures, Ncv32u offsetFeatures)
{
if (numFeatures > HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures)
{
return NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER;
}
if (offsetFeatures > HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset)
{
return NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE;
}
this->desc = 0;
this->desc |= (bTilted ? HaarFeatureDescriptor32_Interpret_MaskFlagTilted : 0);
this->desc |= (numFeatures << HaarFeatureDescriptor32_NumFeatures_Shift);
this->desc |= offsetFeatures;
return NCV_SUCCESS;
}
__device__ __host__ NcvBool isTilted(void)
{
return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagTilted) != 0;
}
__device__ __host__ Ncv32u getNumFeatures(void)
{
return (this->desc & ~HaarFeatureDescriptor32_Interpret_MaskFlagTilted) >> HaarFeatureDescriptor32_NumFeatures_Shift;
}
__device__ __host__ Ncv32u getFeaturesOffset(void)
{
return this->desc & HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset;
}
};
struct HaarClassifierNodeDescriptor32
{
uint1 _ui1;
#define HaarClassifierNodeDescriptor32_Interpret_MaskSwitch (1 << 30)
__host__ NCVStatus create(Ncv32f leafValue)
{
if ((*(Ncv32u *)&leafValue) & HaarClassifierNodeDescriptor32_Interpret_MaskSwitch)
{
return NCV_HAAR_XML_LOADING_EXCEPTION;
}
*(Ncv32f *)&this->_ui1 = leafValue;
return NCV_SUCCESS;
}
__host__ NCVStatus create(Ncv32u offsetHaarClassifierNode)
{
if (offsetHaarClassifierNode >= HaarClassifierNodeDescriptor32_Interpret_MaskSwitch)
{
return NCV_HAAR_XML_LOADING_EXCEPTION;
}
this->_ui1.x = offsetHaarClassifierNode;
this->_ui1.x |= HaarClassifierNodeDescriptor32_Interpret_MaskSwitch;
return NCV_SUCCESS;
}
__device__ __host__ NcvBool isLeaf(void)
{
return !(this->_ui1.x & HaarClassifierNodeDescriptor32_Interpret_MaskSwitch);
}
__host__ Ncv32f getLeafValueHost(void)
{
return *(Ncv32f *)&this->_ui1.x;
}
#ifdef __CUDACC__
__device__ Ncv32f getLeafValue(void)
{
return __int_as_float(this->_ui1.x);
}
#endif
__device__ __host__ Ncv32u getNextNodeOffset(void)
{
return (this->_ui1.x & ~HaarClassifierNodeDescriptor32_Interpret_MaskSwitch);
}
};
struct HaarClassifierNode128
{
uint4 _ui4;
__host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f)
{
this->_ui4.x = *(Ncv32u *)&f;
return NCV_SUCCESS;
}
__host__ NCVStatus setThreshold(Ncv32f t)
{
this->_ui4.y = *(Ncv32u *)&t;
return NCV_SUCCESS;
}
__host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl)
{
this->_ui4.z = *(Ncv32u *)&nl;
return NCV_SUCCESS;
}
__host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr)
{
this->_ui4.w = *(Ncv32u *)&nr;
return NCV_SUCCESS;
}
__host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void)
{
return *(HaarFeatureDescriptor32 *)&this->_ui4.x;
}
__host__ __device__ Ncv32f getThreshold(void)
{
return *(Ncv32f*)&this->_ui4.y;
}
__host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void)
{
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.z;
}
__host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void)
{
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.w;
}
};
struct HaarStage64
{
#define HaarStage64_Interpret_MaskRootNodes 0x0000FFFF
#define HaarStage64_Interpret_MaskRootNodeOffset 0xFFFF0000
#define HaarStage64_Interpret_ShiftRootNodeOffset 16
uint2 _ui2;
__host__ NCVStatus setStageThreshold(Ncv32f t)
{
this->_ui2.x = *(Ncv32u *)&t;
return NCV_SUCCESS;
}
__host__ NCVStatus setStartClassifierRootNodeOffset(Ncv32u val)
{
if (val > (HaarStage64_Interpret_MaskRootNodeOffset >> HaarStage64_Interpret_ShiftRootNodeOffset))
{
return NCV_HAAR_XML_LOADING_EXCEPTION;
}
this->_ui2.y = (val << HaarStage64_Interpret_ShiftRootNodeOffset) | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
return NCV_SUCCESS;
}
__host__ NCVStatus setNumClassifierRootNodes(Ncv32u val)
{
if (val > HaarStage64_Interpret_MaskRootNodes)
{
return NCV_HAAR_XML_LOADING_EXCEPTION;
}
this->_ui2.y = val | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodeOffset);
return NCV_SUCCESS;
}
__host__ __device__ Ncv32f getStageThreshold(void)
{
return *(Ncv32f*)&this->_ui2.x;
}
__host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void)
{
return (this->_ui2.y >> HaarStage64_Interpret_ShiftRootNodeOffset);
}
__host__ __device__ Ncv32u getNumClassifierRootNodes(void)
{
return (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
}
};
NPPST_CT_ASSERT(sizeof(HaarFeature64) == 8);
NPPST_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
NPPST_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
NPPST_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
NPPST_CT_ASSERT(sizeof(HaarStage64) == 8);
//==============================================================================
//
// Classifier cascade descriptor
//
//==============================================================================
struct HaarClassifierCascadeDescriptor
{
Ncv32u NumStages;
Ncv32u NumClassifierRootNodes;
Ncv32u NumClassifierTotalNodes;
Ncv32u NumFeatures;
NcvSize32u ClassifierSize;
NcvBool bNeedsTiltedII;
NcvBool bHasStumpsOnly;
};
//==============================================================================
//
// Functional interface
//
//==============================================================================
enum
{
NCVPipeObjDet_Default = 0x000,
NCVPipeObjDet_UseFairImageScaling = 0x001,
NCVPipeObjDet_FindLargestObject = 0x002,
NCVPipeObjDet_VisualizeInPlace = 0x004,
};
NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
NcvSize32u srcRoi,
NCVVector<NcvRect32u> &d_dstRects,
Ncv32u &dstNumRects,
HaarClassifierCascadeDescriptor &haar,
NCVVector<HaarStage64> &h_HaarStages,
NCVVector<HaarStage64> &d_HaarStages,
NCVVector<HaarClassifierNode128> &d_HaarNodes,
NCVVector<HaarFeature64> &d_HaarFeatures,
NcvSize32u minObjSize,
Ncv32u minNeighbors, //default 4
Ncv32f scaleStep, //default 1.2f
Ncv32u pixelStep, //default 1
Ncv32u flags, //default NCVPipeObjDet_Default
INCVMemAllocator &gpuAllocator,
INCVMemAllocator &cpuAllocator,
Ncv32u devPropMajor,
Ncv32u devPropMinor,
cudaStream_t cuStream);
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF
#define HAAR_STDDEV_BORDER 1
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
NCVMatrix<Ncv32f> &d_weights,
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
Ncv32u &numDetections,
HaarClassifierCascadeDescriptor &haar,
NCVVector<HaarStage64> &h_HaarStages,
NCVVector<HaarStage64> &d_HaarStages,
NCVVector<HaarClassifierNode128> &d_HaarNodes,
NCVVector<HaarFeature64> &d_HaarFeatures,
NcvBool bMaskElements,
NcvSize32u anchorsRoi,
Ncv32u pixelStep,
Ncv32f scaleArea,
INCVMemAllocator &gpuAllocator,
INCVMemAllocator &cpuAllocator,
Ncv32u devPropMajor,
Ncv32u devPropMinor,
cudaStream_t cuStream);
NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
NCVMatrix<Ncv32f> &h_weights,
NCVMatrixAlloc<Ncv32u> &h_pixelMask,
Ncv32u &numDetections,
HaarClassifierCascadeDescriptor &haar,
NCVVector<HaarStage64> &h_HaarStages,
NCVVector<HaarClassifierNode128> &h_HaarNodes,
NCVVector<HaarFeature64> &h_HaarFeatures,
NcvBool bMaskElements,
NcvSize32u anchorsRoi,
Ncv32u pixelStep,
Ncv32f scaleArea);
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
Ncv32u dstHeight,
NcvRect32u *d_rects,
Ncv32u numRects,
Ncv8u color,
cudaStream_t cuStream);
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
Ncv32u dstHeight,
NcvRect32u *d_rects,
Ncv32u numRects,
Ncv32u color,
cudaStream_t cuStream);
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
Ncv32u dstHeight,
NcvRect32u *h_rects,
Ncv32u numRects,
Ncv8u color);
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
Ncv32u dstHeight,
NcvRect32u *h_rects,
Ncv32u numRects,
Ncv32u color);
#define RECT_SIMILARITY_PROPORTION 0.2f
NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
Ncv32u numPixelMaskDetections,
NCVVector<NcvRect32u> &hypotheses,
Ncv32u &totalDetections,
Ncv32u totalMaxDetections,
Ncv32u rectWidth,
Ncv32u rectHeight,
Ncv32f curScale,
cudaStream_t cuStream);
NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
Ncv32u numPixelMaskDetections,
NCVVector<NcvRect32u> &hypotheses,
Ncv32u &totalDetections,
Ncv32u totalMaxDetections,
Ncv32u rectWidth,
Ncv32u rectHeight,
Ncv32f curScale);
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
Ncv32u &numHypotheses,
Ncv32u minNeighbors,
Ncv32f intersectEps,
NCVVector<Ncv32u> *hypothesesWeights);
NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
Ncv32u &numNodes, Ncv32u &numFeatures);
NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
HaarClassifierCascadeDescriptor &haar,
NCVVector<HaarStage64> &h_HaarStages,
NCVVector<HaarClassifierNode128> &h_HaarNodes,
NCVVector<HaarFeature64> &h_HaarFeatures);
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
HaarClassifierCascadeDescriptor haar,
NCVVector<HaarStage64> &h_HaarStages,
NCVVector<HaarClassifierNode128> &h_HaarNodes,
NCVVector<HaarFeature64> &h_HaarFeatures);
#endif // _ncvhaarobjectdetection_hpp_

View File

@@ -0,0 +1,174 @@
////////////////////////////////////////////////////////////////////////////////
// The Loki Library
// Copyright (c) 2001 by Andrei Alexandrescu
// This code accompanies the book:
// Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design
// Patterns Applied". Copyright (c) 2001. Addison-Wesley.
// Permission to use, copy, modify, distribute and sell this software for any
// purpose is hereby granted without fee, provided that the above copyright
// notice appear in all copies and that both that copyright notice and this
// permission notice appear in supporting documentation.
// The author or Addison-Welsey Longman make no representations about the
// suitability of this software for any purpose. It is provided "as is"
// without express or implied warranty.
// http://loki-lib.sourceforge.net/index.php?n=Main.License
////////////////////////////////////////////////////////////////////////////////
#ifndef _ncvruntimetemplates_hpp_
#define _ncvruntimetemplates_hpp_
#include <stdarg.h>
#include <vector>
namespace Loki
{
//==============================================================================
// class NullType
// Used as a placeholder for "no type here"
// Useful as an end marker in typelists
//==============================================================================
class NullType {};
//==============================================================================
// class template Typelist
// The building block of typelists of any length
// Use it through the LOKI_TYPELIST_NN macros
// Defines nested types:
// Head (first element, a non-typelist type by convention)
// Tail (second element, can be another typelist)
//==============================================================================
template <class T, class U>
struct Typelist
{
typedef T Head;
typedef U Tail;
};
//==============================================================================
// class template Int2Type
// Converts each integral constant into a unique type
// Invocation: Int2Type<v> where v is a compile-time constant integral
// Defines 'value', an enum that evaluates to v
//==============================================================================
template <int v>
struct Int2Type
{
enum { value = v };
};
namespace TL
{
//==============================================================================
// class template TypeAt
// Finds the type at a given index in a typelist
// Invocation (TList is a typelist and index is a compile-time integral
// constant):
// TypeAt<TList, index>::Result
// returns the type in position 'index' in TList
// If you pass an out-of-bounds index, the result is a compile-time error
//==============================================================================
template <class TList, unsigned int index> struct TypeAt;
template <class Head, class Tail>
struct TypeAt<Typelist<Head, Tail>, 0>
{
typedef Head Result;
};
template <class Head, class Tail, unsigned int i>
struct TypeAt<Typelist<Head, Tail>, i>
{
typedef typename TypeAt<Tail, i - 1>::Result Result;
};
}
}
////////////////////////////////////////////////////////////////////////////////
// Runtime boolean template instance dispatcher
// Cyril Crassin <cyril.crassin@icare3d.org>
// NVIDIA, 2010
////////////////////////////////////////////////////////////////////////////////
namespace NCVRuntimeTemplateBool
{
//This struct is used to transform a list of parameters into template arguments
//The idea is to build a typelist containing the arguments
//and to pass this typelist to a user defined functor
template<typename TList, int NumArguments, class Func>
struct KernelCaller
{
//Convenience function used by the user
//Takes a variable argument list, transforms it into a list
static void call(Func &functor, int dummy, ...)
{
//Vector used to collect arguments
std::vector<int> templateParamList;
//Variable argument list manipulation
va_list listPointer;
va_start(listPointer, dummy);
//Collect parameters into the list
for(int i=0; i<NumArguments; i++)
{
int val = va_arg(listPointer, int);
templateParamList.push_back(val);
}
va_end(listPointer);
//Call the actual typelist building function
call(functor, templateParamList);
}
//Actual function called recursively to build a typelist based
//on a list of values
static void call( Func &functor, std::vector<int> &templateParamList)
{
//Get current parameter value in the list
int val = templateParamList[templateParamList.size() - 1];
templateParamList.pop_back();
//Select the compile time value to add into the typelist
//depending on the runtime variable and make recursive call.
//Both versions are really instantiated
if(val)
{
KernelCaller<
Loki::Typelist<typename Loki::Int2Type<true>, TList >,
NumArguments-1, Func >
::call(functor, templateParamList);
}
else
{
KernelCaller<
Loki::Typelist<typename Loki::Int2Type<false>, TList >,
NumArguments-1, Func >
::call(functor, templateParamList);
}
}
};
//Specialization for 0 value left in the list
//-> actual kernel functor call
template<class TList, class Func>
struct KernelCaller<TList, 0, Func>
{
static void call(Func &functor)
{
//Call to the functor's kernel call method
functor.call(TList()); //TList instantiated to get the method template parameter resolved
}
static void call(Func &functor, std::vector<int> &templateParams)
{
functor.call(TList());
}
};
}
#endif //_ncvruntimetemplates_hpp_