1) NPP_staging as sources. Binaries removed.
2) NVidia tests for GPU 3) FD sample that uses NVidia's interface.
This commit is contained in:
@@ -1,362 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include <cstdio>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#define CV_NO_BACKWARD_COMPATIBILITY
|
||||
|
||||
#include "opencv2/opencv.hpp"
|
||||
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace std;
|
||||
|
||||
const Size preferredVideoFrameSize(640, 480);
|
||||
|
||||
string preferredClassifier = "haarcascade_frontalface_alt.xml";
|
||||
string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed";
|
||||
|
||||
|
||||
void printSyntax(void)
|
||||
{
|
||||
printf("Syntax: FaceDetectionFeed.exe [-c cameranum | -v filename] classifier.xml\n");
|
||||
}
|
||||
|
||||
|
||||
void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...)
|
||||
{
|
||||
int fontFace = CV_FONT_HERSHEY_PLAIN;
|
||||
double fontScale = 1;
|
||||
|
||||
int baseline;
|
||||
Size textSize = cv::getTextSize("T", fontFace, fontScale, 1, &baseline);
|
||||
|
||||
va_list arg_ptr;
|
||||
va_start(arg_ptr, format);
|
||||
int len = _vscprintf(format, arg_ptr) + 1;
|
||||
|
||||
vector<char> strBuf(len);
|
||||
vsprintf_s(&strBuf[0], len, format, arg_ptr);
|
||||
|
||||
Point org(1, 3 * textSize.height * (lineOffsY + 1) / 2);
|
||||
putText(img, &strBuf[0], org, fontFace, fontScale, color);
|
||||
va_end(arg_ptr);
|
||||
}
|
||||
|
||||
|
||||
NCVStatus process(Mat *srcdst,
|
||||
Ncv32u width, Ncv32u height,
|
||||
NcvBool bShowAllHypotheses, NcvBool bLargestFace,
|
||||
HaarClassifierCascadeDescriptor &haar,
|
||||
NCVVector<HaarStage64> &d_haarStages, NCVVector<HaarClassifierNode128> &d_haarNodes,
|
||||
NCVVector<HaarFeature64> &d_haarFeatures, NCVVector<HaarStage64> &h_haarStages,
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
cudaDeviceProp &devProp)
|
||||
{
|
||||
ncvAssertReturn(!((srcdst == NULL) ^ gpuAllocator.isCounting()), NCV_NULL_PTR);
|
||||
|
||||
NCVStatus ncvStat;
|
||||
|
||||
NCV_SET_SKIP_COND(gpuAllocator.isCounting());
|
||||
|
||||
NCVMatrixAlloc<Ncv8u> d_src(gpuAllocator, width, height);
|
||||
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height);
|
||||
ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);
|
||||
ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
Mat h_src_hdr(Size(width, height), CV_8U, h_src.ptr(), h_src.stride());
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
(*srcdst).copyTo(h_src_hdr);
|
||||
|
||||
ncvStat = h_src.copySolid(d_src, 0);
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
NcvSize32u roi;
|
||||
roi.width = d_src.width();
|
||||
roi.height = d_src.height();
|
||||
|
||||
Ncv32u numDetections;
|
||||
ncvStat = ncvDetectObjectsMultiScale_device(
|
||||
d_src, roi, d_rects, numDetections, haar, h_haarStages,
|
||||
d_haarStages, d_haarNodes, d_haarFeatures,
|
||||
haar.ClassifierSize,
|
||||
bShowAllHypotheses ? 0 : 4,
|
||||
1.2f, 1,
|
||||
(bLargestFace ? NCVPipeObjDet_FindLargestObject : 0) | NCVPipeObjDet_VisualizeInPlace,
|
||||
gpuAllocator, cpuAllocator, devProp.major, devProp.minor, 0);
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
ncvStat = d_src.copySolid(h_src, 0);
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
|
||||
h_src_hdr.copyTo(*srcdst);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int main( int argc, const char** argv )
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
|
||||
printf("NVIDIA Computer Vision SDK\n");
|
||||
printf("Face Detection in video and live feed\n");
|
||||
printf("=========================================\n");
|
||||
printf(" Esc - Quit\n");
|
||||
printf(" Space - Switch between NCV and OpenCV\n");
|
||||
printf(" L - Switch between FullSearch and LargestFace modes\n");
|
||||
printf(" U - Toggle unfiltered hypotheses visualization in FullSearch\n");
|
||||
|
||||
if (argc != 4 && argc != 1)
|
||||
return printSyntax(), -1;
|
||||
|
||||
VideoCapture capture;
|
||||
Size frameSize;
|
||||
|
||||
if (argc == 1 || strcmp(argv[1], "-c") == 0)
|
||||
{
|
||||
// Camera input is specified
|
||||
int camIdx = (argc == 3) ? atoi(argv[2]) : 0;
|
||||
if(!capture.open(camIdx))
|
||||
return printf("Error opening camera\n"), -1;
|
||||
|
||||
capture.set(CV_CAP_PROP_FRAME_WIDTH, preferredVideoFrameSize.width);
|
||||
capture.set(CV_CAP_PROP_FRAME_HEIGHT, preferredVideoFrameSize.height);
|
||||
capture.set(CV_CAP_PROP_FPS, 25);
|
||||
frameSize = preferredVideoFrameSize;
|
||||
}
|
||||
else if (strcmp(argv[1], "-v") == 0)
|
||||
{
|
||||
// Video file input (avi)
|
||||
if(!capture.open(argv[2]))
|
||||
return printf("Error opening video file\n"), -1;
|
||||
|
||||
frameSize.width = (int)capture.get(CV_CAP_PROP_FRAME_WIDTH);
|
||||
frameSize.height = (int)capture.get(CV_CAP_PROP_FRAME_HEIGHT);
|
||||
}
|
||||
else
|
||||
return printSyntax(), -1;
|
||||
|
||||
NcvBool bUseOpenCV = true;
|
||||
NcvBool bLargestFace = true;
|
||||
NcvBool bShowAllHypotheses = false;
|
||||
|
||||
string classifierFile = (argc == 1) ? preferredClassifier : argv[3];
|
||||
|
||||
CascadeClassifier classifierOpenCV;
|
||||
if (!classifierOpenCV.load(classifierFile))
|
||||
return printf("Error (in OpenCV) opening classifier\n"), printSyntax(), -1;
|
||||
|
||||
int devId;
|
||||
ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);
|
||||
cudaDeviceProp devProp;
|
||||
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1);
|
||||
printf("Using GPU %d %s, arch=%d.%d\n", devId, devProp.name, devProp.major, devProp.minor);
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Load the classifier from file (assuming its size is about 1 mb)
|
||||
// using a simple allocator
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice);
|
||||
ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1);
|
||||
NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned);
|
||||
ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1);
|
||||
|
||||
Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
|
||||
ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
|
||||
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", -1);
|
||||
|
||||
NCVVectorAlloc<HaarStage64> h_haarStages(cpuCascadeAllocator, haarNumStages);
|
||||
ncvAssertPrintReturn(h_haarStages.isMemAllocated(), "Error in cascade CPU allocator", -1);
|
||||
NCVVectorAlloc<HaarClassifierNode128> h_haarNodes(cpuCascadeAllocator, haarNumNodes);
|
||||
ncvAssertPrintReturn(h_haarNodes.isMemAllocated(), "Error in cascade CPU allocator", -1);
|
||||
NCVVectorAlloc<HaarFeature64> h_haarFeatures(cpuCascadeAllocator, haarNumFeatures);
|
||||
ncvAssertPrintReturn(h_haarFeatures.isMemAllocated(), "Error in cascade CPU allocator", -1);
|
||||
|
||||
HaarClassifierCascadeDescriptor haar;
|
||||
ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, h_haarStages, h_haarNodes, h_haarFeatures);
|
||||
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", -1);
|
||||
|
||||
NCVVectorAlloc<HaarStage64> d_haarStages(gpuCascadeAllocator, haarNumStages);
|
||||
ncvAssertPrintReturn(d_haarStages.isMemAllocated(), "Error in cascade GPU allocator", -1);
|
||||
NCVVectorAlloc<HaarClassifierNode128> d_haarNodes(gpuCascadeAllocator, haarNumNodes);
|
||||
ncvAssertPrintReturn(d_haarNodes.isMemAllocated(), "Error in cascade GPU allocator", -1);
|
||||
NCVVectorAlloc<HaarFeature64> d_haarFeatures(gpuCascadeAllocator, haarNumFeatures);
|
||||
ncvAssertPrintReturn(d_haarFeatures.isMemAllocated(), "Error in cascade GPU allocator", -1);
|
||||
|
||||
ncvStat = h_haarStages.copySolid(d_haarStages, 0);
|
||||
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
|
||||
ncvStat = h_haarNodes.copySolid(d_haarNodes, 0);
|
||||
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
|
||||
ncvStat = h_haarFeatures.copySolid(d_haarFeatures, 0);
|
||||
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Calculate memory requirements and create real allocators
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
NCVMemStackAllocator gpuCounter(devProp.textureAlignment);
|
||||
ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", -1);
|
||||
NCVMemStackAllocator cpuCounter(devProp.textureAlignment);
|
||||
ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", -1);
|
||||
|
||||
ncvStat = process(NULL, frameSize.width, frameSize.height,
|
||||
false, false, haar,
|
||||
d_haarStages, d_haarNodes,
|
||||
d_haarFeatures, h_haarStages,
|
||||
gpuCounter, cpuCounter, devProp);
|
||||
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
|
||||
|
||||
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment);
|
||||
ncvAssertPrintReturn(gpuAllocator.isInitialized(), "Error creating GPU memory allocator", -1);
|
||||
NCVMemStackAllocator cpuAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), devProp.textureAlignment);
|
||||
ncvAssertPrintReturn(cpuAllocator.isInitialized(), "Error creating CPU memory allocator", -1);
|
||||
|
||||
printf("Initialized for frame size [%dx%d]\n", frameSize.width, frameSize.height);
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Main processing loop
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
namedWindow(wndTitle, 1);
|
||||
|
||||
Mat frame, gray, frameDisp;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
// For camera and video file, capture the next image
|
||||
capture >> frame;
|
||||
if (frame.empty())
|
||||
break;
|
||||
|
||||
cvtColor(frame, gray, CV_BGR2GRAY);
|
||||
|
||||
// process
|
||||
NcvSize32u minSize = haar.ClassifierSize;
|
||||
if (bLargestFace)
|
||||
{
|
||||
Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width;
|
||||
Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height;
|
||||
Ncv32u ratioSmallest = std::min(ratioX, ratioY);
|
||||
ratioSmallest = (Ncv32u)std::max(ratioSmallest / 2.5f, 1.f);
|
||||
minSize.width *= ratioSmallest;
|
||||
minSize.height *= ratioSmallest;
|
||||
}
|
||||
|
||||
NcvTimer timer = ncvStartTimer();
|
||||
|
||||
if (!bUseOpenCV)
|
||||
{
|
||||
ncvStat = process(&gray, frameSize.width, frameSize.height,
|
||||
bShowAllHypotheses, bLargestFace, haar,
|
||||
d_haarStages, d_haarNodes,
|
||||
d_haarFeatures, h_haarStages,
|
||||
gpuAllocator, cpuAllocator, devProp);
|
||||
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
vector<Rect> rectsOpenCV;
|
||||
|
||||
classifierOpenCV.detectMultiScale(
|
||||
gray,
|
||||
rectsOpenCV,
|
||||
1.2f,
|
||||
bShowAllHypotheses && !bLargestFace ? 0 : 4,
|
||||
(bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE,
|
||||
Size(minSize.width, minSize.height));
|
||||
|
||||
for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt)
|
||||
rectangle(gray, rectsOpenCV[rt], Scalar(255));
|
||||
}
|
||||
|
||||
Ncv32f avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);
|
||||
|
||||
cvtColor(gray, frameDisp, CV_GRAY2BGR);
|
||||
|
||||
imagePrintf(frameDisp, 0, CV_RGB(255, 0,0), "Space - Switch NCV%s / OpenCV%s", bUseOpenCV?"":" (ON)", bUseOpenCV?" (ON)":"");
|
||||
imagePrintf(frameDisp, 1, CV_RGB(255, 0,0), "L - Switch FullSearch%s / LargestFace%s modes", bLargestFace?"":" (ON)", bLargestFace?" (ON)":"");
|
||||
imagePrintf(frameDisp, 2, CV_RGB(255, 0,0), "U - Toggle unfiltered hypotheses visualization in FullSearch %s", bShowAllHypotheses?"(ON)":"(OFF)");
|
||||
imagePrintf(frameDisp, 3, CV_RGB(118,185,0), " Running at %f FPS on %s", 1000.0f / avgTime, bUseOpenCV?"CPU":"GPU");
|
||||
|
||||
cv::imshow(wndTitle, frameDisp);
|
||||
|
||||
switch (cvWaitKey(1))
|
||||
{
|
||||
case ' ':
|
||||
bUseOpenCV = !bUseOpenCV;
|
||||
break;
|
||||
case 'L':case 'l':
|
||||
bLargestFace = !bLargestFace;
|
||||
break;
|
||||
case 'U':case 'u':
|
||||
bShowAllHypotheses = !bShowAllHypotheses;
|
||||
break;
|
||||
case 27:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@@ -57,8 +57,8 @@
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "npp.h"
|
||||
#include "NCV.hpp"
|
||||
#include "NPP_staging/NPP_staging.hpp"
|
||||
#include "NCVRuntimeTemplates.hpp"
|
||||
#include "NCVHaarObjectDetection.hpp"
|
||||
|
||||
@@ -970,8 +970,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
Ncv32f scaleArea,
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
Ncv32u devPropMajor,
|
||||
Ncv32u devPropMinor,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
ncvAssertReturn(d_integralImage.memType() == d_weights.memType() &&
|
||||
@@ -1077,15 +1076,15 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
Ncv32f scaleAreaPixels = scaleArea * ((haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER) *
|
||||
(haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER));
|
||||
|
||||
NcvBool bTexCacheCascade = devPropMajor < 2;
|
||||
NcvBool bTexCacheCascade = devProp.major < 2;
|
||||
NcvBool bTexCacheIImg = true; //this works better even on Fermi so far
|
||||
NcvBool bDoAtomicCompaction = devPropMajor >= 2 || (devPropMajor == 1 && devPropMinor >= 3);
|
||||
NcvBool bDoAtomicCompaction = devProp.major >= 2 || (devProp.major == 1 && devProp.minor >= 3);
|
||||
|
||||
NCVVector<Ncv32u> *d_ptrNowData = &d_vecPixelMask;
|
||||
NCVVector<Ncv32u> *d_ptrNowTmp = &d_vecPixelMaskTmp;
|
||||
|
||||
Ncv32u szNppCompactTmpBuf;
|
||||
nppsStCompactGetSize_32u(d_vecPixelMask.length(), &szNppCompactTmpBuf);
|
||||
nppsStCompactGetSize_32u(d_vecPixelMask.length(), &szNppCompactTmpBuf, devProp);
|
||||
if (bDoAtomicCompaction)
|
||||
{
|
||||
szNppCompactTmpBuf = 0;
|
||||
@@ -1185,11 +1184,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
}
|
||||
else
|
||||
{
|
||||
NppStStatus nppSt;
|
||||
NCVStatus nppSt;
|
||||
nppSt = nppsStCompact_32u(d_ptrNowTmp->ptr(), d_vecPixelMask.length(),
|
||||
d_ptrNowData->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
|
||||
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
|
||||
ncvAssertReturn(nppSt == NPPST_SUCCESS, NCV_NPP_ERROR);
|
||||
}
|
||||
numDetections = *hp_numDet;
|
||||
}
|
||||
@@ -1240,11 +1239,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
}
|
||||
else
|
||||
{
|
||||
NppStStatus nppSt;
|
||||
NCVStatus nppSt;
|
||||
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), d_vecPixelMask.length(),
|
||||
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
|
||||
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
|
||||
ncvAssertReturnNcvStat(nppSt);
|
||||
}
|
||||
|
||||
swap(d_ptrNowData, d_ptrNowTmp);
|
||||
@@ -1310,11 +1309,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
}
|
||||
else
|
||||
{
|
||||
NppStStatus nppSt;
|
||||
NCVStatus nppSt;
|
||||
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), numDetections,
|
||||
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
|
||||
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
|
||||
ncvAssertReturnNcvStat(nppSt);
|
||||
}
|
||||
|
||||
swap(d_ptrNowData, d_ptrNowTmp);
|
||||
@@ -1371,11 +1370,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
}
|
||||
else
|
||||
{
|
||||
NppStStatus nppSt;
|
||||
NCVStatus nppSt;
|
||||
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), numDetections,
|
||||
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
|
||||
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
|
||||
ncvAssertReturnNcvStat(nppSt);
|
||||
}
|
||||
|
||||
swap(d_ptrNowData, d_ptrNowTmp);
|
||||
@@ -1715,8 +1714,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
Ncv32u devPropMajor,
|
||||
Ncv32u devPropMinor,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
ncvAssertReturn(d_srcImg.memType() == d_dstRects.memType() &&
|
||||
@@ -1773,12 +1771,12 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
NCVVectorAlloc<NcvRect32u> h_hypothesesIntermediate(cpuAllocator, d_srcImg.width() * d_srcImg.height());
|
||||
ncvAssertReturn(h_hypothesesIntermediate.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
NppStStatus nppStat;
|
||||
NCVStatus nppStat;
|
||||
Ncv32u szTmpBufIntegral, szTmpBufSqIntegral;
|
||||
nppStat = nppiStIntegralGetSize_8u32u(NppStSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufIntegral);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
nppStat = nppiStSqrIntegralGetSize_8u64u(NppStSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufSqIntegral);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufIntegral, devProp);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufSqIntegral, devProp);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
NCVVectorAlloc<Ncv8u> d_tmpIIbuf(gpuAllocator, std::max(szTmpBufIntegral, szTmpBufSqIntegral));
|
||||
ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
|
||||
@@ -1786,15 +1784,15 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
|
||||
nppStat = nppiStIntegral_8u32u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),
|
||||
d_integralImage.ptr(), d_integralImage.pitch(),
|
||||
NppStSize32u(d_srcImg.width(), d_srcImg.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufIntegral);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
NcvSize32u(d_srcImg.width(), d_srcImg.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufIntegral, devProp);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
nppStat = nppiStSqrIntegral_8u64u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),
|
||||
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
|
||||
NppStSize32u(d_srcImg.width(), d_srcImg.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufSqIntegral);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
NcvSize32u(d_srcImg.width(), d_srcImg.height()),
|
||||
d_tmpIIbuf.ptr(), szTmpBufSqIntegral, devProp);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
@@ -1859,7 +1857,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
Ncv32u scale = scalesVector[i];
|
||||
|
||||
NcvSize32u srcRoi, scaledIIRoi, searchRoi;
|
||||
NppStSize32u srcIIRoi;
|
||||
NcvSize32u srcIIRoi;
|
||||
srcRoi.width = d_srcImg.width();
|
||||
srcRoi.height = d_srcImg.height();
|
||||
srcIIRoi.width = srcRoi.width + 1;
|
||||
@@ -1875,15 +1873,15 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
d_integralImage.ptr(), d_integralImage.pitch(),
|
||||
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
|
||||
srcIIRoi, scale, true);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
nppStat = nppiStDownsampleNearest_64u_C1R(
|
||||
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
|
||||
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),
|
||||
srcIIRoi, scale, true);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
const NppStRect32u rect(
|
||||
const NcvRect32u rect(
|
||||
HAAR_STDDEV_BORDER,
|
||||
HAAR_STDDEV_BORDER,
|
||||
haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER,
|
||||
@@ -1892,9 +1890,9 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
|
||||
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),
|
||||
d_rectStdDev.ptr(), d_rectStdDev.pitch(),
|
||||
NppStSize32u(searchRoi.width, searchRoi.height), rect,
|
||||
NcvSize32u(searchRoi.width, searchRoi.height), rect,
|
||||
(Ncv32f)scale*scale, true);
|
||||
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
NCV_SKIP_COND_END
|
||||
|
||||
@@ -1904,8 +1902,8 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
detectionsOnThisScale,
|
||||
haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,
|
||||
searchRoi, pixelStep, (Ncv32f)scale*scale,
|
||||
gpuAllocator, cpuAllocator, devPropMajor, devPropMinor, cuStream);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||
gpuAllocator, cpuAllocator, devProp, cuStream);
|
||||
ncvAssertReturnNcvStat(nppStat);
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
@@ -2250,6 +2248,10 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
||||
return ncvStat;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
|
||||
Ncv32u &numHypotheses,
|
||||
Ncv32u minNeighbors,
|
||||
@@ -2539,7 +2541,7 @@ NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
|
||||
}
|
||||
|
||||
|
||||
NCVStatus ncvHaarStoreNVBIN_host(std::string &filename,
|
||||
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
|
||||
HaarClassifierCascadeDescriptor haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||
|
@@ -75,13 +75,13 @@ struct HaarFeature64
|
||||
|
||||
#define HaarFeature64_CreateCheck_MaxRectField 0xFF
|
||||
|
||||
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u clsWidth, Ncv32u clsHeight)
|
||||
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)
|
||||
{
|
||||
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
|
||||
((NcvRect8u*)&(this->_ui2.x))->x = rectX;
|
||||
((NcvRect8u*)&(this->_ui2.x))->y = rectY;
|
||||
((NcvRect8u*)&(this->_ui2.x))->width = rectWidth;
|
||||
((NcvRect8u*)&(this->_ui2.x))->height = rectHeight;
|
||||
((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX;
|
||||
((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;
|
||||
((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;
|
||||
((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -306,11 +306,11 @@ struct HaarStage64
|
||||
};
|
||||
|
||||
|
||||
NPPST_CT_ASSERT(sizeof(HaarFeature64) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
|
||||
NPPST_CT_ASSERT(sizeof(HaarStage64) == 8);
|
||||
NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);
|
||||
NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
|
||||
NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
|
||||
NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
|
||||
NCV_CT_ASSERT(sizeof(HaarStage64) == 8);
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@@ -347,7 +347,7 @@ enum
|
||||
NCVPipeObjDet_VisualizeInPlace = 0x004,
|
||||
};
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
NcvSize32u srcRoi,
|
||||
NCVVector<NcvRect32u> &d_dstRects,
|
||||
@@ -367,15 +367,14 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
Ncv32u devPropMajor,
|
||||
Ncv32u devPropMinor,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF
|
||||
#define HAAR_STDDEV_BORDER 1
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
|
||||
NCVMatrix<Ncv32f> &d_weights,
|
||||
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
|
||||
@@ -391,11 +390,10 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
||||
Ncv32f scaleArea,
|
||||
INCVMemAllocator &gpuAllocator,
|
||||
INCVMemAllocator &cpuAllocator,
|
||||
Ncv32u devPropMajor,
|
||||
Ncv32u devPropMinor,
|
||||
cudaDeviceProp &devProp,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
|
||||
NCVMatrix<Ncv32f> &h_weights,
|
||||
NCVMatrixAlloc<Ncv32u> &h_pixelMask,
|
||||
@@ -409,7 +407,7 @@ NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
|
||||
Ncv32u pixelStep,
|
||||
Ncv32f scaleArea);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
@@ -419,7 +417,7 @@ NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
|
||||
Ncv8u color,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
@@ -429,7 +427,7 @@ NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
|
||||
Ncv32u color,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
@@ -438,7 +436,7 @@ NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
|
||||
Ncv32u numRects,
|
||||
Ncv8u color);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
|
||||
Ncv32u dstStride,
|
||||
Ncv32u dstWidth,
|
||||
@@ -450,7 +448,7 @@ NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
|
||||
|
||||
#define RECT_SIMILARITY_PROPORTION 0.2f
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32u numPixelMaskDetections,
|
||||
NCVVector<NcvRect32u> &hypotheses,
|
||||
@@ -461,7 +459,7 @@ NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32f curScale,
|
||||
cudaStream_t cuStream);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32u numPixelMaskDetections,
|
||||
NCVVector<NcvRect32u> &hypotheses,
|
||||
@@ -471,18 +469,18 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
||||
Ncv32u rectHeight,
|
||||
Ncv32f curScale);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
|
||||
Ncv32u &numHypotheses,
|
||||
Ncv32u minNeighbors,
|
||||
Ncv32f intersectEps,
|
||||
NCVVector<Ncv32u> *hypothesesWeights);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
|
||||
Ncv32u &numNodes, Ncv32u &numFeatures);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
|
||||
HaarClassifierCascadeDescriptor &haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
@@ -490,6 +488,7 @@ NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
|
||||
NCVVector<HaarFeature64> &h_HaarFeatures);
|
||||
|
||||
|
||||
NCV_EXPORTS
|
||||
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
|
||||
HaarClassifierCascadeDescriptor haar,
|
||||
NCVVector<HaarStage64> &h_HaarStages,
|
||||
|
1704
modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu
Normal file
1704
modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu
Normal file
File diff suppressed because it is too large
Load Diff
637
modules/gpu/src/nvidia/NPP_staging/NPP_staging.hpp
Normal file
637
modules/gpu/src/nvidia/NPP_staging/NPP_staging.hpp
Normal file
@@ -0,0 +1,637 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _npp_staging_hpp_
|
||||
#define _npp_staging_hpp_
|
||||
|
||||
#include "NCV.hpp"
|
||||
|
||||
|
||||
/**
|
||||
* \file NPP_staging.hpp
|
||||
* NPP Staging Library
|
||||
*/
|
||||
|
||||
|
||||
/** \defgroup core_npp NPPST Core
|
||||
* Basic functions for CUDA streams management.
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Gets an active CUDA stream used by NPPST
|
||||
* NOT THREAD SAFE
|
||||
* \return Current CUDA stream
|
||||
*/
|
||||
cudaStream_t nppStGetActiveCUDAstream();
|
||||
|
||||
|
||||
/**
|
||||
* Sets an active CUDA stream used by NPPST
|
||||
* NOT THREAD SAFE
|
||||
* \param cudaStream [IN] cudaStream CUDA stream to become current
|
||||
* \return CUDA stream used before
|
||||
*/
|
||||
cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/** \defgroup nppi NPPST Image Processing
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest in the source image
|
||||
* \param scale [IN] Downsampling scale factor (positive integer)
|
||||
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
|
||||
Ncv32u *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
|
||||
Ncv32s *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
|
||||
Ncv32f *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
|
||||
Ncv64u *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
|
||||
Ncv64s *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
|
||||
* \see nppiStDownsampleNearest_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
|
||||
Ncv64f *d_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale,
|
||||
NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest in the source image
|
||||
* \param scale [IN] Downsampling scale factor (positive integer)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,
|
||||
Ncv32u *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,
|
||||
Ncv32s *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
|
||||
Ncv32f *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,
|
||||
Ncv64u *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,
|
||||
Ncv64s *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
|
||||
* \see nppiStDownsampleNearest_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStDownsampleNearest_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,
|
||||
Ncv64f *h_dst, Ncv32u dstStep,
|
||||
NcvSize32u srcRoi, Ncv32u scale);
|
||||
|
||||
|
||||
/**
|
||||
* Computes standard deviation for each rectangular region of the input image using integral images.
|
||||
*
|
||||
* \param d_sum [IN] Integral image pointer (CUDA device memory)
|
||||
* \param sumStep [IN] Integral image line step
|
||||
* \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)
|
||||
* \param sqsumStep [IN] Squared integral image line step
|
||||
* \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
|
||||
* \param normStep [IN] Stddev image line step
|
||||
* \param roi [IN] Region of interest in the source image
|
||||
* \param rect [IN] Rectangular region to calculate stddev over
|
||||
* \param scaleArea [IN] Multiplication factor to account decimated scale
|
||||
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
|
||||
Ncv64u *d_sqsum, Ncv32u sqsumStep,
|
||||
Ncv32f *d_norm, Ncv32u normStep,
|
||||
NcvSize32u roi, NcvRect32u rect,
|
||||
Ncv32f scaleArea, NcvBool readThruTexture);
|
||||
|
||||
|
||||
/**
|
||||
* Computes standard deviation for each rectangular region of the input image using integral images. Host implementation
|
||||
*
|
||||
* \param h_sum [IN] Integral image pointer (Host or pinned memory)
|
||||
* \param sumStep [IN] Integral image line step
|
||||
* \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)
|
||||
* \param sqsumStep [IN] Squared integral image line step
|
||||
* \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
|
||||
* \param normStep [IN] Stddev image line step
|
||||
* \param roi [IN] Region of interest in the source image
|
||||
* \param rect [IN] Rectangular region to calculate stddev over
|
||||
* \param scaleArea [IN] Multiplication factor to account decimated scale
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStRectStdDev_32f_C1R_host(Ncv32u *h_sum, Ncv32u sumStep,
|
||||
Ncv64u *h_sqsum, Ncv32u sqsumStep,
|
||||
Ncv32f *h_norm, Ncv32u normStep,
|
||||
NcvSize32u roi, NcvRect32u rect,
|
||||
Ncv32f scaleArea);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit unsigned pixels, single channel
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStride [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
|
||||
* \param dstStride [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32u_C1R(Ncv32u *d_src, Ncv32u srcStride,
|
||||
Ncv32u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit signed pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32s_C1R(Ncv32s *d_src, Ncv32u srcStride,
|
||||
Ncv32s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit float pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32f_C1R(Ncv32f *d_src, Ncv32u srcStride,
|
||||
Ncv32f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit unsigned pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64u_C1R(Ncv64u *d_src, Ncv32u srcStride,
|
||||
Ncv64u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit signed pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64s_C1R(Ncv64s *d_src, Ncv32u srcStride,
|
||||
Ncv64s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit float pixels, single channel
|
||||
* \see nppiStTranspose_32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64f_C1R(Ncv64f *d_src, Ncv32u srcStride,
|
||||
Ncv64f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStride [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
|
||||
* \param dstStride [IN] Destination image line step
|
||||
* \param srcRoi [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStride,
|
||||
Ncv32u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit signed pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStride,
|
||||
Ncv32s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 32-bit float pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStride,
|
||||
Ncv32f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit unsigned pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStride,
|
||||
Ncv64u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit signed pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStride,
|
||||
Ncv64s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Transposes an image. 64-bit float pixels, single channel. Host implementation
|
||||
* \see nppiStTranspose_32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStTranspose_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStride,
|
||||
Ncv64f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for integral image creation
|
||||
*
|
||||
* \param roiSize [IN] Size of the input image
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for integral image creation
|
||||
* \see nppiStIntegralGetSize_8u32u
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination integral image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,
|
||||
Ncv32u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
|
||||
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image
|
||||
* \see nppiStIntegral_8u32u_C1R
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
|
||||
Ncv32f *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
|
||||
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_8u32u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
|
||||
Ncv32u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
|
||||
|
||||
|
||||
/**
|
||||
* Creates an integral image representation for the input image. Host implementation
|
||||
* \see nppiStIntegral_8u32u_C1R_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStIntegral_32f32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
|
||||
Ncv32f *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for squared integral image creation
|
||||
*
|
||||
* \param roiSize [IN] Size of the input image
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a squared integral image representation for the input image
|
||||
*
|
||||
* \param d_src [IN] Source image pointer (CUDA device memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,
|
||||
Ncv64u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
|
||||
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a squared integral image representation for the input image. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source image pointer (Host or pinned memory)
|
||||
* \param srcStep [IN] Source image line step
|
||||
* \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)
|
||||
* \param dstStep [IN] Destination image line step
|
||||
* \param roiSize [IN] Region of interest of the source image
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
|
||||
Ncv64u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/** \defgroup npps NPPST Signal Processing
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values
|
||||
*
|
||||
* \param srcLen [IN] Length of the input vector in elements
|
||||
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit signed values
|
||||
* \see nppsStCompactGetSize_32u
|
||||
*/
|
||||
NCVStatus nppsStCompactGetSize_32s(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the size of the temporary buffer for vector compaction. 32-bit float values
|
||||
* \see nppsStCompactGetSize_32u
|
||||
*/
|
||||
NCVStatus nppsStCompactGetSize_32f(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values
|
||||
*
|
||||
* \param d_src [IN] Source vector pointer (CUDA device memory)
|
||||
* \param srcLen [IN] Source vector length
|
||||
* \param d_dst [OUT] Destination vector pointer (CUDA device memory)
|
||||
* \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)
|
||||
* \param elemRemove [IN] The value to be removed
|
||||
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
|
||||
* \param bufSize [IN] Size of the pBuffer in bytes
|
||||
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,
|
||||
Ncv32u *d_dst, Ncv32u *p_dstLen,
|
||||
Ncv32u elemRemove, Ncv8u *pBuffer,
|
||||
Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit signed values
|
||||
* \see nppsStCompact_32u
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,
|
||||
Ncv32s *d_dst, Ncv32u *p_dstLen,
|
||||
Ncv32s elemRemove, Ncv8u *pBuffer,
|
||||
Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit float values
|
||||
* \see nppsStCompact_32u
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,
|
||||
Ncv32f *d_dst, Ncv32u *p_dstLen,
|
||||
Ncv32f elemRemove, Ncv8u *pBuffer,
|
||||
Ncv32u bufSize, cudaDeviceProp &devProp);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation
|
||||
*
|
||||
* \param h_src [IN] Source vector pointer (CUDA device memory)
|
||||
* \param srcLen [IN] Source vector length
|
||||
* \param h_dst [OUT] Destination vector pointer (CUDA device memory)
|
||||
* \param dstLen [OUT] Pointer to the destination vector length (can be NULL)
|
||||
* \param elemRemove [IN] The value to be removed
|
||||
*
|
||||
* \return NCV status code
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
|
||||
Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation
|
||||
* \see nppsStCompact_32u_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,
|
||||
Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove);
|
||||
|
||||
|
||||
/**
|
||||
* Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation
|
||||
* \see nppsStCompact_32u_host
|
||||
*/
|
||||
NCV_EXPORTS
|
||||
NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,
|
||||
Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove);
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
#endif // _npp_staging_hpp_
|
@@ -40,15 +40,13 @@
|
||||
//M*/
|
||||
|
||||
|
||||
#include <precomp.hpp>
|
||||
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
|
||||
#include <ios>
|
||||
#include <stdarg.h>
|
||||
#include "NCV.hpp"
|
||||
|
||||
@@ -94,17 +92,6 @@ void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
|
||||
//==============================================================================
|
||||
|
||||
|
||||
NCVStatus GPUAlignmentValue(Ncv32u &alignment)
|
||||
{
|
||||
int curDev;
|
||||
cudaDeviceProp curProp;
|
||||
ncvAssertCUDAReturn(cudaGetDevice(&curDev), NCV_CUDA_ERROR);
|
||||
ncvAssertCUDAReturn(cudaGetDeviceProperties(&curProp, curDev), NCV_CUDA_ERROR);
|
||||
alignment = curProp.textureAlignment; //GPUAlignmentValue(curProp.major);
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
Ncv32u alignUp(Ncv32u what, Ncv32u alignment)
|
||||
{
|
||||
Ncv32u alignMask = alignment-1;
|
||||
@@ -216,7 +203,7 @@ NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
|
||||
}
|
||||
|
||||
|
||||
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment)
|
||||
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr)
|
||||
:
|
||||
currentSize(0),
|
||||
_maxSize(0),
|
||||
@@ -229,17 +216,26 @@ NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity,
|
||||
|
||||
allocBegin = NULL;
|
||||
|
||||
switch (memT)
|
||||
if (reusePtr == NULL)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
allocBegin = (Ncv8u *)malloc(capacity);
|
||||
break;
|
||||
bReusesMemory = false;
|
||||
switch (memT)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
allocBegin = (Ncv8u *)malloc(capacity);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
bReusesMemory = true;
|
||||
allocBegin = (Ncv8u *)reusePtr;
|
||||
}
|
||||
|
||||
if (capacity == 0)
|
||||
@@ -260,18 +256,23 @@ NCVMemStackAllocator::~NCVMemStackAllocator()
|
||||
if (allocBegin != NULL)
|
||||
{
|
||||
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
|
||||
switch (_memType)
|
||||
|
||||
if (!bReusesMemory)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaFree(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
free(allocBegin);
|
||||
break;
|
||||
switch (_memType)
|
||||
{
|
||||
case NCVMemoryTypeDevice:
|
||||
ncvAssertCUDAReturn(cudaFree(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPinned:
|
||||
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
|
||||
break;
|
||||
case NCVMemoryTypeHostPageable:
|
||||
free(allocBegin);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
allocBegin = NULL;
|
||||
}
|
||||
}
|
||||
@@ -356,14 +357,14 @@ size_t NCVMemStackAllocator::maxSize(void) const
|
||||
//===================================================================
|
||||
|
||||
|
||||
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT)
|
||||
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment)
|
||||
:
|
||||
currentSize(0),
|
||||
_maxSize(0),
|
||||
_memType(memT)
|
||||
_memType(memT),
|
||||
_alignment(alignment)
|
||||
{
|
||||
ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
|
||||
ncvAssertPrintReturn(NCV_SUCCESS == GPUAlignmentValue(this->_alignment), "NCVMemNativeAllocator ctor:: couldn't get device _alignment", );
|
||||
}
|
||||
|
||||
|
@@ -42,8 +42,49 @@
|
||||
#ifndef _ncv_hpp_
|
||||
#define _ncv_hpp_
|
||||
|
||||
#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS //&& !defined(__CUDACC__)
|
||||
#define NCV_EXPORTS __declspec(dllexport)
|
||||
#else
|
||||
#define NCV_EXPORTS
|
||||
#endif
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "npp_staging.h"
|
||||
|
||||
|
||||
//==============================================================================
|
||||
//
|
||||
// Compile-time assert functionality
|
||||
//
|
||||
//==============================================================================
|
||||
|
||||
|
||||
/**
|
||||
* Compile-time assert namespace
|
||||
*/
|
||||
namespace NcvCTprep
|
||||
{
|
||||
template <bool x>
|
||||
struct CT_ASSERT_FAILURE;
|
||||
|
||||
template <>
|
||||
struct CT_ASSERT_FAILURE<true> {};
|
||||
|
||||
template <int x>
|
||||
struct assertTest{};
|
||||
}
|
||||
|
||||
|
||||
#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
|
||||
#define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro
|
||||
|
||||
|
||||
/**
|
||||
* Performs compile-time assertion of a condition on the file scope
|
||||
*/
|
||||
#define NCV_CT_ASSERT(X) \
|
||||
typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
|
||||
NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
|
||||
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@@ -82,62 +123,72 @@ typedef float Ncv32f;
|
||||
typedef double Ncv64f;
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvRect8u
|
||||
{
|
||||
Ncv8u x;
|
||||
Ncv8u y;
|
||||
Ncv8u width;
|
||||
Ncv8u height;
|
||||
} NcvRect8u;
|
||||
NcvRect8u() : x(0), y(0), width(0), height(0) {};
|
||||
NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvRect32s
|
||||
{
|
||||
Ncv32s x; ///< x-coordinate of upper left corner.
|
||||
Ncv32s y; ///< y-coordinate of upper left corner.
|
||||
Ncv32s width; ///< Rectangle width.
|
||||
Ncv32s height; ///< Rectangle height.
|
||||
} NcvRect32s;
|
||||
NcvRect32s() : x(0), y(0), width(0), height(0) {};
|
||||
NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height) : x(x), y(y), width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvRect32u
|
||||
{
|
||||
Ncv32u x; ///< x-coordinate of upper left corner.
|
||||
Ncv32u y; ///< y-coordinate of upper left corner.
|
||||
Ncv32u width; ///< Rectangle width.
|
||||
Ncv32u height; ///< Rectangle height.
|
||||
} NcvRect32u;
|
||||
NcvRect32u() : x(0), y(0), width(0), height(0) {};
|
||||
NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height) : x(x), y(y), width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvSize32s
|
||||
{
|
||||
Ncv32s width; ///< Rectangle width.
|
||||
Ncv32s height; ///< Rectangle height.
|
||||
} NcvSize32s;
|
||||
NcvSize32s() : width(0), height(0) {};
|
||||
NcvSize32s(Ncv32s width, Ncv32s height) : width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
struct NcvSize32u
|
||||
{
|
||||
Ncv32u width; ///< Rectangle width.
|
||||
Ncv32u height; ///< Rectangle height.
|
||||
} NcvSize32u;
|
||||
NcvSize32u() : width(0), height(0) {};
|
||||
NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}
|
||||
};
|
||||
|
||||
|
||||
NPPST_CT_ASSERT(sizeof(NcvBool) <= 4);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv64s) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv64u) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv32s) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv32u) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv16s) == 2);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv16u) == 2);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv8s) == 1);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv8u) == 1);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv32f) == 4);
|
||||
NPPST_CT_ASSERT(sizeof(Ncv64f) == 8);
|
||||
NPPST_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
|
||||
NPPST_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
|
||||
NPPST_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
|
||||
NPPST_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
|
||||
NCV_CT_ASSERT(sizeof(NcvBool) <= 4);
|
||||
NCV_CT_ASSERT(sizeof(Ncv64s) == 8);
|
||||
NCV_CT_ASSERT(sizeof(Ncv64u) == 8);
|
||||
NCV_CT_ASSERT(sizeof(Ncv32s) == 4);
|
||||
NCV_CT_ASSERT(sizeof(Ncv32u) == 4);
|
||||
NCV_CT_ASSERT(sizeof(Ncv16s) == 2);
|
||||
NCV_CT_ASSERT(sizeof(Ncv16u) == 2);
|
||||
NCV_CT_ASSERT(sizeof(Ncv8s) == 1);
|
||||
NCV_CT_ASSERT(sizeof(Ncv8u) == 1);
|
||||
NCV_CT_ASSERT(sizeof(Ncv32f) == 4);
|
||||
NCV_CT_ASSERT(sizeof(Ncv64f) == 8);
|
||||
NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
|
||||
NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
|
||||
NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
|
||||
NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@@ -162,13 +213,13 @@ const Ncv32u K_LOG2_WARP_SIZE = 5;
|
||||
#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x)
|
||||
|
||||
|
||||
void ncvDebugOutput(const char *msg, ...);
|
||||
NCV_EXPORTS void ncvDebugOutput(const char *msg, ...);
|
||||
|
||||
|
||||
typedef void NCVDebugOutputHandler(const char* msg);
|
||||
|
||||
|
||||
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
||||
NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
||||
|
||||
|
||||
#define ncvAssertPrintCheck(pred, msg) \
|
||||
@@ -222,6 +273,7 @@ void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
||||
*/
|
||||
enum NCVStatus
|
||||
{
|
||||
//NCV statuses
|
||||
NCV_SUCCESS,
|
||||
|
||||
NCV_CUDA_ERROR,
|
||||
@@ -257,6 +309,24 @@ enum NCVStatus
|
||||
NCV_NOIMPL_HAAR_TILTED_FEATURES,
|
||||
|
||||
NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
|
||||
|
||||
//NPP statuses
|
||||
NPPST_SUCCESS = NCV_SUCCESS, ///< Successful operation (same as NPP_NO_ERROR)
|
||||
NPPST_ERROR, ///< Unknown error
|
||||
NPPST_CUDA_KERNEL_EXECUTION_ERROR, ///< CUDA kernel execution error
|
||||
NPPST_NULL_POINTER_ERROR, ///< NULL pointer argument error
|
||||
NPPST_TEXTURE_BIND_ERROR, ///< CUDA texture binding error or non-zero offset returned
|
||||
NPPST_MEMCPY_ERROR, ///< CUDA memory copy error
|
||||
NPPST_MEM_ALLOC_ERR, ///< CUDA memory allocation error
|
||||
NPPST_MEMFREE_ERR, ///< CUDA memory deallocation error
|
||||
|
||||
//NPPST statuses
|
||||
NPPST_INVALID_ROI, ///< Invalid region of interest argument
|
||||
NPPST_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)
|
||||
NPPST_INVALID_SCALE, ///< Invalid scale parameter passed
|
||||
NPPST_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer
|
||||
NPPST_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)
|
||||
NPPST_MEM_INTERNAL_ERROR, ///< Internal memory management error
|
||||
};
|
||||
|
||||
|
||||
@@ -285,11 +355,11 @@ enum NCVStatus
|
||||
|
||||
typedef struct _NcvTimer *NcvTimer;
|
||||
|
||||
NcvTimer ncvStartTimer(void);
|
||||
NCV_EXPORTS NcvTimer ncvStartTimer(void);
|
||||
|
||||
double ncvEndQueryTimerUs(NcvTimer t);
|
||||
NCV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t);
|
||||
|
||||
double ncvEndQueryTimerMs(NcvTimer t);
|
||||
NCV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);
|
||||
|
||||
|
||||
//==============================================================================
|
||||
@@ -299,16 +369,10 @@ double ncvEndQueryTimerMs(NcvTimer t);
|
||||
//==============================================================================
|
||||
|
||||
|
||||
/**
|
||||
* Alignment of GPU memory chunks in bytes
|
||||
*/
|
||||
NCVStatus GPUAlignmentValue(Ncv32u &alignment);
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the aligned top bound value
|
||||
*/
|
||||
Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
|
||||
NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
|
||||
|
||||
|
||||
/**
|
||||
@@ -326,7 +390,7 @@ enum NCVMemoryType
|
||||
/**
|
||||
* NCVMemPtr
|
||||
*/
|
||||
struct NCVMemPtr
|
||||
struct NCV_EXPORTS NCVMemPtr
|
||||
{
|
||||
void *ptr;
|
||||
NCVMemoryType memtype;
|
||||
@@ -337,7 +401,7 @@ struct NCVMemPtr
|
||||
/**
|
||||
* NCVMemSegment
|
||||
*/
|
||||
struct NCVMemSegment
|
||||
struct NCV_EXPORTS NCVMemSegment
|
||||
{
|
||||
NCVMemPtr begin;
|
||||
size_t size;
|
||||
@@ -348,7 +412,7 @@ struct NCVMemSegment
|
||||
/**
|
||||
* INCVMemAllocator (Interface)
|
||||
*/
|
||||
class INCVMemAllocator
|
||||
class NCV_EXPORTS INCVMemAllocator
|
||||
{
|
||||
public:
|
||||
virtual ~INCVMemAllocator() = 0;
|
||||
@@ -370,7 +434,7 @@ inline INCVMemAllocator::~INCVMemAllocator() {}
|
||||
/**
|
||||
* NCVMemStackAllocator
|
||||
*/
|
||||
class NCVMemStackAllocator : public INCVMemAllocator
|
||||
class NCV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator
|
||||
{
|
||||
NCVMemStackAllocator();
|
||||
NCVMemStackAllocator(const NCVMemStackAllocator &);
|
||||
@@ -378,7 +442,7 @@ class NCVMemStackAllocator : public INCVMemAllocator
|
||||
public:
|
||||
|
||||
explicit NCVMemStackAllocator(Ncv32u alignment);
|
||||
NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment);
|
||||
NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr=NULL);
|
||||
virtual ~NCVMemStackAllocator();
|
||||
|
||||
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
|
||||
@@ -400,17 +464,18 @@ private:
|
||||
Ncv8u *end;
|
||||
size_t currentSize;
|
||||
size_t _maxSize;
|
||||
NcvBool bReusesMemory;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* NCVMemNativeAllocator
|
||||
*/
|
||||
class NCVMemNativeAllocator : public INCVMemAllocator
|
||||
class NCV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator
|
||||
{
|
||||
public:
|
||||
|
||||
NCVMemNativeAllocator(NCVMemoryType memT);
|
||||
NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment);
|
||||
virtual ~NCVMemNativeAllocator();
|
||||
|
||||
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
|
||||
@@ -438,9 +503,9 @@ private:
|
||||
/**
|
||||
* Copy dispatcher
|
||||
*/
|
||||
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
|
||||
const void *src, NCVMemoryType srcType,
|
||||
size_t sz, cudaStream_t cuStream);
|
||||
NCV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
|
||||
const void *src, NCVMemoryType srcType,
|
||||
size_t sz, cudaStream_t cuStream);
|
||||
|
||||
|
||||
/**
|
||||
@@ -514,6 +579,7 @@ class NCVVectorAlloc : public NCVVector<T>
|
||||
{
|
||||
NCVVectorAlloc();
|
||||
NCVVectorAlloc(const NCVVectorAlloc &);
|
||||
NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&);
|
||||
|
||||
public:
|
||||
|
||||
@@ -563,8 +629,7 @@ public:
|
||||
return allocatedMem;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
private:
|
||||
INCVMemAllocator &allocator;
|
||||
NCVMemSegment allocatedMem;
|
||||
};
|
||||
@@ -707,7 +772,7 @@ class NCVMatrixAlloc : public NCVMatrix<T>
|
||||
{
|
||||
NCVMatrixAlloc();
|
||||
NCVMatrixAlloc(const NCVMatrixAlloc &);
|
||||
|
||||
NCVMatrixAlloc& operator=(const NCVMatrixAlloc &);
|
||||
public:
|
||||
|
||||
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
|
@@ -1,3 +1,51 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef _ncvruntimetemplates_hpp_
|
||||
#define _ncvruntimetemplates_hpp_
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <vector>
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// The Loki Library
|
||||
// Copyright (c) 2001 by Andrei Alexandrescu
|
||||
@@ -14,13 +62,6 @@
|
||||
// http://loki-lib.sourceforge.net/index.php?n=Main.License
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _ncvruntimetemplates_hpp_
|
||||
#define _ncvruntimetemplates_hpp_
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Loki
|
||||
{
|
||||
//==============================================================================
|
Reference in New Issue
Block a user