First version of CascadeClassifier_GPU.
Only for VS2008 now. Sample for it. new NPP_staging for VS2008 only
This commit is contained in:
parent
31e582e314
commit
1a94186195
Binary file not shown.
42
3rdparty/NPP_staging/npp_staging.h
vendored
42
3rdparty/NPP_staging/npp_staging.h
vendored
@ -188,14 +188,14 @@ struct NppStSize32u
|
|||||||
enum NppStStatus
|
enum NppStStatus
|
||||||
{
|
{
|
||||||
//already present in NPP
|
//already present in NPP
|
||||||
/* NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR)
|
//NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR)
|
||||||
NPP_ERROR = -1, ///< Unknown error
|
//NPP_ERROR = -1, ///< Unknown error
|
||||||
NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error
|
//NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error
|
||||||
NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error
|
//NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error
|
||||||
NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned
|
//NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned
|
||||||
NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error
|
//NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error
|
||||||
NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error
|
//NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error
|
||||||
NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error*/
|
//NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error
|
||||||
|
|
||||||
//to be added
|
//to be added
|
||||||
NPP_INVALID_ROI, ///< Invalid region of interest argument
|
NPP_INVALID_ROI, ///< Invalid region of interest argument
|
||||||
@ -244,7 +244,7 @@ extern "C" {
|
|||||||
|
|
||||||
/** \defgroup core_npp NPP Core
|
/** \defgroup core_npp NPP Core
|
||||||
* Basic functions for CUDA streams management.
|
* Basic functions for CUDA streams management.
|
||||||
* WARNING: These functions couldn't be exported from NPP_staging library, so they can't be used
|
* WARNING: These functions couldn't be exported into DLL, so they can be used only with static version of NPP_staging
|
||||||
* @{
|
* @{
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -569,6 +569,13 @@ NppStStatus nppiStTranspose_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStride,
|
|||||||
NppStStatus nppiStIntegralGetSize_8u32u(NppStSize32u roiSize, NppSt32u *pBufsize);
|
NppStStatus nppiStIntegralGetSize_8u32u(NppStSize32u roiSize, NppSt32u *pBufsize);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the size of the temporary buffer for integral image creation
|
||||||
|
* \see nppiStIntegralGetSize_8u32u
|
||||||
|
*/
|
||||||
|
NppStStatus nppiStIntegralGetSize_32f32f(NppStSize32u roiSize, NppSt32u *pBufsize);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an integral image representation for the input image
|
* Creates an integral image representation for the input image
|
||||||
*
|
*
|
||||||
@ -587,6 +594,15 @@ NppStStatus nppiStIntegral_8u32u_C1R(NppSt8u *d_src, NppSt32u srcStep,
|
|||||||
NppSt8u *pBuffer, NppSt32u bufSize);
|
NppSt8u *pBuffer, NppSt32u bufSize);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an integral image representation for the input image
|
||||||
|
* \see nppiStIntegral_8u32u_C1R
|
||||||
|
*/
|
||||||
|
NppStStatus nppiStIntegral_32f32f_C1R(NppSt32f *d_src, NppSt32u srcStep,
|
||||||
|
NppSt32f *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
|
||||||
|
NppSt8u *pBuffer, NppSt32u bufSize);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an integral image representation for the input image. Host implementation
|
* Creates an integral image representation for the input image. Host implementation
|
||||||
*
|
*
|
||||||
@ -602,6 +618,14 @@ NppStStatus nppiStIntegral_8u32u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,
|
|||||||
NppSt32u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
|
NppSt32u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an integral image representation for the input image. Host implementation
|
||||||
|
* \see nppiStIntegral_8u32u_C1R_host
|
||||||
|
*/
|
||||||
|
NppStStatus nppiStIntegral_32f32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,
|
||||||
|
NppSt32f *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the size of the temporary buffer for squared integral image creation
|
* Calculates the size of the temporary buffer for squared integral image creation
|
||||||
*
|
*
|
||||||
|
@ -35,6 +35,13 @@ source_group("Include" FILES ${lib_hdrs})
|
|||||||
file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
|
file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
|
||||||
source_group("Device" FILES ${lib_device_hdrs})
|
source_group("Device" FILES ${lib_device_hdrs})
|
||||||
|
|
||||||
|
if (HAVE_CUDA AND MSVC)
|
||||||
|
file(GLOB ncv_srcs "src/nvidia/*.cpp")
|
||||||
|
file(GLOB ncv_hdrs "src/nvidia/*.h*")
|
||||||
|
file(GLOB ncv_cuda "src/nvidia/*.cu")
|
||||||
|
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
|
||||||
|
endif()
|
||||||
|
|
||||||
if (HAVE_CUDA)
|
if (HAVE_CUDA)
|
||||||
get_filename_component(_path_to_findnpp "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
get_filename_component(_path_to_findnpp "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${_path_to_findnpp})
|
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${_path_to_findnpp})
|
||||||
@ -68,19 +75,16 @@ if (HAVE_CUDA)
|
|||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
CUDA_COMPILE(cuda_objs ${lib_cuda})
|
include(FindNPP_staging.cmake)
|
||||||
|
include_directories(${NPPST_INC})
|
||||||
|
|
||||||
|
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
|
||||||
#CUDA_BUILD_CLEAN_TARGET()
|
#CUDA_BUILD_CLEAN_TARGET()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
|
||||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${cuda_objs})
|
|
||||||
|
|
||||||
IF (HAVE_CUDA)
|
|
||||||
include(FindNPP_staging.cmake)
|
|
||||||
include_directories(${NPPST_INC})
|
|
||||||
target_link_libraries(${the_target} ${NPPST_LIB})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(PCHSupport_FOUND)
|
if(PCHSupport_FOUND)
|
||||||
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
|
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
|
||||||
@ -114,6 +118,7 @@ target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
|
|||||||
|
|
||||||
if (HAVE_CUDA)
|
if (HAVE_CUDA)
|
||||||
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
|
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
|
||||||
|
target_link_libraries(${the_target} ${NPPST_LIB})
|
||||||
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
|
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -1380,87 +1380,39 @@ namespace cv
|
|||||||
explicit BruteForceMatcher_GPU(L2<T> /*d*/) : BruteForceMatcher_GPU_base(L2Dist) {}
|
explicit BruteForceMatcher_GPU(L2<T> /*d*/) : BruteForceMatcher_GPU_base(L2Dist) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////// CascadeClassifier //////////////////////////////////////////
|
////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
|
||||||
// The cascade classifier class for object detection.
|
// The cascade classifier class for object detection.
|
||||||
class CV_EXPORTS CascadeClassifier
|
class CV_EXPORTS CascadeClassifier_GPU
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
struct CV_EXPORTS DTreeNode
|
CascadeClassifier_GPU();
|
||||||
{
|
CascadeClassifier_GPU(const string& filename);
|
||||||
int featureIdx;
|
~CascadeClassifier_GPU();
|
||||||
float threshold; // for ordered features only
|
|
||||||
int left;
|
|
||||||
int right;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CV_EXPORTS DTree
|
|
||||||
{
|
|
||||||
int nodeCount;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CV_EXPORTS Stage
|
|
||||||
{
|
|
||||||
int first;
|
|
||||||
int ntrees;
|
|
||||||
float threshold;
|
|
||||||
};
|
|
||||||
|
|
||||||
enum { BOOST = 0 };
|
|
||||||
enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 };
|
|
||||||
|
|
||||||
CascadeClassifier();
|
|
||||||
CascadeClassifier(const string& filename);
|
|
||||||
~CascadeClassifier();
|
|
||||||
|
|
||||||
bool empty() const;
|
bool empty() const;
|
||||||
bool load(const string& filename);
|
bool load(const string& filename);
|
||||||
bool read(const FileNode& node);
|
void release();
|
||||||
|
|
||||||
|
/* returns number of detected objects */
|
||||||
|
int detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size());
|
||||||
|
|
||||||
|
bool findLargestObject;
|
||||||
|
bool visualizeInPlace;
|
||||||
|
|
||||||
void detectMultiScale( const Mat& image, vector<Rect>& objects, double scaleFactor=1.1,
|
Size getClassifierSize() const;
|
||||||
int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size());
|
private:
|
||||||
|
|
||||||
bool setImage( Ptr<FeatureEvaluator>&, const Mat& );
|
struct CascadeClassifierImpl;
|
||||||
int runAt( Ptr<FeatureEvaluator>&, Point );
|
CascadeClassifierImpl* impl;
|
||||||
|
|
||||||
bool isStumpBased;
|
|
||||||
|
|
||||||
int stageType;
|
|
||||||
int featureType;
|
|
||||||
int ncategories;
|
|
||||||
Size origWinSize;
|
|
||||||
|
|
||||||
vector<Stage> stages;
|
|
||||||
vector<DTree> classifiers;
|
|
||||||
vector<DTreeNode> nodes;
|
|
||||||
vector<float> leaves;
|
|
||||||
vector<int> subsets;
|
|
||||||
|
|
||||||
Ptr<FeatureEvaluator> feval;
|
|
||||||
Ptr<CvHaarClassifierCascade> oldCascade;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////// SURF //////////////////////////////////////////
|
////////////////////////////////// SURF //////////////////////////////////////////
|
||||||
|
|
||||||
struct CV_EXPORTS SURFParams_GPU
|
struct CV_EXPORTS SURFParams_GPU
|
||||||
{
|
{
|
||||||
SURFParams_GPU() :
|
SURFParams_GPU() : threshold(0.1f), nOctaves(4), nIntervals(4), initialScale(2.f),
|
||||||
threshold(0.1f),
|
l1(3.f/1.5f), l2(5.f/1.5f), l3(3.f/1.5f), l4(1.f/1.5f),
|
||||||
nOctaves(4),
|
edgeScale(0.81f), initialStep(1), extended(true), featuresRatio(0.01f) {}
|
||||||
nIntervals(4),
|
|
||||||
initialScale(2.f),
|
|
||||||
|
|
||||||
l1(3.f/1.5f),
|
|
||||||
l2(5.f/1.5f),
|
|
||||||
l3(3.f/1.5f),
|
|
||||||
l4(1.f/1.5f),
|
|
||||||
edgeScale(0.81f),
|
|
||||||
initialStep(1),
|
|
||||||
|
|
||||||
extended(true),
|
|
||||||
|
|
||||||
featuresRatio(0.01f)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
//! The interest operator threshold
|
//! The interest operator threshold
|
||||||
float threshold;
|
float threshold;
|
||||||
|
@ -170,8 +170,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
|
|||||||
|
|
||||||
if (src.type() == CV_8UC1)
|
if (src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
|
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
|
||||||
nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -186,8 +185,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
|
|||||||
pValues3[1] = nppLut3[1].ptr<Npp32s>();
|
pValues3[1] = nppLut3[1].ptr<Npp32s>();
|
||||||
pValues3[2] = nppLut3[2].ptr<Npp32s>();
|
pValues3[2] = nppLut3[2].ptr<Npp32s>();
|
||||||
}
|
}
|
||||||
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
|
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, pValues3, lvls.pLevels3, lvls.nValues3) );
|
||||||
pValues3, lvls.pLevels3, lvls.nValues3) );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,69 +42,751 @@
|
|||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
|
||||||
|
|
||||||
cv::gpu::CascadeClassifier::CascadeClassifier() { throw_nogpu(); }
|
#if !defined (HAVE_CUDA) || (defined(_MSC_VER) && _MSC_VER != 1500) || !defined(_MSC_VER)
|
||||||
cv::gpu::CascadeClassifier::CascadeClassifier(const string&) { throw_nogpu(); }
|
|
||||||
cv::gpu::CascadeClassifier::~CascadeClassifier() { throw_nogpu(); }
|
|
||||||
|
|
||||||
bool cv::gpu::CascadeClassifier::empty() const { throw_nogpu(); return true; }
|
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
|
||||||
bool cv::gpu::CascadeClassifier::load(const string& filename) { throw_nogpu(); return true; }
|
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string&) { throw_nogpu(); }
|
||||||
bool cv::gpu::CascadeClassifier::read(const FileNode& node) { throw_nogpu(); return true; }
|
cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { throw_nogpu(); }
|
||||||
|
|
||||||
void cv::gpu::CascadeClassifier::detectMultiScale( const Mat&, vector<Rect>&, double, int, int, Size, Size) { throw_nogpu(); }
|
bool cv::gpu::CascadeClassifier_GPU::empty() const { throw_nogpu(); return true; }
|
||||||
|
bool cv::gpu::CascadeClassifier_GPU::load(const string&) { throw_nogpu(); return true; }
|
||||||
|
Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const { throw_nogpu(); return Size(); }
|
||||||
|
|
||||||
|
int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& , GpuMat& , double , int , Size) { throw_nogpu(); return 0; }
|
||||||
|
|
||||||
|
#if defined (HAVE_CUDA)
|
||||||
|
NCVStatus loadFromXML(const string&, HaarClassifierCascadeDescriptor&, vector<HaarStage64>&,
|
||||||
|
vector<HaarClassifierNode128>&, vector<HaarFeature64>&) { throw_nogpu(); return NCVStatus(); }
|
||||||
|
|
||||||
|
void groupRectangles(vector<NcvRect32u>&, int, double, vector<Ncv32u>*) { throw_nogpu(); }
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
|
||||||
|
{
|
||||||
|
CascadeClassifierImpl(const string& filename) : lastAllocatedFrameSize(-1, -1)
|
||||||
|
{
|
||||||
|
ncvSetDebugOutputHandler(NCVDebugOutputHandler);
|
||||||
|
if (ncvStat != load(filename))
|
||||||
|
CV_Error(CV_GpuApiCallError, "Error in GPU cacade load");
|
||||||
|
}
|
||||||
|
NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors, bool findLargestObject, bool visualizeInPlace, NcvSize32u ncvMinSize, /*out*/unsigned int& numDetections)
|
||||||
|
{
|
||||||
|
calculateMemReqsAndAllocate(src.size());
|
||||||
|
|
||||||
cv::gpu::CascadeClassifier::CascadeClassifier()
|
NCVMemPtr src_beg;
|
||||||
{
|
src_beg.ptr = (void*)src.ptr<Ncv8u>();
|
||||||
|
src_beg.memtype = NCVMemoryTypeDevice;
|
||||||
|
|
||||||
}
|
NCVMemSegment src_seg;
|
||||||
|
src_seg.begin = src_beg;
|
||||||
|
src_seg.size = src.step * src.rows;
|
||||||
|
|
||||||
cv::gpu::CascadeClassifier::CascadeClassifier(const string& filename)
|
NCVMatrixReuse<Ncv8u> d_src(src_seg, devProp.textureAlignment, src.cols, src.rows, src.step, true);
|
||||||
{
|
|
||||||
|
//NCVMatrixAlloc<Ncv8u> d_src(*gpuAllocator, src.cols, src.rows);
|
||||||
|
//ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
|
||||||
}
|
//NCVMatrixAlloc<Ncv8u> h_src(*cpuAllocator, src.cols, src.rows);
|
||||||
|
//ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
|
||||||
cv::gpu::CascadeClassifier::~CascadeClassifier()
|
CV_Assert(objects.rows == 1);
|
||||||
{
|
|
||||||
|
NCVMemPtr objects_beg;
|
||||||
|
objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
|
||||||
|
objects_beg.memtype = NCVMemoryTypeDevice;
|
||||||
|
|
||||||
|
NCVMemSegment objects_seg;
|
||||||
|
objects_seg.begin = objects_beg;
|
||||||
|
objects_seg.size = objects.step * objects.rows;
|
||||||
|
NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
|
||||||
|
//NCVVectorAlloc<NcvRect32u> d_rects(*gpuAllocator, 100);
|
||||||
|
//ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
|
||||||
|
NcvSize32u roi;
|
||||||
|
roi.width = d_src.width();
|
||||||
|
roi.height = d_src.height();
|
||||||
|
|
||||||
|
Ncv32u flags = 0;
|
||||||
|
flags |= findLargestObject? NCVPipeObjDet_FindLargestObject : 0;
|
||||||
|
flags |= visualizeInPlace ? NCVPipeObjDet_VisualizeInPlace : 0;
|
||||||
|
|
||||||
|
ncvStat = ncvDetectObjectsMultiScale_device(
|
||||||
|
d_src, roi, d_rects, numDetections, haar, *h_haarStages,
|
||||||
|
*d_haarStages, *d_haarNodes, *d_haarFeatures,
|
||||||
|
ncvMinSize,
|
||||||
|
minNeighbors,
|
||||||
|
scaleStep, 1,
|
||||||
|
flags,
|
||||||
|
*gpuAllocator, *cpuAllocator, devProp.major, devProp.minor, 0);
|
||||||
|
ncvAssertReturnNcvStat(ncvStat);
|
||||||
|
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
////
|
||||||
|
|
||||||
|
NcvSize32u getClassifierSize() const { return haar.ClassifierSize; }
|
||||||
|
cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); }
|
||||||
|
private:
|
||||||
|
|
||||||
|
static void NCVDebugOutputHandler(const char* msg) { CV_Error(CV_GpuApiCallError, msg); }
|
||||||
|
|
||||||
|
NCVStatus load(const string& classifierFile)
|
||||||
|
{
|
||||||
|
int devId = cv::gpu::getDevice();
|
||||||
|
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
// Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
|
||||||
|
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice);
|
||||||
|
cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned);
|
||||||
|
|
||||||
|
ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
|
||||||
|
ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
|
||||||
|
ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR);
|
||||||
|
|
||||||
|
h_haarStages = new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages);
|
||||||
|
h_haarNodes = new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes);
|
||||||
|
h_haarFeatures = new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures);
|
||||||
|
|
||||||
|
ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
|
||||||
|
ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
|
||||||
|
ncvAssertPrintReturn(h_haarFeatures->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", NCV_FILE_ERROR);
|
||||||
|
|
||||||
|
d_haarStages = new NCVVectorAlloc<HaarStage64>(*gpuCascadeAllocator, haarNumStages);
|
||||||
|
d_haarNodes = new NCVVectorAlloc<HaarClassifierNode128>(*gpuCascadeAllocator, haarNumNodes);
|
||||||
|
d_haarFeatures = new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures);
|
||||||
|
|
||||||
|
ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
|
||||||
|
ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
|
||||||
|
ncvAssertPrintReturn(d_haarFeatures->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
ncvStat = h_haarStages->copySolid(*d_haarStages, 0);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
|
||||||
|
ncvStat = h_haarNodes->copySolid(*d_haarNodes, 0);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
|
||||||
|
ncvStat = h_haarFeatures->copySolid(*d_haarFeatures, 0);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
////
|
||||||
|
|
||||||
|
NCVStatus calculateMemReqsAndAllocate(const Size& frameSize)
|
||||||
|
{
|
||||||
|
if (lastAllocatedFrameSize == frameSize)
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
|
||||||
|
// Calculate memory requirements and create real allocators
|
||||||
|
NCVMemStackAllocator gpuCounter(devProp.textureAlignment);
|
||||||
|
NCVMemStackAllocator cpuCounter(devProp.textureAlignment);
|
||||||
|
|
||||||
|
ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR);
|
||||||
|
ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
NCVMatrixAlloc<Ncv8u> d_src(gpuCounter, frameSize.width, frameSize.height);
|
||||||
|
NCVMatrixAlloc<Ncv8u> h_src(cpuCounter, frameSize.width, frameSize.height);
|
||||||
|
|
||||||
|
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
|
||||||
|
NCVVectorAlloc<NcvRect32u> d_rects(gpuCounter, 100);
|
||||||
|
ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
|
||||||
|
NcvSize32u roi;
|
||||||
|
roi.width = d_src.width();
|
||||||
|
roi.height = d_src.height();
|
||||||
|
Ncv32u numDetections;
|
||||||
|
ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages,
|
||||||
|
*d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp.major, devProp.minor, 0);
|
||||||
|
|
||||||
|
ncvAssertReturnNcvStat(ncvStat);
|
||||||
|
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
gpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment);
|
||||||
|
cpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), devProp.textureAlignment);
|
||||||
|
|
||||||
|
ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR);
|
||||||
|
ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR);
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
////
|
||||||
|
|
||||||
|
cudaDeviceProp devProp;
|
||||||
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
|
Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
|
||||||
|
Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
|
||||||
|
|
||||||
|
Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages;
|
||||||
|
Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
|
||||||
|
Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures;
|
||||||
|
|
||||||
|
HaarClassifierCascadeDescriptor haar;
|
||||||
|
|
||||||
|
Ptr<NCVVectorAlloc<HaarStage64> > d_haarStages;
|
||||||
|
Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
|
||||||
|
Ptr<NCVVectorAlloc<HaarFeature64> > d_haarFeatures;
|
||||||
|
|
||||||
|
Size lastAllocatedFrameSize;
|
||||||
|
|
||||||
|
Ptr<NCVMemStackAllocator> gpuAllocator;
|
||||||
|
Ptr<NCVMemStackAllocator> cpuAllocator;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() : findLargestObject(false), visualizeInPlace(false), impl(0) {}
|
||||||
|
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string& filename) : findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); }
|
||||||
|
cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { release(); }
|
||||||
|
bool cv::gpu::CascadeClassifier_GPU::empty() const { return impl == 0; }
|
||||||
|
|
||||||
|
void cv::gpu::CascadeClassifier_GPU::release() { if (impl) { delete impl; impl = 0; } }
|
||||||
|
|
||||||
|
bool cv::gpu::CascadeClassifier_GPU::load(const string& filename)
|
||||||
|
{
|
||||||
|
release();
|
||||||
|
impl = new CascadeClassifierImpl(filename);
|
||||||
|
return !this->empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool cv::gpu::CascadeClassifier::empty() const
|
Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const
|
||||||
{
|
{
|
||||||
int *a = (int*)&nppiStTranspose_32u_C1R;
|
return this->empty() ? Size() : impl->getClassifierCvSize();
|
||||||
return *a == 0xFFFFF;
|
}
|
||||||
return true;
|
|
||||||
|
int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize)
|
||||||
|
{
|
||||||
|
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U);
|
||||||
|
CV_Assert( !this->empty());
|
||||||
|
|
||||||
|
const int defaultObjSearchNum = 100;
|
||||||
|
if (objectsBuf.empty())
|
||||||
|
objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type);
|
||||||
|
|
||||||
|
NcvSize32u ncvMinSize = impl->getClassifierSize();
|
||||||
|
|
||||||
|
if (ncvMinSize.width < (unsigned)minSize.width && ncvMinSize.height < (unsigned)minSize.height)
|
||||||
|
{
|
||||||
|
ncvMinSize.width = minSize.width;
|
||||||
|
ncvMinSize.height = minSize.height;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int numDetections;
|
||||||
|
NCVStatus ncvStat = impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections);
|
||||||
|
if (ncvStat != NCV_SUCCESS)
|
||||||
|
CV_Error(CV_GpuApiCallError, "Error in face detectioln");
|
||||||
|
|
||||||
|
return numDetections;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool cv::gpu::CascadeClassifier::load(const string& filename)
|
struct RectConvert
|
||||||
|
{
|
||||||
|
Rect operator()(const NcvRect32u& nr) const { return Rect(nr.x, nr.y, nr.width, nr.height); }
|
||||||
|
NcvRect32u operator()(const Rect& nr) const
|
||||||
|
{
|
||||||
|
NcvRect32u rect;
|
||||||
|
rect.x = nr.x;
|
||||||
|
rect.y = nr.y;
|
||||||
|
rect.width = nr.width;
|
||||||
|
rect.height = nr.height;
|
||||||
|
return rect;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights)
|
||||||
|
{
|
||||||
|
vector<Rect> rects(hypotheses.size());
|
||||||
|
std::transform(hypotheses.begin(), hypotheses.end(), rects.begin(), RectConvert());
|
||||||
|
|
||||||
|
if (weights)
|
||||||
|
{
|
||||||
|
vector<int> weights_int;
|
||||||
|
weights_int.assign(weights->begin(), weights->end());
|
||||||
|
cv::groupRectangles(rects, weights_int, groupThreshold, eps);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cv::groupRectangles(rects, groupThreshold, eps);
|
||||||
|
}
|
||||||
|
std::transform(rects.begin(), rects.end(), hypotheses.begin(), RectConvert());
|
||||||
|
hypotheses.resize(rects.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if 1 /* loadFromXML implementation switch */
|
||||||
|
|
||||||
|
NCVStatus loadFromXML(const std::string &filename,
|
||||||
|
HaarClassifierCascadeDescriptor &haar,
|
||||||
|
std::vector<HaarStage64> &haarStages,
|
||||||
|
std::vector<HaarClassifierNode128> &haarClassifierNodes,
|
||||||
|
std::vector<HaarFeature64> &haarFeatures)
|
||||||
{
|
{
|
||||||
return true;
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
|
haar.NumStages = 0;
|
||||||
|
haar.NumClassifierRootNodes = 0;
|
||||||
|
haar.NumClassifierTotalNodes = 0;
|
||||||
|
haar.NumFeatures = 0;
|
||||||
|
haar.ClassifierSize.width = 0;
|
||||||
|
haar.ClassifierSize.height = 0;
|
||||||
|
haar.bHasStumpsOnly = true;
|
||||||
|
haar.bNeedsTiltedII = false;
|
||||||
|
Ncv32u curMaxTreeDepth;
|
||||||
|
|
||||||
|
std::vector<char> xmlFileCont;
|
||||||
|
|
||||||
|
std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
|
||||||
|
haarStages.resize(0);
|
||||||
|
haarClassifierNodes.resize(0);
|
||||||
|
haarFeatures.resize(0);
|
||||||
|
|
||||||
|
Ptr<CvHaarClassifierCascade> oldCascade = (CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0);
|
||||||
|
if (oldCascade.empty())
|
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
haar.ClassifierSize.width = oldCascade->orig_window_size.width;
|
||||||
|
haar.ClassifierSize.height = oldCascade->orig_window_size.height;
|
||||||
|
|
||||||
|
int stagesCound = oldCascade->count;
|
||||||
|
for(int s = 0; s < stagesCound; ++s) // by stages
|
||||||
|
{
|
||||||
|
HaarStage64 curStage;
|
||||||
|
curStage.setStartClassifierRootNodeOffset(haarClassifierNodes.size());
|
||||||
|
|
||||||
|
curStage.setStageThreshold(oldCascade->stage_classifier[s].threshold);
|
||||||
|
|
||||||
|
int treesCount = oldCascade->stage_classifier[s].count;
|
||||||
|
for(int t = 0; t < treesCount; ++t) // bytrees
|
||||||
|
{
|
||||||
|
Ncv32u nodeId = 0;
|
||||||
|
CvHaarClassifier* tree = &oldCascade->stage_classifier[s].classifier[t];
|
||||||
|
|
||||||
|
int nodesCount = tree->count;
|
||||||
|
for(int n = 0; n < nodesCount; ++n) //by features
|
||||||
|
{
|
||||||
|
CvHaarFeature* feature = &tree->haar_feature[n];
|
||||||
|
|
||||||
|
HaarClassifierNode128 curNode;
|
||||||
|
curNode.setThreshold(tree->threshold[n]);
|
||||||
|
|
||||||
|
HaarClassifierNodeDescriptor32 nodeLeft;
|
||||||
|
if ( tree->left[n] <= 0 )
|
||||||
|
{
|
||||||
|
Ncv32f leftVal = tree->alpha[-tree->left[n]];
|
||||||
|
ncvStat = nodeLeft.create(leftVal);
|
||||||
|
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Ncv32u leftNodeOffset = tree->left[n];
|
||||||
|
nodeLeft.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + leftNodeOffset - 1));
|
||||||
|
haar.bHasStumpsOnly = false;
|
||||||
|
}
|
||||||
|
curNode.setLeftNodeDesc(nodeLeft);
|
||||||
|
|
||||||
|
HaarClassifierNodeDescriptor32 nodeRight;
|
||||||
|
if ( tree->right[n] <= 0 )
|
||||||
|
{
|
||||||
|
Ncv32f rightVal = tree->alpha[-tree->right[n]];
|
||||||
|
ncvStat = nodeRight.create(rightVal);
|
||||||
|
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Ncv32u rightNodeOffset = tree->right[n];
|
||||||
|
nodeRight.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + rightNodeOffset - 1));
|
||||||
|
haar.bHasStumpsOnly = false;
|
||||||
|
}
|
||||||
|
curNode.setRightNodeDesc(nodeRight);
|
||||||
|
|
||||||
|
Ncv32u tiltedVal = feature->tilted;
|
||||||
|
haar.bNeedsTiltedII = (tiltedVal != 0);
|
||||||
|
|
||||||
|
Ncv32u featureId = 0;
|
||||||
|
for(int l = 0; l < CV_HAAR_FEATURE_MAX; ++l) //by rects
|
||||||
|
{
|
||||||
|
Ncv32u rectX = feature->rect[l].r.x;
|
||||||
|
Ncv32u rectY = feature->rect[l].r.y;
|
||||||
|
Ncv32u rectWidth = feature->rect[l].r.width;
|
||||||
|
Ncv32u rectHeight = feature->rect[l].r.height;
|
||||||
|
|
||||||
|
Ncv32f rectWeight = feature->rect[l].weight;
|
||||||
|
|
||||||
|
if (rectWeight == 0/* && rectX == 0 &&rectY == 0 && rectWidth == 0 && rectHeight == 0*/)
|
||||||
|
break;
|
||||||
|
|
||||||
|
HaarFeature64 curFeature;
|
||||||
|
ncvStat = curFeature.setRect(rectX, rectY, rectWidth, rectHeight, haar.ClassifierSize.width, haar.ClassifierSize.height);
|
||||||
|
curFeature.setWeight(rectWeight);
|
||||||
|
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
|
||||||
|
haarFeatures.push_back(curFeature);
|
||||||
|
|
||||||
|
featureId++;
|
||||||
|
}
|
||||||
|
|
||||||
|
HaarFeatureDescriptor32 tmpFeatureDesc;
|
||||||
|
ncvStat = tmpFeatureDesc.create(haar.bNeedsTiltedII, featureId, haarFeatures.size() - featureId);
|
||||||
|
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
|
||||||
|
curNode.setFeatureDesc(tmpFeatureDesc);
|
||||||
|
|
||||||
|
if (!nodeId)
|
||||||
|
{
|
||||||
|
//root node
|
||||||
|
haarClassifierNodes.push_back(curNode);
|
||||||
|
curMaxTreeDepth = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//other node
|
||||||
|
h_TmpClassifierNotRootNodes.push_back(curNode);
|
||||||
|
curMaxTreeDepth++;
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeId++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
curStage.setNumClassifierRootNodes(treesCount);
|
||||||
|
haarStages.push_back(curStage);
|
||||||
|
}
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
//fill in cascade stats
|
||||||
|
haar.NumStages = haarStages.size();
|
||||||
|
haar.NumClassifierRootNodes = haarClassifierNodes.size();
|
||||||
|
haar.NumClassifierTotalNodes = haar.NumClassifierRootNodes + h_TmpClassifierNotRootNodes.size();
|
||||||
|
haar.NumFeatures = haarFeatures.size();
|
||||||
|
|
||||||
|
//merge root and leaf nodes in one classifiers array
|
||||||
|
Ncv32u offsetRoot = haarClassifierNodes.size();
|
||||||
|
for (Ncv32u i=0; i<haarClassifierNodes.size(); i++)
|
||||||
|
{
|
||||||
|
HaarClassifierNodeDescriptor32 nodeLeft = haarClassifierNodes[i].getLeftNodeDesc();
|
||||||
|
if (!nodeLeft.isLeaf())
|
||||||
|
{
|
||||||
|
Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
|
||||||
|
nodeLeft.create(newOffset);
|
||||||
|
}
|
||||||
|
haarClassifierNodes[i].setLeftNodeDesc(nodeLeft);
|
||||||
|
|
||||||
|
HaarClassifierNodeDescriptor32 nodeRight = haarClassifierNodes[i].getRightNodeDesc();
|
||||||
|
if (!nodeRight.isLeaf())
|
||||||
|
{
|
||||||
|
Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
|
||||||
|
nodeRight.create(newOffset);
|
||||||
|
}
|
||||||
|
haarClassifierNodes[i].setRightNodeDesc(nodeRight);
|
||||||
|
}
|
||||||
|
for (Ncv32u i=0; i<h_TmpClassifierNotRootNodes.size(); i++)
|
||||||
|
{
|
||||||
|
HaarClassifierNodeDescriptor32 nodeLeft = h_TmpClassifierNotRootNodes[i].getLeftNodeDesc();
|
||||||
|
if (!nodeLeft.isLeaf())
|
||||||
|
{
|
||||||
|
Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
|
||||||
|
nodeLeft.create(newOffset);
|
||||||
|
}
|
||||||
|
h_TmpClassifierNotRootNodes[i].setLeftNodeDesc(nodeLeft);
|
||||||
|
|
||||||
|
HaarClassifierNodeDescriptor32 nodeRight = h_TmpClassifierNotRootNodes[i].getRightNodeDesc();
|
||||||
|
if (!nodeRight.isLeaf())
|
||||||
|
{
|
||||||
|
Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
|
||||||
|
nodeRight.create(newOffset);
|
||||||
|
}
|
||||||
|
h_TmpClassifierNotRootNodes[i].setRightNodeDesc(nodeRight);
|
||||||
|
|
||||||
|
haarClassifierNodes.push_back(h_TmpClassifierNotRootNodes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool cv::gpu::CascadeClassifier::read(const FileNode& node)
|
////
|
||||||
|
|
||||||
|
#else /* loadFromXML implementation switch */
|
||||||
|
|
||||||
|
#include "e:/devNPP-OpenCV/src/external/_rapidxml-1.13/rapidxml.hpp"
|
||||||
|
|
||||||
|
NCVStatus loadFromXML(const std::string &filename,
|
||||||
|
HaarClassifierCascadeDescriptor &haar,
|
||||||
|
std::vector<HaarStage64> &haarStages,
|
||||||
|
std::vector<HaarClassifierNode128> &haarClassifierNodes,
|
||||||
|
std::vector<HaarFeature64> &haarFeatures)
|
||||||
{
|
{
|
||||||
return true;
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
|
haar.NumStages = 0;
|
||||||
|
haar.NumClassifierRootNodes = 0;
|
||||||
|
haar.NumClassifierTotalNodes = 0;
|
||||||
|
haar.NumFeatures = 0;
|
||||||
|
haar.ClassifierSize.width = 0;
|
||||||
|
haar.ClassifierSize.height = 0;
|
||||||
|
haar.bNeedsTiltedII = false;
|
||||||
|
haar.bHasStumpsOnly = false;
|
||||||
|
|
||||||
|
FILE *fp;
|
||||||
|
fopen_s(&fp, filename.c_str(), "r");
|
||||||
|
ncvAssertReturn(fp != NULL, NCV_FILE_ERROR);
|
||||||
|
|
||||||
|
//get file size
|
||||||
|
fseek(fp, 0, SEEK_END);
|
||||||
|
Ncv32u xmlSize = ftell(fp);
|
||||||
|
fseek(fp, 0, SEEK_SET);
|
||||||
|
|
||||||
|
//load file to vector
|
||||||
|
std::vector<char> xmlFileCont;
|
||||||
|
xmlFileCont.resize(xmlSize+1);
|
||||||
|
memset(&xmlFileCont[0], 0, xmlSize+1);
|
||||||
|
fread_s(&xmlFileCont[0], xmlSize, 1, xmlSize, fp);
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
haar.bHasStumpsOnly = true;
|
||||||
|
haar.bNeedsTiltedII = false;
|
||||||
|
Ncv32u curMaxTreeDepth;
|
||||||
|
|
||||||
|
std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
|
||||||
|
haarStages.resize(0);
|
||||||
|
haarClassifierNodes.resize(0);
|
||||||
|
haarFeatures.resize(0);
|
||||||
|
|
||||||
|
//XML loading and OpenCV XML classifier syntax verification
|
||||||
|
try
|
||||||
|
{
|
||||||
|
rapidxml::xml_document<> doc;
|
||||||
|
doc.parse<0>(&xmlFileCont[0]);
|
||||||
|
|
||||||
|
//opencv_storage
|
||||||
|
rapidxml::xml_node<> *parserGlobal = doc.first_node();
|
||||||
|
ncvAssertReturn(!strcmp(parserGlobal->name(), "opencv_storage"), NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
|
||||||
|
//classifier type
|
||||||
|
parserGlobal = parserGlobal->first_node();
|
||||||
|
ncvAssertReturn(parserGlobal, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
rapidxml::xml_attribute<> *attr = parserGlobal->first_attribute("type_id");
|
||||||
|
ncvAssertReturn(!strcmp(attr->value(), "opencv-haar-classifier"), NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
|
||||||
|
//classifier size
|
||||||
|
parserGlobal = parserGlobal->first_node("size");
|
||||||
|
ncvAssertReturn(parserGlobal, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
sscanf_s(parserGlobal->value(), "%d %d", &(haar.ClassifierSize.width), &(haar.ClassifierSize.height));
|
||||||
|
|
||||||
|
//parse stages
|
||||||
|
parserGlobal = parserGlobal->next_sibling("stages");
|
||||||
|
ncvAssertReturn(parserGlobal, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
parserGlobal = parserGlobal->first_node("_");
|
||||||
|
ncvAssertReturn(parserGlobal, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
|
||||||
|
while (parserGlobal)
|
||||||
|
{
|
||||||
|
HaarStage64 curStage;
|
||||||
|
curStage.setStartClassifierRootNodeOffset(haarClassifierNodes.size());
|
||||||
|
Ncv32u tmpNumClassifierRootNodes = 0;
|
||||||
|
|
||||||
|
rapidxml::xml_node<> *parserStageThreshold = parserGlobal->first_node("stage_threshold");
|
||||||
|
ncvAssertReturn(parserStageThreshold, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
Ncv32f tmpStageThreshold;
|
||||||
|
sscanf_s(parserStageThreshold->value(), "%f", &tmpStageThreshold);
|
||||||
|
curStage.setStageThreshold(tmpStageThreshold);
|
||||||
|
|
||||||
|
//parse trees
|
||||||
|
rapidxml::xml_node<> *parserTree;
|
||||||
|
parserTree = parserGlobal->first_node("trees");
|
||||||
|
ncvAssertReturn(parserTree, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
parserTree = parserTree->first_node("_");
|
||||||
|
ncvAssertReturn(parserTree, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
|
||||||
|
while (parserTree)
|
||||||
|
{
|
||||||
|
rapidxml::xml_node<> *parserNode;
|
||||||
|
parserNode = parserTree->first_node("_");
|
||||||
|
ncvAssertReturn(parserNode, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
Ncv32u nodeId = 0;
|
||||||
|
|
||||||
|
while (parserNode)
|
||||||
|
{
|
||||||
|
HaarClassifierNode128 curNode;
|
||||||
|
|
||||||
|
rapidxml::xml_node<> *parserNodeThreshold = parserNode->first_node("threshold");
|
||||||
|
ncvAssertReturn(parserNodeThreshold, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
Ncv32f tmpThreshold;
|
||||||
|
sscanf_s(parserNodeThreshold->value(), "%f", &tmpThreshold);
|
||||||
|
curNode.setThreshold(tmpThreshold);
|
||||||
|
|
||||||
|
rapidxml::xml_node<> *parserNodeLeft = parserNode->first_node("left_val");
|
||||||
|
HaarClassifierNodeDescriptor32 nodeLeft;
|
||||||
|
if (parserNodeLeft)
|
||||||
|
{
|
||||||
|
Ncv32f leftVal;
|
||||||
|
sscanf_s(parserNodeLeft->value(), "%f", &leftVal);
|
||||||
|
ncvStat = nodeLeft.create(leftVal);
|
||||||
|
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
parserNodeLeft = parserNode->first_node("left_node");
|
||||||
|
ncvAssertReturn(parserNodeLeft, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
Ncv32u leftNodeOffset;
|
||||||
|
sscanf_s(parserNodeLeft->value(), "%d", &leftNodeOffset);
|
||||||
|
nodeLeft.create(h_TmpClassifierNotRootNodes.size() + leftNodeOffset - 1);
|
||||||
|
haar.bHasStumpsOnly = false;
|
||||||
|
}
|
||||||
|
curNode.setLeftNodeDesc(nodeLeft);
|
||||||
|
|
||||||
|
rapidxml::xml_node<> *parserNodeRight = parserNode->first_node("right_val");
|
||||||
|
HaarClassifierNodeDescriptor32 nodeRight;
|
||||||
|
if (parserNodeRight)
|
||||||
|
{
|
||||||
|
Ncv32f rightVal;
|
||||||
|
sscanf_s(parserNodeRight->value(), "%f", &rightVal);
|
||||||
|
ncvStat = nodeRight.create(rightVal);
|
||||||
|
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
parserNodeRight = parserNode->first_node("right_node");
|
||||||
|
ncvAssertReturn(parserNodeRight, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
Ncv32u rightNodeOffset;
|
||||||
|
sscanf_s(parserNodeRight->value(), "%d", &rightNodeOffset);
|
||||||
|
nodeRight.create(h_TmpClassifierNotRootNodes.size() + rightNodeOffset - 1);
|
||||||
|
haar.bHasStumpsOnly = false;
|
||||||
|
}
|
||||||
|
curNode.setRightNodeDesc(nodeRight);
|
||||||
|
|
||||||
|
rapidxml::xml_node<> *parserNodeFeatures = parserNode->first_node("feature");
|
||||||
|
ncvAssertReturn(parserNodeFeatures, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
|
||||||
|
rapidxml::xml_node<> *parserNodeFeaturesTilted = parserNodeFeatures->first_node("tilted");
|
||||||
|
ncvAssertReturn(parserNodeFeaturesTilted, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
Ncv32u tiltedVal;
|
||||||
|
sscanf_s(parserNodeFeaturesTilted->value(), "%d", &tiltedVal);
|
||||||
|
haar.bNeedsTiltedII = (tiltedVal != 0);
|
||||||
|
|
||||||
|
rapidxml::xml_node<> *parserNodeFeaturesRects = parserNodeFeatures->first_node("rects");
|
||||||
|
ncvAssertReturn(parserNodeFeaturesRects, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
parserNodeFeaturesRects = parserNodeFeaturesRects->first_node("_");
|
||||||
|
ncvAssertReturn(parserNodeFeaturesRects, NCV_HAAR_XML_LOADING_EXCEPTION);
|
||||||
|
Ncv32u featureId = 0;
|
||||||
|
|
||||||
|
while (parserNodeFeaturesRects)
|
||||||
|
{
|
||||||
|
Ncv32u rectX, rectY, rectWidth, rectHeight;
|
||||||
|
Ncv32f rectWeight;
|
||||||
|
sscanf_s(parserNodeFeaturesRects->value(), "%d %d %d %d %f", &rectX, &rectY, &rectWidth, &rectHeight, &rectWeight);
|
||||||
|
HaarFeature64 curFeature;
|
||||||
|
ncvStat = curFeature.setRect(rectX, rectY, rectWidth, rectHeight, haar.ClassifierSize.width, haar.ClassifierSize.height);
|
||||||
|
curFeature.setWeight(rectWeight);
|
||||||
|
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
|
||||||
|
haarFeatures.push_back(curFeature);
|
||||||
|
|
||||||
|
parserNodeFeaturesRects = parserNodeFeaturesRects->next_sibling("_");
|
||||||
|
featureId++;
|
||||||
|
}
|
||||||
|
|
||||||
|
HaarFeatureDescriptor32 tmpFeatureDesc;
|
||||||
|
ncvStat = tmpFeatureDesc.create(haar.bNeedsTiltedII, featureId, haarFeatures.size() - featureId);
|
||||||
|
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
|
||||||
|
curNode.setFeatureDesc(tmpFeatureDesc);
|
||||||
|
|
||||||
|
if (!nodeId)
|
||||||
|
{
|
||||||
|
//root node
|
||||||
|
haarClassifierNodes.push_back(curNode);
|
||||||
|
curMaxTreeDepth = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//other node
|
||||||
|
h_TmpClassifierNotRootNodes.push_back(curNode);
|
||||||
|
curMaxTreeDepth++;
|
||||||
|
}
|
||||||
|
|
||||||
|
parserNode = parserNode->next_sibling("_");
|
||||||
|
nodeId++;
|
||||||
|
}
|
||||||
|
|
||||||
|
parserTree = parserTree->next_sibling("_");
|
||||||
|
tmpNumClassifierRootNodes++;
|
||||||
|
}
|
||||||
|
|
||||||
|
curStage.setNumClassifierRootNodes(tmpNumClassifierRootNodes);
|
||||||
|
haarStages.push_back(curStage);
|
||||||
|
|
||||||
|
parserGlobal = parserGlobal->next_sibling("_");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
//fill in cascade stats
|
||||||
|
haar.NumStages = haarStages.size();
|
||||||
|
haar.NumClassifierRootNodes = haarClassifierNodes.size();
|
||||||
|
haar.NumClassifierTotalNodes = haar.NumClassifierRootNodes + h_TmpClassifierNotRootNodes.size();
|
||||||
|
haar.NumFeatures = haarFeatures.size();
|
||||||
|
|
||||||
|
//merge root and leaf nodes in one classifiers array
|
||||||
|
Ncv32u offsetRoot = haarClassifierNodes.size();
|
||||||
|
for (Ncv32u i=0; i<haarClassifierNodes.size(); i++)
|
||||||
|
{
|
||||||
|
HaarClassifierNodeDescriptor32 nodeLeft = haarClassifierNodes[i].getLeftNodeDesc();
|
||||||
|
if (!nodeLeft.isLeaf())
|
||||||
|
{
|
||||||
|
Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
|
||||||
|
nodeLeft.create(newOffset);
|
||||||
|
}
|
||||||
|
haarClassifierNodes[i].setLeftNodeDesc(nodeLeft);
|
||||||
|
|
||||||
|
HaarClassifierNodeDescriptor32 nodeRight = haarClassifierNodes[i].getRightNodeDesc();
|
||||||
|
if (!nodeRight.isLeaf())
|
||||||
|
{
|
||||||
|
Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
|
||||||
|
nodeRight.create(newOffset);
|
||||||
|
}
|
||||||
|
haarClassifierNodes[i].setRightNodeDesc(nodeRight);
|
||||||
|
}
|
||||||
|
for (Ncv32u i=0; i<h_TmpClassifierNotRootNodes.size(); i++)
|
||||||
|
{
|
||||||
|
HaarClassifierNodeDescriptor32 nodeLeft = h_TmpClassifierNotRootNodes[i].getLeftNodeDesc();
|
||||||
|
if (!nodeLeft.isLeaf())
|
||||||
|
{
|
||||||
|
Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
|
||||||
|
nodeLeft.create(newOffset);
|
||||||
|
}
|
||||||
|
h_TmpClassifierNotRootNodes[i].setLeftNodeDesc(nodeLeft);
|
||||||
|
|
||||||
|
HaarClassifierNodeDescriptor32 nodeRight = h_TmpClassifierNotRootNodes[i].getRightNodeDesc();
|
||||||
|
if (!nodeRight.isLeaf())
|
||||||
|
{
|
||||||
|
Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
|
||||||
|
nodeRight.create(newOffset);
|
||||||
|
}
|
||||||
|
h_TmpClassifierNotRootNodes[i].setRightNodeDesc(nodeRight);
|
||||||
|
|
||||||
|
haarClassifierNodes.push_back(h_TmpClassifierNotRootNodes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects, double scaleFactor,
|
#endif /* loadFromXML implementation switch */
|
||||||
int minNeighbors, int flags, Size minSize, Size maxSize)
|
|
||||||
|
|
||||||
{
|
#endif /* HAVE_CUDA */
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
@ -62,7 +62,7 @@ namespace cv
|
|||||||
BORDER_REPLICATE_GPU,
|
BORDER_REPLICATE_GPU,
|
||||||
BORDER_CONSTANT_GPU
|
BORDER_CONSTANT_GPU
|
||||||
};
|
};
|
||||||
|
|
||||||
// Converts CPU border extrapolation mode into GPU internal analogue.
|
// Converts CPU border extrapolation mode into GPU internal analogue.
|
||||||
// Returns true if the GPU analogue exists, false otherwise.
|
// Returns true if the GPU analogue exists, false otherwise.
|
||||||
bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
|
bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
|
||||||
@ -105,8 +105,28 @@ namespace cv
|
|||||||
const textureReference* tex;
|
const textureReference* tex;
|
||||||
cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
||||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct KeyPoint_GPU
|
||||||
|
{
|
||||||
|
float x;
|
||||||
|
float y;
|
||||||
|
float size;
|
||||||
|
float response;
|
||||||
|
float angle;
|
||||||
|
float octave;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum KeypointLayout
|
||||||
|
{
|
||||||
|
SF_X,
|
||||||
|
SF_Y,
|
||||||
|
SF_SIZE,
|
||||||
|
SF_RESPONSE,
|
||||||
|
SF_ANGLE,
|
||||||
|
SF_OCTAVE,
|
||||||
|
SF_FEATURE_STRIDE
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,29 +47,7 @@ namespace cv
|
|||||||
{
|
{
|
||||||
namespace gpu
|
namespace gpu
|
||||||
{
|
{
|
||||||
namespace surf
|
|
||||||
{
|
|
||||||
struct KeyPoint_GPU
|
|
||||||
{
|
|
||||||
float x;
|
|
||||||
float y;
|
|
||||||
float size;
|
|
||||||
float response;
|
|
||||||
float angle;
|
|
||||||
float octave;
|
|
||||||
};
|
|
||||||
|
|
||||||
enum KeypointLayout
|
|
||||||
{
|
|
||||||
SF_X,
|
|
||||||
SF_Y,
|
|
||||||
SF_SIZE,
|
|
||||||
SF_RESPONSE,
|
|
||||||
SF_ANGLE,
|
|
||||||
SF_OCTAVE,
|
|
||||||
SF_FEATURE_STRIDE
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,21 +82,16 @@ void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu()
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep,
|
typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
|
||||||
NppiSize oSizeROI, int nScaleFactor);
|
typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst, int nDstStep, NppiSize oSizeROI);
|
||||||
typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst,
|
typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
|
||||||
int nDstStep, NppiSize oSizeROI);
|
|
||||||
typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst,
|
|
||||||
int nDstStep, NppiSize oSizeROI);
|
|
||||||
|
|
||||||
void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst,
|
void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst,
|
||||||
npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4,
|
npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4,
|
||||||
npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1)
|
npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1)
|
||||||
{
|
{
|
||||||
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
|
|
||||||
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
|
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
|
||||||
|
|
||||||
dst.create( src1.size(), src1.type() );
|
dst.create( src1.size(), src1.type() );
|
||||||
|
|
||||||
NppiSize sz;
|
NppiSize sz;
|
||||||
@ -106,24 +101,16 @@ namespace
|
|||||||
switch (src1.type())
|
switch (src1.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step,
|
nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz, 0) );
|
||||||
src2.ptr<Npp8u>(), src2.step,
|
|
||||||
dst.ptr<Npp8u>(), dst.step, sz, 0) );
|
|
||||||
break;
|
break;
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step,
|
nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz, 0) );
|
||||||
src2.ptr<Npp8u>(), src2.step,
|
|
||||||
dst.ptr<Npp8u>(), dst.step, sz, 0) );
|
|
||||||
break;
|
break;
|
||||||
case CV_32SC1:
|
case CV_32SC1:
|
||||||
nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step,
|
nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step, src2.ptr<Npp32s>(), src2.step, dst.ptr<Npp32s>(), dst.step, sz) );
|
||||||
src2.ptr<Npp32s>(), src2.step,
|
|
||||||
dst.ptr<Npp32s>(), dst.step, sz) );
|
|
||||||
break;
|
break;
|
||||||
case CV_32FC1:
|
case CV_32FC1:
|
||||||
nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step,
|
nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step, src2.ptr<Npp32f>(), src2.step, dst.ptr<Npp32f>(), dst.step, sz) );
|
||||||
src2.ptr<Npp32f>(), src2.step,
|
|
||||||
dst.ptr<Npp32f>(), dst.step, sz) );
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Assert(!"Unsupported source type");
|
CV_Assert(!"Unsupported source type");
|
||||||
@ -133,16 +120,15 @@ namespace
|
|||||||
template<int SCN> struct NppArithmScalarFunc;
|
template<int SCN> struct NppArithmScalarFunc;
|
||||||
template<> struct NppArithmScalarFunc<1>
|
template<> struct NppArithmScalarFunc<1>
|
||||||
{
|
{
|
||||||
typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst,
|
typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst, int nDstStep, NppiSize oSizeROI);
|
||||||
int nDstStep, NppiSize oSizeROI);
|
|
||||||
};
|
};
|
||||||
template<> struct NppArithmScalarFunc<2>
|
template<> struct NppArithmScalarFunc<2>
|
||||||
{
|
{
|
||||||
typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst,
|
typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst, int nDstStep, NppiSize oSizeROI);
|
||||||
int nDstStep, NppiSize oSizeROI);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int SCN, typename NppArithmScalarFunc<SCN>::func_ptr func> struct NppArithmScalar;
|
template<int SCN, typename NppArithmScalarFunc<SCN>::func_ptr func> struct NppArithmScalar;
|
||||||
|
|
||||||
template<typename NppArithmScalarFunc<1>::func_ptr func> struct NppArithmScalar<1, func>
|
template<typename NppArithmScalarFunc<1>::func_ptr func> struct NppArithmScalar<1, func>
|
||||||
{
|
{
|
||||||
static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
|
static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
|
||||||
@ -254,24 +240,16 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
|||||||
switch (src1.type())
|
switch (src1.type())
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step,
|
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz) );
|
||||||
src2.ptr<Npp8u>(), src2.step,
|
|
||||||
dst.ptr<Npp8u>(), dst.step, sz) );
|
|
||||||
break;
|
break;
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step,
|
nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz) );
|
||||||
src2.ptr<Npp8u>(), src2.step,
|
|
||||||
dst.ptr<Npp8u>(), dst.step, sz) );
|
|
||||||
break;
|
break;
|
||||||
case CV_32SC1:
|
case CV_32SC1:
|
||||||
nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), src1.step,
|
nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), src1.step, src2.ptr<Npp32s>(), src2.step, dst.ptr<Npp32s>(), dst.step, sz) );
|
||||||
src2.ptr<Npp32s>(), src2.step,
|
|
||||||
dst.ptr<Npp32s>(), dst.step, sz) );
|
|
||||||
break;
|
break;
|
||||||
case CV_32FC1:
|
case CV_32FC1:
|
||||||
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), src1.step,
|
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), src1.step, src2.ptr<Npp32f>(), src2.step, dst.ptr<Npp32f>(), dst.step, sz) );
|
||||||
src2.ptr<Npp32f>(), src2.step,
|
|
||||||
dst.ptr<Npp32f>(), dst.step, sz) );
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Assert(!"Unsupported source type");
|
CV_Assert(!"Unsupported source type");
|
||||||
|
362
modules/gpu/src/nvidia/FaceDetectionFeed.cpp_NvidiaAPI_sample
Normal file
362
modules/gpu/src/nvidia/FaceDetectionFeed.cpp_NvidiaAPI_sample
Normal file
@ -0,0 +1,362 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
|
#define CV_NO_BACKWARD_COMPATIBILITY
|
||||||
|
|
||||||
|
#include "opencv2/opencv.hpp"
|
||||||
|
|
||||||
|
#include "NCVHaarObjectDetection.hpp"
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
const Size preferredVideoFrameSize(640, 480);
|
||||||
|
|
||||||
|
string preferredClassifier = "haarcascade_frontalface_alt.xml";
|
||||||
|
string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed";
|
||||||
|
|
||||||
|
|
||||||
|
void printSyntax(void)
|
||||||
|
{
|
||||||
|
printf("Syntax: FaceDetectionFeed.exe [-c cameranum | -v filename] classifier.xml\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...)
|
||||||
|
{
|
||||||
|
int fontFace = CV_FONT_HERSHEY_PLAIN;
|
||||||
|
double fontScale = 1;
|
||||||
|
|
||||||
|
int baseline;
|
||||||
|
Size textSize = cv::getTextSize("T", fontFace, fontScale, 1, &baseline);
|
||||||
|
|
||||||
|
va_list arg_ptr;
|
||||||
|
va_start(arg_ptr, format);
|
||||||
|
int len = _vscprintf(format, arg_ptr) + 1;
|
||||||
|
|
||||||
|
vector<char> strBuf(len);
|
||||||
|
vsprintf_s(&strBuf[0], len, format, arg_ptr);
|
||||||
|
|
||||||
|
Point org(1, 3 * textSize.height * (lineOffsY + 1) / 2);
|
||||||
|
putText(img, &strBuf[0], org, fontFace, fontScale, color);
|
||||||
|
va_end(arg_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus process(Mat *srcdst,
|
||||||
|
Ncv32u width, Ncv32u height,
|
||||||
|
NcvBool bShowAllHypotheses, NcvBool bLargestFace,
|
||||||
|
HaarClassifierCascadeDescriptor &haar,
|
||||||
|
NCVVector<HaarStage64> &d_haarStages, NCVVector<HaarClassifierNode128> &d_haarNodes,
|
||||||
|
NCVVector<HaarFeature64> &d_haarFeatures, NCVVector<HaarStage64> &h_haarStages,
|
||||||
|
INCVMemAllocator &gpuAllocator,
|
||||||
|
INCVMemAllocator &cpuAllocator,
|
||||||
|
cudaDeviceProp &devProp)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(!((srcdst == NULL) ^ gpuAllocator.isCounting()), NCV_NULL_PTR);
|
||||||
|
|
||||||
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
|
NCV_SET_SKIP_COND(gpuAllocator.isCounting());
|
||||||
|
|
||||||
|
NCVMatrixAlloc<Ncv8u> d_src(gpuAllocator, width, height);
|
||||||
|
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height);
|
||||||
|
ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);
|
||||||
|
ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
|
||||||
|
Mat h_src_hdr(Size(width, height), CV_8U, h_src.ptr(), h_src.stride());
|
||||||
|
|
||||||
|
NCV_SKIP_COND_BEGIN
|
||||||
|
|
||||||
|
(*srcdst).copyTo(h_src_hdr);
|
||||||
|
|
||||||
|
ncvStat = h_src.copySolid(d_src, 0);
|
||||||
|
ncvAssertReturnNcvStat(ncvStat);
|
||||||
|
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
NCV_SKIP_COND_END
|
||||||
|
|
||||||
|
NcvSize32u roi;
|
||||||
|
roi.width = d_src.width();
|
||||||
|
roi.height = d_src.height();
|
||||||
|
|
||||||
|
Ncv32u numDetections;
|
||||||
|
ncvStat = ncvDetectObjectsMultiScale_device(
|
||||||
|
d_src, roi, d_rects, numDetections, haar, h_haarStages,
|
||||||
|
d_haarStages, d_haarNodes, d_haarFeatures,
|
||||||
|
haar.ClassifierSize,
|
||||||
|
bShowAllHypotheses ? 0 : 4,
|
||||||
|
1.2f, 1,
|
||||||
|
(bLargestFace ? NCVPipeObjDet_FindLargestObject : 0) | NCVPipeObjDet_VisualizeInPlace,
|
||||||
|
gpuAllocator, cpuAllocator, devProp.major, devProp.minor, 0);
|
||||||
|
ncvAssertReturnNcvStat(ncvStat);
|
||||||
|
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
NCV_SKIP_COND_BEGIN
|
||||||
|
|
||||||
|
ncvStat = d_src.copySolid(h_src, 0);
|
||||||
|
ncvAssertReturnNcvStat(ncvStat);
|
||||||
|
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||||
|
|
||||||
|
h_src_hdr.copyTo(*srcdst);
|
||||||
|
|
||||||
|
NCV_SKIP_COND_END
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main( int argc, const char** argv )
|
||||||
|
{
|
||||||
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
|
printf("NVIDIA Computer Vision SDK\n");
|
||||||
|
printf("Face Detection in video and live feed\n");
|
||||||
|
printf("=========================================\n");
|
||||||
|
printf(" Esc - Quit\n");
|
||||||
|
printf(" Space - Switch between NCV and OpenCV\n");
|
||||||
|
printf(" L - Switch between FullSearch and LargestFace modes\n");
|
||||||
|
printf(" U - Toggle unfiltered hypotheses visualization in FullSearch\n");
|
||||||
|
|
||||||
|
if (argc != 4 && argc != 1)
|
||||||
|
return printSyntax(), -1;
|
||||||
|
|
||||||
|
VideoCapture capture;
|
||||||
|
Size frameSize;
|
||||||
|
|
||||||
|
if (argc == 1 || strcmp(argv[1], "-c") == 0)
|
||||||
|
{
|
||||||
|
// Camera input is specified
|
||||||
|
int camIdx = (argc == 3) ? atoi(argv[2]) : 0;
|
||||||
|
if(!capture.open(camIdx))
|
||||||
|
return printf("Error opening camera\n"), -1;
|
||||||
|
|
||||||
|
capture.set(CV_CAP_PROP_FRAME_WIDTH, preferredVideoFrameSize.width);
|
||||||
|
capture.set(CV_CAP_PROP_FRAME_HEIGHT, preferredVideoFrameSize.height);
|
||||||
|
capture.set(CV_CAP_PROP_FPS, 25);
|
||||||
|
frameSize = preferredVideoFrameSize;
|
||||||
|
}
|
||||||
|
else if (strcmp(argv[1], "-v") == 0)
|
||||||
|
{
|
||||||
|
// Video file input (avi)
|
||||||
|
if(!capture.open(argv[2]))
|
||||||
|
return printf("Error opening video file\n"), -1;
|
||||||
|
|
||||||
|
frameSize.width = (int)capture.get(CV_CAP_PROP_FRAME_WIDTH);
|
||||||
|
frameSize.height = (int)capture.get(CV_CAP_PROP_FRAME_HEIGHT);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return printSyntax(), -1;
|
||||||
|
|
||||||
|
NcvBool bUseOpenCV = true;
|
||||||
|
NcvBool bLargestFace = true;
|
||||||
|
NcvBool bShowAllHypotheses = false;
|
||||||
|
|
||||||
|
string classifierFile = (argc == 1) ? preferredClassifier : argv[3];
|
||||||
|
|
||||||
|
CascadeClassifier classifierOpenCV;
|
||||||
|
if (!classifierOpenCV.load(classifierFile))
|
||||||
|
return printf("Error (in OpenCV) opening classifier\n"), printSyntax(), -1;
|
||||||
|
|
||||||
|
int devId;
|
||||||
|
ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);
|
||||||
|
cudaDeviceProp devProp;
|
||||||
|
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1);
|
||||||
|
printf("Using GPU %d %s, arch=%d.%d\n", devId, devProp.name, devProp.major, devProp.minor);
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Load the classifier from file (assuming its size is about 1 mb)
|
||||||
|
// using a simple allocator
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice);
|
||||||
|
ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1);
|
||||||
|
NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned);
|
||||||
|
ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1);
|
||||||
|
|
||||||
|
Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
|
||||||
|
ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", -1);
|
||||||
|
|
||||||
|
NCVVectorAlloc<HaarStage64> h_haarStages(cpuCascadeAllocator, haarNumStages);
|
||||||
|
ncvAssertPrintReturn(h_haarStages.isMemAllocated(), "Error in cascade CPU allocator", -1);
|
||||||
|
NCVVectorAlloc<HaarClassifierNode128> h_haarNodes(cpuCascadeAllocator, haarNumNodes);
|
||||||
|
ncvAssertPrintReturn(h_haarNodes.isMemAllocated(), "Error in cascade CPU allocator", -1);
|
||||||
|
NCVVectorAlloc<HaarFeature64> h_haarFeatures(cpuCascadeAllocator, haarNumFeatures);
|
||||||
|
ncvAssertPrintReturn(h_haarFeatures.isMemAllocated(), "Error in cascade CPU allocator", -1);
|
||||||
|
|
||||||
|
HaarClassifierCascadeDescriptor haar;
|
||||||
|
ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, h_haarStages, h_haarNodes, h_haarFeatures);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", -1);
|
||||||
|
|
||||||
|
NCVVectorAlloc<HaarStage64> d_haarStages(gpuCascadeAllocator, haarNumStages);
|
||||||
|
ncvAssertPrintReturn(d_haarStages.isMemAllocated(), "Error in cascade GPU allocator", -1);
|
||||||
|
NCVVectorAlloc<HaarClassifierNode128> d_haarNodes(gpuCascadeAllocator, haarNumNodes);
|
||||||
|
ncvAssertPrintReturn(d_haarNodes.isMemAllocated(), "Error in cascade GPU allocator", -1);
|
||||||
|
NCVVectorAlloc<HaarFeature64> d_haarFeatures(gpuCascadeAllocator, haarNumFeatures);
|
||||||
|
ncvAssertPrintReturn(d_haarFeatures.isMemAllocated(), "Error in cascade GPU allocator", -1);
|
||||||
|
|
||||||
|
ncvStat = h_haarStages.copySolid(d_haarStages, 0);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
|
||||||
|
ncvStat = h_haarNodes.copySolid(d_haarNodes, 0);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
|
||||||
|
ncvStat = h_haarFeatures.copySolid(d_haarFeatures, 0);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Calculate memory requirements and create real allocators
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
NCVMemStackAllocator gpuCounter(devProp.textureAlignment);
|
||||||
|
ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", -1);
|
||||||
|
NCVMemStackAllocator cpuCounter(devProp.textureAlignment);
|
||||||
|
ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", -1);
|
||||||
|
|
||||||
|
ncvStat = process(NULL, frameSize.width, frameSize.height,
|
||||||
|
false, false, haar,
|
||||||
|
d_haarStages, d_haarNodes,
|
||||||
|
d_haarFeatures, h_haarStages,
|
||||||
|
gpuCounter, cpuCounter, devProp);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
|
||||||
|
|
||||||
|
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment);
|
||||||
|
ncvAssertPrintReturn(gpuAllocator.isInitialized(), "Error creating GPU memory allocator", -1);
|
||||||
|
NCVMemStackAllocator cpuAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), devProp.textureAlignment);
|
||||||
|
ncvAssertPrintReturn(cpuAllocator.isInitialized(), "Error creating CPU memory allocator", -1);
|
||||||
|
|
||||||
|
printf("Initialized for frame size [%dx%d]\n", frameSize.width, frameSize.height);
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Main processing loop
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
namedWindow(wndTitle, 1);
|
||||||
|
|
||||||
|
Mat frame, gray, frameDisp;
|
||||||
|
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
// For camera and video file, capture the next image
|
||||||
|
capture >> frame;
|
||||||
|
if (frame.empty())
|
||||||
|
break;
|
||||||
|
|
||||||
|
cvtColor(frame, gray, CV_BGR2GRAY);
|
||||||
|
|
||||||
|
// process
|
||||||
|
NcvSize32u minSize = haar.ClassifierSize;
|
||||||
|
if (bLargestFace)
|
||||||
|
{
|
||||||
|
Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width;
|
||||||
|
Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height;
|
||||||
|
Ncv32u ratioSmallest = std::min(ratioX, ratioY);
|
||||||
|
ratioSmallest = (Ncv32u)std::max(ratioSmallest / 2.5f, 1.f);
|
||||||
|
minSize.width *= ratioSmallest;
|
||||||
|
minSize.height *= ratioSmallest;
|
||||||
|
}
|
||||||
|
|
||||||
|
NcvTimer timer = ncvStartTimer();
|
||||||
|
|
||||||
|
if (!bUseOpenCV)
|
||||||
|
{
|
||||||
|
ncvStat = process(&gray, frameSize.width, frameSize.height,
|
||||||
|
bShowAllHypotheses, bLargestFace, haar,
|
||||||
|
d_haarStages, d_haarNodes,
|
||||||
|
d_haarFeatures, h_haarStages,
|
||||||
|
gpuAllocator, cpuAllocator, devProp);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vector<Rect> rectsOpenCV;
|
||||||
|
|
||||||
|
classifierOpenCV.detectMultiScale(
|
||||||
|
gray,
|
||||||
|
rectsOpenCV,
|
||||||
|
1.2f,
|
||||||
|
bShowAllHypotheses && !bLargestFace ? 0 : 4,
|
||||||
|
(bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE,
|
||||||
|
Size(minSize.width, minSize.height));
|
||||||
|
|
||||||
|
for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt)
|
||||||
|
rectangle(gray, rectsOpenCV[rt], Scalar(255));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ncv32f avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);
|
||||||
|
|
||||||
|
cvtColor(gray, frameDisp, CV_GRAY2BGR);
|
||||||
|
|
||||||
|
imagePrintf(frameDisp, 0, CV_RGB(255, 0,0), "Space - Switch NCV%s / OpenCV%s", bUseOpenCV?"":" (ON)", bUseOpenCV?" (ON)":"");
|
||||||
|
imagePrintf(frameDisp, 1, CV_RGB(255, 0,0), "L - Switch FullSearch%s / LargestFace%s modes", bLargestFace?"":" (ON)", bLargestFace?" (ON)":"");
|
||||||
|
imagePrintf(frameDisp, 2, CV_RGB(255, 0,0), "U - Toggle unfiltered hypotheses visualization in FullSearch %s", bShowAllHypotheses?"(ON)":"(OFF)");
|
||||||
|
imagePrintf(frameDisp, 3, CV_RGB(118,185,0), " Running at %f FPS on %s", 1000.0f / avgTime, bUseOpenCV?"CPU":"GPU");
|
||||||
|
|
||||||
|
cv::imshow(wndTitle, frameDisp);
|
||||||
|
|
||||||
|
switch (cvWaitKey(1))
|
||||||
|
{
|
||||||
|
case ' ':
|
||||||
|
bUseOpenCV = !bUseOpenCV;
|
||||||
|
break;
|
||||||
|
case 'L':case 'l':
|
||||||
|
bLargestFace = !bLargestFace;
|
||||||
|
break;
|
||||||
|
case 'U':case 'u':
|
||||||
|
bShowAllHypotheses = !bShowAllHypotheses;
|
||||||
|
break;
|
||||||
|
case 27:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
571
modules/gpu/src/nvidia/NCV.cpp
Normal file
571
modules/gpu/src/nvidia/NCV.cpp
Normal file
@ -0,0 +1,571 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <precomp.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
|
|
||||||
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include "NCV.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Error handling helpers
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
static void stdioDebugOutput(const char *msg)
|
||||||
|
{
|
||||||
|
printf("%s", msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static NCVDebugOutputHandler *debugOutputHandler = stdioDebugOutput;
|
||||||
|
|
||||||
|
|
||||||
|
void ncvDebugOutput(const char *msg, ...)
|
||||||
|
{
|
||||||
|
const int K_DEBUG_STRING_MAXLEN = 1024;
|
||||||
|
char buffer[K_DEBUG_STRING_MAXLEN];
|
||||||
|
va_list args;
|
||||||
|
va_start(args, msg);
|
||||||
|
vsnprintf_s(buffer, K_DEBUG_STRING_MAXLEN, K_DEBUG_STRING_MAXLEN-1, msg, args);
|
||||||
|
va_end (args);
|
||||||
|
debugOutputHandler(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
|
||||||
|
{
|
||||||
|
debugOutputHandler = func;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Memory wrappers and helpers
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus GPUAlignmentValue(Ncv32u &alignment)
|
||||||
|
{
|
||||||
|
int curDev;
|
||||||
|
cudaDeviceProp curProp;
|
||||||
|
ncvAssertCUDAReturn(cudaGetDevice(&curDev), NCV_CUDA_ERROR);
|
||||||
|
ncvAssertCUDAReturn(cudaGetDeviceProperties(&curProp, curDev), NCV_CUDA_ERROR);
|
||||||
|
alignment = curProp.textureAlignment; //GPUAlignmentValue(curProp.major);
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Ncv32u alignUp(Ncv32u what, Ncv32u alignment)
|
||||||
|
{
|
||||||
|
Ncv32u alignMask = alignment-1;
|
||||||
|
Ncv32u inverseAlignMask = ~alignMask;
|
||||||
|
Ncv32u res = (what + alignMask) & inverseAlignMask;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void NCVMemPtr::clear()
|
||||||
|
{
|
||||||
|
ptr = NULL;
|
||||||
|
memtype = NCVMemoryTypeNone;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void NCVMemSegment::clear()
|
||||||
|
{
|
||||||
|
begin.clear();
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, const void *src, NCVMemoryType srcType, size_t sz, cudaStream_t cuStream)
|
||||||
|
{
|
||||||
|
NCVStatus ncvStat;
|
||||||
|
switch (dstType)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
switch (srcType)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
memcpy(dst, src, sz);
|
||||||
|
ncvStat = NCV_SUCCESS;
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
if (cuStream != 0)
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
ncvStat = NCV_SUCCESS;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
switch (srcType)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
if (cuStream != 0)
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyHostToDevice), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
ncvStat = NCV_SUCCESS;
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
if (cuStream != 0)
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR);
|
||||||
|
}
|
||||||
|
ncvStat = NCV_SUCCESS;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ncvStat = NCV_MEM_RESIDENCE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ncvStat;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//===================================================================
|
||||||
|
//
|
||||||
|
// NCVMemStackAllocator class members implementation
|
||||||
|
//
|
||||||
|
//===================================================================
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
|
||||||
|
:
|
||||||
|
currentSize(0),
|
||||||
|
_maxSize(0),
|
||||||
|
allocBegin(NULL),
|
||||||
|
begin(NULL),
|
||||||
|
_memType(NCVMemoryTypeNone),
|
||||||
|
_alignment(alignment)
|
||||||
|
{
|
||||||
|
NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;
|
||||||
|
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment)
|
||||||
|
:
|
||||||
|
currentSize(0),
|
||||||
|
_maxSize(0),
|
||||||
|
allocBegin(NULL),
|
||||||
|
_memType(memT),
|
||||||
|
_alignment(alignment)
|
||||||
|
{
|
||||||
|
NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;
|
||||||
|
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: _alignment not power of 2");
|
||||||
|
|
||||||
|
allocBegin = NULL;
|
||||||
|
|
||||||
|
switch (memT)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
allocBegin = (Ncv8u *)malloc(capacity);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (capacity == 0)
|
||||||
|
{
|
||||||
|
allocBegin = (Ncv8u *)(0x1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isCounting())
|
||||||
|
{
|
||||||
|
begin = allocBegin;
|
||||||
|
end = begin + capacity;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemStackAllocator::~NCVMemStackAllocator()
|
||||||
|
{
|
||||||
|
if (allocBegin != NULL)
|
||||||
|
{
|
||||||
|
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
|
||||||
|
switch (_memType)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
ncvAssertCUDAReturn(cudaFree(allocBegin), );
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
free(allocBegin);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
allocBegin = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size)
|
||||||
|
{
|
||||||
|
seg.clear();
|
||||||
|
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
|
||||||
|
size = alignUp(size, this->_alignment);
|
||||||
|
this->currentSize += size;
|
||||||
|
this->_maxSize = std::max(this->_maxSize, this->currentSize);
|
||||||
|
|
||||||
|
if (!isCounting())
|
||||||
|
{
|
||||||
|
size_t availSize = end - begin;
|
||||||
|
ncvAssertReturn(size <= availSize, NCV_ALLOCATOR_INSUFFICIENT_CAPACITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
seg.begin.ptr = begin;
|
||||||
|
seg.begin.memtype = this->_memType;
|
||||||
|
seg.size = size;
|
||||||
|
begin += size;
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus NCVMemStackAllocator::dealloc(NCVMemSegment &seg)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||||
|
ncvAssertReturn(seg.begin.ptr != NULL || isCounting(), NCV_ALLOCATOR_BAD_DEALLOC);
|
||||||
|
ncvAssertReturn(seg.begin.ptr == begin - seg.size, NCV_ALLOCATOR_DEALLOC_ORDER);
|
||||||
|
|
||||||
|
currentSize -= seg.size;
|
||||||
|
begin -= seg.size;
|
||||||
|
|
||||||
|
seg.clear();
|
||||||
|
|
||||||
|
ncvAssertReturn(allocBegin <= begin, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NcvBool NCVMemStackAllocator::isInitialized(void) const
|
||||||
|
{
|
||||||
|
return ((this->_alignment & (this->_alignment-1)) == 0) && isCounting() || this->allocBegin != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NcvBool NCVMemStackAllocator::isCounting(void) const
|
||||||
|
{
|
||||||
|
return this->_memType == NCVMemoryTypeNone;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemoryType NCVMemStackAllocator::memType(void) const
|
||||||
|
{
|
||||||
|
return this->_memType;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Ncv32u NCVMemStackAllocator::alignment(void) const
|
||||||
|
{
|
||||||
|
return this->_alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t NCVMemStackAllocator::maxSize(void) const
|
||||||
|
{
|
||||||
|
return this->_maxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//===================================================================
|
||||||
|
//
|
||||||
|
// NCVMemNativeAllocator class members implementation
|
||||||
|
//
|
||||||
|
//===================================================================
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT)
|
||||||
|
:
|
||||||
|
currentSize(0),
|
||||||
|
_maxSize(0),
|
||||||
|
_memType(memT)
|
||||||
|
{
|
||||||
|
ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
|
||||||
|
ncvAssertPrintReturn(NCV_SUCCESS == GPUAlignmentValue(this->_alignment), "NCVMemNativeAllocator ctor:: couldn't get device _alignment", );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemNativeAllocator::~NCVMemNativeAllocator()
|
||||||
|
{
|
||||||
|
ncvAssertPrintCheck(currentSize == 0, "NCVMemNativeAllocator dtor:: detected memory leak");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size)
|
||||||
|
{
|
||||||
|
seg.clear();
|
||||||
|
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
|
||||||
|
switch (this->_memType)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
ncvAssertCUDAReturn(cudaMalloc(&seg.begin.ptr, size), NCV_CUDA_ERROR);
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
ncvAssertCUDAReturn(cudaMallocHost(&seg.begin.ptr, size), NCV_CUDA_ERROR);
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
seg.begin.ptr = (Ncv8u *)malloc(size);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->currentSize += alignUp(size, this->_alignment);
|
||||||
|
this->_maxSize = std::max(this->_maxSize, this->currentSize);
|
||||||
|
|
||||||
|
seg.begin.memtype = this->_memType;
|
||||||
|
seg.size = size;
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||||
|
ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC);
|
||||||
|
|
||||||
|
ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);
|
||||||
|
currentSize -= alignUp(seg.size, this->_alignment);
|
||||||
|
|
||||||
|
switch (this->_memType)
|
||||||
|
{
|
||||||
|
case NCVMemoryTypeDevice:
|
||||||
|
ncvAssertCUDAReturn(cudaFree(seg.begin.ptr), NCV_CUDA_ERROR);
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeHostPinned:
|
||||||
|
ncvAssertCUDAReturn(cudaFreeHost(seg.begin.ptr), NCV_CUDA_ERROR);
|
||||||
|
break;
|
||||||
|
case NCVMemoryTypeHostPageable:
|
||||||
|
free(seg.begin.ptr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
seg.clear();
|
||||||
|
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NcvBool NCVMemNativeAllocator::isInitialized(void) const
|
||||||
|
{
|
||||||
|
return (this->_alignment != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NcvBool NCVMemNativeAllocator::isCounting(void) const
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemoryType NCVMemNativeAllocator::memType(void) const
|
||||||
|
{
|
||||||
|
return this->_memType;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Ncv32u NCVMemNativeAllocator::alignment(void) const
|
||||||
|
{
|
||||||
|
return this->_alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t NCVMemNativeAllocator::maxSize(void) const
|
||||||
|
{
|
||||||
|
return this->_maxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//===================================================================
|
||||||
|
//
|
||||||
|
// Time and timer routines
|
||||||
|
//
|
||||||
|
//===================================================================
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct _NcvTimeMoment NcvTimeMoment;
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
|
||||||
|
#include <Windows.h>
|
||||||
|
|
||||||
|
typedef struct _NcvTimeMoment
|
||||||
|
{
|
||||||
|
LONGLONG moment, freq;
|
||||||
|
} NcvTimeMoment;
|
||||||
|
|
||||||
|
|
||||||
|
static void _ncvQueryMoment(NcvTimeMoment *t)
|
||||||
|
{
|
||||||
|
QueryPerformanceFrequency((LARGE_INTEGER *)&(t->freq));
|
||||||
|
QueryPerformanceCounter((LARGE_INTEGER *)&(t->moment));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentToMicroseconds(NcvTimeMoment *t)
|
||||||
|
{
|
||||||
|
return 1000000.0 * t->moment / t->freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
|
||||||
|
{
|
||||||
|
return 1000000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentsDiffToMilliseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
|
||||||
|
{
|
||||||
|
return 1000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(__unix__)
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
|
typedef struct _NcvTimeMoment
|
||||||
|
{
|
||||||
|
struct timeval tv;
|
||||||
|
struct timezone tz;
|
||||||
|
} NcvTimeMoment;
|
||||||
|
|
||||||
|
|
||||||
|
void _ncvQueryMoment(NcvTimeMoment *t)
|
||||||
|
{
|
||||||
|
gettimeofday(& t->tv, & t->tz);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentToMicroseconds(NcvTimeMoment *t)
|
||||||
|
{
|
||||||
|
return 1000000.0 * t->tv.tv_sec + (double)t->tv.tv_usec;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)
|
||||||
|
{
|
||||||
|
return (((double)t2->tv.tv_sec - (double)t1->tv.tv_sec) * 1000000 + (double)t2->tv.tv_usec - (double)t1->tv.tv_usec);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif //#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
|
||||||
|
|
||||||
|
struct _NcvTimer
|
||||||
|
{
|
||||||
|
NcvTimeMoment t1, t2;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
NcvTimer ncvStartTimer(void)
|
||||||
|
{
|
||||||
|
struct _NcvTimer *t;
|
||||||
|
t = (struct _NcvTimer *)malloc(sizeof(struct _NcvTimer));
|
||||||
|
_ncvQueryMoment(&t->t1);
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double ncvEndQueryTimerUs(NcvTimer t)
|
||||||
|
{
|
||||||
|
double res;
|
||||||
|
_ncvQueryMoment(&t->t2);
|
||||||
|
res = _ncvMomentsDiffToMicroseconds(&t->t1, &t->t2);
|
||||||
|
free(t);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double ncvEndQueryTimerMs(NcvTimer t)
|
||||||
|
{
|
||||||
|
double res;
|
||||||
|
_ncvQueryMoment(&t->t2);
|
||||||
|
res = _ncvMomentsDiffToMilliseconds(&t->t1, &t->t2);
|
||||||
|
free(t);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* !defined (HAVE_CUDA) */
|
837
modules/gpu/src/nvidia/NCV.hpp
Normal file
837
modules/gpu/src/nvidia/NCV.hpp
Normal file
@ -0,0 +1,837 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef _ncv_hpp_
|
||||||
|
#define _ncv_hpp_
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include "npp_staging.h"
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Alignment macros
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
#if !defined(__align__) && !defined(__CUDACC__)
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define __align__(n) __declspec(align(n))
|
||||||
|
#elif defined(__unix__)
|
||||||
|
#define __align__(n) __attribute__((__aligned__(n)))
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Integral and compound types of guaranteed size
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
typedef bool NcvBool;
|
||||||
|
typedef long long Ncv64s;
|
||||||
|
typedef unsigned long long Ncv64u;
|
||||||
|
typedef int Ncv32s;
|
||||||
|
typedef unsigned int Ncv32u;
|
||||||
|
typedef short Ncv16s;
|
||||||
|
typedef unsigned short Ncv16u;
|
||||||
|
typedef char Ncv8s;
|
||||||
|
typedef unsigned char Ncv8u;
|
||||||
|
typedef float Ncv32f;
|
||||||
|
typedef double Ncv64f;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
Ncv8u x;
|
||||||
|
Ncv8u y;
|
||||||
|
Ncv8u width;
|
||||||
|
Ncv8u height;
|
||||||
|
} NcvRect8u;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
Ncv32s x; ///< x-coordinate of upper left corner.
|
||||||
|
Ncv32s y; ///< y-coordinate of upper left corner.
|
||||||
|
Ncv32s width; ///< Rectangle width.
|
||||||
|
Ncv32s height; ///< Rectangle height.
|
||||||
|
} NcvRect32s;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
Ncv32u x; ///< x-coordinate of upper left corner.
|
||||||
|
Ncv32u y; ///< y-coordinate of upper left corner.
|
||||||
|
Ncv32u width; ///< Rectangle width.
|
||||||
|
Ncv32u height; ///< Rectangle height.
|
||||||
|
} NcvRect32u;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
Ncv32s width; ///< Rectangle width.
|
||||||
|
Ncv32s height; ///< Rectangle height.
|
||||||
|
} NcvSize32s;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
Ncv32u width; ///< Rectangle width.
|
||||||
|
Ncv32u height; ///< Rectangle height.
|
||||||
|
} NcvSize32u;
|
||||||
|
|
||||||
|
|
||||||
|
NPPST_CT_ASSERT(sizeof(NcvBool) <= 4);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv64s) == 8);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv64u) == 8);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv32s) == 4);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv32u) == 4);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv16s) == 2);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv16u) == 2);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv8s) == 1);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv8u) == 1);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv32f) == 4);
|
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv64f) == 8);
|
||||||
|
NPPST_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
|
||||||
|
NPPST_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
|
||||||
|
NPPST_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
|
||||||
|
NPPST_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Persistent constants
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
const Ncv32u K_WARP_SIZE = 32;
|
||||||
|
const Ncv32u K_LOG2_WARP_SIZE = 5;
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Error handling
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
#define NCV_CT_PREP_STRINGIZE_AUX(x) #x
|
||||||
|
#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x)
|
||||||
|
|
||||||
|
|
||||||
|
void ncvDebugOutput(const char *msg, ...);
|
||||||
|
|
||||||
|
|
||||||
|
typedef void NCVDebugOutputHandler(const char* msg);
|
||||||
|
|
||||||
|
|
||||||
|
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
|
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertPrintCheck(pred, msg) \
|
||||||
|
((pred) ? true : (ncvDebugOutput("\n%s\n", \
|
||||||
|
"NCV Assertion Failed: " msg ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__) \
|
||||||
|
), false))
|
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertPrintReturn(pred, msg, err) \
|
||||||
|
if (ncvAssertPrintCheck(pred, msg)) ; else return err
|
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertReturn(pred, err) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
if (!(pred)) \
|
||||||
|
{ \
|
||||||
|
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: retcode=", (int)err, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
|
||||||
|
return err; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertReturnNcvStat(ncvOp) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
NCVStatus _ncvStat = ncvOp; \
|
||||||
|
if (NCV_SUCCESS != _ncvStat) \
|
||||||
|
{ \
|
||||||
|
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: NcvStat=", (int)_ncvStat, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
|
||||||
|
return _ncvStat; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertCUDAReturn(cudacall, errCode) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
cudaError_t resCall = cudacall; \
|
||||||
|
cudaError_t resGLE = cudaGetLastError(); \
|
||||||
|
if (cudaSuccess != resCall || cudaSuccess != resGLE) \
|
||||||
|
{ \
|
||||||
|
ncvDebugOutput("\n%s%d%s\n", "NCV CUDA Assertion Failed: cudaError_t=", (int)(resCall | resGLE), ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
|
||||||
|
return errCode; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return-codes for status notification, errors and warnings
|
||||||
|
*/
|
||||||
|
enum NCVStatus
|
||||||
|
{
|
||||||
|
NCV_SUCCESS,
|
||||||
|
|
||||||
|
NCV_CUDA_ERROR,
|
||||||
|
NCV_NPP_ERROR,
|
||||||
|
NCV_FILE_ERROR,
|
||||||
|
|
||||||
|
NCV_NULL_PTR,
|
||||||
|
NCV_INCONSISTENT_INPUT,
|
||||||
|
NCV_TEXTURE_BIND_ERROR,
|
||||||
|
NCV_DIMENSIONS_INVALID,
|
||||||
|
|
||||||
|
NCV_INVALID_ROI,
|
||||||
|
NCV_INVALID_STEP,
|
||||||
|
NCV_INVALID_SCALE,
|
||||||
|
|
||||||
|
NCV_ALLOCATOR_NOT_INITIALIZED,
|
||||||
|
NCV_ALLOCATOR_BAD_ALLOC,
|
||||||
|
NCV_ALLOCATOR_BAD_DEALLOC,
|
||||||
|
NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
|
||||||
|
NCV_ALLOCATOR_DEALLOC_ORDER,
|
||||||
|
NCV_ALLOCATOR_BAD_REUSE,
|
||||||
|
|
||||||
|
NCV_MEM_COPY_ERROR,
|
||||||
|
NCV_MEM_RESIDENCE_ERROR,
|
||||||
|
NCV_MEM_INSUFFICIENT_CAPACITY,
|
||||||
|
|
||||||
|
NCV_HAAR_INVALID_PIXEL_STEP,
|
||||||
|
NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
|
||||||
|
NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
|
||||||
|
NCV_HAAR_TOO_LARGE_FEATURES,
|
||||||
|
NCV_HAAR_XML_LOADING_EXCEPTION,
|
||||||
|
|
||||||
|
NCV_NOIMPL_HAAR_TILTED_FEATURES,
|
||||||
|
|
||||||
|
NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#define NCV_SET_SKIP_COND(x) \
|
||||||
|
bool __ncv_skip_cond = x
|
||||||
|
|
||||||
|
|
||||||
|
#define NCV_RESET_SKIP_COND(x) \
|
||||||
|
__ncv_skip_cond = x
|
||||||
|
|
||||||
|
|
||||||
|
#define NCV_SKIP_COND_BEGIN \
|
||||||
|
if (!__ncv_skip_cond) {
|
||||||
|
|
||||||
|
|
||||||
|
#define NCV_SKIP_COND_END \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Timer
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct _NcvTimer *NcvTimer;
|
||||||
|
|
||||||
|
NcvTimer ncvStartTimer(void);
|
||||||
|
|
||||||
|
double ncvEndQueryTimerUs(NcvTimer t);
|
||||||
|
|
||||||
|
double ncvEndQueryTimerMs(NcvTimer t);
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Memory management classes template compound types
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alignment of GPU memory chunks in bytes
|
||||||
|
*/
|
||||||
|
NCVStatus GPUAlignmentValue(Ncv32u &alignment);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the aligned top bound value
|
||||||
|
*/
|
||||||
|
Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemoryType
|
||||||
|
*/
|
||||||
|
enum NCVMemoryType
|
||||||
|
{
|
||||||
|
NCVMemoryTypeNone,
|
||||||
|
NCVMemoryTypeHostPageable,
|
||||||
|
NCVMemoryTypeHostPinned,
|
||||||
|
NCVMemoryTypeDevice
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemPtr
|
||||||
|
*/
|
||||||
|
struct NCVMemPtr
|
||||||
|
{
|
||||||
|
void *ptr;
|
||||||
|
NCVMemoryType memtype;
|
||||||
|
void clear();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemSegment
|
||||||
|
*/
|
||||||
|
struct NCVMemSegment
|
||||||
|
{
|
||||||
|
NCVMemPtr begin;
|
||||||
|
size_t size;
|
||||||
|
void clear();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* INCVMemAllocator (Interface)
|
||||||
|
*/
|
||||||
|
class INCVMemAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~INCVMemAllocator() = 0;
|
||||||
|
|
||||||
|
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size) = 0;
|
||||||
|
virtual NCVStatus dealloc(NCVMemSegment &seg) = 0;
|
||||||
|
|
||||||
|
virtual NcvBool isInitialized(void) const = 0;
|
||||||
|
virtual NcvBool isCounting(void) const = 0;
|
||||||
|
|
||||||
|
virtual NCVMemoryType memType(void) const = 0;
|
||||||
|
virtual Ncv32u alignment(void) const = 0;
|
||||||
|
virtual size_t maxSize(void) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline INCVMemAllocator::~INCVMemAllocator() {}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemStackAllocator
|
||||||
|
*/
|
||||||
|
class NCVMemStackAllocator : public INCVMemAllocator
|
||||||
|
{
|
||||||
|
NCVMemStackAllocator();
|
||||||
|
NCVMemStackAllocator(const NCVMemStackAllocator &);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
explicit NCVMemStackAllocator(Ncv32u alignment);
|
||||||
|
NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment);
|
||||||
|
virtual ~NCVMemStackAllocator();
|
||||||
|
|
||||||
|
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
|
||||||
|
virtual NCVStatus dealloc(NCVMemSegment &seg);
|
||||||
|
|
||||||
|
virtual NcvBool isInitialized(void) const;
|
||||||
|
virtual NcvBool isCounting(void) const;
|
||||||
|
|
||||||
|
virtual NCVMemoryType memType(void) const;
|
||||||
|
virtual Ncv32u alignment(void) const;
|
||||||
|
virtual size_t maxSize(void) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
NCVMemoryType _memType;
|
||||||
|
Ncv32u _alignment;
|
||||||
|
Ncv8u *allocBegin;
|
||||||
|
Ncv8u *begin;
|
||||||
|
Ncv8u *end;
|
||||||
|
size_t currentSize;
|
||||||
|
size_t _maxSize;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemNativeAllocator
|
||||||
|
*/
|
||||||
|
class NCVMemNativeAllocator : public INCVMemAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
NCVMemNativeAllocator(NCVMemoryType memT);
|
||||||
|
virtual ~NCVMemNativeAllocator();
|
||||||
|
|
||||||
|
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
|
||||||
|
virtual NCVStatus dealloc(NCVMemSegment &seg);
|
||||||
|
|
||||||
|
virtual NcvBool isInitialized(void) const;
|
||||||
|
virtual NcvBool isCounting(void) const;
|
||||||
|
|
||||||
|
virtual NCVMemoryType memType(void) const;
|
||||||
|
virtual Ncv32u alignment(void) const;
|
||||||
|
virtual size_t maxSize(void) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
NCVMemNativeAllocator();
|
||||||
|
NCVMemNativeAllocator(const NCVMemNativeAllocator &);
|
||||||
|
|
||||||
|
NCVMemoryType _memType;
|
||||||
|
Ncv32u _alignment;
|
||||||
|
size_t currentSize;
|
||||||
|
size_t _maxSize;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy dispatcher
|
||||||
|
*/
|
||||||
|
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
|
||||||
|
const void *src, NCVMemoryType srcType,
|
||||||
|
size_t sz, cudaStream_t cuStream);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVVector (1D)
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
class NCVVector
|
||||||
|
{
|
||||||
|
NCVVector(const NCVVector &);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
NCVVector()
|
||||||
|
{
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~NCVVector() {}
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
_ptr = NULL;
|
||||||
|
_length = 0;
|
||||||
|
_memtype = NCVMemoryTypeNone;
|
||||||
|
}
|
||||||
|
|
||||||
|
NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0)
|
||||||
|
{
|
||||||
|
if (howMuch == 0)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR);
|
||||||
|
howMuch = this->_length * sizeof(T);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ncvAssertReturn(dst._length * sizeof(T) >= howMuch &&
|
||||||
|
this->_length * sizeof(T) >= howMuch &&
|
||||||
|
howMuch > 0, NCV_MEM_COPY_ERROR);
|
||||||
|
}
|
||||||
|
ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
|
||||||
|
(dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
|
||||||
|
|
||||||
|
NCVStatus ncvStat = NCV_SUCCESS;
|
||||||
|
if (this->_memtype != NCVMemoryTypeNone)
|
||||||
|
{
|
||||||
|
ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
|
||||||
|
this->_ptr, this->_memtype,
|
||||||
|
howMuch, cuStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ncvStat;
|
||||||
|
}
|
||||||
|
|
||||||
|
T *ptr() const {return this->_ptr;}
|
||||||
|
size_t length() const {return this->_length;}
|
||||||
|
NCVMemoryType memType() const {return this->_memtype;}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
T *_ptr;
|
||||||
|
size_t _length;
|
||||||
|
NCVMemoryType _memtype;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVVectorAlloc
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
class NCVVectorAlloc : public NCVVector<T>
|
||||||
|
{
|
||||||
|
NCVVectorAlloc();
|
||||||
|
NCVVectorAlloc(const NCVVectorAlloc &);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length)
|
||||||
|
:
|
||||||
|
allocator(allocator)
|
||||||
|
{
|
||||||
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
|
this->clear();
|
||||||
|
this->allocatedMem.clear();
|
||||||
|
|
||||||
|
ncvStat = allocator.alloc(this->allocatedMem, length * sizeof(T));
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", );
|
||||||
|
|
||||||
|
this->_ptr = (T *)this->allocatedMem.begin.ptr;
|
||||||
|
this->_length = length;
|
||||||
|
this->_memtype = this->allocatedMem.begin.memtype;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
~NCVVectorAlloc()
|
||||||
|
{
|
||||||
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
|
ncvStat = allocator.dealloc(this->allocatedMem);
|
||||||
|
ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed");
|
||||||
|
|
||||||
|
this->clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NcvBool isMemAllocated() const
|
||||||
|
{
|
||||||
|
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Ncv32u getAllocatorsAlignment() const
|
||||||
|
{
|
||||||
|
return allocator.alignment();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemSegment getSegment() const
|
||||||
|
{
|
||||||
|
return allocatedMem;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
INCVMemAllocator &allocator;
|
||||||
|
NCVMemSegment allocatedMem;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVVectorReuse
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
class NCVVectorReuse : public NCVVector<T>
|
||||||
|
{
|
||||||
|
NCVVectorReuse();
|
||||||
|
NCVVectorReuse(const NCVVectorReuse &);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
explicit NCVVectorReuse(const NCVMemSegment &memSegment)
|
||||||
|
{
|
||||||
|
this->bReused = false;
|
||||||
|
this->clear();
|
||||||
|
|
||||||
|
this->_length = memSegment.size / sizeof(T);
|
||||||
|
this->_ptr = (T *)memSegment.begin.ptr;
|
||||||
|
this->_memtype = memSegment.begin.memtype;
|
||||||
|
|
||||||
|
this->bReused = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)
|
||||||
|
{
|
||||||
|
this->bReused = false;
|
||||||
|
this->clear();
|
||||||
|
|
||||||
|
ncvAssertPrintReturn(length * sizeof(T) <= memSegment.size, \
|
||||||
|
"NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
|
||||||
|
|
||||||
|
this->_length = length;
|
||||||
|
this->_ptr = (T *)memSegment.begin.ptr;
|
||||||
|
this->_memtype = memSegment.begin.memtype;
|
||||||
|
|
||||||
|
this->bReused = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NcvBool isMemReused() const
|
||||||
|
{
|
||||||
|
return this->bReused;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
NcvBool bReused;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMatrix (2D)
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
class NCVMatrix
|
||||||
|
{
|
||||||
|
NCVMatrix(const NCVMatrix &);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
NCVMatrix()
|
||||||
|
{
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~NCVMatrix() {}
|
||||||
|
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
_ptr = NULL;
|
||||||
|
_pitch = 0;
|
||||||
|
_width = 0;
|
||||||
|
_height = 0;
|
||||||
|
_memtype = NCVMemoryTypeNone;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Ncv32u stride() const
|
||||||
|
{
|
||||||
|
return _pitch / sizeof(T);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0)
|
||||||
|
{
|
||||||
|
if (howMuch == 0)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(dst._pitch == this->_pitch &&
|
||||||
|
dst._height == this->_height, NCV_MEM_COPY_ERROR);
|
||||||
|
howMuch = this->_pitch * this->_height;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ncvAssertReturn(dst._pitch * dst._height >= howMuch &&
|
||||||
|
this->_pitch * this->_height >= howMuch &&
|
||||||
|
howMuch > 0, NCV_MEM_COPY_ERROR);
|
||||||
|
}
|
||||||
|
ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
|
||||||
|
(dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
|
||||||
|
|
||||||
|
NCVStatus ncvStat = NCV_SUCCESS;
|
||||||
|
if (this->_memtype != NCVMemoryTypeNone)
|
||||||
|
{
|
||||||
|
ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
|
||||||
|
this->_ptr, this->_memtype,
|
||||||
|
howMuch, cuStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ncvStat;
|
||||||
|
}
|
||||||
|
|
||||||
|
T *ptr() const {return this->_ptr;}
|
||||||
|
Ncv32u width() const {return this->_width;}
|
||||||
|
Ncv32u height() const {return this->_height;}
|
||||||
|
Ncv32u pitch() const {return this->_pitch;}
|
||||||
|
NCVMemoryType memType() const {return this->_memtype;}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
T *_ptr;
|
||||||
|
Ncv32u _width;
|
||||||
|
Ncv32u _height;
|
||||||
|
Ncv32u _pitch;
|
||||||
|
NCVMemoryType _memtype;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMatrixAlloc
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
class NCVMatrixAlloc : public NCVMatrix<T>
|
||||||
|
{
|
||||||
|
NCVMatrixAlloc();
|
||||||
|
NCVMatrixAlloc(const NCVMatrixAlloc &);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
|
||||||
|
:
|
||||||
|
allocator(allocator)
|
||||||
|
{
|
||||||
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
|
this->clear();
|
||||||
|
this->allocatedMem.clear();
|
||||||
|
|
||||||
|
Ncv32u widthBytes = width * sizeof(T);
|
||||||
|
Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment());
|
||||||
|
|
||||||
|
if (pitch != 0)
|
||||||
|
{
|
||||||
|
ncvAssertPrintReturn(pitch >= pitchBytes &&
|
||||||
|
(pitch & (allocator.alignment() - 1)) == 0,
|
||||||
|
"NCVMatrixAlloc ctor:: incorrect pitch passed", );
|
||||||
|
pitchBytes = pitch;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ncv32u requiredAllocSize = pitchBytes * height;
|
||||||
|
|
||||||
|
ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize);
|
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", );
|
||||||
|
|
||||||
|
this->_ptr = (T *)this->allocatedMem.begin.ptr;
|
||||||
|
this->_width = width;
|
||||||
|
this->_height = height;
|
||||||
|
this->_pitch = pitchBytes;
|
||||||
|
this->_memtype = this->allocatedMem.begin.memtype;
|
||||||
|
}
|
||||||
|
|
||||||
|
~NCVMatrixAlloc()
|
||||||
|
{
|
||||||
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
|
ncvStat = allocator.dealloc(this->allocatedMem);
|
||||||
|
ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed");
|
||||||
|
|
||||||
|
this->clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NcvBool isMemAllocated() const
|
||||||
|
{
|
||||||
|
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Ncv32u getAllocatorsAlignment() const
|
||||||
|
{
|
||||||
|
return allocator.alignment();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemSegment getSegment() const
|
||||||
|
{
|
||||||
|
return allocatedMem;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
INCVMemAllocator &allocator;
|
||||||
|
NCVMemSegment allocatedMem;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMatrixReuse
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
class NCVMatrixReuse : public NCVMatrix<T>
|
||||||
|
{
|
||||||
|
NCVMatrixReuse();
|
||||||
|
NCVMatrixReuse(const NCVMatrixReuse &);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)
|
||||||
|
{
|
||||||
|
this->bReused = false;
|
||||||
|
this->clear();
|
||||||
|
|
||||||
|
Ncv32u widthBytes = width * sizeof(T);
|
||||||
|
Ncv32u pitchBytes = alignUp(widthBytes, alignment);
|
||||||
|
|
||||||
|
if (pitch != 0)
|
||||||
|
{
|
||||||
|
if (!bSkipPitchCheck)
|
||||||
|
{
|
||||||
|
ncvAssertPrintReturn(pitch >= pitchBytes &&
|
||||||
|
(pitch & (alignment - 1)) == 0,
|
||||||
|
"NCVMatrixReuse ctor:: incorrect pitch passed", );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ncvAssertPrintReturn(pitch >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );
|
||||||
|
}
|
||||||
|
pitchBytes = pitch;
|
||||||
|
}
|
||||||
|
|
||||||
|
ncvAssertPrintReturn(pitchBytes * height <= memSegment.size, \
|
||||||
|
"NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
|
||||||
|
|
||||||
|
this->_width = width;
|
||||||
|
this->_height = height;
|
||||||
|
this->_pitch = pitchBytes;
|
||||||
|
this->_ptr = (T *)memSegment.begin.ptr;
|
||||||
|
this->_memtype = memSegment.begin.memtype;
|
||||||
|
|
||||||
|
this->bReused = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NcvBool isMemReused() const
|
||||||
|
{
|
||||||
|
return this->bReused;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
NcvBool bReused;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // _ncv_hpp_
|
2603
modules/gpu/src/nvidia/NCVHaarObjectDetection.cu
Normal file
2603
modules/gpu/src/nvidia/NCVHaarObjectDetection.cu
Normal file
File diff suppressed because it is too large
Load Diff
501
modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp
Normal file
501
modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp
Normal file
@ -0,0 +1,501 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// NVIDIA CUDA implementation of Viola-Jones Object Detection Framework
|
||||||
|
//
|
||||||
|
// The algorithm and code are explained in the upcoming GPU Computing Gems
|
||||||
|
// chapter in detail:
|
||||||
|
//
|
||||||
|
// Anton Obukhov, "Haar Classifiers for Object Detection with CUDA"
|
||||||
|
// PDF URL placeholder
|
||||||
|
// email: aobukhov@nvidia.com, devsupport@nvidia.com
|
||||||
|
//
|
||||||
|
// Credits for help with the code to:
|
||||||
|
// Alexey Mendelenko, Cyril Crassin, and Mikhail Smirnov.
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef _ncvhaarobjectdetection_hpp_
|
||||||
|
#define _ncvhaarobjectdetection_hpp_
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include "NCV.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Guaranteed size cross-platform classifier structures
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
struct HaarFeature64
|
||||||
|
{
|
||||||
|
uint2 _ui2;
|
||||||
|
|
||||||
|
#define HaarFeature64_CreateCheck_MaxRectField 0xFF
|
||||||
|
|
||||||
|
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u clsWidth, Ncv32u clsHeight)
|
||||||
|
{
|
||||||
|
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
|
||||||
|
((NcvRect8u*)&(this->_ui2.x))->x = rectX;
|
||||||
|
((NcvRect8u*)&(this->_ui2.x))->y = rectY;
|
||||||
|
((NcvRect8u*)&(this->_ui2.x))->width = rectWidth;
|
||||||
|
((NcvRect8u*)&(this->_ui2.x))->height = rectHeight;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ NCVStatus setWeight(Ncv32f weight)
|
||||||
|
{
|
||||||
|
((Ncv32f*)&(this->_ui2.y))[0] = weight;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight)
|
||||||
|
{
|
||||||
|
NcvRect8u tmpRect = *(NcvRect8u*)(&this->_ui2.x);
|
||||||
|
*rectX = tmpRect.x;
|
||||||
|
*rectY = tmpRect.y;
|
||||||
|
*rectWidth = tmpRect.width;
|
||||||
|
*rectHeight = tmpRect.height;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __host__ Ncv32f getWeight(void)
|
||||||
|
{
|
||||||
|
return *(Ncv32f*)(&this->_ui2.y);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct HaarFeatureDescriptor32
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
#define HaarFeatureDescriptor32_Interpret_MaskFlagTilted 0x80000000
|
||||||
|
#define HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures 0x7F
|
||||||
|
#define HaarFeatureDescriptor32_NumFeatures_Shift 24
|
||||||
|
#define HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset 0x00FFFFFF
|
||||||
|
|
||||||
|
Ncv32u desc;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
__host__ NCVStatus create(NcvBool bTilted, Ncv32u numFeatures, Ncv32u offsetFeatures)
|
||||||
|
{
|
||||||
|
if (numFeatures > HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures)
|
||||||
|
{
|
||||||
|
return NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER;
|
||||||
|
}
|
||||||
|
if (offsetFeatures > HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset)
|
||||||
|
{
|
||||||
|
return NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE;
|
||||||
|
}
|
||||||
|
this->desc = 0;
|
||||||
|
this->desc |= (bTilted ? HaarFeatureDescriptor32_Interpret_MaskFlagTilted : 0);
|
||||||
|
this->desc |= (numFeatures << HaarFeatureDescriptor32_NumFeatures_Shift);
|
||||||
|
this->desc |= offsetFeatures;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __host__ NcvBool isTilted(void)
|
||||||
|
{
|
||||||
|
return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagTilted) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __host__ Ncv32u getNumFeatures(void)
|
||||||
|
{
|
||||||
|
return (this->desc & ~HaarFeatureDescriptor32_Interpret_MaskFlagTilted) >> HaarFeatureDescriptor32_NumFeatures_Shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __host__ Ncv32u getFeaturesOffset(void)
|
||||||
|
{
|
||||||
|
return this->desc & HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct HaarClassifierNodeDescriptor32
|
||||||
|
{
|
||||||
|
uint1 _ui1;
|
||||||
|
|
||||||
|
#define HaarClassifierNodeDescriptor32_Interpret_MaskSwitch (1 << 30)
|
||||||
|
|
||||||
|
__host__ NCVStatus create(Ncv32f leafValue)
|
||||||
|
{
|
||||||
|
if ((*(Ncv32u *)&leafValue) & HaarClassifierNodeDescriptor32_Interpret_MaskSwitch)
|
||||||
|
{
|
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||||
|
}
|
||||||
|
*(Ncv32f *)&this->_ui1 = leafValue;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ NCVStatus create(Ncv32u offsetHaarClassifierNode)
|
||||||
|
{
|
||||||
|
if (offsetHaarClassifierNode >= HaarClassifierNodeDescriptor32_Interpret_MaskSwitch)
|
||||||
|
{
|
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||||
|
}
|
||||||
|
this->_ui1.x = offsetHaarClassifierNode;
|
||||||
|
this->_ui1.x |= HaarClassifierNodeDescriptor32_Interpret_MaskSwitch;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __host__ NcvBool isLeaf(void)
|
||||||
|
{
|
||||||
|
return !(this->_ui1.x & HaarClassifierNodeDescriptor32_Interpret_MaskSwitch);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ Ncv32f getLeafValueHost(void)
|
||||||
|
{
|
||||||
|
return *(Ncv32f *)&this->_ui1.x;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
__device__ Ncv32f getLeafValue(void)
|
||||||
|
{
|
||||||
|
return __int_as_float(this->_ui1.x);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__device__ __host__ Ncv32u getNextNodeOffset(void)
|
||||||
|
{
|
||||||
|
return (this->_ui1.x & ~HaarClassifierNodeDescriptor32_Interpret_MaskSwitch);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct HaarClassifierNode128
|
||||||
|
{
|
||||||
|
uint4 _ui4;
|
||||||
|
|
||||||
|
__host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f)
|
||||||
|
{
|
||||||
|
this->_ui4.x = *(Ncv32u *)&f;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ NCVStatus setThreshold(Ncv32f t)
|
||||||
|
{
|
||||||
|
this->_ui4.y = *(Ncv32u *)&t;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl)
|
||||||
|
{
|
||||||
|
this->_ui4.z = *(Ncv32u *)&nl;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr)
|
||||||
|
{
|
||||||
|
this->_ui4.w = *(Ncv32u *)&nr;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void)
|
||||||
|
{
|
||||||
|
return *(HaarFeatureDescriptor32 *)&this->_ui4.x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ Ncv32f getThreshold(void)
|
||||||
|
{
|
||||||
|
return *(Ncv32f*)&this->_ui4.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void)
|
||||||
|
{
|
||||||
|
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void)
|
||||||
|
{
|
||||||
|
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.w;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct HaarStage64
|
||||||
|
{
|
||||||
|
#define HaarStage64_Interpret_MaskRootNodes 0x0000FFFF
|
||||||
|
#define HaarStage64_Interpret_MaskRootNodeOffset 0xFFFF0000
|
||||||
|
#define HaarStage64_Interpret_ShiftRootNodeOffset 16
|
||||||
|
|
||||||
|
uint2 _ui2;
|
||||||
|
|
||||||
|
__host__ NCVStatus setStageThreshold(Ncv32f t)
|
||||||
|
{
|
||||||
|
this->_ui2.x = *(Ncv32u *)&t;
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ NCVStatus setStartClassifierRootNodeOffset(Ncv32u val)
|
||||||
|
{
|
||||||
|
if (val > (HaarStage64_Interpret_MaskRootNodeOffset >> HaarStage64_Interpret_ShiftRootNodeOffset))
|
||||||
|
{
|
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||||
|
}
|
||||||
|
this->_ui2.y = (val << HaarStage64_Interpret_ShiftRootNodeOffset) | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ NCVStatus setNumClassifierRootNodes(Ncv32u val)
|
||||||
|
{
|
||||||
|
if (val > HaarStage64_Interpret_MaskRootNodes)
|
||||||
|
{
|
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||||
|
}
|
||||||
|
this->_ui2.y = val | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodeOffset);
|
||||||
|
return NCV_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ Ncv32f getStageThreshold(void)
|
||||||
|
{
|
||||||
|
return *(Ncv32f*)&this->_ui2.x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void)
|
||||||
|
{
|
||||||
|
return (this->_ui2.y >> HaarStage64_Interpret_ShiftRootNodeOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ Ncv32u getNumClassifierRootNodes(void)
|
||||||
|
{
|
||||||
|
return (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
NPPST_CT_ASSERT(sizeof(HaarFeature64) == 8);
|
||||||
|
NPPST_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
|
||||||
|
NPPST_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
|
||||||
|
NPPST_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
|
||||||
|
NPPST_CT_ASSERT(sizeof(HaarStage64) == 8);
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Classifier cascade descriptor
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
struct HaarClassifierCascadeDescriptor
|
||||||
|
{
|
||||||
|
Ncv32u NumStages;
|
||||||
|
Ncv32u NumClassifierRootNodes;
|
||||||
|
Ncv32u NumClassifierTotalNodes;
|
||||||
|
Ncv32u NumFeatures;
|
||||||
|
NcvSize32u ClassifierSize;
|
||||||
|
NcvBool bNeedsTiltedII;
|
||||||
|
NcvBool bHasStumpsOnly;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Functional interface
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
NCVPipeObjDet_Default = 0x000,
|
||||||
|
NCVPipeObjDet_UseFairImageScaling = 0x001,
|
||||||
|
NCVPipeObjDet_FindLargestObject = 0x002,
|
||||||
|
NCVPipeObjDet_VisualizeInPlace = 0x004,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
||||||
|
NcvSize32u srcRoi,
|
||||||
|
NCVVector<NcvRect32u> &d_dstRects,
|
||||||
|
Ncv32u &dstNumRects,
|
||||||
|
|
||||||
|
HaarClassifierCascadeDescriptor &haar,
|
||||||
|
NCVVector<HaarStage64> &h_HaarStages,
|
||||||
|
NCVVector<HaarStage64> &d_HaarStages,
|
||||||
|
NCVVector<HaarClassifierNode128> &d_HaarNodes,
|
||||||
|
NCVVector<HaarFeature64> &d_HaarFeatures,
|
||||||
|
|
||||||
|
NcvSize32u minObjSize,
|
||||||
|
Ncv32u minNeighbors, //default 4
|
||||||
|
Ncv32f scaleStep, //default 1.2f
|
||||||
|
Ncv32u pixelStep, //default 1
|
||||||
|
Ncv32u flags, //default NCVPipeObjDet_Default
|
||||||
|
|
||||||
|
INCVMemAllocator &gpuAllocator,
|
||||||
|
INCVMemAllocator &cpuAllocator,
|
||||||
|
Ncv32u devPropMajor,
|
||||||
|
Ncv32u devPropMinor,
|
||||||
|
cudaStream_t cuStream);
|
||||||
|
|
||||||
|
|
||||||
|
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF
|
||||||
|
#define HAAR_STDDEV_BORDER 1
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
|
||||||
|
NCVMatrix<Ncv32f> &d_weights,
|
||||||
|
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
|
||||||
|
Ncv32u &numDetections,
|
||||||
|
HaarClassifierCascadeDescriptor &haar,
|
||||||
|
NCVVector<HaarStage64> &h_HaarStages,
|
||||||
|
NCVVector<HaarStage64> &d_HaarStages,
|
||||||
|
NCVVector<HaarClassifierNode128> &d_HaarNodes,
|
||||||
|
NCVVector<HaarFeature64> &d_HaarFeatures,
|
||||||
|
NcvBool bMaskElements,
|
||||||
|
NcvSize32u anchorsRoi,
|
||||||
|
Ncv32u pixelStep,
|
||||||
|
Ncv32f scaleArea,
|
||||||
|
INCVMemAllocator &gpuAllocator,
|
||||||
|
INCVMemAllocator &cpuAllocator,
|
||||||
|
Ncv32u devPropMajor,
|
||||||
|
Ncv32u devPropMinor,
|
||||||
|
cudaStream_t cuStream);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
|
||||||
|
NCVMatrix<Ncv32f> &h_weights,
|
||||||
|
NCVMatrixAlloc<Ncv32u> &h_pixelMask,
|
||||||
|
Ncv32u &numDetections,
|
||||||
|
HaarClassifierCascadeDescriptor &haar,
|
||||||
|
NCVVector<HaarStage64> &h_HaarStages,
|
||||||
|
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||||
|
NCVVector<HaarFeature64> &h_HaarFeatures,
|
||||||
|
NcvBool bMaskElements,
|
||||||
|
NcvSize32u anchorsRoi,
|
||||||
|
Ncv32u pixelStep,
|
||||||
|
Ncv32f scaleArea);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *d_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
Ncv8u color,
|
||||||
|
cudaStream_t cuStream);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *d_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
Ncv32u color,
|
||||||
|
cudaStream_t cuStream);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *h_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
Ncv8u color);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
|
||||||
|
Ncv32u dstStride,
|
||||||
|
Ncv32u dstWidth,
|
||||||
|
Ncv32u dstHeight,
|
||||||
|
NcvRect32u *h_rects,
|
||||||
|
Ncv32u numRects,
|
||||||
|
Ncv32u color);
|
||||||
|
|
||||||
|
|
||||||
|
#define RECT_SIMILARITY_PROPORTION 0.2f
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
||||||
|
Ncv32u numPixelMaskDetections,
|
||||||
|
NCVVector<NcvRect32u> &hypotheses,
|
||||||
|
Ncv32u &totalDetections,
|
||||||
|
Ncv32u totalMaxDetections,
|
||||||
|
Ncv32u rectWidth,
|
||||||
|
Ncv32u rectHeight,
|
||||||
|
Ncv32f curScale,
|
||||||
|
cudaStream_t cuStream);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
|
||||||
|
Ncv32u numPixelMaskDetections,
|
||||||
|
NCVVector<NcvRect32u> &hypotheses,
|
||||||
|
Ncv32u &totalDetections,
|
||||||
|
Ncv32u totalMaxDetections,
|
||||||
|
Ncv32u rectWidth,
|
||||||
|
Ncv32u rectHeight,
|
||||||
|
Ncv32f curScale);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
|
||||||
|
Ncv32u &numHypotheses,
|
||||||
|
Ncv32u minNeighbors,
|
||||||
|
Ncv32f intersectEps,
|
||||||
|
NCVVector<Ncv32u> *hypothesesWeights);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
|
||||||
|
Ncv32u &numNodes, Ncv32u &numFeatures);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
|
||||||
|
HaarClassifierCascadeDescriptor &haar,
|
||||||
|
NCVVector<HaarStage64> &h_HaarStages,
|
||||||
|
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||||
|
NCVVector<HaarFeature64> &h_HaarFeatures);
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
|
||||||
|
HaarClassifierCascadeDescriptor haar,
|
||||||
|
NCVVector<HaarStage64> &h_HaarStages,
|
||||||
|
NCVVector<HaarClassifierNode128> &h_HaarNodes,
|
||||||
|
NCVVector<HaarFeature64> &h_HaarFeatures);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif // _ncvhaarobjectdetection_hpp_
|
174
modules/gpu/src/nvidia/NCVRuntimeTemplates.hpp
Normal file
174
modules/gpu/src/nvidia/NCVRuntimeTemplates.hpp
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// The Loki Library
|
||||||
|
// Copyright (c) 2001 by Andrei Alexandrescu
|
||||||
|
// This code accompanies the book:
|
||||||
|
// Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design
|
||||||
|
// Patterns Applied". Copyright (c) 2001. Addison-Wesley.
|
||||||
|
// Permission to use, copy, modify, distribute and sell this software for any
|
||||||
|
// purpose is hereby granted without fee, provided that the above copyright
|
||||||
|
// notice appear in all copies and that both that copyright notice and this
|
||||||
|
// permission notice appear in supporting documentation.
|
||||||
|
// The author or Addison-Welsey Longman make no representations about the
|
||||||
|
// suitability of this software for any purpose. It is provided "as is"
|
||||||
|
// without express or implied warranty.
|
||||||
|
// http://loki-lib.sourceforge.net/index.php?n=Main.License
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef _ncvruntimetemplates_hpp_
|
||||||
|
#define _ncvruntimetemplates_hpp_
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Loki
|
||||||
|
{
|
||||||
|
//==============================================================================
|
||||||
|
// class NullType
|
||||||
|
// Used as a placeholder for "no type here"
|
||||||
|
// Useful as an end marker in typelists
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
class NullType {};
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
// class template Typelist
|
||||||
|
// The building block of typelists of any length
|
||||||
|
// Use it through the LOKI_TYPELIST_NN macros
|
||||||
|
// Defines nested types:
|
||||||
|
// Head (first element, a non-typelist type by convention)
|
||||||
|
// Tail (second element, can be another typelist)
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
template <class T, class U>
|
||||||
|
struct Typelist
|
||||||
|
{
|
||||||
|
typedef T Head;
|
||||||
|
typedef U Tail;
|
||||||
|
};
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
// class template Int2Type
|
||||||
|
// Converts each integral constant into a unique type
|
||||||
|
// Invocation: Int2Type<v> where v is a compile-time constant integral
|
||||||
|
// Defines 'value', an enum that evaluates to v
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
template <int v>
|
||||||
|
struct Int2Type
|
||||||
|
{
|
||||||
|
enum { value = v };
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace TL
|
||||||
|
{
|
||||||
|
//==============================================================================
|
||||||
|
// class template TypeAt
|
||||||
|
// Finds the type at a given index in a typelist
|
||||||
|
// Invocation (TList is a typelist and index is a compile-time integral
|
||||||
|
// constant):
|
||||||
|
// TypeAt<TList, index>::Result
|
||||||
|
// returns the type in position 'index' in TList
|
||||||
|
// If you pass an out-of-bounds index, the result is a compile-time error
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
template <class TList, unsigned int index> struct TypeAt;
|
||||||
|
|
||||||
|
template <class Head, class Tail>
|
||||||
|
struct TypeAt<Typelist<Head, Tail>, 0>
|
||||||
|
{
|
||||||
|
typedef Head Result;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Head, class Tail, unsigned int i>
|
||||||
|
struct TypeAt<Typelist<Head, Tail>, i>
|
||||||
|
{
|
||||||
|
typedef typename TypeAt<Tail, i - 1>::Result Result;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Runtime boolean template instance dispatcher
|
||||||
|
// Cyril Crassin <cyril.crassin@icare3d.org>
|
||||||
|
// NVIDIA, 2010
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
namespace NCVRuntimeTemplateBool
|
||||||
|
{
|
||||||
|
//This struct is used to transform a list of parameters into template arguments
|
||||||
|
//The idea is to build a typelist containing the arguments
|
||||||
|
//and to pass this typelist to a user defined functor
|
||||||
|
template<typename TList, int NumArguments, class Func>
|
||||||
|
struct KernelCaller
|
||||||
|
{
|
||||||
|
//Convenience function used by the user
|
||||||
|
//Takes a variable argument list, transforms it into a list
|
||||||
|
static void call(Func &functor, int dummy, ...)
|
||||||
|
{
|
||||||
|
//Vector used to collect arguments
|
||||||
|
std::vector<int> templateParamList;
|
||||||
|
|
||||||
|
//Variable argument list manipulation
|
||||||
|
va_list listPointer;
|
||||||
|
va_start(listPointer, dummy);
|
||||||
|
//Collect parameters into the list
|
||||||
|
for(int i=0; i<NumArguments; i++)
|
||||||
|
{
|
||||||
|
int val = va_arg(listPointer, int);
|
||||||
|
templateParamList.push_back(val);
|
||||||
|
}
|
||||||
|
va_end(listPointer);
|
||||||
|
|
||||||
|
//Call the actual typelist building function
|
||||||
|
call(functor, templateParamList);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Actual function called recursively to build a typelist based
|
||||||
|
//on a list of values
|
||||||
|
static void call( Func &functor, std::vector<int> &templateParamList)
|
||||||
|
{
|
||||||
|
//Get current parameter value in the list
|
||||||
|
int val = templateParamList[templateParamList.size() - 1];
|
||||||
|
templateParamList.pop_back();
|
||||||
|
|
||||||
|
//Select the compile time value to add into the typelist
|
||||||
|
//depending on the runtime variable and make recursive call.
|
||||||
|
//Both versions are really instantiated
|
||||||
|
if(val)
|
||||||
|
{
|
||||||
|
KernelCaller<
|
||||||
|
Loki::Typelist<typename Loki::Int2Type<true>, TList >,
|
||||||
|
NumArguments-1, Func >
|
||||||
|
::call(functor, templateParamList);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
KernelCaller<
|
||||||
|
Loki::Typelist<typename Loki::Int2Type<false>, TList >,
|
||||||
|
NumArguments-1, Func >
|
||||||
|
::call(functor, templateParamList);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//Specialization for 0 value left in the list
|
||||||
|
//-> actual kernel functor call
|
||||||
|
template<class TList, class Func>
|
||||||
|
struct KernelCaller<TList, 0, Func>
|
||||||
|
{
|
||||||
|
static void call(Func &functor)
|
||||||
|
{
|
||||||
|
//Call to the functor's kernel call method
|
||||||
|
functor.call(TList()); //TList instantiated to get the method template parameter resolved
|
||||||
|
}
|
||||||
|
|
||||||
|
static void call(Func &functor, std::vector<int> &templateParams)
|
||||||
|
{
|
||||||
|
functor.call(TList());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif //_ncvruntimetemplates_hpp_
|
@ -71,6 +71,9 @@
|
|||||||
#include "npp_staging.h"
|
#include "npp_staging.h"
|
||||||
#include "surf_key_point.h"
|
#include "surf_key_point.h"
|
||||||
|
|
||||||
|
#include "nvidia/NCV.hpp"
|
||||||
|
#include "nvidia/NCVHaarObjectDetection.hpp"
|
||||||
|
|
||||||
#define CUDART_MINIMUM_REQUIRED_VERSION 3020
|
#define CUDART_MINIMUM_REQUIRED_VERSION 3020
|
||||||
#define NPP_MINIMUM_REQUIRED_VERSION 3216
|
#define NPP_MINIMUM_REQUIRED_VERSION 3216
|
||||||
|
|
||||||
|
193
samples/gpu/cascadeclassifier.cpp
Normal file
193
samples/gpu/cascadeclassifier.cpp
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
// WARNING: this sample is under construction! Use it on your own risk.
|
||||||
|
|
||||||
|
#include <opencv2/contrib/contrib.hpp>
|
||||||
|
#include <opencv2/objdetect/objdetect.hpp>
|
||||||
|
#include <opencv2/highgui/highgui.hpp>
|
||||||
|
#include <opencv2/imgproc/imgproc.hpp>
|
||||||
|
#include <opencv2/gpu/gpu.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::gpu;
|
||||||
|
|
||||||
|
void help()
|
||||||
|
{
|
||||||
|
cout << "Usage: ./cascadeclassifier <cascade_file> <image_or_video_or_cameraid>\n"
|
||||||
|
"Using OpenCV version " << CV_VERSION << endl << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DetectAndDraw(Mat& img, CascadeClassifier_GPU& cascade);
|
||||||
|
|
||||||
|
String cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
||||||
|
String nestedCascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<class T> void convertAndReseize(const T& src, T& gray, T& resized, double scale = 2.0)
|
||||||
|
{
|
||||||
|
if (src.channels() == 3)
|
||||||
|
cvtColor( src, gray, CV_BGR2GRAY );
|
||||||
|
else
|
||||||
|
gray = src;
|
||||||
|
|
||||||
|
Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));
|
||||||
|
if (scale != 1)
|
||||||
|
resize(gray, resized, sz);
|
||||||
|
else
|
||||||
|
resized = gray;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int main( int argc, const char** argv )
|
||||||
|
{
|
||||||
|
if (argc != 3)
|
||||||
|
return help(), -1;
|
||||||
|
|
||||||
|
if (cv::gpu::getCudaEnabledDeviceCount() == 0)
|
||||||
|
return cerr << "No GPU found or the library is compiled without GPU support" << endl, -1;
|
||||||
|
|
||||||
|
VideoCapture capture;
|
||||||
|
|
||||||
|
string cascadeName = argv[1];
|
||||||
|
string inputName = argv[2];
|
||||||
|
|
||||||
|
cv::gpu::CascadeClassifier_GPU cascade_gpu;
|
||||||
|
if( !cascade_gpu.load( cascadeName ) )
|
||||||
|
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
|
||||||
|
|
||||||
|
cv::CascadeClassifier cascade_cpu;
|
||||||
|
if( !cascade_cpu.load( cascadeName ) )
|
||||||
|
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
|
||||||
|
|
||||||
|
Mat image = imread( inputName);
|
||||||
|
if( image.empty() )
|
||||||
|
if (!capture.open(inputName))
|
||||||
|
{
|
||||||
|
int camid = 0;
|
||||||
|
sscanf(inputName.c_str(), "%d", &camid);
|
||||||
|
if(!capture.open(camid))
|
||||||
|
cout << "Can't open source" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
namedWindow( "result", 1 );
|
||||||
|
Size fontSz = cv::getTextSize("T[]", FONT_HERSHEY_SIMPLEX, 1.0, 2, 0);
|
||||||
|
|
||||||
|
Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp;
|
||||||
|
vector<Rect> facesBuf_cpu;
|
||||||
|
|
||||||
|
GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;
|
||||||
|
|
||||||
|
/* parameters */
|
||||||
|
bool useGPU = true;
|
||||||
|
double scale_factor = 2;
|
||||||
|
|
||||||
|
bool visualizeInPlace = false;
|
||||||
|
bool findLargestObject = false;
|
||||||
|
|
||||||
|
printf("\t<space> - toggle GPU/CPU\n");
|
||||||
|
printf("\tL - toggle lagest faces\n");
|
||||||
|
printf("\tV - toggle visualisation in-place (for GPU only)\n");
|
||||||
|
printf("\t1/q - inc/dec scale\n");
|
||||||
|
|
||||||
|
int detections_num;
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
if( capture.isOpened() )
|
||||||
|
{
|
||||||
|
capture >> frame;
|
||||||
|
if( frame.empty())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
(image.empty() ? frame : image).copyTo(frame_cpu);
|
||||||
|
frame_gpu.upload( image.empty() ? frame : image);
|
||||||
|
|
||||||
|
convertAndReseize(frame_gpu, gray_gpu, resized_gpu, scale_factor);
|
||||||
|
convertAndReseize(frame_cpu, gray_cpu, resized_cpu, scale_factor);
|
||||||
|
|
||||||
|
cv::TickMeter tm;
|
||||||
|
tm.start();
|
||||||
|
|
||||||
|
if (useGPU)
|
||||||
|
{
|
||||||
|
cascade_gpu.visualizeInPlace = visualizeInPlace;
|
||||||
|
cascade_gpu.findLargestObject = findLargestObject;
|
||||||
|
|
||||||
|
detections_num = cascade_gpu.detectMultiScale( resized_gpu, facesBuf_gpu );
|
||||||
|
facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
|
||||||
|
|
||||||
|
}
|
||||||
|
else /* so use CPU */
|
||||||
|
{
|
||||||
|
Size minSize = cascade_gpu.getClassifierSize();
|
||||||
|
if (findLargestObject)
|
||||||
|
{
|
||||||
|
float ratio = (float)std::min(frame.cols / minSize.width, frame.rows / minSize.height);
|
||||||
|
ratio = std::max(ratio / 2.5f, 1.f);
|
||||||
|
minSize = Size(cvRound(minSize.width * ratio), cvRound(minSize.height * ratio));
|
||||||
|
}
|
||||||
|
|
||||||
|
cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2, 4, (findLargestObject ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE, minSize);
|
||||||
|
detections_num = (int)facesBuf_cpu.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
tm.stop();
|
||||||
|
printf( "detection time = %g ms\n", tm.getTimeMilli() );
|
||||||
|
|
||||||
|
if (useGPU)
|
||||||
|
resized_gpu.download(resized_cpu);
|
||||||
|
|
||||||
|
if (!visualizeInPlace || !useGPU)
|
||||||
|
if (detections_num)
|
||||||
|
{
|
||||||
|
Rect* faces = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0];
|
||||||
|
for(int i = 0; i < detections_num; ++i)
|
||||||
|
cv::rectangle(resized_cpu, faces[i], Scalar(255));
|
||||||
|
}
|
||||||
|
|
||||||
|
Point text_pos(5, 25);
|
||||||
|
int offs = fontSz.height + 5;
|
||||||
|
Scalar color = CV_RGB(255, 0, 0);
|
||||||
|
|
||||||
|
|
||||||
|
cv::cvtColor(resized_cpu, frameDisp, CV_GRAY2BGR);
|
||||||
|
|
||||||
|
char buf[4096];
|
||||||
|
sprintf(buf, "%s, FPS = %0.3g", useGPU ? "GPU" : "CPU", 1.0/tm.getTimeSec());
|
||||||
|
putText(frameDisp, buf, text_pos, FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
|
||||||
|
sprintf(buf, "scale = %0.3g, [%d*scale x %d*scale]", scale_factor, frame.cols, frame.rows);
|
||||||
|
putText(frameDisp, buf, text_pos+=Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
|
||||||
|
putText(frameDisp, "Hotkeys: space, 1, Q, L, V, Esc", text_pos+=Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
|
||||||
|
|
||||||
|
if (findLargestObject)
|
||||||
|
putText(frameDisp, "FindLargestObject", text_pos+=Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
|
||||||
|
|
||||||
|
if (visualizeInPlace && useGPU)
|
||||||
|
putText(frameDisp, "VisualizeInPlace", text_pos+Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
|
||||||
|
|
||||||
|
cv::imshow( "result", frameDisp);
|
||||||
|
|
||||||
|
int key = waitKey( 5 );
|
||||||
|
if( key == 27)
|
||||||
|
break;
|
||||||
|
|
||||||
|
switch (key)
|
||||||
|
{
|
||||||
|
case (int)' ': useGPU = !useGPU; printf("Using %s\n", useGPU ? "GPU" : "CPU");break;
|
||||||
|
case (int)'v': case (int)'V': visualizeInPlace = !visualizeInPlace; printf("VisualizeInPlace = %d\n", visualizeInPlace); break;
|
||||||
|
case (int)'l': case (int)'L': findLargestObject = !findLargestObject; printf("FindLargestObject = %d\n", findLargestObject); break;
|
||||||
|
case (int)'1': scale_factor*=1.05; printf("Scale factor = %g\n", scale_factor); break;
|
||||||
|
case (int)'q': case (int)'Q':scale_factor/=1.05; printf("Scale factor = %g\n", scale_factor); break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user