initial support of GPU LBP classifier: added new style xml format loading
This commit is contained in:
parent
02170a0a58
commit
1365e28a54
@ -88,6 +88,7 @@ if(CUDA_FOUND)
|
|||||||
if(APPLE)
|
if(APPLE)
|
||||||
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
|
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
|
||||||
endif()
|
endif()
|
||||||
|
string(REPLACE "-Wsign-promo" "" CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
|
||||||
|
|
||||||
# we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1)
|
# we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1)
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG_ ${CMAKE_CXX_FLAGS_DEBUG})
|
set(CMAKE_CXX_FLAGS_DEBUG_ ${CMAKE_CXX_FLAGS_DEBUG})
|
||||||
|
@ -1422,6 +1422,44 @@ private:
|
|||||||
CascadeClassifierImpl* impl;
|
CascadeClassifierImpl* impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// The cascade classifier class for object detection.
|
||||||
|
class CV_EXPORTS CascadeClassifier_GPU_LBP
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum stage { BOOST = 0 };
|
||||||
|
enum feature { LBP = 0 };
|
||||||
|
CascadeClassifier_GPU_LBP();
|
||||||
|
~CascadeClassifier_GPU_LBP();
|
||||||
|
|
||||||
|
bool empty() const;
|
||||||
|
bool load(const std::string& filename);
|
||||||
|
void release();
|
||||||
|
|
||||||
|
int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
|
||||||
|
|
||||||
|
bool findLargestObject;
|
||||||
|
bool visualizeInPlace;
|
||||||
|
|
||||||
|
Size getClassifierSize() const;
|
||||||
|
private:
|
||||||
|
bool read(const FileNode &root);
|
||||||
|
|
||||||
|
static const stage stageType = BOOST;
|
||||||
|
static const feature feature = LBP;
|
||||||
|
|
||||||
|
cv::Size NxM;
|
||||||
|
bool isStumps;
|
||||||
|
int ncategories;
|
||||||
|
struct Stage;
|
||||||
|
Stage* stages;
|
||||||
|
|
||||||
|
struct DTree;
|
||||||
|
// DTree* classifiers;
|
||||||
|
|
||||||
|
struct DTreeNode;
|
||||||
|
// DTreeNode* nodes;
|
||||||
|
};
|
||||||
|
|
||||||
////////////////////////////////// SURF //////////////////////////////////////////
|
////////////////////////////////// SURF //////////////////////////////////////////
|
||||||
|
|
||||||
class CV_EXPORTS SURF_GPU
|
class CV_EXPORTS SURF_GPU
|
||||||
|
@ -272,14 +272,14 @@ void cv::gpu::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& distan
|
|||||||
const float* distance_ptr = distance.ptr<float>();
|
const float* distance_ptr = distance.ptr<float>();
|
||||||
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr)
|
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr)
|
||||||
{
|
{
|
||||||
int trainIdx = *trainIdx_ptr;
|
int train_idx = *trainIdx_ptr;
|
||||||
|
|
||||||
if (trainIdx == -1)
|
if (train_idx == -1)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
float distance = *distance_ptr;
|
float distance_local = *distance_ptr;
|
||||||
|
|
||||||
DMatch m(queryIdx, trainIdx, 0, distance);
|
DMatch m(queryIdx, train_idx, 0, distance_local);
|
||||||
|
|
||||||
matches.push_back(m);
|
matches.push_back(m);
|
||||||
}
|
}
|
||||||
|
@ -41,13 +41,37 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
struct cv::gpu::CascadeClassifier_GPU_LBP::Stage
|
||||||
|
{
|
||||||
|
int first;
|
||||||
|
int ntrees;
|
||||||
|
float threshold;
|
||||||
|
Stage(int f = 0, int n = 0, float t = 0.f) : first(f), ntrees(n), threshold(t) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cv::gpu::CascadeClassifier_GPU_LBP::DTree
|
||||||
|
{
|
||||||
|
int nodeCount;
|
||||||
|
DTree(int n = 0) : nodeCount(n) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cv::gpu::CascadeClassifier_GPU_LBP::DTreeNode
|
||||||
|
{
|
||||||
|
int featureIdx;
|
||||||
|
//float threshold; // for ordered features only
|
||||||
|
int left;
|
||||||
|
int right;
|
||||||
|
DTreeNode(int f = 0, int l = 0, int r = 0) : featureIdx(f), left(l), right(r) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !defined (HAVE_CUDA)
|
||||||
|
// ============ old fashioned haar cascade ==============================================//
|
||||||
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
|
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
|
||||||
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string&) { throw_nogpu(); }
|
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string&) { throw_nogpu(); }
|
||||||
cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { throw_nogpu(); }
|
cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { throw_nogpu(); }
|
||||||
@ -58,8 +82,174 @@ Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const { throw_nogpu();
|
|||||||
|
|
||||||
int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& , GpuMat& , double , int , Size) { throw_nogpu(); return 0; }
|
int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& , GpuMat& , double , int , Size) { throw_nogpu(); return 0; }
|
||||||
|
|
||||||
|
// ============ LBP cascade ==============================================//
|
||||||
|
cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifier_GPU_LBP() { throw_nogpu(); }
|
||||||
|
cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP() { throw_nogpu(); }
|
||||||
|
|
||||||
|
bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const { throw_nogpu(); return true; }
|
||||||
|
bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string&) { throw_nogpu(); return true; }
|
||||||
|
Size cv::gpu::CascadeClassifier_GPU_LBP::getClassifierSize() const { throw_nogpu(); return Size(); }
|
||||||
|
|
||||||
|
int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale( const GpuMat& , GpuMat& , double , int , Size) { throw_nogpu(); return 0; }
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifier_GPU_LBP()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const { throw_nogpu(); return true; }
|
||||||
|
|
||||||
|
bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml)
|
||||||
|
{
|
||||||
|
FileStorage fs(classifierAsXml, FileStorage::READ);
|
||||||
|
if (!fs.isOpened())
|
||||||
|
return false;
|
||||||
|
if (read(fs.getFirstTopLevelNode()))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GPU_CC_STAGE_TYPE "stageType"
|
||||||
|
#define GPU_CC_FEATURE_TYPE "featureType"
|
||||||
|
#define GPU_CC_BOOST "BOOST"
|
||||||
|
#define GPU_CC_LBP "LBP"
|
||||||
|
#define GPU_CC_MAX_CAT_COUNT "maxCatCount"
|
||||||
|
#define GPU_CC_HEIGHT "height"
|
||||||
|
#define GPU_CC_WIDTH "width"
|
||||||
|
#define GPU_CC_STAGE_PARAMS "stageParams"
|
||||||
|
#define GPU_CC_MAX_DEPTH "maxDepth"
|
||||||
|
#define GPU_CC_FEATURE_PARAMS "featureParams"
|
||||||
|
#define GPU_CC_STAGES "stages"
|
||||||
|
#define GPU_CC_STAGE_THRESHOLD "stageThreshold"
|
||||||
|
#define GPU_THRESHOLD_EPS 1e-5f
|
||||||
|
#define GPU_CC_WEAK_CLASSIFIERS "weakClassifiers"
|
||||||
|
#define GPU_CC_INTERNAL_NODES "internalNodes"
|
||||||
|
#define GPU_CC_LEAF_VALUES "leafValues"
|
||||||
|
|
||||||
|
bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
|
||||||
|
{
|
||||||
|
string stageTypeStr = (string)root[GPU_CC_STAGE_TYPE];
|
||||||
|
CV_Assert(stageTypeStr == GPU_CC_BOOST);
|
||||||
|
|
||||||
|
string featureTypeStr = (string)root[GPU_CC_FEATURE_TYPE];
|
||||||
|
CV_Assert(featureTypeStr == GPU_CC_LBP);
|
||||||
|
|
||||||
|
NxM.width = (int)root[GPU_CC_WIDTH];
|
||||||
|
NxM.height = (int)root[GPU_CC_HEIGHT];
|
||||||
|
CV_Assert( NxM.height > 0 && NxM.width > 0 );
|
||||||
|
|
||||||
|
isStumps = ((int)(root[GPU_CC_STAGE_PARAMS][GPU_CC_MAX_DEPTH]) == 1) ? true : false;
|
||||||
|
|
||||||
|
// features
|
||||||
|
FileNode fn = root[GPU_CC_FEATURE_PARAMS];
|
||||||
|
if (fn.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ncategories = fn[GPU_CC_MAX_CAT_COUNT];
|
||||||
|
int subsetSize = (ncategories + 31)/32, nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 );// ?
|
||||||
|
|
||||||
|
fn = root[GPU_CC_STAGES];
|
||||||
|
if (fn.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
delete[] stages;
|
||||||
|
// delete[] classifiers;
|
||||||
|
// delete[] nodes;
|
||||||
|
|
||||||
|
stages = new Stage[fn.size()];
|
||||||
|
|
||||||
|
std::vector<DTree> cl_trees;
|
||||||
|
std::vector<DTreeNode> cl_nodes;
|
||||||
|
std::vector<float> cl_leaves;
|
||||||
|
std::vector<int> subsets;
|
||||||
|
|
||||||
|
FileNodeIterator it = fn.begin(), it_end = fn.end();
|
||||||
|
size_t s_it = 0;
|
||||||
|
|
||||||
|
for (size_t si = 0; it != it_end; si++, ++it )
|
||||||
|
{
|
||||||
|
FileNode fns = *it;
|
||||||
|
|
||||||
|
fns = fns[GPU_CC_WEAK_CLASSIFIERS];
|
||||||
|
if (fns.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
stages[s_it++] = Stage((float)fns[GPU_CC_STAGE_THRESHOLD] - GPU_THRESHOLD_EPS,
|
||||||
|
(int)cl_trees.size(), (int)fns.size());
|
||||||
|
|
||||||
|
cl_trees.reserve(stages[si].first + stages[si].ntrees);
|
||||||
|
|
||||||
|
// weak trees
|
||||||
|
FileNodeIterator it1 = fns.begin(), it1_end = fns.end();
|
||||||
|
for ( ; it1 != it1_end; ++it1 )
|
||||||
|
{
|
||||||
|
FileNode fnw = *it1;
|
||||||
|
|
||||||
|
FileNode internalNodes = fnw[GPU_CC_INTERNAL_NODES];
|
||||||
|
FileNode leafValues = fnw[GPU_CC_LEAF_VALUES];
|
||||||
|
if ( internalNodes.empty() || leafValues.empty() )
|
||||||
|
return false;
|
||||||
|
DTree tree((int)internalNodes.size()/nodeStep );
|
||||||
|
cl_trees.push_back(tree);
|
||||||
|
|
||||||
|
cl_nodes.reserve(cl_nodes.size() + tree.nodeCount);
|
||||||
|
cl_leaves.reserve(cl_leaves.size() + leafValues.size());
|
||||||
|
|
||||||
|
if( subsetSize > 0 )
|
||||||
|
subsets.reserve(subsets.size() + tree.nodeCount * subsetSize);
|
||||||
|
|
||||||
|
// nodes
|
||||||
|
FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end();
|
||||||
|
|
||||||
|
for( ; iIt != iEnd; )
|
||||||
|
{
|
||||||
|
DTreeNode node((int)*(iIt++), (int)*(iIt++), (int)*(iIt++));
|
||||||
|
cl_nodes.push_back(node);
|
||||||
|
|
||||||
|
if ( subsetSize > 0 )
|
||||||
|
{
|
||||||
|
for( int j = 0; j < subsetSize; j++, ++iIt )
|
||||||
|
subsets.push_back((int)*iIt); //????
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
iIt = leafValues.begin(), iEnd = leafValues.end();
|
||||||
|
// leaves
|
||||||
|
for( ; iIt != iEnd; ++iIt )
|
||||||
|
cl_leaves.push_back((float)*iIt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef GPU_CC_STAGE_TYPE
|
||||||
|
#undef GPU_CC_BOOST
|
||||||
|
#undef GPU_CC_FEATURE_TYPE
|
||||||
|
#undef GPU_CC_LBP
|
||||||
|
#undef GPU_CC_MAX_CAT_COUNT
|
||||||
|
#undef GPU_CC_HEIGHT
|
||||||
|
#undef GPU_CC_WIDTH
|
||||||
|
#undef GPU_CC_STAGE_PARAMS
|
||||||
|
#undef GPU_CC_MAX_DEPTH
|
||||||
|
#undef GPU_CC_FEATURE_PARAMS
|
||||||
|
#undef GPU_CC_STAGES
|
||||||
|
#undef GPU_CC_STAGE_THRESHOLD
|
||||||
|
#undef GPU_THRESHOLD_EPS
|
||||||
|
#undef GPU_CC_WEAK_CLASSIFIERS
|
||||||
|
#undef GPU_CC_INTERNAL_NODES
|
||||||
|
#undef GPU_CC_LEAF_VALUES
|
||||||
|
|
||||||
|
Size cv::gpu::CascadeClassifier_GPU_LBP::getClassifierSize() const { throw_nogpu(); return Size(); }
|
||||||
|
|
||||||
|
int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale( const GpuMat& , GpuMat& , double , int , Size) { throw_nogpu(); return 0; }
|
||||||
|
|
||||||
|
// ============ old fashioned haar cascade ==============================================//
|
||||||
struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
|
struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
|
||||||
{
|
{
|
||||||
CascadeClassifierImpl(const string& filename) : lastAllocatedFrameSize(-1, -1)
|
CascadeClassifierImpl(const string& filename) : lastAllocatedFrameSize(-1, -1)
|
||||||
|
@ -357,6 +357,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T,
|
void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T,
|
||||||
float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
||||||
{
|
{
|
||||||
|
(void)src_size;
|
||||||
using namespace ::cv::gpu::device::imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
||||||
@ -390,6 +391,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
||||||
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
||||||
{
|
{
|
||||||
|
(void)src_size;
|
||||||
using namespace ::cv::gpu::device::imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
||||||
@ -422,6 +424,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
||||||
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
||||||
{
|
{
|
||||||
|
(void)src_size;
|
||||||
using namespace ::cv::gpu::device::imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
||||||
@ -466,6 +469,7 @@ namespace
|
|||||||
|
|
||||||
static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
|
static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
(void)dsize;
|
||||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||||
|
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
@ -1139,6 +1143,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
|
void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
|
||||||
{
|
{
|
||||||
|
(void)flags;
|
||||||
using namespace ::cv::gpu::device::imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
||||||
@ -1169,6 +1174,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
|
void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
|
||||||
{
|
{
|
||||||
|
(void)flags;
|
||||||
using namespace ::cv::gpu::device::imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
||||||
|
@ -927,7 +927,7 @@ Ncv32u getStageNumWithNotLessThanNclassifiers(Ncv32u N, HaarClassifierCascadeDes
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
|
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &integral,
|
||||||
NCVMatrix<Ncv32f> &d_weights,
|
NCVMatrix<Ncv32f> &d_weights,
|
||||||
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
|
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
|
||||||
Ncv32u &numDetections,
|
Ncv32u &numDetections,
|
||||||
@ -945,32 +945,41 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
|||||||
cudaDeviceProp &devProp,
|
cudaDeviceProp &devProp,
|
||||||
cudaStream_t cuStream)
|
cudaStream_t cuStream)
|
||||||
{
|
{
|
||||||
ncvAssertReturn(d_integralImage.memType() == d_weights.memType() &&
|
ncvAssertReturn(integral.memType() == d_weights.memType()&&
|
||||||
d_integralImage.memType() == d_pixelMask.memType() &&
|
integral.memType() == d_pixelMask.memType() &&
|
||||||
d_integralImage.memType() == gpuAllocator.memType() &&
|
integral.memType() == gpuAllocator.memType() &&
|
||||||
(d_integralImage.memType() == NCVMemoryTypeDevice ||
|
(integral.memType() == NCVMemoryTypeDevice ||
|
||||||
d_integralImage.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
integral.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
||||||
|
|
||||||
ncvAssertReturn(d_HaarStages.memType() == d_HaarNodes.memType() &&
|
ncvAssertReturn(d_HaarStages.memType() == d_HaarNodes.memType() &&
|
||||||
d_HaarStages.memType() == d_HaarFeatures.memType() &&
|
d_HaarStages.memType() == d_HaarFeatures.memType() &&
|
||||||
(d_HaarStages.memType() == NCVMemoryTypeDevice ||
|
(d_HaarStages.memType() == NCVMemoryTypeDevice ||
|
||||||
d_HaarStages.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
d_HaarStages.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
||||||
|
|
||||||
ncvAssertReturn(h_HaarStages.memType() != NCVMemoryTypeDevice, NCV_MEM_RESIDENCE_ERROR);
|
ncvAssertReturn(h_HaarStages.memType() != NCVMemoryTypeDevice, NCV_MEM_RESIDENCE_ERROR);
|
||||||
|
|
||||||
ncvAssertReturn(gpuAllocator.isInitialized() && cpuAllocator.isInitialized(), NCV_ALLOCATOR_NOT_INITIALIZED);
|
ncvAssertReturn(gpuAllocator.isInitialized() && cpuAllocator.isInitialized(), NCV_ALLOCATOR_NOT_INITIALIZED);
|
||||||
ncvAssertReturn((d_integralImage.ptr() != NULL && d_weights.ptr() != NULL && d_pixelMask.ptr() != NULL &&
|
|
||||||
|
ncvAssertReturn((integral.ptr() != NULL && d_weights.ptr() != NULL && d_pixelMask.ptr() != NULL &&
|
||||||
h_HaarStages.ptr() != NULL && d_HaarStages.ptr() != NULL && d_HaarNodes.ptr() != NULL &&
|
h_HaarStages.ptr() != NULL && d_HaarStages.ptr() != NULL && d_HaarNodes.ptr() != NULL &&
|
||||||
d_HaarFeatures.ptr() != NULL) || gpuAllocator.isCounting(), NCV_NULL_PTR);
|
d_HaarFeatures.ptr() != NULL) || gpuAllocator.isCounting(), NCV_NULL_PTR);
|
||||||
|
|
||||||
ncvAssertReturn(anchorsRoi.width > 0 && anchorsRoi.height > 0 &&
|
ncvAssertReturn(anchorsRoi.width > 0 && anchorsRoi.height > 0 &&
|
||||||
d_pixelMask.width() >= anchorsRoi.width && d_pixelMask.height() >= anchorsRoi.height &&
|
d_pixelMask.width() >= anchorsRoi.width && d_pixelMask.height() >= anchorsRoi.height &&
|
||||||
d_weights.width() >= anchorsRoi.width && d_weights.height() >= anchorsRoi.height &&
|
d_weights.width() >= anchorsRoi.width && d_weights.height() >= anchorsRoi.height &&
|
||||||
d_integralImage.width() >= anchorsRoi.width + haar.ClassifierSize.width &&
|
integral.width() >= anchorsRoi.width + haar.ClassifierSize.width &&
|
||||||
d_integralImage.height() >= anchorsRoi.height + haar.ClassifierSize.height, NCV_DIMENSIONS_INVALID);
|
integral.height() >= anchorsRoi.height + haar.ClassifierSize.height, NCV_DIMENSIONS_INVALID);
|
||||||
|
|
||||||
ncvAssertReturn(scaleArea > 0, NCV_INVALID_SCALE);
|
ncvAssertReturn(scaleArea > 0, NCV_INVALID_SCALE);
|
||||||
|
|
||||||
ncvAssertReturn(d_HaarStages.length() >= haar.NumStages &&
|
ncvAssertReturn(d_HaarStages.length() >= haar.NumStages &&
|
||||||
d_HaarNodes.length() >= haar.NumClassifierTotalNodes &&
|
d_HaarNodes.length() >= haar.NumClassifierTotalNodes &&
|
||||||
d_HaarFeatures.length() >= haar.NumFeatures &&
|
d_HaarFeatures.length() >= haar.NumFeatures &&
|
||||||
d_HaarStages.length() == h_HaarStages.length() &&
|
d_HaarStages.length() == h_HaarStages.length() &&
|
||||||
haar.NumClassifierRootNodes <= haar.NumClassifierTotalNodes, NCV_DIMENSIONS_INVALID);
|
haar.NumClassifierRootNodes <= haar.NumClassifierTotalNodes, NCV_DIMENSIONS_INVALID);
|
||||||
|
|
||||||
ncvAssertReturn(haar.bNeedsTiltedII == false || gpuAllocator.isCounting(), NCV_NOIMPL_HAAR_TILTED_FEATURES);
|
ncvAssertReturn(haar.bNeedsTiltedII == false || gpuAllocator.isCounting(), NCV_NOIMPL_HAAR_TILTED_FEATURES);
|
||||||
|
|
||||||
ncvAssertReturn(pixelStep == 1 || pixelStep == 2, NCV_HAAR_INVALID_PIXEL_STEP);
|
ncvAssertReturn(pixelStep == 1 || pixelStep == 2, NCV_HAAR_INVALID_PIXEL_STEP);
|
||||||
|
|
||||||
NCV_SET_SKIP_COND(gpuAllocator.isCounting());
|
NCV_SET_SKIP_COND(gpuAllocator.isCounting());
|
||||||
@ -979,7 +988,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
|||||||
|
|
||||||
NCVStatus ncvStat;
|
NCVStatus ncvStat;
|
||||||
|
|
||||||
NCVMatrixAlloc<Ncv32u> h_integralImage(cpuAllocator, d_integralImage.width, d_integralImage.height, d_integralImage.pitch);
|
NCVMatrixAlloc<Ncv32u> h_integralImage(cpuAllocator, integral.width, integral.height, integral.pitch);
|
||||||
ncvAssertReturn(h_integralImage.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
ncvAssertReturn(h_integralImage.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
NCVMatrixAlloc<Ncv32f> h_weights(cpuAllocator, d_weights.width, d_weights.height, d_weights.pitch);
|
NCVMatrixAlloc<Ncv32f> h_weights(cpuAllocator, d_weights.width, d_weights.height, d_weights.pitch);
|
||||||
ncvAssertReturn(h_weights.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
ncvAssertReturn(h_weights.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
@ -997,7 +1006,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
|||||||
|
|
||||||
ncvStat = d_pixelMask.copySolid(h_pixelMask, 0);
|
ncvStat = d_pixelMask.copySolid(h_pixelMask, 0);
|
||||||
ncvAssertReturnNcvStat(ncvStat);
|
ncvAssertReturnNcvStat(ncvStat);
|
||||||
ncvStat = d_integralImage.copySolid(h_integralImage, 0);
|
ncvStat = integral.copySolid(h_integralImage, 0);
|
||||||
ncvAssertReturnNcvStat(ncvStat);
|
ncvAssertReturnNcvStat(ncvStat);
|
||||||
ncvStat = d_weights.copySolid(h_weights, 0);
|
ncvStat = d_weights.copySolid(h_weights, 0);
|
||||||
ncvAssertReturnNcvStat(ncvStat);
|
ncvAssertReturnNcvStat(ncvStat);
|
||||||
@ -1071,8 +1080,8 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
|||||||
cfdTexIImage = cudaCreateChannelDesc<Ncv32u>();
|
cfdTexIImage = cudaCreateChannelDesc<Ncv32u>();
|
||||||
|
|
||||||
size_t alignmentOffset;
|
size_t alignmentOffset;
|
||||||
ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, texIImage, d_integralImage.ptr(), cfdTexIImage,
|
ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, texIImage, integral.ptr(), cfdTexIImage,
|
||||||
(anchorsRoi.height + haar.ClassifierSize.height) * d_integralImage.pitch()), NCV_CUDA_ERROR);
|
(anchorsRoi.height + haar.ClassifierSize.height) * integral.pitch()), NCV_CUDA_ERROR);
|
||||||
ncvAssertReturn(alignmentOffset==0, NCV_TEXTURE_BIND_ERROR);
|
ncvAssertReturn(alignmentOffset==0, NCV_TEXTURE_BIND_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1189,7 +1198,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
|||||||
grid1,
|
grid1,
|
||||||
block1,
|
block1,
|
||||||
cuStream,
|
cuStream,
|
||||||
d_integralImage.ptr(), d_integralImage.stride(),
|
integral.ptr(), integral.stride(),
|
||||||
d_weights.ptr(), d_weights.stride(),
|
d_weights.ptr(), d_weights.stride(),
|
||||||
d_HaarFeatures.ptr(), d_HaarNodes.ptr(), d_HaarStages.ptr(),
|
d_HaarFeatures.ptr(), d_HaarNodes.ptr(), d_HaarStages.ptr(),
|
||||||
d_ptrNowData->ptr(),
|
d_ptrNowData->ptr(),
|
||||||
@ -1259,7 +1268,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
|||||||
grid2,
|
grid2,
|
||||||
block2,
|
block2,
|
||||||
cuStream,
|
cuStream,
|
||||||
d_integralImage.ptr(), d_integralImage.stride(),
|
integral.ptr(), integral.stride(),
|
||||||
d_weights.ptr(), d_weights.stride(),
|
d_weights.ptr(), d_weights.stride(),
|
||||||
d_HaarFeatures.ptr(), d_HaarNodes.ptr(), d_HaarStages.ptr(),
|
d_HaarFeatures.ptr(), d_HaarNodes.ptr(), d_HaarStages.ptr(),
|
||||||
d_ptrNowData->ptr(),
|
d_ptrNowData->ptr(),
|
||||||
@ -1320,7 +1329,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
|
|||||||
grid3,
|
grid3,
|
||||||
block3,
|
block3,
|
||||||
cuStream,
|
cuStream,
|
||||||
d_integralImage.ptr(), d_integralImage.stride(),
|
integral.ptr(), integral.stride(),
|
||||||
d_weights.ptr(), d_weights.stride(),
|
d_weights.ptr(), d_weights.stride(),
|
||||||
d_HaarFeatures.ptr(), d_HaarNodes.ptr(), d_HaarStages.ptr(),
|
d_HaarFeatures.ptr(), d_HaarNodes.ptr(), d_HaarStages.ptr(),
|
||||||
d_ptrNowData->ptr(),
|
d_ptrNowData->ptr(),
|
||||||
@ -1455,10 +1464,14 @@ NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
|
|||||||
cudaStream_t cuStream)
|
cudaStream_t cuStream)
|
||||||
{
|
{
|
||||||
ncvAssertReturn(pixelMask.ptr() != NULL && hypotheses.ptr() != NULL, NCV_NULL_PTR);
|
ncvAssertReturn(pixelMask.ptr() != NULL && hypotheses.ptr() != NULL, NCV_NULL_PTR);
|
||||||
|
|
||||||
ncvAssertReturn(pixelMask.memType() == hypotheses.memType() &&
|
ncvAssertReturn(pixelMask.memType() == hypotheses.memType() &&
|
||||||
pixelMask.memType() == NCVMemoryTypeDevice, NCV_MEM_RESIDENCE_ERROR);
|
pixelMask.memType() == NCVMemoryTypeDevice, NCV_MEM_RESIDENCE_ERROR);
|
||||||
|
|
||||||
ncvAssertReturn(rectWidth > 0 && rectHeight > 0 && curScale > 0, NCV_INVALID_ROI);
|
ncvAssertReturn(rectWidth > 0 && rectHeight > 0 && curScale > 0, NCV_INVALID_ROI);
|
||||||
|
|
||||||
ncvAssertReturn(curScale > 0, NCV_INVALID_SCALE);
|
ncvAssertReturn(curScale > 0, NCV_INVALID_SCALE);
|
||||||
|
|
||||||
ncvAssertReturn(totalMaxDetections <= hypotheses.length() &&
|
ncvAssertReturn(totalMaxDetections <= hypotheses.length() &&
|
||||||
numPixelMaskDetections <= pixelMask.length() &&
|
numPixelMaskDetections <= pixelMask.length() &&
|
||||||
totalMaxDetections <= totalMaxDetections, NCV_INCONSISTENT_INPUT);
|
totalMaxDetections <= totalMaxDetections, NCV_INCONSISTENT_INPUT);
|
||||||
@ -1527,12 +1540,16 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
|||||||
d_srcImg.memType() == gpuAllocator.memType() &&
|
d_srcImg.memType() == gpuAllocator.memType() &&
|
||||||
(d_srcImg.memType() == NCVMemoryTypeDevice ||
|
(d_srcImg.memType() == NCVMemoryTypeDevice ||
|
||||||
d_srcImg.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
d_srcImg.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
||||||
|
|
||||||
ncvAssertReturn(d_HaarStages.memType() == d_HaarNodes.memType() &&
|
ncvAssertReturn(d_HaarStages.memType() == d_HaarNodes.memType() &&
|
||||||
d_HaarStages.memType() == d_HaarFeatures.memType() &&
|
d_HaarStages.memType() == d_HaarFeatures.memType() &&
|
||||||
(d_HaarStages.memType() == NCVMemoryTypeDevice ||
|
(d_HaarStages.memType() == NCVMemoryTypeDevice ||
|
||||||
d_HaarStages.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
d_HaarStages.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
|
||||||
|
|
||||||
ncvAssertReturn(h_HaarStages.memType() != NCVMemoryTypeDevice, NCV_MEM_RESIDENCE_ERROR);
|
ncvAssertReturn(h_HaarStages.memType() != NCVMemoryTypeDevice, NCV_MEM_RESIDENCE_ERROR);
|
||||||
|
|
||||||
ncvAssertReturn(gpuAllocator.isInitialized() && cpuAllocator.isInitialized(), NCV_ALLOCATOR_NOT_INITIALIZED);
|
ncvAssertReturn(gpuAllocator.isInitialized() && cpuAllocator.isInitialized(), NCV_ALLOCATOR_NOT_INITIALIZED);
|
||||||
|
|
||||||
ncvAssertReturn((d_srcImg.ptr() != NULL && d_dstRects.ptr() != NULL &&
|
ncvAssertReturn((d_srcImg.ptr() != NULL && d_dstRects.ptr() != NULL &&
|
||||||
h_HaarStages.ptr() != NULL && d_HaarStages.ptr() != NULL && d_HaarNodes.ptr() != NULL &&
|
h_HaarStages.ptr() != NULL && d_HaarStages.ptr() != NULL && d_HaarNodes.ptr() != NULL &&
|
||||||
d_HaarFeatures.ptr() != NULL) || gpuAllocator.isCounting(), NCV_NULL_PTR);
|
d_HaarFeatures.ptr() != NULL) || gpuAllocator.isCounting(), NCV_NULL_PTR);
|
||||||
@ -1540,13 +1557,17 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
|||||||
d_srcImg.width() >= srcRoi.width && d_srcImg.height() >= srcRoi.height &&
|
d_srcImg.width() >= srcRoi.width && d_srcImg.height() >= srcRoi.height &&
|
||||||
srcRoi.width >= minObjSize.width && srcRoi.height >= minObjSize.height &&
|
srcRoi.width >= minObjSize.width && srcRoi.height >= minObjSize.height &&
|
||||||
d_dstRects.length() >= 1, NCV_DIMENSIONS_INVALID);
|
d_dstRects.length() >= 1, NCV_DIMENSIONS_INVALID);
|
||||||
|
|
||||||
ncvAssertReturn(scaleStep > 1.0f, NCV_INVALID_SCALE);
|
ncvAssertReturn(scaleStep > 1.0f, NCV_INVALID_SCALE);
|
||||||
|
|
||||||
ncvAssertReturn(d_HaarStages.length() >= haar.NumStages &&
|
ncvAssertReturn(d_HaarStages.length() >= haar.NumStages &&
|
||||||
d_HaarNodes.length() >= haar.NumClassifierTotalNodes &&
|
d_HaarNodes.length() >= haar.NumClassifierTotalNodes &&
|
||||||
d_HaarFeatures.length() >= haar.NumFeatures &&
|
d_HaarFeatures.length() >= haar.NumFeatures &&
|
||||||
d_HaarStages.length() == h_HaarStages.length() &&
|
d_HaarStages.length() == h_HaarStages.length() &&
|
||||||
haar.NumClassifierRootNodes <= haar.NumClassifierTotalNodes, NCV_DIMENSIONS_INVALID);
|
haar.NumClassifierRootNodes <= haar.NumClassifierTotalNodes, NCV_DIMENSIONS_INVALID);
|
||||||
|
|
||||||
ncvAssertReturn(haar.bNeedsTiltedII == false, NCV_NOIMPL_HAAR_TILTED_FEATURES);
|
ncvAssertReturn(haar.bNeedsTiltedII == false, NCV_NOIMPL_HAAR_TILTED_FEATURES);
|
||||||
|
|
||||||
ncvAssertReturn(pixelStep == 1 || pixelStep == 2, NCV_HAAR_INVALID_PIXEL_STEP);
|
ncvAssertReturn(pixelStep == 1 || pixelStep == 2, NCV_HAAR_INVALID_PIXEL_STEP);
|
||||||
|
|
||||||
//TODO: set NPP active stream to cuStream
|
//TODO: set NPP active stream to cuStream
|
||||||
@ -1557,8 +1578,8 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
|||||||
Ncv32u integralWidth = d_srcImg.width() + 1;
|
Ncv32u integralWidth = d_srcImg.width() + 1;
|
||||||
Ncv32u integralHeight = d_srcImg.height() + 1;
|
Ncv32u integralHeight = d_srcImg.height() + 1;
|
||||||
|
|
||||||
NCVMatrixAlloc<Ncv32u> d_integralImage(gpuAllocator, integralWidth, integralHeight);
|
NCVMatrixAlloc<Ncv32u> integral(gpuAllocator, integralWidth, integralHeight);
|
||||||
ncvAssertReturn(d_integralImage.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
ncvAssertReturn(integral.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
NCVMatrixAlloc<Ncv64u> d_sqIntegralImage(gpuAllocator, integralWidth, integralHeight);
|
NCVMatrixAlloc<Ncv64u> d_sqIntegralImage(gpuAllocator, integralWidth, integralHeight);
|
||||||
ncvAssertReturn(d_sqIntegralImage.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
ncvAssertReturn(d_sqIntegralImage.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||||
|
|
||||||
@ -1589,7 +1610,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
|||||||
NCV_SKIP_COND_BEGIN
|
NCV_SKIP_COND_BEGIN
|
||||||
|
|
||||||
nppStat = nppiStIntegral_8u32u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),
|
nppStat = nppiStIntegral_8u32u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),
|
||||||
d_integralImage.ptr(), d_integralImage.pitch(),
|
integral.ptr(), integral.pitch(),
|
||||||
NcvSize32u(d_srcImg.width(), d_srcImg.height()),
|
NcvSize32u(d_srcImg.width(), d_srcImg.height()),
|
||||||
d_tmpIIbuf.ptr(), szTmpBufIntegral, devProp);
|
d_tmpIIbuf.ptr(), szTmpBufIntegral, devProp);
|
||||||
ncvAssertReturnNcvStat(nppStat);
|
ncvAssertReturnNcvStat(nppStat);
|
||||||
@ -1676,7 +1697,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
|
|||||||
NCV_SKIP_COND_BEGIN
|
NCV_SKIP_COND_BEGIN
|
||||||
|
|
||||||
nppStat = nppiStDecimate_32u_C1R(
|
nppStat = nppiStDecimate_32u_C1R(
|
||||||
d_integralImage.ptr(), d_integralImage.pitch(),
|
integral.ptr(), integral.pitch(),
|
||||||
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
|
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
|
||||||
srcIIRoi, scale, true);
|
srcIIRoi, scale, true);
|
||||||
ncvAssertReturnNcvStat(nppStat);
|
ncvAssertReturnNcvStat(nppStat);
|
||||||
|
@ -95,11 +95,6 @@ inline __device__ T warpScanInclusive(T idata, volatile T *s_Data)
|
|||||||
pos += K_WARP_SIZE;
|
pos += K_WARP_SIZE;
|
||||||
s_Data[pos] = idata;
|
s_Data[pos] = idata;
|
||||||
|
|
||||||
//for(Ncv32u offset = 1; offset < K_WARP_SIZE; offset <<= 1)
|
|
||||||
//{
|
|
||||||
// s_Data[pos] += s_Data[pos - offset];
|
|
||||||
//}
|
|
||||||
|
|
||||||
s_Data[pos] += s_Data[pos - 1];
|
s_Data[pos] += s_Data[pos - 1];
|
||||||
s_Data[pos] += s_Data[pos - 2];
|
s_Data[pos] += s_Data[pos - 2];
|
||||||
s_Data[pos] += s_Data[pos - 4];
|
s_Data[pos] += s_Data[pos - 4];
|
||||||
@ -1447,14 +1442,14 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
|
|||||||
//adjust hierarchical partial sums
|
//adjust hierarchical partial sums
|
||||||
for (Ncv32s i=(Ncv32s)partSumNums.size()-3; i>=0; i--)
|
for (Ncv32s i=(Ncv32s)partSumNums.size()-3; i>=0; i--)
|
||||||
{
|
{
|
||||||
dim3 grid(partSumNums[i+1]);
|
dim3 grid_local(partSumNums[i+1]);
|
||||||
if (grid.x > 65535)
|
if (grid_local.x > 65535)
|
||||||
{
|
{
|
||||||
grid.y = (grid.x + 65534) / 65535;
|
grid_local.y = (grid_local.x + 65534) / 65535;
|
||||||
grid.x = 65535;
|
grid_local.x = 65535;
|
||||||
}
|
}
|
||||||
removePass2Adjust
|
removePass2Adjust
|
||||||
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
<<<grid_local, block, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(d_hierSums.ptr() + partSumOffsets[i], partSumNums[i],
|
(d_hierSums.ptr() + partSumOffsets[i], partSumNums[i],
|
||||||
d_hierSums.ptr() + partSumOffsets[i+1]);
|
d_hierSums.ptr() + partSumOffsets[i+1]);
|
||||||
|
|
||||||
@ -1463,10 +1458,10 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
dim3 grid(partSumNums[1]);
|
dim3 grid_local(partSumNums[1]);
|
||||||
removePass1Scan
|
removePass1Scan
|
||||||
<true, false>
|
<true, false>
|
||||||
<<<grid, block, 0, nppStGetActiveCUDAstream()>>>
|
<<<grid_local, block, 0, nppStGetActiveCUDAstream()>>>
|
||||||
(d_src, srcLen,
|
(d_src, srcLen,
|
||||||
d_hierSums.ptr(),
|
d_hierSums.ptr(),
|
||||||
NULL, elemRemove);
|
NULL, elemRemove);
|
||||||
|
@ -39,11 +39,14 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
|
// this file does not contain any used code.
|
||||||
|
|
||||||
#ifndef _ncv_color_conversion_hpp_
|
#ifndef _ncv_color_conversion_hpp_
|
||||||
#define _ncv_color_conversion_hpp_
|
#define _ncv_color_conversion_hpp_
|
||||||
|
|
||||||
#include "NCVPixelOperations.hpp"
|
#include "NCVPixelOperations.hpp"
|
||||||
|
|
||||||
|
#if 0
|
||||||
enum NCVColorSpace
|
enum NCVColorSpace
|
||||||
{
|
{
|
||||||
NCVColorSpaceGray,
|
NCVColorSpaceGray,
|
||||||
@ -71,8 +74,7 @@ static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
|
|||||||
}};
|
}};
|
||||||
|
|
||||||
template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout>
|
template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout>
|
||||||
static
|
static NCVStatus _ncvColorConv_host(const NCVMatrix<Tin> &h_imgIn,
|
||||||
NCVStatus _ncvColorConv_host(const NCVMatrix<Tin> &h_imgIn,
|
|
||||||
const NCVMatrix<Tout> &h_imgOut)
|
const NCVMatrix<Tout> &h_imgOut)
|
||||||
{
|
{
|
||||||
ncvAssertReturn(h_imgIn.size() == h_imgOut.size(), NCV_DIMENSIONS_INVALID);
|
ncvAssertReturn(h_imgIn.size() == h_imgOut.size(), NCV_DIMENSIONS_INVALID);
|
||||||
@ -92,5 +94,6 @@ NCVStatus _ncvColorConv_host(const NCVMatrix<Tin> &h_imgIn,
|
|||||||
NCV_SKIP_COND_END
|
NCV_SKIP_COND_END
|
||||||
return NCV_SUCCESS;
|
return NCV_SUCCESS;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif //_ncv_color_conversion_hpp_
|
#endif //_ncv_color_conversion_hpp_
|
||||||
|
@ -13,10 +13,10 @@
|
|||||||
#include "NCVHaarObjectDetection.hpp"
|
#include "NCVHaarObjectDetection.hpp"
|
||||||
|
|
||||||
|
|
||||||
TestHypothesesFilter::TestHypothesesFilter(std::string testName, NCVTestSourceProvider<Ncv32u> &src_,
|
TestHypothesesFilter::TestHypothesesFilter(std::string testName_, NCVTestSourceProvider<Ncv32u> &src_,
|
||||||
Ncv32u numDstRects_, Ncv32u minNeighbors_, Ncv32f eps_)
|
Ncv32u numDstRects_, Ncv32u minNeighbors_, Ncv32f eps_)
|
||||||
:
|
:
|
||||||
NCVTestProvider(testName),
|
NCVTestProvider(testName_),
|
||||||
src(src_),
|
src(src_),
|
||||||
numDstRects(numDstRects_),
|
numDstRects(numDstRects_),
|
||||||
minNeighbors(minNeighbors_),
|
minNeighbors(minNeighbors_),
|
||||||
|
@ -15,10 +15,10 @@
|
|||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
TestResize<T>::TestResize(std::string testName, NCVTestSourceProvider<T> &src_,
|
TestResize<T>::TestResize(std::string testName_, NCVTestSourceProvider<T> &src_,
|
||||||
Ncv32u width_, Ncv32u height_, Ncv32u scaleFactor_, NcvBool bTextureCache_)
|
Ncv32u width_, Ncv32u height_, Ncv32u scaleFactor_, NcvBool bTextureCache_)
|
||||||
:
|
:
|
||||||
NCVTestProvider(testName),
|
NCVTestProvider(testName_),
|
||||||
src(src_),
|
src(src_),
|
||||||
width(width_),
|
width(width_),
|
||||||
height(height_),
|
height(height_),
|
||||||
|
@ -34,7 +34,7 @@ PERF_TEST_P(ImageName_MinSize, CascadeClassifierLBPFrontalFace,
|
|||||||
if (cc.empty())
|
if (cc.empty())
|
||||||
FAIL() << "Can't load cascade file";
|
FAIL() << "Can't load cascade file";
|
||||||
|
|
||||||
Mat img=imread(getDataPath(filename), 0);
|
Mat img = imread(getDataPath(filename), 0);
|
||||||
if (img.empty())
|
if (img.empty())
|
||||||
FAIL() << "Can't load source image";
|
FAIL() << "Can't load source image";
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user