Merge pull request #3600 from jet47:cuda-objdetect-module

This commit is contained in:
Vadim Pisarevsky
2015-01-20 13:29:32 +00:00
23 changed files with 2809 additions and 2397 deletions

View File

@@ -0,0 +1,9 @@
if(IOS OR (NOT HAVE_CUDA AND NOT BUILD_CUDA_STUBS))
ocv_module_disable(cudaobjdetect)
endif()
set(the_description "CUDA-accelerated Object Detection")
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
ocv_define_module(cudaobjdetect opencv_objdetect opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)

View File

@@ -0,0 +1,288 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDAOBJDETECT_HPP__
#define __OPENCV_CUDAOBJDETECT_HPP__
#ifndef __cplusplus
# error cudaobjdetect.hpp header must be compiled as C++
#endif
#include "opencv2/core/cuda.hpp"
/**
@addtogroup cuda
@{
@defgroup cudaobjdetect Object Detection
@}
*/
namespace cv { namespace cuda {
//! @addtogroup cudaobjdetect
//! @{
//
// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector
//
/** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
@note
- An example applying the HOG descriptor for people detection can be found at
opencv_source_code/samples/cpp/peopledetect.cpp
- A CUDA example applying the HOG descriptor for people detection can be found at
opencv_source_code/samples/gpu/hog.cpp
- (Python) An example applying the HOG descriptor for people detection can be found at
opencv_source_code/samples/python2/peopledetect.py
*/
class CV_EXPORTS HOG : public Algorithm
{
public:
enum
{
DESCR_FORMAT_ROW_BY_ROW,
DESCR_FORMAT_COL_BY_COL
};
/** @brief Creates the HOG descriptor and detector.
@param win_size Detection window size. Align to block size and block stride.
@param block_size Block size in pixels. Align to cell size. Only (16,16) is supported for now.
@param block_stride Block stride. It must be a multiple of cell size.
@param cell_size Cell size. Only (8, 8) is supported for now.
@param nbins Number of bins. Only 9 bins per cell are supported for now.
*/
static Ptr<HOG> create(Size win_size = Size(64, 128),
Size block_size = Size(16, 16),
Size block_stride = Size(8, 8),
Size cell_size = Size(8, 8),
int nbins = 9);
//! Gaussian smoothing window parameter.
virtual void setWinSigma(double win_sigma) = 0;
virtual double getWinSigma() const = 0;
//! L2-Hys normalization method shrinkage.
virtual void setL2HysThreshold(double threshold_L2hys) = 0;
virtual double getL2HysThreshold() const = 0;
//! Flag to specify whether the gamma correction preprocessing is required or not.
virtual void setGammaCorrection(bool gamma_correction) = 0;
virtual bool getGammaCorrection() const = 0;
//! Maximum number of detection window increases.
virtual void setNumLevels(int nlevels) = 0;
virtual int getNumLevels() const = 0;
//! Threshold for the distance between features and SVM classifying plane.
//! Usually it is 0 and should be specfied in the detector coefficients (as the last free
//! coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
//! manually here.
virtual void setHitThreshold(double hit_threshold) = 0;
virtual double getHitThreshold() const = 0;
//! Window stride. It must be a multiple of block stride.
virtual void setWinStride(Size win_stride) = 0;
virtual Size getWinStride() const = 0;
//! Coefficient of the detection window increase.
virtual void setScaleFactor(double scale0) = 0;
virtual double getScaleFactor() const = 0;
//! Coefficient to regulate the similarity threshold. When detected, some
//! objects can be covered by many rectangles. 0 means not to perform grouping.
//! See groupRectangles.
virtual void setGroupThreshold(int group_threshold) = 0;
virtual int getGroupThreshold() const = 0;
//! Descriptor storage format:
//! - **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
//! - **DESCR_FORMAT_COL_BY_COL** - Column-major order.
virtual void setDescriptorFormat(int descr_format) = 0;
virtual int getDescriptorFormat() const = 0;
/** @brief Returns the number of coefficients required for the classification.
*/
virtual size_t getDescriptorSize() const = 0;
/** @brief Returns the block histogram size.
*/
virtual size_t getBlockHistogramSize() const = 0;
/** @brief Sets coefficients for the linear SVM classifier.
*/
virtual void setSVMDetector(InputArray detector) = 0;
/** @brief Returns coefficients of the classifier trained for people detection.
*/
virtual Mat getDefaultPeopleDetector() const = 0;
/** @brief Performs object detection without a multi-scale window.
@param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
@param found_locations Left-top corner points of detected objects boundaries.
@param confidences Optional output array for confidences.
*/
virtual void detect(InputArray img,
std::vector<Point>& found_locations,
std::vector<double>* confidences = NULL) = 0;
/** @brief Performs object detection with a multi-scale window.
@param img Source image. See cuda::HOGDescriptor::detect for type limitations.
@param found_locations Detected objects boundaries.
@param confidences Optional output array for confidences.
*/
virtual void detectMultiScale(InputArray img,
std::vector<Rect>& found_locations,
std::vector<double>* confidences = NULL) = 0;
/** @brief Returns block descriptors computed for the whole image.
@param img Source image. See cuda::HOGDescriptor::detect for type limitations.
@param descriptors 2D array of descriptors.
@param stream CUDA stream.
*/
virtual void compute(InputArray img,
OutputArray descriptors,
Stream& stream = Stream::Null()) = 0;
};
//
// CascadeClassifier
//
/** @brief Cascade classifier class used for object detection. Supports HAAR and LBP cascades. :
@note
- A cascade classifier example can be found at
opencv_source_code/samples/gpu/cascadeclassifier.cpp
- A Nvidea API specific cascade classifier example can be found at
opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
*/
class CV_EXPORTS CascadeClassifier : public Algorithm
{
public:
/** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
@param filename Name of the file from which the classifier is loaded. Only the old haar classifier
(trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
type of OpenCV XML cascade supported for LBP.
*/
static Ptr<CascadeClassifier> create(const String& filename);
/** @overload
*/
static Ptr<CascadeClassifier> create(const FileStorage& file);
//! Maximum possible object size. Objects larger than that are ignored. Used for
//! second signature and supported only for LBP cascades.
virtual void setMaxObjectSize(Size maxObjectSize) = 0;
virtual Size getMaxObjectSize() const = 0;
//! Minimum possible object size. Objects smaller than that are ignored.
virtual void setMinObjectSize(Size minSize) = 0;
virtual Size getMinObjectSize() const = 0;
//! Parameter specifying how much the image size is reduced at each image scale.
virtual void setScaleFactor(double scaleFactor) = 0;
virtual double getScaleFactor() const = 0;
//! Parameter specifying how many neighbors each candidate rectangle should have
//! to retain it.
virtual void setMinNeighbors(int minNeighbors) = 0;
virtual int getMinNeighbors() const = 0;
virtual void setFindLargestObject(bool findLargestObject) = 0;
virtual bool getFindLargestObject() = 0;
virtual void setMaxNumObjects(int maxNumObjects) = 0;
virtual int getMaxNumObjects() const = 0;
virtual Size getClassifierSize() const = 0;
/** @brief Detects objects of different sizes in the input image.
@param image Matrix of type CV_8U containing an image where objects should be detected.
@param objects Buffer to store detected objects (rectangles).
@param stream CUDA stream.
To get final array of detected objects use CascadeClassifier::convert method.
@code
Ptr<cuda::CascadeClassifier> cascade_gpu = cuda::CascadeClassifier::create(...);
Mat image_cpu = imread(...)
GpuMat image_gpu(image_cpu);
GpuMat objbuf;
cascade_gpu->detectMultiScale(image_gpu, objbuf);
std::vector<Rect> faces;
cascade_gpu->convert(objbuf, faces);
for(int i = 0; i < detections_num; ++i)
cv::rectangle(image_cpu, faces[i], Scalar(255));
imshow("Faces", image_cpu);
@endcode
@sa CascadeClassifier::detectMultiScale
*/
virtual void detectMultiScale(InputArray image,
OutputArray objects,
Stream& stream = Stream::Null()) = 0;
/** @brief Converts objects array from internal representation to standard vector.
@param gpu_objects Objects array in internal representation.
@param objects Resulting array.
*/
virtual void convert(OutputArray gpu_objects,
std::vector<Rect>& objects) = 0;
};
//! @}
}} // namespace cv { namespace cuda {
#endif /* __OPENCV_CUDAOBJDETECT_HPP__ */

View File

@@ -0,0 +1,47 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
using namespace perf;
CV_PERF_TEST_CUDA_MAIN(cudaobjdetect)

View File

@@ -0,0 +1,173 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
using namespace perf;
///////////////////////////////////////////////////////////////
// HOG
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, ObjDetect_HOG,
Values<string>("gpu/hog/road.png",
"gpu/caltech/image_00000009_0.png",
"gpu/caltech/image_00000032_0.png",
"gpu/caltech/image_00000165_0.png",
"gpu/caltech/image_00000261_0.png",
"gpu/caltech/image_00000469_0.png",
"gpu/caltech/image_00000527_0.png",
"gpu/caltech/image_00000574_0.png"))
{
declare.time(300.0);
const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_img(img);
std::vector<cv::Rect> gpu_found_locations;
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
TEST_CYCLE() d_hog->detectMultiScale(d_img, gpu_found_locations);
SANITY_CHECK(gpu_found_locations);
}
else
{
std::vector<cv::Rect> cpu_found_locations;
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
cv::HOGDescriptor hog;
hog.setSVMDetector(d_hog->getDefaultPeopleDetector());
TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
SANITY_CHECK(cpu_found_locations);
}
}
///////////////////////////////////////////////////////////////
// HaarClassifier
typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
{
const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_CUDA())
{
cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
const cv::cuda::GpuMat d_img(img);
cv::cuda::GpuMat objects_buffer;
TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
std::vector<cv::Rect> gpu_rects;
d_cascade->convert(objects_buffer, gpu_rects);
cv::groupRectangles(gpu_rects, 3, 0.2);
SANITY_CHECK(gpu_rects);
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
std::vector<cv::Rect> cpu_rects;
TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
SANITY_CHECK(cpu_rects);
}
}
///////////////////////////////////////////////////////////////
// LBP cascade
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
{
const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_CUDA())
{
cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
const cv::cuda::GpuMat d_img(img);
cv::cuda::GpuMat objects_buffer;
TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
std::vector<cv::Rect> gpu_rects;
d_cascade->convert(objects_buffer, gpu_rects);
cv::groupRectangles(gpu_rects, 3, 0.2);
SANITY_CHECK(gpu_rects);
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
std::vector<cv::Rect> cpu_rects;
TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
SANITY_CHECK(cpu_rects);
}
}

View File

@@ -0,0 +1,64 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#endif
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
#include "opencv2/ts.hpp"
#include "opencv2/ts/cuda_perf.hpp"
#include "opencv2/cudaobjdetect.hpp"
#include "opencv2/objdetect.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
#endif
#endif

View File

@@ -0,0 +1,861 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include "opencv2/objdetect/objdetect_c.h"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
#else
//
// CascadeClassifierBase
//
namespace
{
class CascadeClassifierBase : public cuda::CascadeClassifier
{
public:
CascadeClassifierBase();
virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; }
virtual Size getMaxObjectSize() const { return maxObjectSize_; }
virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; }
virtual Size getMinObjectSize() const { return minObjectSize_; }
virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
virtual double getScaleFactor() const { return scaleFactor_; }
virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; }
virtual int getMinNeighbors() const { return minNeighbors_; }
virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; }
virtual bool getFindLargestObject() { return findLargestObject_; }
virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; }
virtual int getMaxNumObjects() const { return maxNumObjects_; }
protected:
Size maxObjectSize_;
Size minObjectSize_;
double scaleFactor_;
int minNeighbors_;
bool findLargestObject_;
int maxNumObjects_;
};
CascadeClassifierBase::CascadeClassifierBase() :
maxObjectSize_(),
minObjectSize_(),
scaleFactor_(1.2),
minNeighbors_(4),
findLargestObject_(false),
maxNumObjects_(100)
{
}
}
//
// HaarCascade
//
#ifdef HAVE_OPENCV_CUDALEGACY
namespace
{
class HaarCascade_Impl : public CascadeClassifierBase
{
public:
explicit HaarCascade_Impl(const String& filename);
virtual Size getClassifierSize() const;
virtual void detectMultiScale(InputArray image,
OutputArray objects,
Stream& stream);
virtual void convert(OutputArray gpu_objects,
std::vector<Rect>& objects);
private:
NCVStatus load(const String& classifierFile);
NCVStatus calculateMemReqsAndAllocate(const Size& frameSize);
NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections);
Size lastAllocatedFrameSize;
Ptr<NCVMemStackAllocator> gpuAllocator;
Ptr<NCVMemStackAllocator> cpuAllocator;
cudaDeviceProp devProp;
NCVStatus ncvStat;
Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages;
Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures;
HaarClassifierCascadeDescriptor haar;
Ptr<NCVVectorAlloc<HaarStage64> > d_haarStages;
Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
Ptr<NCVVectorAlloc<HaarFeature64> > d_haarFeatures;
};
static void NCVDebugOutputHandler(const String &msg)
{
CV_Error(Error::GpuApiCallError, msg.c_str());
}
HaarCascade_Impl::HaarCascade_Impl(const String& filename) :
lastAllocatedFrameSize(-1, -1)
{
ncvSetDebugOutputHandler(NCVDebugOutputHandler);
ncvSafeCall( load(filename) );
}
Size HaarCascade_Impl::getClassifierSize() const
{
return Size(haar.ClassifierSize.width, haar.ClassifierSize.height);
}
void HaarCascade_Impl::detectMultiScale(InputArray _image,
OutputArray _objects,
Stream& stream)
{
const GpuMat image = _image.getGpuMat();
CV_Assert( image.depth() == CV_8U);
CV_Assert( scaleFactor_ > 1 );
CV_Assert( !stream );
Size ncvMinSize = getClassifierSize();
if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height)
{
ncvMinSize.width = minObjectSize_.width;
ncvMinSize.height = minObjectSize_.height;
}
BufferPool pool(stream);
GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
unsigned int numDetections;
ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) );
if (numDetections > 0)
{
objectsBuf.colRange(0, numDetections).copyTo(_objects);
}
else
{
_objects.release();
}
}
void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
{
if (_gpu_objects.empty())
{
objects.clear();
return;
}
Mat gpu_objects;
if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_objects.getGpuMat().download(gpu_objects);
}
else
{
gpu_objects = _gpu_objects.getMat();
}
CV_Assert( gpu_objects.rows == 1 );
CV_Assert( gpu_objects.type() == DataType<Rect>::type );
Rect* ptr = gpu_objects.ptr<Rect>();
objects.assign(ptr, ptr + gpu_objects.cols);
}
NCVStatus HaarCascade_Impl::load(const String& classifierFile)
{
int devId = cv::cuda::getDevice();
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
// Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
gpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeDevice, static_cast<int>(devProp.textureAlignment));
cpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeHostPinned, static_cast<int>(devProp.textureAlignment));
ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);
Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR);
h_haarStages.reset (new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages));
h_haarNodes.reset (new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes));
h_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures));
ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(h_haarFeatures->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", NCV_FILE_ERROR);
d_haarStages.reset (new NCVVectorAlloc<HaarStage64>(*gpuCascadeAllocator, haarNumStages));
d_haarNodes.reset (new NCVVectorAlloc<HaarClassifierNode128>(*gpuCascadeAllocator, haarNumNodes));
d_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures));
ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(d_haarFeatures->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
ncvStat = h_haarStages->copySolid(*d_haarStages, 0);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
ncvStat = h_haarNodes->copySolid(*d_haarNodes, 0);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
ncvStat = h_haarFeatures->copySolid(*d_haarFeatures, 0);
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
return NCV_SUCCESS;
}
NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize)
{
if (lastAllocatedFrameSize == frameSize)
{
return NCV_SUCCESS;
}
// Calculate memory requirements and create real allocators
NCVMemStackAllocator gpuCounter(static_cast<int>(devProp.textureAlignment));
NCVMemStackAllocator cpuCounter(static_cast<int>(devProp.textureAlignment));
ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR);
NCVMatrixAlloc<Ncv8u> d_src(gpuCounter, frameSize.width, frameSize.height);
NCVMatrixAlloc<Ncv8u> h_src(cpuCounter, frameSize.width, frameSize.height);
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NCVVectorAlloc<NcvRect32u> d_rects(gpuCounter, 100);
ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NcvSize32u roi;
roi.width = d_src.width();
roi.height = d_src.height();
Ncv32u numDetections;
ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages,
*d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
gpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
cpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR);
lastAllocatedFrameSize = frameSize;
return NCV_SUCCESS;
}
NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections)
{
calculateMemReqsAndAllocate(src.size());
NCVMemPtr src_beg;
src_beg.ptr = (void*)src.ptr<Ncv8u>();
src_beg.memtype = NCVMemoryTypeDevice;
NCVMemSegment src_seg;
src_seg.begin = src_beg;
src_seg.size = src.step * src.rows;
NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
CV_Assert(objects.rows == 1);
NCVMemPtr objects_beg;
objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
objects_beg.memtype = NCVMemoryTypeDevice;
NCVMemSegment objects_seg;
objects_seg.begin = objects_beg;
objects_seg.size = objects.step * objects.rows;
NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
NcvSize32u roi;
roi.width = d_src.width();
roi.height = d_src.height();
NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
Ncv32u flags = 0;
flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0;
ncvStat = ncvDetectObjectsMultiScale_device(
d_src, roi, d_rects, numDetections, haar, *h_haarStages,
*d_haarStages, *d_haarNodes, *d_haarFeatures,
winMinSize,
minNeighbors_,
scaleFactor_, 1,
flags,
*gpuAllocator, *cpuAllocator, devProp, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
return NCV_SUCCESS;
}
}
#endif
//
// LbpCascade
//
namespace cv { namespace cuda { namespace device
{
namespace lbp
{
void classifyPyramid(int frameW,
int frameH,
int windowW,
int windowH,
float initalScale,
float factor,
int total,
const PtrStepSzb& mstages,
const int nstages,
const PtrStepSzi& mnodes,
const PtrStepSzf& mleaves,
const PtrStepSzi& msubsets,
const PtrStepSzb& mfeatures,
const int subsetSize,
PtrStepSz<int4> objects,
unsigned int* classified,
PtrStepSzi integral);
void connectedConmonents(PtrStepSz<int4> candidates,
int ncandidates,
PtrStepSz<int4> objects,
int groupThreshold,
float grouping_eps,
unsigned int* nclasses);
}
}}}
namespace
{
cv::Size operator -(const cv::Size& a, const cv::Size& b)
{
return cv::Size(a.width - b.width, a.height - b.height);
}
cv::Size operator +(const cv::Size& a, const int& i)
{
return cv::Size(a.width + i, a.height + i);
}
cv::Size operator *(const cv::Size& a, const float& f)
{
return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
}
cv::Size operator /(const cv::Size& a, const float& f)
{
return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
}
bool operator <=(const cv::Size& a, const cv::Size& b)
{
return a.width <= b.width && a.height <= b.width;
}
struct PyrLavel
{
PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
{
do
{
order = _order;
scale = pow(_scale, order);
sFrame = frame / scale;
workArea = sFrame - window + 1;
sWindow = window * scale;
_order++;
} while (sWindow <= minObjectSize);
}
bool isFeasible(cv::Size maxObj)
{
return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
}
PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
{
return PyrLavel(order + 1, factor, frame, window, minObjectSize);
}
int order;
float scale;
cv::Size sFrame;
cv::Size workArea;
cv::Size sWindow;
};
class LbpCascade_Impl : public CascadeClassifierBase
{
public:
explicit LbpCascade_Impl(const FileStorage& file);
virtual Size getClassifierSize() const { return NxM; }
virtual void detectMultiScale(InputArray image,
OutputArray objects,
Stream& stream);
virtual void convert(OutputArray gpu_objects,
std::vector<Rect>& objects);
private:
bool load(const FileNode &root);
void allocateBuffers(cv::Size frame);
private:
struct Stage
{
int first;
int ntrees;
float threshold;
};
enum stage { BOOST = 0 };
enum feature { LBP = 1, HAAR = 2 };
static const stage stageType = BOOST;
static const feature featureType = LBP;
cv::Size NxM;
bool isStumps;
int ncategories;
int subsetSize;
int nodeStep;
// gpu representation of classifier
GpuMat stage_mat;
GpuMat trees_mat;
GpuMat nodes_mat;
GpuMat leaves_mat;
GpuMat subsets_mat;
GpuMat features_mat;
GpuMat integral;
GpuMat integralBuffer;
GpuMat resuzeBuffer;
GpuMat candidates;
static const int integralFactor = 4;
};
LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file)
{
load(file.getFirstTopLevelNode());
}
void LbpCascade_Impl::detectMultiScale(InputArray _image,
OutputArray _objects,
Stream& stream)
{
const GpuMat image = _image.getGpuMat();
CV_Assert( image.depth() == CV_8U);
CV_Assert( scaleFactor_ > 1 );
CV_Assert( !stream );
const float grouping_eps = 0.2f;
BufferPool pool(stream);
GpuMat objects = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
// used for debug
// candidates.setTo(cv::Scalar::all(0));
// objects.setTo(cv::Scalar::all(0));
if (maxObjectSize_ == cv::Size())
maxObjectSize_ = image.size();
allocateBuffers(image.size());
unsigned int classified = 0;
GpuMat dclassified(1, 1, CV_32S);
cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_);
while (level.isFeasible(maxObjectSize_))
{
int acc = level.sFrame.width + 1;
float iniScale = level.scale;
cv::Size area = level.workArea;
int step = 1 + (level.scale <= 2.f);
int total = 0, prev = 0;
while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_))
{
// create sutable matrix headers
GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1));
// generate integral for scale
cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
cuda::integral(src, sint);
// calculate job
int totalWidth = level.workArea.width / step;
total += totalWidth * (level.workArea.height / step);
// go to next pyramide level
level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_);
area = level.workArea;
step = (1 + (level.scale <= 2.f));
prev = acc;
acc += level.sFrame.width + 1;
}
device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
}
if (minNeighbors_ <= 0 || objects.empty())
return;
cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>());
cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaDeviceSynchronize() );
if (classified > 0)
{
objects.colRange(0, classified).copyTo(_objects);
}
else
{
_objects.release();
}
}
void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
{
if (_gpu_objects.empty())
{
objects.clear();
return;
}
Mat gpu_objects;
if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_objects.getGpuMat().download(gpu_objects);
}
else
{
gpu_objects = _gpu_objects.getMat();
}
CV_Assert( gpu_objects.rows == 1 );
CV_Assert( gpu_objects.type() == DataType<Rect>::type );
Rect* ptr = gpu_objects.ptr<Rect>();
objects.assign(ptr, ptr + gpu_objects.cols);
}
bool LbpCascade_Impl::load(const FileNode &root)
{
const char *CUDA_CC_STAGE_TYPE = "stageType";
const char *CUDA_CC_FEATURE_TYPE = "featureType";
const char *CUDA_CC_BOOST = "BOOST";
const char *CUDA_CC_LBP = "LBP";
const char *CUDA_CC_MAX_CAT_COUNT = "maxCatCount";
const char *CUDA_CC_HEIGHT = "height";
const char *CUDA_CC_WIDTH = "width";
const char *CUDA_CC_STAGE_PARAMS = "stageParams";
const char *CUDA_CC_MAX_DEPTH = "maxDepth";
const char *CUDA_CC_FEATURE_PARAMS = "featureParams";
const char *CUDA_CC_STAGES = "stages";
const char *CUDA_CC_STAGE_THRESHOLD = "stageThreshold";
const float CUDA_THRESHOLD_EPS = 1e-5f;
const char *CUDA_CC_WEAK_CLASSIFIERS = "weakClassifiers";
const char *CUDA_CC_INTERNAL_NODES = "internalNodes";
const char *CUDA_CC_LEAF_VALUES = "leafValues";
const char *CUDA_CC_FEATURES = "features";
const char *CUDA_CC_RECT = "rect";
String stageTypeStr = (String)root[CUDA_CC_STAGE_TYPE];
CV_Assert(stageTypeStr == CUDA_CC_BOOST);
String featureTypeStr = (String)root[CUDA_CC_FEATURE_TYPE];
CV_Assert(featureTypeStr == CUDA_CC_LBP);
NxM.width = (int)root[CUDA_CC_WIDTH];
NxM.height = (int)root[CUDA_CC_HEIGHT];
CV_Assert( NxM.height > 0 && NxM.width > 0 );
isStumps = ((int)(root[CUDA_CC_STAGE_PARAMS][CUDA_CC_MAX_DEPTH]) == 1) ? true : false;
CV_Assert(isStumps);
FileNode fn = root[CUDA_CC_FEATURE_PARAMS];
if (fn.empty())
return false;
ncategories = fn[CUDA_CC_MAX_CAT_COUNT];
subsetSize = (ncategories + 31) / 32;
nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 );
fn = root[CUDA_CC_STAGES];
if (fn.empty())
return false;
std::vector<Stage> stages;
stages.reserve(fn.size());
std::vector<int> cl_trees;
std::vector<int> cl_nodes;
std::vector<float> cl_leaves;
std::vector<int> subsets;
FileNodeIterator it = fn.begin(), it_end = fn.end();
for (size_t si = 0; it != it_end; si++, ++it )
{
FileNode fns = *it;
Stage st;
st.threshold = (float)fns[CUDA_CC_STAGE_THRESHOLD] - CUDA_THRESHOLD_EPS;
fns = fns[CUDA_CC_WEAK_CLASSIFIERS];
if (fns.empty())
return false;
st.ntrees = (int)fns.size();
st.first = (int)cl_trees.size();
stages.push_back(st);// (int, int, float)
cl_trees.reserve(stages[si].first + stages[si].ntrees);
// weak trees
FileNodeIterator it1 = fns.begin(), it1_end = fns.end();
for ( ; it1 != it1_end; ++it1 )
{
FileNode fnw = *it1;
FileNode internalNodes = fnw[CUDA_CC_INTERNAL_NODES];
FileNode leafValues = fnw[CUDA_CC_LEAF_VALUES];
if ( internalNodes.empty() || leafValues.empty() )
return false;
int nodeCount = (int)internalNodes.size()/nodeStep;
cl_trees.push_back(nodeCount);
cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3);
cl_leaves.reserve(cl_leaves.size() + leafValues.size());
if( subsetSize > 0 )
subsets.reserve(subsets.size() + nodeCount * subsetSize);
// nodes
FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end();
for( ; iIt != iEnd; )
{
cl_nodes.push_back((int)*(iIt++));
cl_nodes.push_back((int)*(iIt++));
cl_nodes.push_back((int)*(iIt++));
if( subsetSize > 0 )
for( int j = 0; j < subsetSize; j++, ++iIt )
subsets.push_back((int)*iIt);
}
// leaves
iIt = leafValues.begin(), iEnd = leafValues.end();
for( ; iIt != iEnd; ++iIt )
cl_leaves.push_back((float)*iIt);
}
}
fn = root[CUDA_CC_FEATURES];
if( fn.empty() )
return false;
std::vector<uchar> features;
features.reserve(fn.size() * 4);
FileNodeIterator f_it = fn.begin(), f_end = fn.end();
for (; f_it != f_end; ++f_it)
{
FileNode rect = (*f_it)[CUDA_CC_RECT];
FileNodeIterator r_it = rect.begin();
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
}
// copy data structures on gpu
stage_mat.upload(cv::Mat(1, (int) (stages.size() * sizeof(Stage)), CV_8UC1, (uchar*)&(stages[0]) ));
trees_mat.upload(cv::Mat(cl_trees).reshape(1,1));
nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1));
leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1));
subsets_mat.upload(cv::Mat(subsets).reshape(1,1));
features_mat.upload(cv::Mat(features).reshape(4,1));
return true;
}
void LbpCascade_Impl::allocateBuffers(cv::Size frame)
{
if (frame == cv::Size())
return;
if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
{
resuzeBuffer.create(frame, CV_8UC1);
integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
#ifdef HAVE_OPENCV_CUDALEGACY
NcvSize32u roiSize;
roiSize.width = frame.width;
roiSize.height = frame.height;
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
Ncv32u bufSize;
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
integralBuffer.create(1, bufSize, CV_8UC1);
#endif
candidates.create(1 , frame.width >> 1, CV_32SC4);
}
}
}
//
// create
//
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename)
{
String fext = filename.substr(filename.find_last_of(".") + 1);
fext = fext.toLowerCase();
if (fext == "nvbin")
{
#ifndef HAVE_OPENCV_CUDALEGACY
CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
return Ptr<cuda::CascadeClassifier>();
#else
return makePtr<HaarCascade_Impl>(filename);
#endif
}
FileStorage fs(filename, FileStorage::READ);
if (!fs.isOpened())
{
#ifndef HAVE_OPENCV_CUDALEGACY
CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
return Ptr<cuda::CascadeClassifier>();
#else
return makePtr<HaarCascade_Impl>(filename);
#endif
}
const char *CUDA_CC_LBP = "LBP";
String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"];
if (featureTypeStr == CUDA_CC_LBP)
{
return makePtr<LbpCascade_Impl>(fs);
}
else
{
#ifndef HAVE_OPENCV_CUDALEGACY
CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
return Ptr<cuda::CascadeClassifier>();
#else
return makePtr<HaarCascade_Impl>(filename);
#endif
}
CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier");
return Ptr<cuda::CascadeClassifier>();
}
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file)
{
return makePtr<LbpCascade_Impl>(file);
}
#endif

View File

@@ -0,0 +1,814 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/warp_shuffle.hpp"
namespace cv { namespace cuda { namespace device
{
// Other values are not supported
#define CELL_WIDTH 8
#define CELL_HEIGHT 8
#define CELLS_PER_BLOCK_X 2
#define CELLS_PER_BLOCK_Y 2
namespace hog
{
__constant__ int cnbins;
__constant__ int cblock_stride_x;
__constant__ int cblock_stride_y;
__constant__ int cnblocks_win_x;
__constant__ int cnblocks_win_y;
__constant__ int cblock_hist_size;
__constant__ int cblock_hist_size_2up;
__constant__ int cdescr_size;
__constant__ int cdescr_width;
/* Returns the nearest upper power of two, works only for
the typical GPU thread count (pert block) values */
int power_2up(unsigned int n)
{
if (n < 1) return 1;
else if (n < 2) return 2;
else if (n < 4) return 4;
else if (n < 8) return 8;
else if (n < 16) return 16;
else if (n < 32) return 32;
else if (n < 64) return 64;
else if (n < 128) return 128;
else if (n < 256) return 256;
else if (n < 512) return 512;
else if (n < 1024) return 1024;
return -1; // Input is too big
}
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y)
{
cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) );
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) );
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) );
int block_hist_size_2up = power_2up(block_hist_size);
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up)) );
int descr_width = nblocks_win_x * block_hist_size;
cudaSafeCall( cudaMemcpyToSymbol(cdescr_width, &descr_width, sizeof(descr_width)) );
int descr_size = descr_width * nblocks_win_y;
cudaSafeCall( cudaMemcpyToSymbol(cdescr_size, &descr_size, sizeof(descr_size)) );
}
//----------------------------------------------------------------------------
// Histogram computation
template <int nblocks> // Number of histogram blocks processed by single GPU thread block
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrStepf grad,
const PtrStepb qangle, float scale, float* block_hists)
{
const int block_x = threadIdx.z;
const int cell_x = threadIdx.x / 16;
const int cell_y = threadIdx.y;
const int cell_thread_x = threadIdx.x & 0xF;
if (blockIdx.x * blockDim.z + block_x >= img_block_width)
return;
extern __shared__ float smem[];
float* hists = smem;
float* final_hist = smem + cnbins * 48 * nblocks;
const int offset_x = (blockIdx.x * blockDim.z + block_x) * cblock_stride_x +
4 * cell_x + cell_thread_x;
const int offset_y = blockIdx.y * cblock_stride_y + 4 * cell_y;
const float* grad_ptr = grad.ptr(offset_y) + offset_x * 2;
const unsigned char* qangle_ptr = qangle.ptr(offset_y) + offset_x * 2;
// 12 means that 12 pixels affect on block's cell (in one row)
if (cell_thread_x < 12)
{
float* hist = hists + 12 * (cell_y * blockDim.z * CELLS_PER_BLOCK_Y +
cell_x + block_x * CELLS_PER_BLOCK_X) +
cell_thread_x;
for (int bin_id = 0; bin_id < cnbins; ++bin_id)
hist[bin_id * 48 * nblocks] = 0.f;
const int dist_x = -4 + (int)cell_thread_x - 4 * cell_x;
const int dist_y_begin = -4 - 4 * (int)threadIdx.y;
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
{
float2 vote = *(const float2*)grad_ptr;
uchar2 bin = *(const uchar2*)qangle_ptr;
grad_ptr += grad.step/sizeof(float);
qangle_ptr += qangle.step;
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
float gaussian = ::expf(-(dist_center_y * dist_center_y +
dist_center_x * dist_center_x) * scale);
float interp_weight = (8.f - ::fabs(dist_y + 0.5f)) *
(8.f - ::fabs(dist_x + 0.5f)) / 64.f;
hist[bin.x * 48 * nblocks] += gaussian * interp_weight * vote.x;
hist[bin.y * 48 * nblocks] += gaussian * interp_weight * vote.y;
}
volatile float* hist_ = hist;
for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48 * nblocks)
{
if (cell_thread_x < 6) hist_[0] += hist_[6];
if (cell_thread_x < 3) hist_[0] += hist_[3];
if (cell_thread_x == 0)
final_hist[((cell_x + block_x * 2) * 2 + cell_y) * cnbins + bin_id]
= hist_[0] + hist_[1] + hist_[2];
}
}
__syncthreads();
float* block_hist = block_hists + (blockIdx.y * img_block_width +
blockIdx.x * blockDim.z + block_x) *
cblock_hist_size;
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 16 + cell_thread_x;
if (tid < cblock_hist_size)
block_hist[tid] = final_hist[block_x * cblock_hist_size + tid];
}
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, const PtrStepSzf& grad,
const PtrStepSzb& qangle, float sigma, float* block_hists)
{
const int nblocks = 1;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) /
block_stride_y;
dim3 grid(divUp(img_block_width, nblocks), img_block_height);
dim3 threads(32, 2, nblocks);
cudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>,
cudaFuncCachePreferL1));
// Precompute gaussian spatial window parameter
float scale = 1.f / (2.f * sigma * sigma);
int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12 * nblocks) * sizeof(float);
int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * nblocks) * sizeof(float);
int smem = hists_size + final_hists_size;
compute_hists_kernel_many_blocks<nblocks><<<grid, threads, smem>>>(
img_block_width, grad, qangle, scale, block_hists);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
//-------------------------------------------------------------
// Normalization of histograms via L2Hys_norm
//
template<int size>
__device__ float reduce_smem(float* smem, float val)
{
unsigned int tid = threadIdx.x;
float sum = val;
reduce<size>(smem, sum, tid, plus<float>());
if (size == 32)
{
#if __CUDA_ARCH__ >= 300
return shfl(sum, 0);
#else
return smem[0];
#endif
}
else
{
#if __CUDA_ARCH__ >= 300
if (threadIdx.x == 0)
smem[0] = sum;
#endif
__syncthreads();
return smem[0];
}
}
template <int nthreads, // Number of threads which process one block historgam
int nblocks> // Number of block hisograms processed by one GPU thread block
__global__ void normalize_hists_kernel_many_blocks(const int block_hist_size,
const int img_block_width,
float* block_hists, float threshold)
{
if (blockIdx.x * blockDim.z + threadIdx.z >= img_block_width)
return;
float* hist = block_hists + (blockIdx.y * img_block_width +
blockIdx.x * blockDim.z + threadIdx.z) *
block_hist_size + threadIdx.x;
__shared__ float sh_squares[nthreads * nblocks];
float* squares = sh_squares + threadIdx.z * nthreads;
float elem = 0.f;
if (threadIdx.x < block_hist_size)
elem = hist[0];
float sum = reduce_smem<nthreads>(squares, elem * elem);
float scale = 1.0f / (::sqrtf(sum) + 0.1f * block_hist_size);
elem = ::min(elem * scale, threshold);
sum = reduce_smem<nthreads>(squares, elem * elem);
scale = 1.0f / (::sqrtf(sum) + 1e-3f);
if (threadIdx.x < block_hist_size)
hist[0] = elem * scale;
}
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, float* block_hists, float threshold)
{
const int nblocks = 1;
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
int nthreads = power_2up(block_hist_size);
dim3 threads(nthreads, 1, nblocks);
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
dim3 grid(divUp(img_block_width, nblocks), img_block_height);
if (nthreads == 32)
normalize_hists_kernel_many_blocks<32, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
else if (nthreads == 64)
normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
else if (nthreads == 128)
normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
else if (nthreads == 256)
normalize_hists_kernel_many_blocks<256, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
else if (nthreads == 512)
normalize_hists_kernel_many_blocks<512, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
else
CV_Error(cv::Error::StsBadArg, "normalize_hists: histogram's size is too big, try to decrease number of bins");
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
//---------------------------------------------------------------------
// Linear SVM based classification
//
// return confidence values not just positive location
template <int nthreads, // Number of threads per one histogram block
int nblocks> // Number of histogram block processed by single GPU thread block
__global__ void compute_confidence_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists, const float* coefs,
float free_coef, float threshold, float* confidences)
{
const int win_x = threadIdx.z;
if (blockIdx.x * blockDim.z + win_x >= img_win_width)
return;
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
blockIdx.x * win_block_stride_x * blockDim.z + win_x) *
cblock_hist_size;
float product = 0.f;
for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
{
int offset_y = i / cdescr_width;
int offset_x = i - offset_y * cdescr_width;
product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];
}
__shared__ float products[nthreads * nblocks];
const int tid = threadIdx.z * nthreads + threadIdx.x;
reduce<nthreads>(products, product, tid, plus<float>());
if (threadIdx.x == 0)
confidences[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x] = product + free_coef;
}
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
float* coefs, float free_coef, float threshold, float *confidences)
{
const int nthreads = 256;
const int nblocks = 1;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
dim3 threads(nthreads, 1, nblocks);
dim3 grid(divUp(img_win_width, nblocks), img_win_height);
cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,
cudaFuncCachePreferL1));
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
block_hists, coefs, free_coef, threshold, confidences);
cudaSafeCall(cudaThreadSynchronize());
}
template <int nthreads, // Number of threads per one histogram block
int nblocks> // Number of histogram block processed by single GPU thread block
__global__ void classify_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists, const float* coefs,
float free_coef, float threshold, unsigned char* labels)
{
const int win_x = threadIdx.z;
if (blockIdx.x * blockDim.z + win_x >= img_win_width)
return;
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
blockIdx.x * win_block_stride_x * blockDim.z + win_x) *
cblock_hist_size;
float product = 0.f;
for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
{
int offset_y = i / cdescr_width;
int offset_x = i - offset_y * cdescr_width;
product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];
}
__shared__ float products[nthreads * nblocks];
const int tid = threadIdx.z * nthreads + threadIdx.x;
reduce<nthreads>(products, product, tid, plus<float>());
if (threadIdx.x == 0)
labels[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x] = (product + free_coef >= threshold);
}
void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
float* coefs, float free_coef, float threshold, unsigned char* labels)
{
const int nthreads = 256;
const int nblocks = 1;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
dim3 threads(nthreads, 1, nblocks);
dim3 grid(divUp(img_win_width, nblocks), img_win_height);
cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFuncCachePreferL1));
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
block_hists, coefs, free_coef, threshold, labels);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
//----------------------------------------------------------------------------
// Extract descriptors
template <int nthreads>
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists, PtrStepf descriptors)
{
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
blockIdx.x * win_block_stride_x) * cblock_hist_size;
// Get left top corner of the window in dst
float* descriptor = descriptors.ptr(blockIdx.y * gridDim.x + blockIdx.x);
// Copy elements from src to dst
for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
{
int offset_y = i / cdescr_width;
int offset_x = i - offset_y * cdescr_width;
descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x];
}
}
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
int height, int width, float* block_hists, PtrStepSzf descriptors)
{
const int nthreads = 256;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
dim3 threads(nthreads, 1);
dim3 grid(img_win_width, img_win_height);
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int nthreads>
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
const int win_block_stride_y, const float* block_hists,
PtrStepf descriptors)
{
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
blockIdx.x * win_block_stride_x) * cblock_hist_size;
// Get left top corner of the window in dst
float* descriptor = descriptors.ptr(blockIdx.y * gridDim.x + blockIdx.x);
// Copy elements from src to dst
for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
{
int block_idx = i / cblock_hist_size;
int idx_in_block = i - block_idx * cblock_hist_size;
int y = block_idx / cnblocks_win_x;
int x = block_idx - y * cnblocks_win_x;
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block]
= hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block];
}
}
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
PtrStepSzf descriptors)
{
const int nthreads = 256;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
dim3 threads(nthreads, 1);
dim3 grid(img_win_width, img_win_height);
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>(
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
//----------------------------------------------------------------------------
// Gradients computation
template <int nthreads, int correct_gamma>
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrStepb img,
float angle_scale, PtrStepf grad, PtrStepb qangle)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const uchar4* row = (const uchar4*)img.ptr(blockIdx.y);
__shared__ float sh_row[(nthreads + 2) * 3];
uchar4 val;
if (x < width)
val = row[x];
else
val = row[width - 2];
sh_row[threadIdx.x + 1] = val.x;
sh_row[threadIdx.x + 1 + (nthreads + 2)] = val.y;
sh_row[threadIdx.x + 1 + 2 * (nthreads + 2)] = val.z;
if (threadIdx.x == 0)
{
val = row[::max(x - 1, 1)];
sh_row[0] = val.x;
sh_row[(nthreads + 2)] = val.y;
sh_row[2 * (nthreads + 2)] = val.z;
}
if (threadIdx.x == blockDim.x - 1)
{
val = row[::min(x + 1, width - 2)];
sh_row[blockDim.x + 1] = val.x;
sh_row[blockDim.x + 1 + (nthreads + 2)] = val.y;
sh_row[blockDim.x + 1 + 2 * (nthreads + 2)] = val.z;
}
__syncthreads();
if (x < width)
{
float3 a, b;
b.x = sh_row[threadIdx.x + 2];
b.y = sh_row[threadIdx.x + 2 + (nthreads + 2)];
b.z = sh_row[threadIdx.x + 2 + 2 * (nthreads + 2)];
a.x = sh_row[threadIdx.x];
a.y = sh_row[threadIdx.x + (nthreads + 2)];
a.z = sh_row[threadIdx.x + 2 * (nthreads + 2)];
float3 dx;
if (correct_gamma)
dx = make_float3(::sqrtf(b.x) - ::sqrtf(a.x), ::sqrtf(b.y) - ::sqrtf(a.y), ::sqrtf(b.z) - ::sqrtf(a.z));
else
dx = make_float3(b.x - a.x, b.y - a.y, b.z - a.z);
float3 dy = make_float3(0.f, 0.f, 0.f);
if (blockIdx.y > 0 && blockIdx.y < height - 1)
{
val = ((const uchar4*)img.ptr(blockIdx.y - 1))[x];
a = make_float3(val.x, val.y, val.z);
val = ((const uchar4*)img.ptr(blockIdx.y + 1))[x];
b = make_float3(val.x, val.y, val.z);
if (correct_gamma)
dy = make_float3(::sqrtf(b.x) - ::sqrtf(a.x), ::sqrtf(b.y) - ::sqrtf(a.y), ::sqrtf(b.z) - ::sqrtf(a.z));
else
dy = make_float3(b.x - a.x, b.y - a.y, b.z - a.z);
}
float best_dx = dx.x;
float best_dy = dy.x;
float mag0 = dx.x * dx.x + dy.x * dy.x;
float mag1 = dx.y * dx.y + dy.y * dy.y;
if (mag0 < mag1)
{
best_dx = dx.y;
best_dy = dy.y;
mag0 = mag1;
}
mag1 = dx.z * dx.z + dy.z * dy.z;
if (mag0 < mag1)
{
best_dx = dx.z;
best_dy = dy.z;
mag0 = mag1;
}
mag0 = ::sqrtf(mag0);
float ang = (::atan2f(best_dy, best_dx) + CV_PI_F) * angle_scale - 0.5f;
int hidx = (int)::floorf(ang);
ang -= hidx;
hidx = (hidx + cnbins) % cnbins;
((uchar2*)qangle.ptr(blockIdx.y))[x] = make_uchar2(hidx, (hidx + 1) % cnbins);
((float2*)grad.ptr(blockIdx.y))[x] = make_float2(mag0 * (1.f - ang), mag0 * ang);
}
}
void compute_gradients_8UC4(int nbins, int height, int width, const PtrStepSzb& img,
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
{
(void)nbins;
const int nthreads = 256;
dim3 bdim(nthreads, 1);
dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y));
if (correct_gamma)
compute_gradients_8UC4_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
else
compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int nthreads, int correct_gamma>
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrStepb img,
float angle_scale, PtrStepf grad, PtrStepb qangle)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned char* row = (const unsigned char*)img.ptr(blockIdx.y);
__shared__ float sh_row[nthreads + 2];
if (x < width)
sh_row[threadIdx.x + 1] = row[x];
else
sh_row[threadIdx.x + 1] = row[width - 2];
if (threadIdx.x == 0)
sh_row[0] = row[::max(x - 1, 1)];
if (threadIdx.x == blockDim.x - 1)
sh_row[blockDim.x + 1] = row[::min(x + 1, width - 2)];
__syncthreads();
if (x < width)
{
float dx;
if (correct_gamma)
dx = ::sqrtf(sh_row[threadIdx.x + 2]) - ::sqrtf(sh_row[threadIdx.x]);
else
dx = sh_row[threadIdx.x + 2] - sh_row[threadIdx.x];
float dy = 0.f;
if (blockIdx.y > 0 && blockIdx.y < height - 1)
{
float a = ((const unsigned char*)img.ptr(blockIdx.y + 1))[x];
float b = ((const unsigned char*)img.ptr(blockIdx.y - 1))[x];
if (correct_gamma)
dy = ::sqrtf(a) - ::sqrtf(b);
else
dy = a - b;
}
float mag = ::sqrtf(dx * dx + dy * dy);
float ang = (::atan2f(dy, dx) + CV_PI_F) * angle_scale - 0.5f;
int hidx = (int)::floorf(ang);
ang -= hidx;
hidx = (hidx + cnbins) % cnbins;
((uchar2*)qangle.ptr(blockIdx.y))[x] = make_uchar2(hidx, (hidx + 1) % cnbins);
((float2*) grad.ptr(blockIdx.y))[x] = make_float2(mag * (1.f - ang), mag * ang);
}
}
void compute_gradients_8UC1(int nbins, int height, int width, const PtrStepSzb& img,
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
{
(void)nbins;
const int nthreads = 256;
dim3 bdim(nthreads, 1);
dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y));
if (correct_gamma)
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
else
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
//-------------------------------------------------------------------
// Resize
texture<uchar4, 2, cudaReadModeNormalizedFloat> resize8UC4_tex;
texture<uchar, 2, cudaReadModeNormalizedFloat> resize8UC1_tex;
__global__ void resize_for_hog_kernel(float sx, float sy, PtrStepSz<uchar> dst, int colOfs)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
dst.ptr(y)[x] = tex2D(resize8UC1_tex, x * sx + colOfs, y * sy) * 255;
}
__global__ void resize_for_hog_kernel(float sx, float sy, PtrStepSz<uchar4> dst, int colOfs)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
float4 val = tex2D(resize8UC4_tex, x * sx + colOfs, y * sy);
dst.ptr(y)[x] = make_uchar4(val.x * 255, val.y * 255, val.z * 255, val.w * 255);
}
}
template<class T, class TEX>
static void resize_for_hog(const PtrStepSzb& src, PtrStepSzb dst, TEX& tex)
{
tex.filterMode = cudaFilterModeLinear;
size_t texOfs = 0;
int colOfs = 0;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
if (texOfs != 0)
{
colOfs = static_cast<int>( texOfs/sizeof(T) );
cudaSafeCall( cudaUnbindTexture(tex) );
cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
}
dim3 threads(32, 8);
dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y));
float sx = static_cast<float>(src.cols) / dst.cols;
float sy = static_cast<float>(src.rows) / dst.rows;
resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (PtrStepSz<T>)dst, colOfs);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaUnbindTexture(tex) );
}
void resize_8UC1(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
void resize_8UC4(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
} // namespace hog
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,303 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "lbp.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
namespace cv { namespace cuda { namespace device
{
namespace lbp
{
struct LBP
{
__host__ __device__ __forceinline__ LBP() {}
__device__ __forceinline__ int operator() (const int* integral, int ty, int fh, int fw, int& shift) const
{
int anchors[9];
anchors[0] = integral[ty];
anchors[1] = integral[ty + fw];
anchors[0] -= anchors[1];
anchors[2] = integral[ty + fw * 2];
anchors[1] -= anchors[2];
anchors[2] -= integral[ty + fw * 3];
ty += fh;
anchors[3] = integral[ty];
anchors[4] = integral[ty + fw];
anchors[3] -= anchors[4];
anchors[5] = integral[ty + fw * 2];
anchors[4] -= anchors[5];
anchors[5] -= integral[ty + fw * 3];
anchors[0] -= anchors[3];
anchors[1] -= anchors[4];
anchors[2] -= anchors[5];
// 0 - 2 contains s0 - s2
ty += fh;
anchors[6] = integral[ty];
anchors[7] = integral[ty + fw];
anchors[6] -= anchors[7];
anchors[8] = integral[ty + fw * 2];
anchors[7] -= anchors[8];
anchors[8] -= integral[ty + fw * 3];
anchors[3] -= anchors[6];
anchors[4] -= anchors[7];
anchors[5] -= anchors[8];
// 3 - 5 contains s3 - s5
anchors[0] -= anchors[4];
anchors[1] -= anchors[4];
anchors[2] -= anchors[4];
anchors[3] -= anchors[4];
anchors[5] -= anchors[4];
int response = (~(anchors[0] >> 31)) & 4;
response |= (~(anchors[1] >> 31)) & 2;;
response |= (~(anchors[2] >> 31)) & 1;
shift = (~(anchors[5] >> 31)) & 16;
shift |= (~(anchors[3] >> 31)) & 1;
ty += fh;
anchors[0] = integral[ty];
anchors[1] = integral[ty + fw];
anchors[0] -= anchors[1];
anchors[2] = integral[ty + fw * 2];
anchors[1] -= anchors[2];
anchors[2] -= integral[ty + fw * 3];
anchors[6] -= anchors[0];
anchors[7] -= anchors[1];
anchors[8] -= anchors[2];
// 0 -2 contains s6 - s8
anchors[6] -= anchors[4];
anchors[7] -= anchors[4];
anchors[8] -= anchors[4];
shift |= (~(anchors[6] >> 31)) & 2;
shift |= (~(anchors[7] >> 31)) & 4;
shift |= (~(anchors[8] >> 31)) & 8;
return response;
}
};
template<typename Pr>
__global__ void disjoin(int4* candidates, int4* objects, unsigned int n, int groupThreshold, float grouping_eps, unsigned int* nclasses)
{
unsigned int tid = threadIdx.x;
extern __shared__ int sbuff[];
int* labels = sbuff;
int* rrects = sbuff + n;
Pr predicate(grouping_eps);
partition(candidates, n, labels, predicate);
rrects[tid * 4 + 0] = 0;
rrects[tid * 4 + 1] = 0;
rrects[tid * 4 + 2] = 0;
rrects[tid * 4 + 3] = 0;
__syncthreads();
int cls = labels[tid];
Emulation::smem::atomicAdd((rrects + cls * 4 + 0), candidates[tid].x);
Emulation::smem::atomicAdd((rrects + cls * 4 + 1), candidates[tid].y);
Emulation::smem::atomicAdd((rrects + cls * 4 + 2), candidates[tid].z);
Emulation::smem::atomicAdd((rrects + cls * 4 + 3), candidates[tid].w);
__syncthreads();
labels[tid] = 0;
__syncthreads();
Emulation::smem::atomicInc((unsigned int*)labels + cls, n);
__syncthreads();
*nclasses = 0;
int active = labels[tid];
if (active)
{
int* r1 = rrects + tid * 4;
float s = 1.f / active;
r1[0] = saturate_cast<int>(r1[0] * s);
r1[1] = saturate_cast<int>(r1[1] * s);
r1[2] = saturate_cast<int>(r1[2] * s);
r1[3] = saturate_cast<int>(r1[3] * s);
}
__syncthreads();
if (active && active >= groupThreshold)
{
int* r1 = rrects + tid * 4;
int4 r_out = make_int4(r1[0], r1[1], r1[2], r1[3]);
int aidx = Emulation::smem::atomicInc(nclasses, n);
objects[aidx] = r_out;
}
}
void connectedConmonents(PtrStepSz<int4> candidates, int ncandidates, PtrStepSz<int4> objects, int groupThreshold, float grouping_eps, unsigned int* nclasses)
{
if (!ncandidates) return;
int block = ncandidates;
int smem = block * ( sizeof(int) + sizeof(int4) );
disjoin<InSameComponint><<<1, block, smem>>>(candidates, objects, ncandidates, groupThreshold, grouping_eps, nclasses);
cudaSafeCall( cudaGetLastError() );
}
struct Cascade
{
__host__ __device__ __forceinline__ Cascade(const Stage* _stages, int _nstages, const ClNode* _nodes, const float* _leaves,
const int* _subsets, const uchar4* _features, int _subsetSize)
: stages(_stages), nstages(_nstages), nodes(_nodes), leaves(_leaves), subsets(_subsets), features(_features), subsetSize(_subsetSize){}
__device__ __forceinline__ bool operator() (int y, int x, int* integral, const int pitch) const
{
int current_node = 0;
int current_leave = 0;
for (int s = 0; s < nstages; ++s)
{
float sum = 0;
Stage stage = stages[s];
for (int t = 0; t < stage.ntrees; t++)
{
ClNode node = nodes[current_node];
uchar4 feature = features[node.featureIdx];
int shift;
int c = evaluator(integral, (y + feature.y) * pitch + x + feature.x, feature.w * pitch, feature.z, shift);
int idx = (subsets[ current_node * subsetSize + c] & ( 1 << shift)) ? current_leave : current_leave + 1;
sum += leaves[idx];
current_node += 1;
current_leave += 2;
}
if (sum < stage.threshold)
return false;
}
return true;
}
const Stage* stages;
const int nstages;
const ClNode* nodes;
const float* leaves;
const int* subsets;
const uchar4* features;
const int subsetSize;
const LBP evaluator;
};
// stepShift, scale, width_k, sum_prev => y = sum_prev + tid_k / width_k, x = tid_k - tid_k / width_k
__global__ void lbp_cascade(const Cascade cascade, int frameW, int frameH, int windowW, int windowH, float scale, const float factor,
const int total, int* integral, const int pitch, PtrStepSz<int4> objects, unsigned int* classified)
{
int ftid = blockIdx.x * blockDim.x + threadIdx.x;
if (ftid >= total) return;
int step = (scale <= 2.f);
int windowsForLine = (__float2int_rn( __fdividef(frameW, scale)) - windowW) >> step;
int stotal = windowsForLine * ( (__float2int_rn( __fdividef(frameH, scale)) - windowH) >> step);
int wshift = 0;
int scaleTid = ftid;
while (scaleTid >= stotal)
{
scaleTid -= stotal;
wshift += __float2int_rn(__fdividef(frameW, scale)) + 1;
scale *= factor;
step = (scale <= 2.f);
windowsForLine = ( ((__float2int_rn(__fdividef(frameW, scale)) - windowW) >> step));
stotal = windowsForLine * ( (__float2int_rn(__fdividef(frameH, scale)) - windowH) >> step);
}
int y = __fdividef(scaleTid, windowsForLine);
int x = scaleTid - y * windowsForLine;
x <<= step;
y <<= step;
if (cascade(y, x + wshift, integral, pitch))
{
if(x >= __float2int_rn(__fdividef(frameW, scale)) - windowW) return;
int4 rect;
rect.x = __float2int_rn(x * scale);
rect.y = __float2int_rn(y * scale);
rect.z = __float2int_rn(windowW * scale);
rect.w = __float2int_rn(windowH * scale);
int res = atomicInc(classified, (unsigned int)objects.cols);
objects(0, res) = rect;
}
}
void classifyPyramid(int frameW, int frameH, int windowW, int windowH, float initialScale, float factor, int workAmount,
const PtrStepSzb& mstages, const int nstages, const PtrStepSzi& mnodes, const PtrStepSzf& mleaves, const PtrStepSzi& msubsets, const PtrStepSzb& mfeatures,
const int subsetSize, PtrStepSz<int4> objects, unsigned int* classified, PtrStepSzi integral)
{
const int block = 128;
int grid = divUp(workAmount, block);
cudaFuncSetCacheConfig(lbp_cascade, cudaFuncCachePreferL1);
Cascade cascade((Stage*)mstages.ptr(), nstages, (ClNode*)mnodes.ptr(), mleaves.ptr(), msubsets.ptr(), (uchar4*)mfeatures.ptr(), subsetSize);
lbp_cascade<<<grid, block>>>(cascade, frameW, frameH, windowW, windowH, initialScale, factor, workAmount, integral.ptr(), (int)integral.step / sizeof(int), objects, classified);
}
}
}}}
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,112 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_DEVICE_LBP_HPP_
#define __OPENCV_CUDA_DEVICE_LBP_HPP_
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
namespace cv { namespace cuda { namespace device {
namespace lbp {
struct Stage
{
int first;
int ntrees;
float threshold;
};
struct ClNode
{
int left;
int right;
int featureIdx;
};
struct InSameComponint
{
public:
__device__ __forceinline__ InSameComponint(float _eps) : eps(_eps) {}
__device__ __forceinline__ InSameComponint(const InSameComponint& other) : eps(other.eps) {}
__device__ __forceinline__ bool operator()(const int4& r1, const int4& r2) const
{
float delta = eps * (::min(r1.z, r2.z) + ::min(r1.w, r2.w)) * 0.5f;
return ::abs(r1.x - r2.x) <= delta && ::abs(r1.y - r2.y) <= delta
&& ::abs(r1.x + r1.z - r2.x - r2.z) <= delta && ::abs(r1.y + r1.w - r2.y - r2.w) <= delta;
}
float eps;
};
template<typename Pr>
__device__ __forceinline__ void partition(int4* vec, unsigned int n, int* labels, Pr predicate)
{
unsigned tid = threadIdx.x;
labels[tid] = tid;
__syncthreads();
for (unsigned int id = 0; id < n; id++)
{
if (tid != id && predicate(vec[tid], vec[id]))
{
int p = labels[tid];
int q = labels[id];
if (p < q)
{
Emulation::smem::atomicMin(labels + id, p);
}
else if (p > q)
{
Emulation::smem::atomicMin(labels + tid, q);
}
}
}
__syncthreads();
}
} // lbp
} } }// namespaces
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,62 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_PRECOMP_H__
#define __OPENCV_PRECOMP_H__
#include <limits>
#include "opencv2/cudaobjdetect.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudawarping.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/core/private.cuda.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_CUDALEGACY
# include "opencv2/cudalegacy/private.hpp"
#endif
#endif /* __OPENCV_PRECOMP_H__ */

View File

@@ -0,0 +1,45 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
CV_CUDA_TEST_MAIN("gpu")

View File

@@ -0,0 +1,365 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
using namespace cvtest;
//#define DUMP
struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>
{
cv::cuda::DeviceInfo devInfo;
cv::Ptr<cv::cuda::HOG> hog;
#ifdef DUMP
std::ofstream f;
#else
std::ifstream f;
#endif
int wins_per_img_x;
int wins_per_img_y;
int blocks_per_win_x;
int blocks_per_win_y;
int block_hist_size;
virtual void SetUp()
{
devInfo = GetParam();
cv::cuda::setDevice(devInfo.deviceID());
hog = cv::cuda::HOG::create();
}
#ifdef DUMP
void dump(const std::vector<cv::Point>& locations)
{
int nlocations = locations.size();
f.write((char*)&nlocations, sizeof(nlocations));
for (int i = 0; i < locations.size(); ++i)
f.write((char*)&locations[i], sizeof(locations[i]));
}
#else
void compare(const std::vector<cv::Point>& locations)
{
// skip block_hists check
int rows, cols;
f.read((char*)&rows, sizeof(rows));
f.read((char*)&cols, sizeof(cols));
for (int i = 0; i < rows; ++i)
{
for (int j = 0; j < cols; ++j)
{
float val;
f.read((char*)&val, sizeof(val));
}
}
int nlocations;
f.read((char*)&nlocations, sizeof(nlocations));
ASSERT_EQ(nlocations, static_cast<int>(locations.size()));
for (int i = 0; i < nlocations; ++i)
{
cv::Point location;
f.read((char*)&location, sizeof(location));
ASSERT_EQ(location, locations[i]);
}
}
#endif
void testDetect(const cv::Mat& img)
{
hog->setGammaCorrection(false);
hog->setSVMDetector(hog->getDefaultPeopleDetector());
std::vector<cv::Point> locations;
// Test detect
hog->detect(loadMat(img), locations);
#ifdef DUMP
dump(locations);
#else
compare(locations);
#endif
// Test detect on smaller image
cv::Mat img2;
cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2));
hog->detect(loadMat(img2), locations);
#ifdef DUMP
dump(locations);
#else
compare(locations);
#endif
// Test detect on greater image
cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2));
hog->detect(loadMat(img2), locations);
#ifdef DUMP
dump(locations);
#else
compare(locations);
#endif
}
};
// desabled while resize does not fixed
CUDA_TEST_P(HOG, DISABLED_Detect)
{
cv::Mat img_rgb = readImage("hog/road.png");
ASSERT_FALSE(img_rgb.empty());
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
ASSERT_TRUE(f.is_open());
// Test on color image
cv::Mat img;
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
testDetect(img);
// Test on gray image
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
testDetect(img);
}
CUDA_TEST_P(HOG, GetDescriptors)
{
// Load image (e.g. train data, composed from windows)
cv::Mat img_rgb = readImage("hog/train_data.png");
ASSERT_FALSE(img_rgb.empty());
// Convert to C4
cv::Mat img;
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
cv::cuda::GpuMat d_img(img);
// Convert train images into feature vectors (train table)
cv::cuda::GpuMat descriptors, descriptors_by_cols;
hog->setWinStride(Size(64, 128));
hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_ROW_BY_ROW);
hog->compute(d_img, descriptors);
hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_COL_BY_COL);
hog->compute(d_img, descriptors_by_cols);
// Check size of the result train table
wins_per_img_x = 3;
wins_per_img_y = 2;
blocks_per_win_x = 7;
blocks_per_win_y = 15;
block_hist_size = 36;
cv::Size descr_size_expected = cv::Size(blocks_per_win_x * blocks_per_win_y * block_hist_size,
wins_per_img_x * wins_per_img_y);
ASSERT_EQ(descr_size_expected, descriptors.size());
// Check both formats of output descriptors are handled correctly
cv::Mat dr(descriptors);
cv::Mat dc(descriptors_by_cols);
for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i)
{
const float* l = dr.rowRange(i, i + 1).ptr<float>();
const float* r = dc.rowRange(i, i + 1).ptr<float>();
for (int y = 0; y < blocks_per_win_y; ++y)
for (int x = 0; x < blocks_per_win_x; ++x)
for (int k = 0; k < block_hist_size; ++k)
ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
r[(x * blocks_per_win_y + y) * block_hist_size + k]);
}
}
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, HOG, ALL_DEVICES);
//============== caltech hog tests =====================//
struct CalTech : public ::testing::TestWithParam<std::tr1::tuple<cv::cuda::DeviceInfo, std::string> >
{
cv::cuda::DeviceInfo devInfo;
cv::Mat img;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
cv::cuda::setDevice(devInfo.deviceID());
img = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
}
};
CUDA_TEST_P(CalTech, HOG)
{
cv::cuda::GpuMat d_img(img);
cv::Mat markedImage(img.clone());
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
d_hog->setNumLevels(d_hog->getNumLevels() + 32);
std::vector<cv::Rect> found_locations;
d_hog->detectMultiScale(d_img, found_locations);
#if defined (LOG_CASCADE_STATISTIC)
for (int i = 0; i < (int)found_locations.size(); i++)
{
cv::Rect r = found_locations[i];
std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl;
cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
}
cv::imshow("Res", markedImage);
cv::waitKey();
#endif
}
INSTANTIATE_TEST_CASE_P(detect, CalTech, testing::Combine(ALL_DEVICES,
::testing::Values<std::string>("caltech/image_00000009_0.png", "caltech/image_00000032_0.png",
"caltech/image_00000165_0.png", "caltech/image_00000261_0.png", "caltech/image_00000469_0.png",
"caltech/image_00000527_0.png", "caltech/image_00000574_0.png")));
//////////////////////////////////////////////////////////////////////////////////////////
/// LBP classifier
PARAM_TEST_CASE(LBP_Read_classifier, cv::cuda::DeviceInfo, int)
{
cv::cuda::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
cv::cuda::setDevice(devInfo.deviceID());
}
};
CUDA_TEST_P(LBP_Read_classifier, Accuracy)
{
std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
cv::Ptr<cv::cuda::CascadeClassifier> d_cascade;
ASSERT_NO_THROW(
d_cascade = cv::cuda::CascadeClassifier::create(classifierXmlPath);
);
ASSERT_FALSE(d_cascade.empty());
}
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_Read_classifier,
testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
PARAM_TEST_CASE(LBP_classify, cv::cuda::DeviceInfo, int)
{
cv::cuda::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
cv::cuda::setDevice(devInfo.deviceID());
}
};
CUDA_TEST_P(LBP_classify, Accuracy)
{
std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
std::string imagePath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/er.png";
cv::CascadeClassifier cpuClassifier(classifierXmlPath);
ASSERT_FALSE(cpuClassifier.empty());
cv::Mat image = cv::imread(imagePath);
image = image.colRange(0, image.cols/2);
cv::Mat grey;
cvtColor(image, grey, cv::COLOR_BGR2GRAY);
ASSERT_FALSE(image.empty());
std::vector<cv::Rect> rects;
cpuClassifier.detectMultiScale(grey, rects);
cv::Mat markedImage = image.clone();
std::vector<cv::Rect>::iterator it = rects.begin();
for (; it != rects.end(); ++it)
cv::rectangle(markedImage, *it, cv::Scalar(255, 0, 0));
cv::Ptr<cv::cuda::CascadeClassifier> gpuClassifier =
cv::cuda::CascadeClassifier::create(classifierXmlPath);
cv::cuda::GpuMat tested(grey);
cv::cuda::GpuMat gpu_rects_buf;
gpuClassifier->detectMultiScale(tested, gpu_rects_buf);
std::vector<cv::Rect> gpu_rects;
gpuClassifier->convert(gpu_rects_buf, gpu_rects);
#if defined (LOG_CASCADE_STATISTIC)
for (size_t i = 0; i < gpu_rects.size(); i++)
{
cv::Rect r = gpu_rects[i];
std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl;
cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
}
cv::imshow("Res", markedImage);
cv::waitKey();
#endif
}
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_classify,
testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
#endif // HAVE_CUDA

View File

@@ -0,0 +1,64 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#endif
#ifndef __OPENCV_TEST_PRECOMP_HPP__
#define __OPENCV_TEST_PRECOMP_HPP__
#include <fstream>
#include "opencv2/ts.hpp"
#include "opencv2/ts/cuda_test.hpp"
#include "opencv2/cudaobjdetect.hpp"
#include "opencv2/objdetect.hpp"
#include "cvconfig.h"
#endif