remove softcascade host dependencies on gpu module
This commit is contained in:
parent
5120322cea
commit
6daf17f974
@ -50,6 +50,141 @@
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
//////////////////////////////// CudaMem ////////////////////////////////
|
||||
// CudaMem is limited cv::Mat with page locked memory allocation.
|
||||
// Page locked memory is only needed for async and faster coping to GPU.
|
||||
// It is convertable to cv::Mat header without reference counting
|
||||
// so you can use it with other opencv functions.
|
||||
|
||||
// Page-locks the matrix m memory and maps it for the device(s)
|
||||
CV_EXPORTS void registerPageLocked(Mat& m);
|
||||
// Unmaps the memory of matrix m, and makes it pageable again.
|
||||
CV_EXPORTS void unregisterPageLocked(Mat& m);
|
||||
|
||||
class CV_EXPORTS CudaMem
|
||||
{
|
||||
public:
|
||||
enum { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, ALLOC_WRITE_COMBINED = 4 };
|
||||
|
||||
CudaMem();
|
||||
CudaMem(const CudaMem& m);
|
||||
|
||||
CudaMem(int rows, int cols, int type, int _alloc_type = ALLOC_PAGE_LOCKED);
|
||||
CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
|
||||
|
||||
|
||||
//! creates from cv::Mat with coping data
|
||||
explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED);
|
||||
|
||||
~CudaMem();
|
||||
|
||||
CudaMem& operator = (const CudaMem& m);
|
||||
|
||||
//! returns deep copy of the matrix, i.e. the data is copied
|
||||
CudaMem clone() const;
|
||||
|
||||
//! allocates new matrix data unless the matrix already has specified size and type.
|
||||
void create(int rows, int cols, int type, int alloc_type = ALLOC_PAGE_LOCKED);
|
||||
void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
|
||||
|
||||
//! decrements reference counter and released memory if needed.
|
||||
void release();
|
||||
|
||||
//! returns matrix header with disabled reference counting for CudaMem data.
|
||||
Mat createMatHeader() const;
|
||||
operator Mat() const;
|
||||
|
||||
//! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
|
||||
GpuMat createGpuMatHeader() const;
|
||||
operator GpuMat() const;
|
||||
|
||||
//returns if host memory can be mapperd to gpu address space;
|
||||
static bool canMapHostMemory();
|
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
bool isContinuous() const;
|
||||
size_t elemSize() const;
|
||||
size_t elemSize1() const;
|
||||
int type() const;
|
||||
int depth() const;
|
||||
int channels() const;
|
||||
size_t step1() const;
|
||||
Size size() const;
|
||||
bool empty() const;
|
||||
|
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
int flags;
|
||||
int rows, cols;
|
||||
size_t step;
|
||||
|
||||
uchar* data;
|
||||
int* refcount;
|
||||
|
||||
uchar* datastart;
|
||||
uchar* dataend;
|
||||
|
||||
int alloc_type;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////// CudaStream ////////////////////////////////
|
||||
// Encapculates Cuda Stream. Provides interface for async coping.
|
||||
// Passed to each function that supports async kernel execution.
|
||||
// Reference counting is enabled
|
||||
|
||||
class CV_EXPORTS Stream
|
||||
{
|
||||
public:
|
||||
Stream();
|
||||
~Stream();
|
||||
|
||||
Stream(const Stream&);
|
||||
Stream& operator =(const Stream&);
|
||||
|
||||
bool queryIfComplete();
|
||||
void waitForCompletion();
|
||||
|
||||
//! downloads asynchronously
|
||||
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat)
|
||||
void enqueueDownload(const GpuMat& src, CudaMem& dst);
|
||||
void enqueueDownload(const GpuMat& src, Mat& dst);
|
||||
|
||||
//! uploads asynchronously
|
||||
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI)
|
||||
void enqueueUpload(const CudaMem& src, GpuMat& dst);
|
||||
void enqueueUpload(const Mat& src, GpuMat& dst);
|
||||
|
||||
//! copy asynchronously
|
||||
void enqueueCopy(const GpuMat& src, GpuMat& dst);
|
||||
|
||||
//! memory set asynchronously
|
||||
void enqueueMemSet(GpuMat& src, Scalar val);
|
||||
void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask);
|
||||
|
||||
//! converts matrix type, ex from float to uchar depending on type
|
||||
void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0);
|
||||
|
||||
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
|
||||
typedef void (*StreamCallback)(Stream& stream, int status, void* userData);
|
||||
void enqueueHostCallback(StreamCallback callback, void* userData);
|
||||
|
||||
static Stream& Null();
|
||||
|
||||
operator bool() const;
|
||||
|
||||
private:
|
||||
struct Impl;
|
||||
|
||||
explicit Stream(Impl* impl);
|
||||
void create();
|
||||
void release();
|
||||
|
||||
Impl *impl;
|
||||
|
||||
friend struct StreamAccessor;
|
||||
};
|
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
|
||||
|
64
modules/core/include/opencv2/core/stream_accessor.hpp
Normal file
64
modules/core/include/opencv2/core/stream_accessor.hpp
Normal file
@ -0,0 +1,64 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__
|
||||
#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__
|
||||
|
||||
#include "opencv2/gpu/gpu.hpp"
|
||||
#include "cuda_runtime_api.h"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
// This is only header file that depends on Cuda. All other headers are independent.
|
||||
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
|
||||
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
|
||||
// In this case you have to install Cuda Toolkit.
|
||||
struct StreamAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */
|
@ -72,6 +72,33 @@ void cv::gpu::Stream::release() { throw_nogpu(); }
|
||||
|
||||
#include "opencv2/gpu/stream_accessor.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
|
||||
#endif
|
||||
|
||||
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
||||
}
|
||||
|
||||
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (err < 0)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "NPP API Call Error: " << err;
|
||||
cv::gpu::error(msg.str().c_str(), file, line, func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
|
@ -41,6 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
@ -178,7 +179,7 @@ bool cv::gpu::CudaMem::empty() const
|
||||
return data == 0;
|
||||
}
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::registerPageLocked(Mat&) { throw_nogpu(); }
|
||||
void cv::gpu::unregisterPageLocked(Mat&) { throw_nogpu(); }
|
||||
@ -188,6 +189,33 @@ void cv::gpu::CudaMem::release() { throw_nogpu(); }
|
||||
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_nogpu(); return GpuMat(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
#include <cuda_runtime_api.h>
|
||||
namespace
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
|
||||
#endif
|
||||
|
||||
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
||||
}
|
||||
|
||||
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (err < 0)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "NPP API Call Error: " << err;
|
||||
cv::gpu::error(msg.str().c_str(), file, line, func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::registerPageLocked(Mat& m)
|
||||
{
|
@ -56,7 +56,7 @@ namespace cv
|
||||
// In this case you have to install Cuda Toolkit.
|
||||
struct StreamAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
|
||||
CV_EXPORTS static cudaStream_t getStream(const cv::gpu::Stream& stream);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ macro(ocv_glob_cuda_powered_module_sources)
|
||||
set(lib_device_hdrs "")
|
||||
|
||||
if (HAVE_CUDA AND lib_device_srcs)
|
||||
ocv_include_directories(${CUDA_INCLUDE_DIRS})
|
||||
ocv_include_directories(${CUDA_INCLUDE_DIRS} "${OpenCV_SOURCE_DIR}/modules/gpu/include")
|
||||
file(GLOB_RECURSE lib_device_hdrs "src/cuda/*.hpp")
|
||||
|
||||
ocv_cuda_compile(device_objs ${lib_device_srcs})
|
||||
@ -35,7 +35,7 @@ set(cuda_include "")
|
||||
if (NAVE_CUDA)
|
||||
set(cuda_deps ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
|
||||
endif()
|
||||
ocv_add_module(softcascade opencv_core opencv_imgproc opencv_ml OPTIONAL opencv_gpu ${cuda_deps})
|
||||
ocv_add_module(softcascade opencv_core opencv_imgproc opencv_ml OPTIONAL ${cuda_deps})
|
||||
|
||||
if(HAVE_CUDA)
|
||||
ocv_module_include_directories(${CUDA_INCLUDE_DIRS})
|
||||
|
@ -49,6 +49,8 @@
|
||||
|
||||
namespace cv { namespace softcascade { namespace device {
|
||||
|
||||
typedef unsigned char uchar;
|
||||
|
||||
template <int FACTOR>
|
||||
__device__ __forceinline__ uchar shrink(const uchar* ptr, const int pitch, const int y, const int x)
|
||||
{
|
||||
|
@ -480,8 +480,8 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
|
||||
|
||||
flds.mask.create( rois.cols / shr, rois.rows / shr, rois.type());
|
||||
|
||||
cv::gpu::resize(rois, flds.genRoiTmp, cv::Size(), 1.f / shr, 1.f / shr, CV_INTER_AREA, s);
|
||||
cv::gpu::transpose(flds.genRoiTmp, flds.mask, s);
|
||||
//cv::gpu::resize(rois, flds.genRoiTmp, cv::Size(), 1.f / shr, 1.f / shr, CV_INTER_AREA, s);
|
||||
//cv::gpu::transpose(flds.genRoiTmp, flds.mask, s);
|
||||
|
||||
if (type == CV_8UC3)
|
||||
{
|
||||
@ -491,7 +491,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
|
||||
flds.createLevels(image.rows, image.cols);
|
||||
|
||||
flds.preprocessor->apply(image, flds.shrunk);
|
||||
cv::gpu::integralBuffered(flds.shrunk, flds.hogluv, flds.integralBuffer, s);
|
||||
//cv::gpu::integralBuffered(flds.shrunk, flds.hogluv, flds.integralBuffer, s);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -546,12 +546,12 @@ struct GenricPreprocessor : public cv::softcascade::ChannelsProcessor
|
||||
channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
|
||||
setZero(channels, s);
|
||||
|
||||
cv::gpu::cvtColor(frame, gray, CV_BGR2GRAY, s);
|
||||
//cv::gpu::cvtColor(frame, gray, CV_BGR2GRAY, s);
|
||||
createHogBins(s);
|
||||
|
||||
createLuvBins(frame, s);
|
||||
|
||||
cv::gpu::resize(channels, shrunk, cv::Size(), 1.f / shrinkage, 1.f / shrinkage, CV_INTER_AREA, s);
|
||||
//cv::gpu::resize(channels, shrunk, cv::Size(), 1.f / shrinkage, 1.f / shrinkage, CV_INTER_AREA, s);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -566,20 +566,20 @@ private:
|
||||
cv::gpu::GpuMat dfdx(fplane, cv::Rect(0, 0, fw, fh));
|
||||
cv::gpu::GpuMat dfdy(fplane, cv::Rect(0, fh, fw, fh));
|
||||
|
||||
cv::gpu::Sobel(gray, dfdx, CV_32F, 1, 0, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
|
||||
cv::gpu::Sobel(gray, dfdy, CV_32F, 0, 1, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
|
||||
//cv::gpu::Sobel(gray, dfdx, CV_32F, 1, 0, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
|
||||
//cv::gpu::Sobel(gray, dfdy, CV_32F, 0, 1, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
|
||||
|
||||
cv::gpu::GpuMat mag(fplane, cv::Rect(0, 2 * fh, fw, fh));
|
||||
cv::gpu::GpuMat ang(fplane, cv::Rect(0, 3 * fh, fw, fh));
|
||||
|
||||
cv::gpu::cartToPolar(dfdx, dfdy, mag, ang, true, s);
|
||||
//cv::gpu::cartToPolar(dfdx, dfdy, mag, ang, true, s);
|
||||
|
||||
// normalize magnitude to uchar interval and angles to 6 bins
|
||||
cv::gpu::GpuMat nmag(fplane, cv::Rect(0, 4 * fh, fw, fh));
|
||||
cv::gpu::GpuMat nang(fplane, cv::Rect(0, 5 * fh, fw, fh));
|
||||
|
||||
cv::gpu::multiply(mag, cv::Scalar::all(1.f / (8 *::log(2.0f))), nmag, 1, -1, s);
|
||||
cv::gpu::multiply(ang, cv::Scalar::all(1.f / 60.f), nang, 1, -1, s);
|
||||
//cv::gpu::multiply(mag, cv::Scalar::all(1.f / (8 *::log(2.0f))), nmag, 1, -1, s);
|
||||
//cv::gpu::multiply(ang, cv::Scalar::all(1.f / 60.f), nang, 1, -1, s);
|
||||
|
||||
//create uchar magnitude
|
||||
cv::gpu::GpuMat cmag(channels, cv::Rect(0, fh * HOG_BINS, fw, fh));
|
||||
@ -597,7 +597,7 @@ private:
|
||||
static const int fw = colored.cols;
|
||||
static const int fh = colored.rows;
|
||||
|
||||
cv::gpu::cvtColor(colored, luv, CV_BGR2Luv, s);
|
||||
//cv::gpu::cvtColor(colored, luv, CV_BGR2Luv, s);
|
||||
|
||||
std::vector<cv::gpu::GpuMat> splited;
|
||||
for(int i = 0; i < LUV_BINS; ++i)
|
||||
@ -605,7 +605,7 @@ private:
|
||||
splited.push_back(cv::gpu::GpuMat(channels, cv::Rect(0, fh * (7 + i), fw, fh)));
|
||||
}
|
||||
|
||||
cv::gpu::split(luv, splited, s);
|
||||
//cv::gpu::split(luv, splited, s);
|
||||
}
|
||||
|
||||
enum {HOG_BINS = 6, LUV_BINS = 3};
|
||||
@ -631,7 +631,7 @@ struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor
|
||||
virtual void apply(InputArray _frame, OutputArray _shrunk, cv::gpu::Stream& s = cv::gpu::Stream::Null())
|
||||
{
|
||||
const cv::gpu::GpuMat frame = _frame.getGpuMat();
|
||||
cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
|
||||
//cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
|
||||
|
||||
_shrunk.create(frame.rows * (4 + bins) / shrinkage, frame.cols / shrinkage, CV_8UC1);
|
||||
cv::gpu::GpuMat shrunk = _shrunk.getGpuMat();
|
||||
@ -639,7 +639,7 @@ struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor
|
||||
channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
|
||||
setZero(channels, s);
|
||||
|
||||
cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
|
||||
//cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
|
||||
cv::softcascade::device::gray2hog(gray, channels(cv::Rect(0, 0, bgr.cols, bgr.rows * (bins + 1))), bins);
|
||||
|
||||
cv::gpu::GpuMat luv(channels, cv::Rect(0, bgr.rows * (bins + 1), bgr.cols, bgr.rows * 3));
|
||||
|
Loading…
Reference in New Issue
Block a user