remove softcascade host dependencies on gpu module

This commit is contained in:
marina.kolpakova 2013-03-03 13:01:17 +04:00
parent 5120322cea
commit 6daf17f974
8 changed files with 274 additions and 18 deletions

View File

@ -50,6 +50,141 @@
namespace cv { namespace gpu
{
//////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation.
// Page locked memory is only needed for async and faster coping to GPU.
// It is convertable to cv::Mat header without reference counting
// so you can use it with other opencv functions.
// Page-locks the matrix m memory and maps it for the device(s)
CV_EXPORTS void registerPageLocked(Mat& m);
// Unmaps the memory of matrix m, and makes it pageable again.
CV_EXPORTS void unregisterPageLocked(Mat& m);
class CV_EXPORTS CudaMem
{
public:
enum { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, ALLOC_WRITE_COMBINED = 4 };
CudaMem();
CudaMem(const CudaMem& m);
CudaMem(int rows, int cols, int type, int _alloc_type = ALLOC_PAGE_LOCKED);
CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
//! creates from cv::Mat with coping data
explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED);
~CudaMem();
CudaMem& operator = (const CudaMem& m);
//! returns deep copy of the matrix, i.e. the data is copied
CudaMem clone() const;
//! allocates new matrix data unless the matrix already has specified size and type.
void create(int rows, int cols, int type, int alloc_type = ALLOC_PAGE_LOCKED);
void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
//! decrements reference counter and released memory if needed.
void release();
//! returns matrix header with disabled reference counting for CudaMem data.
Mat createMatHeader() const;
operator Mat() const;
//! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
GpuMat createGpuMatHeader() const;
operator GpuMat() const;
//returns if host memory can be mapperd to gpu address space;
static bool canMapHostMemory();
// Please see cv::Mat for descriptions
bool isContinuous() const;
size_t elemSize() const;
size_t elemSize1() const;
int type() const;
int depth() const;
int channels() const;
size_t step1() const;
Size size() const;
bool empty() const;
// Please see cv::Mat for descriptions
int flags;
int rows, cols;
size_t step;
uchar* data;
int* refcount;
uchar* datastart;
uchar* dataend;
int alloc_type;
};
//////////////////////////////// CudaStream ////////////////////////////////
// Encapculates Cuda Stream. Provides interface for async coping.
// Passed to each function that supports async kernel execution.
// Reference counting is enabled
class CV_EXPORTS Stream
{
public:
Stream();
~Stream();
Stream(const Stream&);
Stream& operator =(const Stream&);
bool queryIfComplete();
void waitForCompletion();
//! downloads asynchronously
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat)
void enqueueDownload(const GpuMat& src, CudaMem& dst);
void enqueueDownload(const GpuMat& src, Mat& dst);
//! uploads asynchronously
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI)
void enqueueUpload(const CudaMem& src, GpuMat& dst);
void enqueueUpload(const Mat& src, GpuMat& dst);
//! copy asynchronously
void enqueueCopy(const GpuMat& src, GpuMat& dst);
//! memory set asynchronously
void enqueueMemSet(GpuMat& src, Scalar val);
void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask);
//! converts matrix type, ex from float to uchar depending on type
void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0);
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
typedef void (*StreamCallback)(Stream& stream, int status, void* userData);
void enqueueHostCallback(StreamCallback callback, void* userData);
static Stream& Null();
operator bool() const;
private:
struct Impl;
explicit Stream(Impl* impl);
void create();
void release();
Impl *impl;
friend struct StreamAccessor;
};
//////////////////////////////// Initialization & Info ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.

View File

@ -0,0 +1,64 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__
#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__
#include "opencv2/gpu/gpu.hpp"
#include "cuda_runtime_api.h"
namespace cv
{
namespace gpu
{
// This is only header file that depends on Cuda. All other headers are independent.
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
// In this case you have to install Cuda Toolkit.
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
};
}
}
#endif /* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */

View File

@ -72,6 +72,33 @@ void cv::gpu::Stream::release() { throw_nogpu(); }
#include "opencv2/gpu/stream_accessor.hpp"
namespace
{
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#endif
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (err < 0)
{
std::ostringstream msg;
msg << "NPP API Call Error: " << err;
cv::gpu::error(msg.str().c_str(), file, line, func);
}
}
}
namespace cv { namespace gpu
{
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);

View File

@ -41,6 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencv2/core/gpumat.hpp"
using namespace cv;
using namespace cv::gpu;
@ -178,7 +179,7 @@ bool cv::gpu::CudaMem::empty() const
return data == 0;
}
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
#if !defined (HAVE_CUDA)
void cv::gpu::registerPageLocked(Mat&) { throw_nogpu(); }
void cv::gpu::unregisterPageLocked(Mat&) { throw_nogpu(); }
@ -188,6 +189,33 @@ void cv::gpu::CudaMem::release() { throw_nogpu(); }
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_nogpu(); return GpuMat(); }
#else /* !defined (HAVE_CUDA) */
#include <cuda_runtime_api.h>
namespace
{
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#endif
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (err < 0)
{
std::ostringstream msg;
msg << "NPP API Call Error: " << err;
cv::gpu::error(msg.str().c_str(), file, line, func);
}
}
}
void cv::gpu::registerPageLocked(Mat& m)
{

View File

@ -56,7 +56,7 @@ namespace cv
// In this case you have to install Cuda Toolkit.
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
CV_EXPORTS static cudaStream_t getStream(const cv::gpu::Stream& stream);
};
}
}

View File

@ -9,7 +9,7 @@ macro(ocv_glob_cuda_powered_module_sources)
set(lib_device_hdrs "")
if (HAVE_CUDA AND lib_device_srcs)
ocv_include_directories(${CUDA_INCLUDE_DIRS})
ocv_include_directories(${CUDA_INCLUDE_DIRS} "${OpenCV_SOURCE_DIR}/modules/gpu/include")
file(GLOB_RECURSE lib_device_hdrs "src/cuda/*.hpp")
ocv_cuda_compile(device_objs ${lib_device_srcs})
@ -35,7 +35,7 @@ set(cuda_include "")
if (NAVE_CUDA)
set(cuda_deps ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
endif()
ocv_add_module(softcascade opencv_core opencv_imgproc opencv_ml OPTIONAL opencv_gpu ${cuda_deps})
ocv_add_module(softcascade opencv_core opencv_imgproc opencv_ml OPTIONAL ${cuda_deps})
if(HAVE_CUDA)
ocv_module_include_directories(${CUDA_INCLUDE_DIRS})

View File

@ -49,6 +49,8 @@
namespace cv { namespace softcascade { namespace device {
typedef unsigned char uchar;
template <int FACTOR>
__device__ __forceinline__ uchar shrink(const uchar* ptr, const int pitch, const int y, const int x)
{

View File

@ -480,8 +480,8 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
flds.mask.create( rois.cols / shr, rois.rows / shr, rois.type());
cv::gpu::resize(rois, flds.genRoiTmp, cv::Size(), 1.f / shr, 1.f / shr, CV_INTER_AREA, s);
cv::gpu::transpose(flds.genRoiTmp, flds.mask, s);
//cv::gpu::resize(rois, flds.genRoiTmp, cv::Size(), 1.f / shr, 1.f / shr, CV_INTER_AREA, s);
//cv::gpu::transpose(flds.genRoiTmp, flds.mask, s);
if (type == CV_8UC3)
{
@ -491,7 +491,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
flds.createLevels(image.rows, image.cols);
flds.preprocessor->apply(image, flds.shrunk);
cv::gpu::integralBuffered(flds.shrunk, flds.hogluv, flds.integralBuffer, s);
//cv::gpu::integralBuffered(flds.shrunk, flds.hogluv, flds.integralBuffer, s);
}
else
{
@ -546,12 +546,12 @@ struct GenricPreprocessor : public cv::softcascade::ChannelsProcessor
channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
setZero(channels, s);
cv::gpu::cvtColor(frame, gray, CV_BGR2GRAY, s);
//cv::gpu::cvtColor(frame, gray, CV_BGR2GRAY, s);
createHogBins(s);
createLuvBins(frame, s);
cv::gpu::resize(channels, shrunk, cv::Size(), 1.f / shrinkage, 1.f / shrinkage, CV_INTER_AREA, s);
//cv::gpu::resize(channels, shrunk, cv::Size(), 1.f / shrinkage, 1.f / shrinkage, CV_INTER_AREA, s);
}
private:
@ -566,20 +566,20 @@ private:
cv::gpu::GpuMat dfdx(fplane, cv::Rect(0, 0, fw, fh));
cv::gpu::GpuMat dfdy(fplane, cv::Rect(0, fh, fw, fh));
cv::gpu::Sobel(gray, dfdx, CV_32F, 1, 0, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
cv::gpu::Sobel(gray, dfdy, CV_32F, 0, 1, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
//cv::gpu::Sobel(gray, dfdx, CV_32F, 1, 0, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
//cv::gpu::Sobel(gray, dfdy, CV_32F, 0, 1, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
cv::gpu::GpuMat mag(fplane, cv::Rect(0, 2 * fh, fw, fh));
cv::gpu::GpuMat ang(fplane, cv::Rect(0, 3 * fh, fw, fh));
cv::gpu::cartToPolar(dfdx, dfdy, mag, ang, true, s);
//cv::gpu::cartToPolar(dfdx, dfdy, mag, ang, true, s);
// normalize magnitude to uchar interval and angles to 6 bins
cv::gpu::GpuMat nmag(fplane, cv::Rect(0, 4 * fh, fw, fh));
cv::gpu::GpuMat nang(fplane, cv::Rect(0, 5 * fh, fw, fh));
cv::gpu::multiply(mag, cv::Scalar::all(1.f / (8 *::log(2.0f))), nmag, 1, -1, s);
cv::gpu::multiply(ang, cv::Scalar::all(1.f / 60.f), nang, 1, -1, s);
//cv::gpu::multiply(mag, cv::Scalar::all(1.f / (8 *::log(2.0f))), nmag, 1, -1, s);
//cv::gpu::multiply(ang, cv::Scalar::all(1.f / 60.f), nang, 1, -1, s);
//create uchar magnitude
cv::gpu::GpuMat cmag(channels, cv::Rect(0, fh * HOG_BINS, fw, fh));
@ -597,7 +597,7 @@ private:
static const int fw = colored.cols;
static const int fh = colored.rows;
cv::gpu::cvtColor(colored, luv, CV_BGR2Luv, s);
//cv::gpu::cvtColor(colored, luv, CV_BGR2Luv, s);
std::vector<cv::gpu::GpuMat> splited;
for(int i = 0; i < LUV_BINS; ++i)
@ -605,7 +605,7 @@ private:
splited.push_back(cv::gpu::GpuMat(channels, cv::Rect(0, fh * (7 + i), fw, fh)));
}
cv::gpu::split(luv, splited, s);
//cv::gpu::split(luv, splited, s);
}
enum {HOG_BINS = 6, LUV_BINS = 3};
@ -631,7 +631,7 @@ struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor
virtual void apply(InputArray _frame, OutputArray _shrunk, cv::gpu::Stream& s = cv::gpu::Stream::Null())
{
const cv::gpu::GpuMat frame = _frame.getGpuMat();
cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
//cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
_shrunk.create(frame.rows * (4 + bins) / shrinkage, frame.cols / shrinkage, CV_8UC1);
cv::gpu::GpuMat shrunk = _shrunk.getGpuMat();
@ -639,7 +639,7 @@ struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor
channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
setZero(channels, s);
cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
//cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
cv::softcascade::device::gray2hog(gray, channels(cv::Rect(0, 0, bgr.cols, bgr.rows * (bins + 1))), bins);
cv::gpu::GpuMat luv(channels, cv::Rect(0, bgr.rows * (bins + 1), bgr.cols, bgr.rows * 3));