diff --git a/modules/core/include/opencv2/core/gpumat.hpp b/modules/core/include/opencv2/core/gpumat.hpp index be757a935..bb436ec55 100644 --- a/modules/core/include/opencv2/core/gpumat.hpp +++ b/modules/core/include/opencv2/core/gpumat.hpp @@ -50,6 +50,141 @@ namespace cv { namespace gpu { + //////////////////////////////// CudaMem //////////////////////////////// + // CudaMem is limited cv::Mat with page locked memory allocation. + // Page locked memory is only needed for async and faster coping to GPU. + // It is convertable to cv::Mat header without reference counting + // so you can use it with other opencv functions. + + // Page-locks the matrix m memory and maps it for the device(s) + CV_EXPORTS void registerPageLocked(Mat& m); + // Unmaps the memory of matrix m, and makes it pageable again. + CV_EXPORTS void unregisterPageLocked(Mat& m); + + class CV_EXPORTS CudaMem + { + public: + enum { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, ALLOC_WRITE_COMBINED = 4 }; + + CudaMem(); + CudaMem(const CudaMem& m); + + CudaMem(int rows, int cols, int type, int _alloc_type = ALLOC_PAGE_LOCKED); + CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED); + + + //! creates from cv::Mat with coping data + explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED); + + ~CudaMem(); + + CudaMem& operator = (const CudaMem& m); + + //! returns deep copy of the matrix, i.e. the data is copied + CudaMem clone() const; + + //! allocates new matrix data unless the matrix already has specified size and type. + void create(int rows, int cols, int type, int alloc_type = ALLOC_PAGE_LOCKED); + void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED); + + //! decrements reference counter and released memory if needed. + void release(); + + //! returns matrix header with disabled reference counting for CudaMem data. + Mat createMatHeader() const; + operator Mat() const; + + //! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware. + GpuMat createGpuMatHeader() const; + operator GpuMat() const; + + //returns if host memory can be mapperd to gpu address space; + static bool canMapHostMemory(); + + // Please see cv::Mat for descriptions + bool isContinuous() const; + size_t elemSize() const; + size_t elemSize1() const; + int type() const; + int depth() const; + int channels() const; + size_t step1() const; + Size size() const; + bool empty() const; + + + // Please see cv::Mat for descriptions + int flags; + int rows, cols; + size_t step; + + uchar* data; + int* refcount; + + uchar* datastart; + uchar* dataend; + + int alloc_type; + }; + + + //////////////////////////////// CudaStream //////////////////////////////// + // Encapculates Cuda Stream. Provides interface for async coping. + // Passed to each function that supports async kernel execution. + // Reference counting is enabled + + class CV_EXPORTS Stream + { + public: + Stream(); + ~Stream(); + + Stream(const Stream&); + Stream& operator =(const Stream&); + + bool queryIfComplete(); + void waitForCompletion(); + + //! downloads asynchronously + // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat) + void enqueueDownload(const GpuMat& src, CudaMem& dst); + void enqueueDownload(const GpuMat& src, Mat& dst); + + //! uploads asynchronously + // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI) + void enqueueUpload(const CudaMem& src, GpuMat& dst); + void enqueueUpload(const Mat& src, GpuMat& dst); + + //! copy asynchronously + void enqueueCopy(const GpuMat& src, GpuMat& dst); + + //! memory set asynchronously + void enqueueMemSet(GpuMat& src, Scalar val); + void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask); + + //! converts matrix type, ex from float to uchar depending on type + void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0); + + //! adds a callback to be called on the host after all currently enqueued items in the stream have completed + typedef void (*StreamCallback)(Stream& stream, int status, void* userData); + void enqueueHostCallback(StreamCallback callback, void* userData); + + static Stream& Null(); + + operator bool() const; + + private: + struct Impl; + + explicit Stream(Impl* impl); + void create(); + void release(); + + Impl *impl; + + friend struct StreamAccessor; + }; + //////////////////////////////// Initialization & Info //////////////////////// //! This is the only function that do not throw exceptions if the library is compiled without Cuda. diff --git a/modules/core/include/opencv2/core/stream_accessor.hpp b/modules/core/include/opencv2/core/stream_accessor.hpp new file mode 100644 index 000000000..6a1a0bddd --- /dev/null +++ b/modules/core/include/opencv2/core/stream_accessor.hpp @@ -0,0 +1,64 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__ +#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__ + +#include "opencv2/gpu/gpu.hpp" +#include "cuda_runtime_api.h" + +namespace cv +{ + namespace gpu + { + // This is only header file that depends on Cuda. All other headers are independent. + // So if you use OpenCV binaries you do noot need to install Cuda Toolkit. + // But of you wanna use GPU by yourself, may get cuda stream instance using the class below. + // In this case you have to install Cuda Toolkit. + struct StreamAccessor + { + CV_EXPORTS static cudaStream_t getStream(const Stream& stream); + }; + } +} + +#endif /* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/src/cudastream.cpp b/modules/core/src/cudastream.cpp similarity index 92% rename from modules/gpu/src/cudastream.cpp rename to modules/core/src/cudastream.cpp index e302fd1fc..a10807cf2 100644 --- a/modules/gpu/src/cudastream.cpp +++ b/modules/core/src/cudastream.cpp @@ -72,6 +72,33 @@ void cv::gpu::Stream::release() { throw_nogpu(); } #include "opencv2/gpu/stream_accessor.hpp" +namespace +{ +#if defined(__GNUC__) + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) + #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__) +#else /* defined(__CUDACC__) || defined(__MSVC__) */ + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) + #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__) +#endif + + inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") + { + if (cudaSuccess != err) + cv::gpu::error(cudaGetErrorString(err), file, line, func); + } + + inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") + { + if (err < 0) + { + std::ostringstream msg; + msg << "NPP API Call Error: " << err; + cv::gpu::error(msg.str().c_str(), file, line, func); + } + } +} + namespace cv { namespace gpu { void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream); diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/core/src/matrix_operations.cpp similarity index 89% rename from modules/gpu/src/matrix_operations.cpp rename to modules/core/src/matrix_operations.cpp index b19524842..3b82df572 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/core/src/matrix_operations.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#include "opencv2/core/gpumat.hpp" using namespace cv; using namespace cv::gpu; @@ -178,7 +179,7 @@ bool cv::gpu::CudaMem::empty() const return data == 0; } -#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) +#if !defined (HAVE_CUDA) void cv::gpu::registerPageLocked(Mat&) { throw_nogpu(); } void cv::gpu::unregisterPageLocked(Mat&) { throw_nogpu(); } @@ -188,6 +189,33 @@ void cv::gpu::CudaMem::release() { throw_nogpu(); } GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_nogpu(); return GpuMat(); } #else /* !defined (HAVE_CUDA) */ +#include +namespace +{ +#if defined(__GNUC__) + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) + #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__) +#else /* defined(__CUDACC__) || defined(__MSVC__) */ + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) + #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__) +#endif + + inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") + { + if (cudaSuccess != err) + cv::gpu::error(cudaGetErrorString(err), file, line, func); + } + + inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") + { + if (err < 0) + { + std::ostringstream msg; + msg << "NPP API Call Error: " << err; + cv::gpu::error(msg.str().c_str(), file, line, func); + } + } +} void cv::gpu::registerPageLocked(Mat& m) { diff --git a/modules/gpu/include/opencv2/gpu/stream_accessor.hpp b/modules/gpu/include/opencv2/gpu/stream_accessor.hpp index abafc613d..1797749b6 100644 --- a/modules/gpu/include/opencv2/gpu/stream_accessor.hpp +++ b/modules/gpu/include/opencv2/gpu/stream_accessor.hpp @@ -56,7 +56,7 @@ namespace cv // In this case you have to install Cuda Toolkit. struct StreamAccessor { - CV_EXPORTS static cudaStream_t getStream(const Stream& stream); + CV_EXPORTS static cudaStream_t getStream(const cv::gpu::Stream& stream); }; } } diff --git a/modules/softcascade/CMakeLists.txt b/modules/softcascade/CMakeLists.txt index f19241a4a..ee6e89299 100644 --- a/modules/softcascade/CMakeLists.txt +++ b/modules/softcascade/CMakeLists.txt @@ -9,7 +9,7 @@ macro(ocv_glob_cuda_powered_module_sources) set(lib_device_hdrs "") if (HAVE_CUDA AND lib_device_srcs) - ocv_include_directories(${CUDA_INCLUDE_DIRS}) + ocv_include_directories(${CUDA_INCLUDE_DIRS} "${OpenCV_SOURCE_DIR}/modules/gpu/include") file(GLOB_RECURSE lib_device_hdrs "src/cuda/*.hpp") ocv_cuda_compile(device_objs ${lib_device_srcs}) @@ -35,7 +35,7 @@ set(cuda_include "") if (NAVE_CUDA) set(cuda_deps ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) endif() -ocv_add_module(softcascade opencv_core opencv_imgproc opencv_ml OPTIONAL opencv_gpu ${cuda_deps}) +ocv_add_module(softcascade opencv_core opencv_imgproc opencv_ml OPTIONAL ${cuda_deps}) if(HAVE_CUDA) ocv_module_include_directories(${CUDA_INCLUDE_DIRS}) diff --git a/modules/softcascade/src/cuda/icf-sc.cu b/modules/softcascade/src/cuda/icf-sc.cu index d339ef0d3..19b20db9c 100644 --- a/modules/softcascade/src/cuda/icf-sc.cu +++ b/modules/softcascade/src/cuda/icf-sc.cu @@ -49,6 +49,8 @@ namespace cv { namespace softcascade { namespace device { +typedef unsigned char uchar; + template __device__ __forceinline__ uchar shrink(const uchar* ptr, const int pitch, const int y, const int x) { diff --git a/modules/softcascade/src/detector_cuda.cpp b/modules/softcascade/src/detector_cuda.cpp index 6c920332f..a013a16fe 100644 --- a/modules/softcascade/src/detector_cuda.cpp +++ b/modules/softcascade/src/detector_cuda.cpp @@ -480,8 +480,8 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp flds.mask.create( rois.cols / shr, rois.rows / shr, rois.type()); - cv::gpu::resize(rois, flds.genRoiTmp, cv::Size(), 1.f / shr, 1.f / shr, CV_INTER_AREA, s); - cv::gpu::transpose(flds.genRoiTmp, flds.mask, s); + //cv::gpu::resize(rois, flds.genRoiTmp, cv::Size(), 1.f / shr, 1.f / shr, CV_INTER_AREA, s); + //cv::gpu::transpose(flds.genRoiTmp, flds.mask, s); if (type == CV_8UC3) { @@ -491,7 +491,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp flds.createLevels(image.rows, image.cols); flds.preprocessor->apply(image, flds.shrunk); - cv::gpu::integralBuffered(flds.shrunk, flds.hogluv, flds.integralBuffer, s); + //cv::gpu::integralBuffered(flds.shrunk, flds.hogluv, flds.integralBuffer, s); } else { @@ -546,12 +546,12 @@ struct GenricPreprocessor : public cv::softcascade::ChannelsProcessor channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1); setZero(channels, s); - cv::gpu::cvtColor(frame, gray, CV_BGR2GRAY, s); + //cv::gpu::cvtColor(frame, gray, CV_BGR2GRAY, s); createHogBins(s); createLuvBins(frame, s); - cv::gpu::resize(channels, shrunk, cv::Size(), 1.f / shrinkage, 1.f / shrinkage, CV_INTER_AREA, s); + //cv::gpu::resize(channels, shrunk, cv::Size(), 1.f / shrinkage, 1.f / shrinkage, CV_INTER_AREA, s); } private: @@ -566,20 +566,20 @@ private: cv::gpu::GpuMat dfdx(fplane, cv::Rect(0, 0, fw, fh)); cv::gpu::GpuMat dfdy(fplane, cv::Rect(0, fh, fw, fh)); - cv::gpu::Sobel(gray, dfdx, CV_32F, 1, 0, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s); - cv::gpu::Sobel(gray, dfdy, CV_32F, 0, 1, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s); + //cv::gpu::Sobel(gray, dfdx, CV_32F, 1, 0, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s); + //cv::gpu::Sobel(gray, dfdy, CV_32F, 0, 1, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s); cv::gpu::GpuMat mag(fplane, cv::Rect(0, 2 * fh, fw, fh)); cv::gpu::GpuMat ang(fplane, cv::Rect(0, 3 * fh, fw, fh)); - cv::gpu::cartToPolar(dfdx, dfdy, mag, ang, true, s); + //cv::gpu::cartToPolar(dfdx, dfdy, mag, ang, true, s); // normalize magnitude to uchar interval and angles to 6 bins cv::gpu::GpuMat nmag(fplane, cv::Rect(0, 4 * fh, fw, fh)); cv::gpu::GpuMat nang(fplane, cv::Rect(0, 5 * fh, fw, fh)); - cv::gpu::multiply(mag, cv::Scalar::all(1.f / (8 *::log(2.0f))), nmag, 1, -1, s); - cv::gpu::multiply(ang, cv::Scalar::all(1.f / 60.f), nang, 1, -1, s); + //cv::gpu::multiply(mag, cv::Scalar::all(1.f / (8 *::log(2.0f))), nmag, 1, -1, s); + //cv::gpu::multiply(ang, cv::Scalar::all(1.f / 60.f), nang, 1, -1, s); //create uchar magnitude cv::gpu::GpuMat cmag(channels, cv::Rect(0, fh * HOG_BINS, fw, fh)); @@ -597,7 +597,7 @@ private: static const int fw = colored.cols; static const int fh = colored.rows; - cv::gpu::cvtColor(colored, luv, CV_BGR2Luv, s); + //cv::gpu::cvtColor(colored, luv, CV_BGR2Luv, s); std::vector splited; for(int i = 0; i < LUV_BINS; ++i) @@ -605,7 +605,7 @@ private: splited.push_back(cv::gpu::GpuMat(channels, cv::Rect(0, fh * (7 + i), fw, fh))); } - cv::gpu::split(luv, splited, s); + //cv::gpu::split(luv, splited, s); } enum {HOG_BINS = 6, LUV_BINS = 3}; @@ -631,7 +631,7 @@ struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor virtual void apply(InputArray _frame, OutputArray _shrunk, cv::gpu::Stream& s = cv::gpu::Stream::Null()) { const cv::gpu::GpuMat frame = _frame.getGpuMat(); - cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0); + //cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0); _shrunk.create(frame.rows * (4 + bins) / shrinkage, frame.cols / shrinkage, CV_8UC1); cv::gpu::GpuMat shrunk = _shrunk.getGpuMat(); @@ -639,7 +639,7 @@ struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1); setZero(channels, s); - cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY); + //cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY); cv::softcascade::device::gray2hog(gray, channels(cv::Rect(0, 0, bgr.cols, bgr.rows * (bins + 1))), bins); cv::gpu::GpuMat luv(channels, cv::Rect(0, bgr.rows * (bins + 1), bgr.cols, bgr.rows * 3));