moved common gpu utility functionality to gpu_private.hpp
This commit is contained in:
@@ -45,10 +45,8 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "opencv2/core/cuda_devptrs.hpp"
|
||||
|
||||
#ifndef CV_PI
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
#endif
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/base.hpp"
|
||||
|
||||
#ifndef CV_PI_F
|
||||
#ifndef CV_PI
|
||||
@@ -58,16 +56,22 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu { namespace cuda {
|
||||
static inline void checkError(cudaError_t err, const char* file, const int line, const char* func)
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
|
||||
}
|
||||
}}}
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define cvCudaSafeCall(expr) cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
|
||||
#define cvCudaSafeCall(expr) cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, "")
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void error(const char *error_string, const char *file, const int line, const char *func);
|
||||
|
||||
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
|
||||
{
|
||||
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||
@@ -79,38 +83,32 @@ namespace cv { namespace gpu
|
||||
}
|
||||
}}
|
||||
|
||||
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||
enum
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
namespace cuda
|
||||
{
|
||||
using cv::gpu::divUp;
|
||||
BORDER_REFLECT101_GPU = 0,
|
||||
BORDER_REPLICATE_GPU,
|
||||
BORDER_CONSTANT_GPU,
|
||||
BORDER_REFLECT_GPU,
|
||||
BORDER_WRAP_GPU
|
||||
};
|
||||
|
||||
#ifdef __CUDACC__
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
typedef signed char schar;
|
||||
#if defined (_WIN32) || defined (__APPLE__)
|
||||
typedef unsigned int uint;
|
||||
#endif
|
||||
namespace cuda
|
||||
{
|
||||
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
cvCudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
}
|
||||
#endif // __CUDACC__
|
||||
}
|
||||
#endif // __CUDACC__
|
||||
}}
|
||||
|
||||
|
||||
|
@@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
@@ -332,10 +332,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
@@ -345,7 +345,7 @@ namespace cv { namespace gpu { namespace cuda
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
StaticAssert<ft::smart_shift != 1>::check();
|
||||
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||
|
||||
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
@@ -358,10 +358,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
@@ -369,7 +369,7 @@ namespace cv { namespace gpu { namespace cuda
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
StaticAssert<ft::smart_shift != 1>::check();
|
||||
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||
|
||||
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
|
||||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
|
||||
@@ -383,10 +383,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
} // namespace transform_detail
|
||||
|
@@ -58,9 +58,6 @@ namespace cv
|
||||
// Simple lightweight structures that encapsulates information about an image on device.
|
||||
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
||||
|
||||
template <bool expr> struct StaticAssert;
|
||||
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
|
||||
|
||||
template<typename T> struct DevPtr
|
||||
{
|
||||
typedef T elem_type;
|
||||
|
134
modules/core/include/opencv2/core/gpu_private.hpp
Normal file
134
modules/core/include/opencv2/core/gpu_private.hpp
Normal file
@@ -0,0 +1,134 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_GPU_PRIVATE_HPP__
|
||||
#define __OPENCV_CORE_GPU_PRIVATE_HPP__
|
||||
|
||||
#ifndef __OPENCV_BUILD
|
||||
# error this is a private header which should not be used from outside of the OpenCV library
|
||||
#endif
|
||||
|
||||
#include "cvconfig.h"
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/base.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
# include <cuda.h>
|
||||
# include <cuda_runtime.h>
|
||||
# include <npp.h>
|
||||
# include "opencv2/core/stream_accessor.hpp"
|
||||
# include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
|
||||
|
||||
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
|
||||
# error "Insufficient Cuda Runtime library version, please update it."
|
||||
# endif
|
||||
|
||||
# if defined(CUDA_ARCH_BIN_OR_PTX_10)
|
||||
# error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu {
|
||||
CV_EXPORTS cv::String getNppErrorMessage(int code);
|
||||
|
||||
static inline void checkNppError(int code, const char* file, const int line, const char* func)
|
||||
{
|
||||
if (code < 0)
|
||||
cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line);
|
||||
}
|
||||
|
||||
// Converts CPU border extrapolation mode into GPU internal analogue.
|
||||
// Returns true if the GPU analogue exists, false otherwise.
|
||||
CV_EXPORTS bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
|
||||
}}
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
||||
static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without GPU support"); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define nppSafeCall(expr) cv::gpu::checkNppError(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define nppSafeCall(expr) cv::gpu::checkNppError(expr, __FILE__, __LINE__, "")
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
template<int n> struct NPPTypeTraits;
|
||||
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
|
||||
|
||||
class NppStreamHandler
|
||||
{
|
||||
public:
|
||||
inline explicit NppStreamHandler(cudaStream_t newStream)
|
||||
{
|
||||
oldStream = nppGetStream();
|
||||
nppSetStream(newStream);
|
||||
}
|
||||
|
||||
inline ~NppStreamHandler()
|
||||
{
|
||||
nppSetStream(oldStream);
|
||||
}
|
||||
|
||||
private:
|
||||
cudaStream_t oldStream;
|
||||
};
|
||||
}}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
#endif // __OPENCV_CORE_GPU_PRIVATE_HPP__
|
@@ -454,11 +454,6 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);
|
||||
|
||||
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Error handling
|
||||
|
||||
CV_EXPORTS void error(const char* error_string, const char* file, const int line, const char* func = "");
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
@@ -43,17 +43,20 @@
|
||||
#ifndef __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
|
||||
#define __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
|
||||
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
#include "cuda_runtime_api.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
// This is only header file that depends on Cuda. All other headers are independent.
|
||||
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
|
||||
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
|
||||
// In this case you have to install Cuda Toolkit.
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
// This is only header file that depends on Cuda. All other headers are independent.
|
||||
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
|
||||
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
|
||||
// In this case you have to install Cuda Toolkit.
|
||||
class Stream;
|
||||
|
||||
struct StreamAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
|
||||
|
@@ -124,31 +124,31 @@ namespace cv { namespace gpu { namespace cuda
|
||||
|
||||
void writeScalar(const uchar* vals)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
|
||||
}
|
||||
void writeScalar(const schar* vals)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
|
||||
}
|
||||
void writeScalar(const ushort* vals)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
|
||||
}
|
||||
void writeScalar(const short* vals)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
|
||||
}
|
||||
void writeScalar(const int* vals)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
|
||||
}
|
||||
void writeScalar(const float* vals)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
|
||||
}
|
||||
void writeScalar(const double* vals)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
@@ -186,10 +186,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall ( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||
@@ -209,10 +209,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall ( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
|
||||
@@ -290,8 +290,8 @@ namespace cv { namespace gpu { namespace cuda
|
||||
template<typename T, typename D, typename S>
|
||||
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||
cvCudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||
cvCudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
|
||||
cv::gpu::cuda::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
@@ -46,33 +46,30 @@ using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
#define throw_nogpu() CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
|
||||
|
||||
cv::gpu::Stream::Stream() { throw_nogpu(); }
|
||||
cv::gpu::Stream::Stream() { throw_no_cuda(); }
|
||||
cv::gpu::Stream::~Stream() {}
|
||||
cv::gpu::Stream::Stream(const Stream&) { throw_nogpu(); }
|
||||
Stream& cv::gpu::Stream::operator=(const Stream&) { throw_nogpu(); return *this; }
|
||||
bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return false; }
|
||||
void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_nogpu(); }
|
||||
Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; }
|
||||
cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
|
||||
cv::gpu::Stream::Stream(Impl*) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::create() { throw_nogpu(); }
|
||||
void cv::gpu::Stream::release() { throw_nogpu(); }
|
||||
cv::gpu::Stream::Stream(const Stream&) { throw_no_cuda(); }
|
||||
Stream& cv::gpu::Stream::operator=(const Stream&) { throw_no_cuda(); return *this; }
|
||||
bool cv::gpu::Stream::queryIfComplete() { throw_no_cuda(); return false; }
|
||||
void cv::gpu::Stream::waitForCompletion() { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_no_cuda(); }
|
||||
Stream& cv::gpu::Stream::Null() { throw_no_cuda(); static Stream s; return s; }
|
||||
cv::gpu::Stream::operator bool() const { throw_no_cuda(); return false; }
|
||||
cv::gpu::Stream::Stream(Impl*) { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::create() { throw_no_cuda(); }
|
||||
void cv::gpu::Stream::release() { throw_no_cuda(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
#include "opencv2/core/stream_accessor.hpp"
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
|
||||
@@ -134,14 +131,14 @@ bool cv::gpu::Stream::queryIfComplete()
|
||||
if (err == cudaErrorNotReady || err == cudaSuccess)
|
||||
return err == cudaSuccess;
|
||||
|
||||
cudaSafeCall(err);
|
||||
cvCudaSafeCall(err);
|
||||
return false;
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::waitForCompletion()
|
||||
{
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
cvCudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
@@ -151,7 +148,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
|
||||
@@ -160,7 +157,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
|
||||
@@ -169,7 +166,7 @@ void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
|
||||
@@ -178,7 +175,7 @@ void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
|
||||
@@ -187,7 +184,7 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
@@ -204,7 +201,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
|
||||
if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0)
|
||||
{
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
|
||||
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -215,7 +212,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3]))
|
||||
{
|
||||
int ival = saturate_cast<uchar>(val[0]);
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
|
||||
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -302,7 +299,7 @@ void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userDat
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
|
||||
cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
|
||||
cvCudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
|
||||
#else
|
||||
(void) callback;
|
||||
(void) userData;
|
||||
@@ -331,7 +328,7 @@ void cv::gpu::Stream::create()
|
||||
release();
|
||||
|
||||
cudaStream_t stream;
|
||||
cudaSafeCall( cudaStreamCreate( &stream ) );
|
||||
cvCudaSafeCall( cudaStreamCreate( &stream ) );
|
||||
|
||||
impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));
|
||||
|
||||
@@ -343,7 +340,7 @@ void cv::gpu::Stream::release()
|
||||
{
|
||||
if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
|
||||
{
|
||||
cudaSafeCall( cudaStreamDestroy(impl->stream) );
|
||||
cvCudaSafeCall( cudaStreamDestroy(impl->stream) );
|
||||
cv::fastFree(impl);
|
||||
}
|
||||
}
|
||||
|
@@ -45,64 +45,38 @@
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
||||
#define throw_nogpu CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
|
||||
#endif
|
||||
|
||||
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (err < 0)
|
||||
{
|
||||
String msg = cv::format("NPP API Call Error: %d", err);
|
||||
cv::gpu::error(msg.c_str(), file, line, func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
||||
int cv::gpu::getCudaEnabledDeviceCount() { return 0; }
|
||||
|
||||
void cv::gpu::setDevice(int) { throw_nogpu; }
|
||||
int cv::gpu::getDevice() { throw_nogpu; return 0; }
|
||||
void cv::gpu::setDevice(int) { throw_no_cuda(); }
|
||||
int cv::gpu::getDevice() { throw_no_cuda(); return 0; }
|
||||
|
||||
void cv::gpu::resetDevice() { throw_nogpu; }
|
||||
void cv::gpu::resetDevice() { throw_no_cuda(); }
|
||||
|
||||
bool cv::gpu::deviceSupports(FeatureSet) { throw_nogpu; return false; }
|
||||
bool cv::gpu::deviceSupports(FeatureSet) { throw_no_cuda(); return false; }
|
||||
|
||||
bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_nogpu; return false; }
|
||||
bool cv::gpu::TargetArchs::has(int, int) { throw_nogpu; return false; }
|
||||
bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_nogpu; return false; }
|
||||
bool cv::gpu::TargetArchs::hasBin(int, int) { throw_nogpu; return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_nogpu; return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_nogpu; return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_nogpu; return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_nogpu; return false; }
|
||||
bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::has(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasBin(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_no_cuda(); return false; }
|
||||
|
||||
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_nogpu; return 0; }
|
||||
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu; }
|
||||
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_nogpu; return 0; }
|
||||
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_nogpu; return 0; }
|
||||
bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_nogpu; return false; }
|
||||
bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu; return false; }
|
||||
void cv::gpu::DeviceInfo::query() { throw_nogpu; }
|
||||
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_no_cuda(); return 0; }
|
||||
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_no_cuda(); }
|
||||
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_no_cuda(); return 0; }
|
||||
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_no_cuda(); return 0; }
|
||||
bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::DeviceInfo::isCompatible() const { throw_no_cuda(); return false; }
|
||||
void cv::gpu::DeviceInfo::query() { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu; }
|
||||
void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu; }
|
||||
void cv::gpu::printCudaDeviceInfo(int) { throw_no_cuda(); }
|
||||
void cv::gpu::printShortCudaDeviceInfo(int) { throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
@@ -117,25 +91,25 @@ int cv::gpu::getCudaEnabledDeviceCount()
|
||||
if (error == cudaErrorNoDevice)
|
||||
return 0;
|
||||
|
||||
cudaSafeCall( error );
|
||||
cvCudaSafeCall( error );
|
||||
return count;
|
||||
}
|
||||
|
||||
void cv::gpu::setDevice(int device)
|
||||
{
|
||||
cudaSafeCall( cudaSetDevice( device ) );
|
||||
cvCudaSafeCall( cudaSetDevice( device ) );
|
||||
}
|
||||
|
||||
int cv::gpu::getDevice()
|
||||
{
|
||||
int device;
|
||||
cudaSafeCall( cudaGetDevice( &device ) );
|
||||
cvCudaSafeCall( cudaGetDevice( &device ) );
|
||||
return device;
|
||||
}
|
||||
|
||||
void cv::gpu::resetDevice()
|
||||
{
|
||||
cudaSafeCall( cudaDeviceReset() );
|
||||
cvCudaSafeCall( cudaDeviceReset() );
|
||||
}
|
||||
|
||||
namespace
|
||||
@@ -328,7 +302,7 @@ namespace
|
||||
if (!props_[devID])
|
||||
{
|
||||
props_[devID] = new cudaDeviceProp;
|
||||
cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
|
||||
}
|
||||
|
||||
return props_[devID];
|
||||
@@ -348,7 +322,7 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory)
|
||||
if (prevDeviceID != device_id_)
|
||||
setDevice(device_id_);
|
||||
|
||||
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
|
||||
cvCudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
|
||||
|
||||
if (prevDeviceID != device_id_)
|
||||
setDevice(prevDeviceID);
|
||||
@@ -434,8 +408,8 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
printf("Device count: %d\n", count);
|
||||
|
||||
int driverVersion = 0, runtimeVersion = 0;
|
||||
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
|
||||
const char *computeMode[] = {
|
||||
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
|
||||
@@ -449,7 +423,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
for(int dev = beg; dev < end; ++dev)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
|
||||
printf("\nDevice %d: \"%s\"\n", dev, prop.name);
|
||||
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
|
||||
@@ -511,13 +485,13 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
|
||||
int end = valid ? device+1 : count;
|
||||
|
||||
int driverVersion = 0, runtimeVersion = 0;
|
||||
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
|
||||
for(int dev = beg; dev < end; ++dev)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
|
||||
const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
|
||||
printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
|
||||
@@ -846,18 +820,18 @@ namespace
|
||||
class EmptyFuncTable : public GpuFuncTable
|
||||
{
|
||||
public:
|
||||
void copy(const Mat&, GpuMat&) const { throw_nogpu; }
|
||||
void copy(const GpuMat&, Mat&) const { throw_nogpu; }
|
||||
void copy(const GpuMat&, GpuMat&) const { throw_nogpu; }
|
||||
void copy(const Mat&, GpuMat&) const { throw_no_cuda(); }
|
||||
void copy(const GpuMat&, Mat&) const { throw_no_cuda(); }
|
||||
void copy(const GpuMat&, GpuMat&) const { throw_no_cuda(); }
|
||||
|
||||
void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_nogpu; }
|
||||
void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_no_cuda(); }
|
||||
|
||||
void convert(const GpuMat&, GpuMat&) const { throw_nogpu; }
|
||||
void convert(const GpuMat&, GpuMat&, double, double) const { throw_nogpu; }
|
||||
void convert(const GpuMat&, GpuMat&) const { throw_no_cuda(); }
|
||||
void convert(const GpuMat&, GpuMat&, double, double) const { throw_no_cuda(); }
|
||||
|
||||
void setTo(GpuMat&, Scalar, const GpuMat&) const { throw_nogpu; }
|
||||
void setTo(GpuMat&, Scalar, const GpuMat&) const { throw_no_cuda(); }
|
||||
|
||||
void mallocPitch(void**, size_t*, size_t, size_t) const { throw_nogpu; }
|
||||
void mallocPitch(void**, size_t*, size_t, size_t) const { throw_no_cuda(); }
|
||||
void free(void*) const {}
|
||||
};
|
||||
|
||||
@@ -1009,7 +983,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
|
||||
@@ -1024,7 +998,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1066,7 +1040,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
|
||||
@@ -1083,7 +1057,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1114,7 +1088,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
|
||||
@@ -1131,7 +1105,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1157,7 +1131,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1174,15 +1148,15 @@ namespace
|
||||
public:
|
||||
void copy(const Mat& src, GpuMat& dst) const
|
||||
{
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
||||
}
|
||||
void copy(const GpuMat& src, Mat& dst) const
|
||||
{
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
|
||||
}
|
||||
void copy(const GpuMat& src, GpuMat& dst) const
|
||||
{
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
|
||||
}
|
||||
|
||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
|
||||
@@ -1327,7 +1301,7 @@ namespace
|
||||
{
|
||||
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
|
||||
{
|
||||
cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
|
||||
cvCudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1338,7 +1312,7 @@ namespace
|
||||
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
|
||||
{
|
||||
int val = saturate_cast<uchar>(s[0]);
|
||||
cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
|
||||
cvCudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1393,7 +1367,7 @@ namespace
|
||||
|
||||
void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const
|
||||
{
|
||||
cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
|
||||
cvCudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
|
||||
}
|
||||
|
||||
void free(void* devPtr) const
|
||||
@@ -1551,18 +1525,117 @@ void cv::gpu::GpuMat::release()
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Error handling
|
||||
|
||||
void cv::gpu::error(const char *error_string, const char *file, const int line, const char *func)
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
int code = CV_GpuApiCallError;
|
||||
#define error_entry(entry) { entry, #entry }
|
||||
|
||||
if (std::uncaught_exception())
|
||||
struct ErrorEntry
|
||||
{
|
||||
const char* errorStr = cvErrorStr(code);
|
||||
const char* function = func ? func : "unknown function";
|
||||
int code;
|
||||
const char* str;
|
||||
};
|
||||
|
||||
fprintf(stderr, "OpenCV Error: %s(%s) in %s, file %s, line %d", errorStr, error_string, function, file, line);
|
||||
fflush(stderr);
|
||||
}
|
||||
else
|
||||
cv::error( cv::Exception(code, error_string, func, file, line) );
|
||||
struct ErrorEntryComparer
|
||||
{
|
||||
int code;
|
||||
ErrorEntryComparer(int code_) : code(code_) {}
|
||||
bool operator()(const ErrorEntry& e) const { return e.code == code; }
|
||||
};
|
||||
|
||||
const ErrorEntry npp_errors [] =
|
||||
{
|
||||
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
|
||||
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
|
||||
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
|
||||
|
||||
#if defined (_MSC_VER)
|
||||
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
|
||||
#endif
|
||||
|
||||
error_entry( NPP_BAD_ARG_ERROR ),
|
||||
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_TEXTURE_BIND_ERROR ),
|
||||
error_entry( NPP_COEFF_ERROR ),
|
||||
error_entry( NPP_RECT_ERROR ),
|
||||
error_entry( NPP_QUAD_ERROR ),
|
||||
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
|
||||
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
|
||||
error_entry( NPP_INTERPOLATION_ERROR ),
|
||||
error_entry( NPP_RESIZE_FACTOR_ERROR ),
|
||||
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
|
||||
error_entry( NPP_MEMFREE_ERR ),
|
||||
error_entry( NPP_MEMSET_ERR ),
|
||||
error_entry( NPP_MEMCPY_ERROR ),
|
||||
error_entry( NPP_MEM_ALLOC_ERR ),
|
||||
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_MIRROR_FLIP_ERR ),
|
||||
error_entry( NPP_INVALID_INPUT ),
|
||||
error_entry( NPP_ALIGNMENT_ERROR ),
|
||||
error_entry( NPP_STEP_ERROR ),
|
||||
error_entry( NPP_SIZE_ERROR ),
|
||||
error_entry( NPP_POINTER_ERROR ),
|
||||
error_entry( NPP_NULL_POINTER_ERROR ),
|
||||
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
|
||||
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
|
||||
error_entry( NPP_ERROR ),
|
||||
error_entry( NPP_NO_ERROR ),
|
||||
error_entry( NPP_SUCCESS ),
|
||||
error_entry( NPP_WARNING ),
|
||||
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
|
||||
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
|
||||
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
|
||||
error_entry( NPP_DOUBLE_SIZE_WARNING ),
|
||||
error_entry( NPP_ODD_ROI_WARNING )
|
||||
};
|
||||
|
||||
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
String cv::gpu::getNppErrorMessage(int code)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) code;
|
||||
return String();
|
||||
#else
|
||||
size_t idx = std::find_if(npp_errors, npp_errors + npp_error_num, ErrorEntryComparer(code)) - npp_errors;
|
||||
|
||||
const char* msg = (idx != npp_error_num) ? npp_errors[idx].str : "Unknown error code";
|
||||
String str = cv::format("%s [Code = %d]", msg, code);
|
||||
|
||||
return str;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) cpuBorderType;
|
||||
(void) gpuBorderType;
|
||||
return false;
|
||||
#else
|
||||
switch (cpuBorderType)
|
||||
{
|
||||
case IPL_BORDER_REFLECT_101:
|
||||
gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;
|
||||
return true;
|
||||
case IPL_BORDER_REPLICATE:
|
||||
gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;
|
||||
return true;
|
||||
case IPL_BORDER_CONSTANT:
|
||||
gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;
|
||||
return true;
|
||||
case IPL_BORDER_REFLECT:
|
||||
gpuBorderType = cv::gpu::BORDER_REFLECT_GPU;
|
||||
return true;
|
||||
case IPL_BORDER_WRAP:
|
||||
gpuBorderType = cv::gpu::BORDER_WRAP_GPU;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
};
|
||||
#endif
|
||||
}
|
||||
|
@@ -41,7 +41,6 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
@@ -181,30 +180,29 @@ bool cv::gpu::CudaMem::empty() const
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::registerPageLocked(Mat&) { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
|
||||
void cv::gpu::unregisterPageLocked(Mat&) { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
|
||||
void cv::gpu::CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/)
|
||||
{ CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
|
||||
bool cv::gpu::CudaMem::canMapHostMemory() { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); return false; }
|
||||
void cv::gpu::CudaMem::release() { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
|
||||
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); return GpuMat(); }
|
||||
void cv::gpu::registerPageLocked(Mat&) { throw_no_cuda(); }
|
||||
void cv::gpu::unregisterPageLocked(Mat&) { throw_no_cuda(); }
|
||||
void cv::gpu::CudaMem::create(int, int, int, int) { throw_no_cuda(); }
|
||||
bool cv::gpu::CudaMem::canMapHostMemory() { throw_no_cuda(); return false; }
|
||||
void cv::gpu::CudaMem::release() { throw_no_cuda(); }
|
||||
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return GpuMat(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
void cv::gpu::registerPageLocked(Mat& m)
|
||||
{
|
||||
cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
|
||||
cvCudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
|
||||
}
|
||||
|
||||
void cv::gpu::unregisterPageLocked(Mat& m)
|
||||
{
|
||||
cudaSafeCall( cudaHostUnregister(m.ptr()) );
|
||||
cvCudaSafeCall( cudaHostUnregister(m.ptr()) );
|
||||
}
|
||||
|
||||
bool cv::gpu::CudaMem::canMapHostMemory()
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
return (prop.canMapHostMemory != 0) ? true : false;
|
||||
}
|
||||
|
||||
@@ -222,7 +220,7 @@ namespace
|
||||
void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
|
||||
{
|
||||
if (_alloc_type == ALLOC_ZEROCOPY && !canMapHostMemory())
|
||||
cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
|
||||
CV_Error(cv::Error::GpuApiCallError, "ZeroCopy is not supported by current device");
|
||||
|
||||
_type &= Mat::TYPE_MASK;
|
||||
if( rows == _rows && cols == _cols && type() == _type && data )
|
||||
@@ -239,7 +237,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
|
||||
if (_alloc_type == ALLOC_ZEROCOPY)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
step = alignUpStep(step, prop.textureAlignment);
|
||||
}
|
||||
int64 _nettosize = (int64)step*rows;
|
||||
@@ -254,10 +252,10 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
|
||||
|
||||
switch (alloc_type)
|
||||
{
|
||||
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
|
||||
case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
|
||||
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
|
||||
default: cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
|
||||
case ALLOC_PAGE_LOCKED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
|
||||
case ALLOC_ZEROCOPY: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
|
||||
case ALLOC_WRITE_COMBINED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
|
||||
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
|
||||
}
|
||||
|
||||
datastart = data = (uchar*)ptr;
|
||||
@@ -270,15 +268,13 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
|
||||
|
||||
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
|
||||
{
|
||||
CV_Assert( alloc_type == ALLOC_ZEROCOPY );
|
||||
|
||||
GpuMat res;
|
||||
if (alloc_type == ALLOC_ZEROCOPY)
|
||||
{
|
||||
void *pdev;
|
||||
cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
|
||||
res = GpuMat(rows, cols, type(), pdev, step);
|
||||
}
|
||||
else
|
||||
cv::gpu::error("Zero-copy is not supported or memory was allocated without zero-copy flag", __FILE__, __LINE__);
|
||||
|
||||
void *pdev;
|
||||
cvCudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
|
||||
res = GpuMat(rows, cols, type(), pdev, step);
|
||||
|
||||
return res;
|
||||
}
|
||||
@@ -287,7 +283,7 @@ void cv::gpu::CudaMem::release()
|
||||
{
|
||||
if( refcount && CV_XADD(refcount, -1) == 1 )
|
||||
{
|
||||
cudaSafeCall( cudaFreeHost(datastart ) );
|
||||
cvCudaSafeCall( cudaFreeHost(datastart ) );
|
||||
fastFree(refcount);
|
||||
}
|
||||
data = datastart = dataend = 0;
|
||||
|
@@ -41,16 +41,12 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencv2/core/opengl.hpp"
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
|
||||
#ifdef HAVE_OPENGL
|
||||
#include "gl_core_3_1.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
#endif
|
||||
# include "gl_core_3_1.hpp"
|
||||
# ifdef HAVE_CUDA
|
||||
# include <cuda_gl_interop.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
using namespace cv;
|
||||
@@ -59,15 +55,9 @@ using namespace cv::gpu;
|
||||
namespace
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
void throw_nogl() { CV_Error(CV_OpenGlNotSupported, "The library is compiled without OpenGL support"); }
|
||||
void throw_no_ogl() { CV_Error(CV_OpenGlNotSupported, "The library is compiled without OpenGL support"); }
|
||||
#else
|
||||
void throw_nogl() { CV_Error(CV_OpenGlApiCallError, "OpenGL context doesn't exist"); }
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
void throw_nocuda() { CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); }
|
||||
#else
|
||||
void throw_nocuda() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
|
||||
#endif
|
||||
void throw_no_ogl() { CV_Error(CV_OpenGlApiCallError, "OpenGL context doesn't exist"); }
|
||||
#endif
|
||||
|
||||
bool checkError(const char* file, const int line, const char* func = 0)
|
||||
@@ -137,13 +127,13 @@ void cv::gpu::setGlDevice(int device)
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
(void) device;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER)
|
||||
(void) device;
|
||||
throw_nocuda();
|
||||
throw_no_cuda();
|
||||
#else
|
||||
cudaSafeCall( cudaGLSetGLDevice(device) );
|
||||
cvCudaSafeCall( cudaGLSetGLDevice(device) );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
@@ -194,7 +184,7 @@ namespace
|
||||
return;
|
||||
|
||||
cudaGraphicsResource_t resource;
|
||||
cudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
|
||||
cvCudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
|
||||
|
||||
release();
|
||||
|
||||
@@ -227,7 +217,7 @@ namespace
|
||||
CudaResource::GraphicsMapHolder::GraphicsMapHolder(cudaGraphicsResource_t* resource, cudaStream_t stream) : resource_(resource), stream_(stream)
|
||||
{
|
||||
if (resource_)
|
||||
cudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
|
||||
cvCudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
|
||||
}
|
||||
|
||||
CudaResource::GraphicsMapHolder::~GraphicsMapHolder()
|
||||
@@ -250,14 +240,14 @@ namespace
|
||||
|
||||
void* dst;
|
||||
size_t size;
|
||||
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
|
||||
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
|
||||
|
||||
CV_DbgAssert( width * height == size );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
void CudaResource::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream)
|
||||
@@ -269,14 +259,14 @@ namespace
|
||||
|
||||
void* src;
|
||||
size_t size;
|
||||
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
|
||||
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
|
||||
|
||||
CV_DbgAssert( width * height == size );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
void* CudaResource::map(cudaStream_t stream)
|
||||
@@ -287,7 +277,7 @@ namespace
|
||||
|
||||
void* ptr;
|
||||
size_t size;
|
||||
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
|
||||
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
|
||||
|
||||
h.reset();
|
||||
|
||||
@@ -476,7 +466,7 @@ void cv::ogl::Buffer::Impl::unmapHost()
|
||||
cv::ogl::Buffer::Buffer() : rows_(0), cols_(0), type_(0)
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_ = Impl::empty();
|
||||
#endif
|
||||
@@ -490,7 +480,7 @@ cv::ogl::Buffer::Buffer(int arows, int acols, int atype, unsigned int abufId, bo
|
||||
(void) atype;
|
||||
(void) abufId;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_ = new Impl(abufId, autoRelease);
|
||||
rows_ = arows;
|
||||
@@ -506,7 +496,7 @@ cv::ogl::Buffer::Buffer(Size asize, int atype, unsigned int abufId, bool autoRel
|
||||
(void) atype;
|
||||
(void) abufId;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_ = new Impl(abufId, autoRelease);
|
||||
rows_ = asize.height;
|
||||
@@ -531,7 +521,7 @@ cv::ogl::Buffer::Buffer(InputArray arr, Target target, bool autoRelease) : rows_
|
||||
(void) arr;
|
||||
(void) target;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
const int kind = arr.kind();
|
||||
|
||||
@@ -578,7 +568,7 @@ void cv::ogl::Buffer::create(int arows, int acols, int atype, Target target, boo
|
||||
(void) atype;
|
||||
(void) target;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
if (rows_ != arows || cols_ != acols || type_ != atype)
|
||||
{
|
||||
@@ -607,7 +597,7 @@ void cv::ogl::Buffer::setAutoRelease(bool flag)
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
(void) flag;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_->setAutoRelease(flag);
|
||||
#endif
|
||||
@@ -619,7 +609,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
|
||||
(void) arr;
|
||||
(void) target;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
const int kind = arr.kind();
|
||||
|
||||
@@ -647,7 +637,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
|
||||
case _InputArray::GPU_MAT:
|
||||
{
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
throw_nocuda();
|
||||
throw_no_cuda();
|
||||
#else
|
||||
GpuMat dmat = arr.getGpuMat();
|
||||
impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows);
|
||||
@@ -672,7 +662,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr, Target target, bool autoRelease) c
|
||||
(void) arr;
|
||||
(void) target;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
const int kind = arr.kind();
|
||||
|
||||
@@ -693,7 +683,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr, Target target, bool autoRelease) c
|
||||
case _InputArray::GPU_MAT:
|
||||
{
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
throw_nocuda();
|
||||
throw_no_cuda();
|
||||
#else
|
||||
GpuMat& dmat = arr.getGpuMatRef();
|
||||
dmat.create(rows_, cols_, type_);
|
||||
@@ -719,7 +709,7 @@ cv::ogl::Buffer cv::ogl::Buffer::clone(Target target, bool autoRelease) const
|
||||
#ifndef HAVE_OPENGL
|
||||
(void) target;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
return cv::ogl::Buffer();
|
||||
#else
|
||||
ogl::Buffer buf;
|
||||
@@ -732,7 +722,7 @@ void cv::ogl::Buffer::bind(Target target) const
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
(void) target;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_->bind(target);
|
||||
#endif
|
||||
@@ -742,7 +732,7 @@ void cv::ogl::Buffer::unbind(Target target)
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
(void) target;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
gl::BindBuffer(target, 0);
|
||||
CV_CheckGlError();
|
||||
@@ -753,7 +743,7 @@ Mat cv::ogl::Buffer::mapHost(Access access)
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
(void) access;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
return Mat();
|
||||
#else
|
||||
return Mat(rows_, cols_, type_, impl_->mapHost(access));
|
||||
@@ -763,7 +753,7 @@ Mat cv::ogl::Buffer::mapHost(Access access)
|
||||
void cv::ogl::Buffer::unmapHost()
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
return impl_->unmapHost();
|
||||
#endif
|
||||
@@ -772,11 +762,11 @@ void cv::ogl::Buffer::unmapHost()
|
||||
GpuMat cv::ogl::Buffer::mapDevice()
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
return GpuMat();
|
||||
#else
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
throw_nocuda();
|
||||
throw_no_cuda();
|
||||
return GpuMat();
|
||||
#else
|
||||
return GpuMat(rows_, cols_, type_, impl_->mapDevice());
|
||||
@@ -787,10 +777,10 @@ GpuMat cv::ogl::Buffer::mapDevice()
|
||||
void cv::ogl::Buffer::unmapDevice()
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
throw_nocuda();
|
||||
throw_no_cuda();
|
||||
#else
|
||||
impl_->unmapDevice();
|
||||
#endif
|
||||
@@ -800,7 +790,7 @@ void cv::ogl::Buffer::unmapDevice()
|
||||
unsigned int cv::ogl::Buffer::bufId() const
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
return 0;
|
||||
#else
|
||||
return impl_->bufId();
|
||||
@@ -926,7 +916,7 @@ void cv::ogl::Texture2D::Impl::bind() const
|
||||
cv::ogl::Texture2D::Texture2D() : rows_(0), cols_(0), format_(NONE)
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_ = Impl::empty();
|
||||
#endif
|
||||
@@ -940,7 +930,7 @@ cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, unsigned int
|
||||
(void) aformat;
|
||||
(void) atexId;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_ = new Impl(atexId, autoRelease);
|
||||
rows_ = arows;
|
||||
@@ -956,7 +946,7 @@ cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, unsigned int atexId, b
|
||||
(void) aformat;
|
||||
(void) atexId;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_ = new Impl(atexId, autoRelease);
|
||||
rows_ = asize.height;
|
||||
@@ -980,7 +970,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
|
||||
#ifndef HAVE_OPENGL
|
||||
(void) arr;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
const int kind = arr.kind();
|
||||
|
||||
@@ -1016,7 +1006,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
|
||||
case _InputArray::GPU_MAT:
|
||||
{
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
throw_nocuda();
|
||||
throw_no_cuda();
|
||||
#else
|
||||
GpuMat dmat = arr.getGpuMat();
|
||||
ogl::Buffer buf(dmat, ogl::Buffer::PIXEL_UNPACK_BUFFER);
|
||||
@@ -1051,7 +1041,7 @@ void cv::ogl::Texture2D::create(int arows, int acols, Format aformat, bool autoR
|
||||
(void) acols;
|
||||
(void) aformat;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
if (rows_ != arows || cols_ != acols || format_ != aformat)
|
||||
{
|
||||
@@ -1080,7 +1070,7 @@ void cv::ogl::Texture2D::setAutoRelease(bool flag)
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
(void) flag;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_->setAutoRelease(flag);
|
||||
#endif
|
||||
@@ -1091,7 +1081,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
|
||||
#ifndef HAVE_OPENGL
|
||||
(void) arr;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
const int kind = arr.kind();
|
||||
|
||||
@@ -1129,7 +1119,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
|
||||
case _InputArray::GPU_MAT:
|
||||
{
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
throw_nocuda();
|
||||
throw_no_cuda();
|
||||
#else
|
||||
GpuMat dmat = arr.getGpuMat();
|
||||
ogl::Buffer buf(dmat, ogl::Buffer::PIXEL_UNPACK_BUFFER);
|
||||
@@ -1158,7 +1148,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
|
||||
(void) arr;
|
||||
(void) ddepth;
|
||||
(void) autoRelease;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
const int kind = arr.kind();
|
||||
|
||||
@@ -1180,7 +1170,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
|
||||
case _InputArray::GPU_MAT:
|
||||
{
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
throw_nocuda();
|
||||
throw_no_cuda();
|
||||
#else
|
||||
ogl::Buffer buf(rows_, cols_, CV_MAKE_TYPE(ddepth, cn), ogl::Buffer::PIXEL_PACK_BUFFER);
|
||||
buf.bind(ogl::Buffer::PIXEL_PACK_BUFFER);
|
||||
@@ -1207,7 +1197,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
|
||||
void cv::ogl::Texture2D::bind() const
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
impl_->bind();
|
||||
#endif
|
||||
@@ -1216,7 +1206,7 @@ void cv::ogl::Texture2D::bind() const
|
||||
unsigned int cv::ogl::Texture2D::texId() const
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
return 0;
|
||||
#else
|
||||
return impl_->texId();
|
||||
@@ -1331,7 +1321,7 @@ void cv::ogl::Arrays::setAutoRelease(bool flag)
|
||||
void cv::ogl::Arrays::bind() const
|
||||
{
|
||||
#ifndef HAVE_OPENGL
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
CV_Assert( texCoord_.empty() || texCoord_.size().area() == size_ );
|
||||
CV_Assert( normal_.empty() || normal_.size().area() == size_ );
|
||||
@@ -1416,7 +1406,7 @@ void cv::ogl::render(const ogl::Texture2D& tex, Rect_<double> wndRect, Rect_<dou
|
||||
(void) tex;
|
||||
(void) wndRect;
|
||||
(void) texRect;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
if (!tex.empty())
|
||||
{
|
||||
@@ -1488,7 +1478,7 @@ void cv::ogl::render(const ogl::Arrays& arr, int mode, Scalar color)
|
||||
(void) arr;
|
||||
(void) mode;
|
||||
(void) color;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
if (!arr.empty())
|
||||
{
|
||||
@@ -1508,7 +1498,7 @@ void cv::ogl::render(const ogl::Arrays& arr, InputArray indices, int mode, Scala
|
||||
(void) indices;
|
||||
(void) mode;
|
||||
(void) color;
|
||||
throw_nogl();
|
||||
throw_no_ogl();
|
||||
#else
|
||||
if (!arr.empty() && !indices.empty())
|
||||
{
|
||||
|
@@ -46,8 +46,10 @@
|
||||
#include "opencv2/core/utility.hpp"
|
||||
#include "opencv2/core/core_c.h"
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
#include "opencv2/core/opengl.hpp"
|
||||
|
||||
#include "opencv2/core/private.hpp"
|
||||
#include "opencv2/core/gpu_private.hpp"
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
@@ -64,37 +66,6 @@
|
||||
#define GET_OPTIMIZED(func) (func)
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
# include <cuda_runtime.h>
|
||||
# include <npp.h>
|
||||
|
||||
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
|
||||
# define NPP_MINIMUM_REQUIRED_VERSION 4200
|
||||
|
||||
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
|
||||
# error "Insufficient Cuda Runtime library version, please update it."
|
||||
# endif
|
||||
|
||||
# if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
|
||||
# error "Insufficient NPP version, please update it."
|
||||
# endif
|
||||
|
||||
# if defined(__GNUC__)
|
||||
# define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
# else
|
||||
# define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
|
||||
# endif
|
||||
|
||||
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
||||
}
|
||||
|
||||
#else
|
||||
# define cudaSafeCall(expr)
|
||||
#endif //HAVE_CUDA
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
|
Reference in New Issue
Block a user