moved common gpu utility functionality to gpu_private.hpp

This commit is contained in:
Vladislav Vinogradov
2013-04-03 17:09:31 +04:00
parent 28b1e81883
commit 204a19b431
117 changed files with 1670 additions and 1721 deletions

View File

@@ -45,10 +45,8 @@
#include <cuda_runtime.h>
#include "opencv2/core/cuda_devptrs.hpp"
#ifndef CV_PI
#define CV_PI 3.1415926535897932384626433832795
#endif
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"
#ifndef CV_PI_F
#ifndef CV_PI
@@ -58,16 +56,22 @@
#endif
#endif
namespace cv { namespace gpu { namespace cuda {
static inline void checkError(cudaError_t err, const char* file, const int line, const char* func)
{
if (cudaSuccess != err)
cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
}
}}}
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#define cvCudaSafeCall(expr) cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#define cvCudaSafeCall(expr) cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, "")
#endif
namespace cv { namespace gpu
{
void error(const char *error_string, const char *file, const int line, const char *func);
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
{
return reinterpret_cast<size_t>(ptr) % size == 0;
@@ -79,38 +83,32 @@ namespace cv { namespace gpu
}
}}
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
namespace cv { namespace gpu
{
__host__ __device__ __forceinline__ int divUp(int total, int grain)
enum
{
return (total + grain - 1) / grain;
}
namespace cuda
{
using cv::gpu::divUp;
BORDER_REFLECT101_GPU = 0,
BORDER_REPLICATE_GPU,
BORDER_CONSTANT_GPU,
BORDER_REFLECT_GPU,
BORDER_WRAP_GPU
};
#ifdef __CUDACC__
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef signed char schar;
#if defined (_WIN32) || defined (__APPLE__)
typedef unsigned int uint;
#endif
namespace cuda
{
__host__ __device__ __forceinline__ int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
cvCudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
}
#endif // __CUDACC__
}
#endif // __CUDACC__
}}

View File

@@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace cuda
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@@ -332,10 +332,10 @@ namespace cv { namespace gpu { namespace cuda
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
};
template<> struct TransformDispatcher<true>
@@ -345,7 +345,7 @@ namespace cv { namespace gpu { namespace cuda
{
typedef TransformFunctorTraits<UnOp> ft;
StaticAssert<ft::smart_shift != 1>::check();
CV_StaticAssert(ft::smart_shift != 1, "");
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
@@ -358,10 +358,10 @@ namespace cv { namespace gpu { namespace cuda
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@@ -369,7 +369,7 @@ namespace cv { namespace gpu { namespace cuda
{
typedef TransformFunctorTraits<BinOp> ft;
StaticAssert<ft::smart_shift != 1>::check();
CV_StaticAssert(ft::smart_shift != 1, "");
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
@@ -383,10 +383,10 @@ namespace cv { namespace gpu { namespace cuda
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
};
} // namespace transform_detail

View File

@@ -58,9 +58,6 @@ namespace cv
// Simple lightweight structures that encapsulates information about an image on device.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
template <bool expr> struct StaticAssert;
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
template<typename T> struct DevPtr
{
typedef T elem_type;

View File

@@ -0,0 +1,134 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_GPU_PRIVATE_HPP__
#define __OPENCV_CORE_GPU_PRIVATE_HPP__
#ifndef __OPENCV_BUILD
# error this is a private header which should not be used from outside of the OpenCV library
#endif
#include "cvconfig.h"
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"
#ifdef HAVE_CUDA
# include <cuda.h>
# include <cuda_runtime.h>
# include <npp.h>
# include "opencv2/core/stream_accessor.hpp"
# include "opencv2/core/cuda/common.hpp"
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
# error "Insufficient Cuda Runtime library version, please update it."
# endif
# if defined(CUDA_ARCH_BIN_OR_PTX_10)
# error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
# endif
#endif
namespace cv { namespace gpu {
CV_EXPORTS cv::String getNppErrorMessage(int code);
static inline void checkNppError(int code, const char* file, const int line, const char* func)
{
if (code < 0)
cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line);
}
// Converts CPU border extrapolation mode into GPU internal analogue.
// Returns true if the GPU analogue exists, false otherwise.
CV_EXPORTS bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
}}
#ifndef HAVE_CUDA
static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without GPU support"); }
#else // HAVE_CUDA
static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
#if defined(__GNUC__)
#define nppSafeCall(expr) cv::gpu::checkNppError(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define nppSafeCall(expr) cv::gpu::checkNppError(expr, __FILE__, __LINE__, "")
#endif
namespace cv { namespace gpu
{
template<int n> struct NPPTypeTraits;
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
class NppStreamHandler
{
public:
inline explicit NppStreamHandler(cudaStream_t newStream)
{
oldStream = nppGetStream();
nppSetStream(newStream);
}
inline ~NppStreamHandler()
{
nppSetStream(oldStream);
}
private:
cudaStream_t oldStream;
};
}}
#endif // HAVE_CUDA
#endif // __OPENCV_CORE_GPU_PRIVATE_HPP__

View File

@@ -454,11 +454,6 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat);
////////////////////////////////////////////////////////////////////////
// Error handling
CV_EXPORTS void error(const char* error_string, const char* file, const int line, const char* func = "");
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////

View File

@@ -43,17 +43,20 @@
#ifndef __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
#define __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
#include "opencv2/core/gpumat.hpp"
#include "cuda_runtime_api.h"
#include <cuda_runtime.h>
#include "opencv2/core/cvdef.h"
// This is only header file that depends on Cuda. All other headers are independent.
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
// In this case you have to install Cuda Toolkit.
namespace cv
{
namespace gpu
{
// This is only header file that depends on Cuda. All other headers are independent.
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
// In this case you have to install Cuda Toolkit.
class Stream;
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);

View File

@@ -124,31 +124,31 @@ namespace cv { namespace gpu { namespace cuda
void writeScalar(const uchar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
}
void writeScalar(const schar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
}
void writeScalar(const ushort* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
}
void writeScalar(const short* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
}
void writeScalar(const int* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
}
void writeScalar(const float* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
}
void writeScalar(const double* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
}
template<typename T>
@@ -186,10 +186,10 @@ namespace cv { namespace gpu { namespace cuda
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
cvCudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
@@ -209,10 +209,10 @@ namespace cv { namespace gpu { namespace cuda
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
cvCudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
@@ -290,8 +290,8 @@ namespace cv { namespace gpu { namespace cuda
template<typename T, typename D, typename S>
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
{
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
cvCudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cvCudaSafeCall( cudaSetDoubleForDevice(&beta) );
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
cv::gpu::cuda::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
}

View File

@@ -46,33 +46,30 @@ using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA)
#define throw_nogpu() CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
cv::gpu::Stream::Stream() { throw_nogpu(); }
cv::gpu::Stream::Stream() { throw_no_cuda(); }
cv::gpu::Stream::~Stream() {}
cv::gpu::Stream::Stream(const Stream&) { throw_nogpu(); }
Stream& cv::gpu::Stream::operator=(const Stream&) { throw_nogpu(); return *this; }
bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return false; }
void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_nogpu(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_nogpu(); }
void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_nogpu(); }
void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_nogpu(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_nogpu(); }
void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_nogpu(); }
void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_nogpu(); }
Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; }
cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
cv::gpu::Stream::Stream(Impl*) { throw_nogpu(); }
void cv::gpu::Stream::create() { throw_nogpu(); }
void cv::gpu::Stream::release() { throw_nogpu(); }
cv::gpu::Stream::Stream(const Stream&) { throw_no_cuda(); }
Stream& cv::gpu::Stream::operator=(const Stream&) { throw_no_cuda(); return *this; }
bool cv::gpu::Stream::queryIfComplete() { throw_no_cuda(); return false; }
void cv::gpu::Stream::waitForCompletion() { throw_no_cuda(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_no_cuda(); }
Stream& cv::gpu::Stream::Null() { throw_no_cuda(); static Stream s; return s; }
cv::gpu::Stream::operator bool() const { throw_no_cuda(); return false; }
cv::gpu::Stream::Stream(Impl*) { throw_no_cuda(); }
void cv::gpu::Stream::create() { throw_no_cuda(); }
void cv::gpu::Stream::release() { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
#include "opencv2/core/stream_accessor.hpp"
namespace cv { namespace gpu
{
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
@@ -134,14 +131,14 @@ bool cv::gpu::Stream::queryIfComplete()
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
cvCudaSafeCall(err);
return false;
}
void cv::gpu::Stream::waitForCompletion()
{
cudaStream_t stream = Impl::getStream(impl);
cudaSafeCall( cudaStreamSynchronize(stream) );
cvCudaSafeCall( cudaStreamSynchronize(stream) );
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
@@ -151,7 +148,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
@@ -160,7 +157,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
}
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
@@ -169,7 +166,7 @@ void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
}
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
@@ -178,7 +175,7 @@ void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
}
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
@@ -187,7 +184,7 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
@@ -204,7 +201,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0)
{
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
return;
}
@@ -215,7 +212,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3]))
{
int ival = saturate_cast<uchar>(val[0]);
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
return;
}
}
@@ -302,7 +299,7 @@ void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userDat
cudaStream_t stream = Impl::getStream(impl);
cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
cvCudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
#else
(void) callback;
(void) userData;
@@ -331,7 +328,7 @@ void cv::gpu::Stream::create()
release();
cudaStream_t stream;
cudaSafeCall( cudaStreamCreate( &stream ) );
cvCudaSafeCall( cudaStreamCreate( &stream ) );
impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));
@@ -343,7 +340,7 @@ void cv::gpu::Stream::release()
{
if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
{
cudaSafeCall( cudaStreamDestroy(impl->stream) );
cvCudaSafeCall( cudaStreamDestroy(impl->stream) );
cv::fastFree(impl);
}
}

View File

@@ -45,64 +45,38 @@
using namespace cv;
using namespace cv::gpu;
#ifndef HAVE_CUDA
#define throw_nogpu CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
#else // HAVE_CUDA
namespace
{
#if defined(__GNUC__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#endif
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (err < 0)
{
String msg = cv::format("NPP API Call Error: %d", err);
cv::gpu::error(msg.c_str(), file, line, func);
}
}
}
#endif // HAVE_CUDA
//////////////////////////////// Initialization & Info ////////////////////////
#ifndef HAVE_CUDA
int cv::gpu::getCudaEnabledDeviceCount() { return 0; }
void cv::gpu::setDevice(int) { throw_nogpu; }
int cv::gpu::getDevice() { throw_nogpu; return 0; }
void cv::gpu::setDevice(int) { throw_no_cuda(); }
int cv::gpu::getDevice() { throw_no_cuda(); return 0; }
void cv::gpu::resetDevice() { throw_nogpu; }
void cv::gpu::resetDevice() { throw_no_cuda(); }
bool cv::gpu::deviceSupports(FeatureSet) { throw_nogpu; return false; }
bool cv::gpu::deviceSupports(FeatureSet) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::has(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasBin(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::has(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasBin(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_no_cuda(); return false; }
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_nogpu; return 0; }
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu; }
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_nogpu; return 0; }
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_nogpu; return 0; }
bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_nogpu; return false; }
bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu; return false; }
void cv::gpu::DeviceInfo::query() { throw_nogpu; }
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_no_cuda(); return 0; }
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_no_cuda(); }
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_no_cuda(); return 0; }
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_no_cuda(); return 0; }
bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_no_cuda(); return false; }
bool cv::gpu::DeviceInfo::isCompatible() const { throw_no_cuda(); return false; }
void cv::gpu::DeviceInfo::query() { throw_no_cuda(); }
void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu; }
void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu; }
void cv::gpu::printCudaDeviceInfo(int) { throw_no_cuda(); }
void cv::gpu::printShortCudaDeviceInfo(int) { throw_no_cuda(); }
#else // HAVE_CUDA
@@ -117,25 +91,25 @@ int cv::gpu::getCudaEnabledDeviceCount()
if (error == cudaErrorNoDevice)
return 0;
cudaSafeCall( error );
cvCudaSafeCall( error );
return count;
}
void cv::gpu::setDevice(int device)
{
cudaSafeCall( cudaSetDevice( device ) );
cvCudaSafeCall( cudaSetDevice( device ) );
}
int cv::gpu::getDevice()
{
int device;
cudaSafeCall( cudaGetDevice( &device ) );
cvCudaSafeCall( cudaGetDevice( &device ) );
return device;
}
void cv::gpu::resetDevice()
{
cudaSafeCall( cudaDeviceReset() );
cvCudaSafeCall( cudaDeviceReset() );
}
namespace
@@ -328,7 +302,7 @@ namespace
if (!props_[devID])
{
props_[devID] = new cudaDeviceProp;
cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
cvCudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
}
return props_[devID];
@@ -348,7 +322,7 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory)
if (prevDeviceID != device_id_)
setDevice(device_id_);
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
cvCudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
if (prevDeviceID != device_id_)
setDevice(prevDeviceID);
@@ -434,8 +408,8 @@ void cv::gpu::printCudaDeviceInfo(int device)
printf("Device count: %d\n", count);
int driverVersion = 0, runtimeVersion = 0;
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
const char *computeMode[] = {
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
@@ -449,7 +423,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
for(int dev = beg; dev < end; ++dev)
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
printf("\nDevice %d: \"%s\"\n", dev, prop.name);
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
@@ -511,13 +485,13 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
int end = valid ? device+1 : count;
int driverVersion = 0, runtimeVersion = 0;
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
for(int dev = beg; dev < end; ++dev)
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
@@ -846,18 +820,18 @@ namespace
class EmptyFuncTable : public GpuFuncTable
{
public:
void copy(const Mat&, GpuMat&) const { throw_nogpu; }
void copy(const GpuMat&, Mat&) const { throw_nogpu; }
void copy(const GpuMat&, GpuMat&) const { throw_nogpu; }
void copy(const Mat&, GpuMat&) const { throw_no_cuda(); }
void copy(const GpuMat&, Mat&) const { throw_no_cuda(); }
void copy(const GpuMat&, GpuMat&) const { throw_no_cuda(); }
void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_nogpu; }
void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_no_cuda(); }
void convert(const GpuMat&, GpuMat&) const { throw_nogpu; }
void convert(const GpuMat&, GpuMat&, double, double) const { throw_nogpu; }
void convert(const GpuMat&, GpuMat&) const { throw_no_cuda(); }
void convert(const GpuMat&, GpuMat&, double, double) const { throw_no_cuda(); }
void setTo(GpuMat&, Scalar, const GpuMat&) const { throw_nogpu; }
void setTo(GpuMat&, Scalar, const GpuMat&) const { throw_no_cuda(); }
void mallocPitch(void**, size_t*, size_t, size_t) const { throw_nogpu; }
void mallocPitch(void**, size_t*, size_t, size_t) const { throw_no_cuda(); }
void free(void*) const {}
};
@@ -1009,7 +983,7 @@ namespace
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
};
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
@@ -1024,7 +998,7 @@ namespace
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
};
@@ -1066,7 +1040,7 @@ namespace
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
};
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
@@ -1083,7 +1057,7 @@ namespace
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
};
@@ -1114,7 +1088,7 @@ namespace
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
};
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
@@ -1131,7 +1105,7 @@ namespace
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
};
@@ -1157,7 +1131,7 @@ namespace
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaDeviceSynchronize() );
}
};
@@ -1174,15 +1148,15 @@ namespace
public:
void copy(const Mat& src, GpuMat& dst) const
{
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
}
void copy(const GpuMat& src, Mat& dst) const
{
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
}
void copy(const GpuMat& src, GpuMat& dst) const
{
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
}
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
@@ -1327,7 +1301,7 @@ namespace
{
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
{
cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
cvCudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
return;
}
@@ -1338,7 +1312,7 @@ namespace
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
{
int val = saturate_cast<uchar>(s[0]);
cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
cvCudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
return;
}
}
@@ -1393,7 +1367,7 @@ namespace
void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const
{
cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
cvCudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
}
void free(void* devPtr) const
@@ -1551,18 +1525,117 @@ void cv::gpu::GpuMat::release()
////////////////////////////////////////////////////////////////////////
// Error handling
void cv::gpu::error(const char *error_string, const char *file, const int line, const char *func)
#ifdef HAVE_CUDA
namespace
{
int code = CV_GpuApiCallError;
#define error_entry(entry) { entry, #entry }
if (std::uncaught_exception())
struct ErrorEntry
{
const char* errorStr = cvErrorStr(code);
const char* function = func ? func : "unknown function";
int code;
const char* str;
};
fprintf(stderr, "OpenCV Error: %s(%s) in %s, file %s, line %d", errorStr, error_string, function, file, line);
fflush(stderr);
}
else
cv::error( cv::Exception(code, error_string, func, file, line) );
struct ErrorEntryComparer
{
int code;
ErrorEntryComparer(int code_) : code(code_) {}
bool operator()(const ErrorEntry& e) const { return e.code == code; }
};
const ErrorEntry npp_errors [] =
{
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
#if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif
error_entry( NPP_BAD_ARG_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_COEFF_ERROR ),
error_entry( NPP_RECT_ERROR ),
error_entry( NPP_QUAD_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMFREE_ERR ),
error_entry( NPP_MEMSET_ERR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_MEM_ALLOC_ERR ),
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERR ),
error_entry( NPP_INVALID_INPUT ),
error_entry( NPP_ALIGNMENT_ERROR ),
error_entry( NPP_STEP_ERROR ),
error_entry( NPP_SIZE_ERROR ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_NULL_POINTER_ERROR ),
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
error_entry( NPP_ERROR ),
error_entry( NPP_NO_ERROR ),
error_entry( NPP_SUCCESS ),
error_entry( NPP_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
error_entry( NPP_DOUBLE_SIZE_WARNING ),
error_entry( NPP_ODD_ROI_WARNING )
};
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
}
#endif
String cv::gpu::getNppErrorMessage(int code)
{
#ifndef HAVE_CUDA
(void) code;
return String();
#else
size_t idx = std::find_if(npp_errors, npp_errors + npp_error_num, ErrorEntryComparer(code)) - npp_errors;
const char* msg = (idx != npp_error_num) ? npp_errors[idx].str : "Unknown error code";
String str = cv::format("%s [Code = %d]", msg, code);
return str;
#endif
}
bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
{
#ifndef HAVE_CUDA
(void) cpuBorderType;
(void) gpuBorderType;
return false;
#else
switch (cpuBorderType)
{
case IPL_BORDER_REFLECT_101:
gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;
return true;
case IPL_BORDER_REPLICATE:
gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;
return true;
case IPL_BORDER_CONSTANT:
gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;
return true;
case IPL_BORDER_REFLECT:
gpuBorderType = cv::gpu::BORDER_REFLECT_GPU;
return true;
case IPL_BORDER_WRAP:
gpuBorderType = cv::gpu::BORDER_WRAP_GPU;
return true;
default:
return false;
};
#endif
}

View File

@@ -41,7 +41,6 @@
//M*/
#include "precomp.hpp"
#include "opencv2/core/gpumat.hpp"
using namespace cv;
using namespace cv::gpu;
@@ -181,30 +180,29 @@ bool cv::gpu::CudaMem::empty() const
#if !defined (HAVE_CUDA)
void cv::gpu::registerPageLocked(Mat&) { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
void cv::gpu::unregisterPageLocked(Mat&) { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
void cv::gpu::CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/)
{ CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
bool cv::gpu::CudaMem::canMapHostMemory() { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); return false; }
void cv::gpu::CudaMem::release() { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); return GpuMat(); }
void cv::gpu::registerPageLocked(Mat&) { throw_no_cuda(); }
void cv::gpu::unregisterPageLocked(Mat&) { throw_no_cuda(); }
void cv::gpu::CudaMem::create(int, int, int, int) { throw_no_cuda(); }
bool cv::gpu::CudaMem::canMapHostMemory() { throw_no_cuda(); return false; }
void cv::gpu::CudaMem::release() { throw_no_cuda(); }
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return GpuMat(); }
#else /* !defined (HAVE_CUDA) */
void cv::gpu::registerPageLocked(Mat& m)
{
cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
cvCudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
}
void cv::gpu::unregisterPageLocked(Mat& m)
{
cudaSafeCall( cudaHostUnregister(m.ptr()) );
cvCudaSafeCall( cudaHostUnregister(m.ptr()) );
}
bool cv::gpu::CudaMem::canMapHostMemory()
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
return (prop.canMapHostMemory != 0) ? true : false;
}
@@ -222,7 +220,7 @@ namespace
void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
{
if (_alloc_type == ALLOC_ZEROCOPY && !canMapHostMemory())
cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
CV_Error(cv::Error::GpuApiCallError, "ZeroCopy is not supported by current device");
_type &= Mat::TYPE_MASK;
if( rows == _rows && cols == _cols && type() == _type && data )
@@ -239,7 +237,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
if (_alloc_type == ALLOC_ZEROCOPY)
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
step = alignUpStep(step, prop.textureAlignment);
}
int64 _nettosize = (int64)step*rows;
@@ -254,10 +252,10 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
switch (alloc_type)
{
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
default: cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
case ALLOC_PAGE_LOCKED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
case ALLOC_ZEROCOPY: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
case ALLOC_WRITE_COMBINED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
}
datastart = data = (uchar*)ptr;
@@ -270,15 +268,13 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
{
CV_Assert( alloc_type == ALLOC_ZEROCOPY );
GpuMat res;
if (alloc_type == ALLOC_ZEROCOPY)
{
void *pdev;
cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
res = GpuMat(rows, cols, type(), pdev, step);
}
else
cv::gpu::error("Zero-copy is not supported or memory was allocated without zero-copy flag", __FILE__, __LINE__);
void *pdev;
cvCudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
res = GpuMat(rows, cols, type(), pdev, step);
return res;
}
@@ -287,7 +283,7 @@ void cv::gpu::CudaMem::release()
{
if( refcount && CV_XADD(refcount, -1) == 1 )
{
cudaSafeCall( cudaFreeHost(datastart ) );
cvCudaSafeCall( cudaFreeHost(datastart ) );
fastFree(refcount);
}
data = datastart = dataend = 0;

View File

@@ -41,16 +41,12 @@
//M*/
#include "precomp.hpp"
#include "opencv2/core/opengl.hpp"
#include "opencv2/core/gpumat.hpp"
#ifdef HAVE_OPENGL
#include "gl_core_3_1.hpp"
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#endif
# include "gl_core_3_1.hpp"
# ifdef HAVE_CUDA
# include <cuda_gl_interop.h>
# endif
#endif
using namespace cv;
@@ -59,15 +55,9 @@ using namespace cv::gpu;
namespace
{
#ifndef HAVE_OPENGL
void throw_nogl() { CV_Error(CV_OpenGlNotSupported, "The library is compiled without OpenGL support"); }
void throw_no_ogl() { CV_Error(CV_OpenGlNotSupported, "The library is compiled without OpenGL support"); }
#else
void throw_nogl() { CV_Error(CV_OpenGlApiCallError, "OpenGL context doesn't exist"); }
#ifndef HAVE_CUDA
void throw_nocuda() { CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); }
#else
void throw_nocuda() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
#endif
void throw_no_ogl() { CV_Error(CV_OpenGlApiCallError, "OpenGL context doesn't exist"); }
#endif
bool checkError(const char* file, const int line, const char* func = 0)
@@ -137,13 +127,13 @@ void cv::gpu::setGlDevice(int device)
{
#ifndef HAVE_OPENGL
(void) device;
throw_nogl();
throw_no_ogl();
#else
#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER)
(void) device;
throw_nocuda();
throw_no_cuda();
#else
cudaSafeCall( cudaGLSetGLDevice(device) );
cvCudaSafeCall( cudaGLSetGLDevice(device) );
#endif
#endif
}
@@ -194,7 +184,7 @@ namespace
return;
cudaGraphicsResource_t resource;
cudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
cvCudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
release();
@@ -227,7 +217,7 @@ namespace
CudaResource::GraphicsMapHolder::GraphicsMapHolder(cudaGraphicsResource_t* resource, cudaStream_t stream) : resource_(resource), stream_(stream)
{
if (resource_)
cudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
cvCudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
}
CudaResource::GraphicsMapHolder::~GraphicsMapHolder()
@@ -250,14 +240,14 @@ namespace
void* dst;
size_t size;
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
CV_DbgAssert( width * height == size );
if (stream == 0)
cudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
cvCudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
cvCudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
}
void CudaResource::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream)
@@ -269,14 +259,14 @@ namespace
void* src;
size_t size;
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
CV_DbgAssert( width * height == size );
if (stream == 0)
cudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
cvCudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
cvCudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
}
void* CudaResource::map(cudaStream_t stream)
@@ -287,7 +277,7 @@ namespace
void* ptr;
size_t size;
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
h.reset();
@@ -476,7 +466,7 @@ void cv::ogl::Buffer::Impl::unmapHost()
cv::ogl::Buffer::Buffer() : rows_(0), cols_(0), type_(0)
{
#ifndef HAVE_OPENGL
throw_nogl();
throw_no_ogl();
#else
impl_ = Impl::empty();
#endif
@@ -490,7 +480,7 @@ cv::ogl::Buffer::Buffer(int arows, int acols, int atype, unsigned int abufId, bo
(void) atype;
(void) abufId;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
impl_ = new Impl(abufId, autoRelease);
rows_ = arows;
@@ -506,7 +496,7 @@ cv::ogl::Buffer::Buffer(Size asize, int atype, unsigned int abufId, bool autoRel
(void) atype;
(void) abufId;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
impl_ = new Impl(abufId, autoRelease);
rows_ = asize.height;
@@ -531,7 +521,7 @@ cv::ogl::Buffer::Buffer(InputArray arr, Target target, bool autoRelease) : rows_
(void) arr;
(void) target;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
const int kind = arr.kind();
@@ -578,7 +568,7 @@ void cv::ogl::Buffer::create(int arows, int acols, int atype, Target target, boo
(void) atype;
(void) target;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
if (rows_ != arows || cols_ != acols || type_ != atype)
{
@@ -607,7 +597,7 @@ void cv::ogl::Buffer::setAutoRelease(bool flag)
{
#ifndef HAVE_OPENGL
(void) flag;
throw_nogl();
throw_no_ogl();
#else
impl_->setAutoRelease(flag);
#endif
@@ -619,7 +609,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
(void) arr;
(void) target;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
const int kind = arr.kind();
@@ -647,7 +637,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
throw_nocuda();
throw_no_cuda();
#else
GpuMat dmat = arr.getGpuMat();
impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows);
@@ -672,7 +662,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr, Target target, bool autoRelease) c
(void) arr;
(void) target;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
const int kind = arr.kind();
@@ -693,7 +683,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr, Target target, bool autoRelease) c
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
throw_nocuda();
throw_no_cuda();
#else
GpuMat& dmat = arr.getGpuMatRef();
dmat.create(rows_, cols_, type_);
@@ -719,7 +709,7 @@ cv::ogl::Buffer cv::ogl::Buffer::clone(Target target, bool autoRelease) const
#ifndef HAVE_OPENGL
(void) target;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
return cv::ogl::Buffer();
#else
ogl::Buffer buf;
@@ -732,7 +722,7 @@ void cv::ogl::Buffer::bind(Target target) const
{
#ifndef HAVE_OPENGL
(void) target;
throw_nogl();
throw_no_ogl();
#else
impl_->bind(target);
#endif
@@ -742,7 +732,7 @@ void cv::ogl::Buffer::unbind(Target target)
{
#ifndef HAVE_OPENGL
(void) target;
throw_nogl();
throw_no_ogl();
#else
gl::BindBuffer(target, 0);
CV_CheckGlError();
@@ -753,7 +743,7 @@ Mat cv::ogl::Buffer::mapHost(Access access)
{
#ifndef HAVE_OPENGL
(void) access;
throw_nogl();
throw_no_ogl();
return Mat();
#else
return Mat(rows_, cols_, type_, impl_->mapHost(access));
@@ -763,7 +753,7 @@ Mat cv::ogl::Buffer::mapHost(Access access)
void cv::ogl::Buffer::unmapHost()
{
#ifndef HAVE_OPENGL
throw_nogl();
throw_no_ogl();
#else
return impl_->unmapHost();
#endif
@@ -772,11 +762,11 @@ void cv::ogl::Buffer::unmapHost()
GpuMat cv::ogl::Buffer::mapDevice()
{
#ifndef HAVE_OPENGL
throw_nogl();
throw_no_ogl();
return GpuMat();
#else
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
throw_nocuda();
throw_no_cuda();
return GpuMat();
#else
return GpuMat(rows_, cols_, type_, impl_->mapDevice());
@@ -787,10 +777,10 @@ GpuMat cv::ogl::Buffer::mapDevice()
void cv::ogl::Buffer::unmapDevice()
{
#ifndef HAVE_OPENGL
throw_nogl();
throw_no_ogl();
#else
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
throw_nocuda();
throw_no_cuda();
#else
impl_->unmapDevice();
#endif
@@ -800,7 +790,7 @@ void cv::ogl::Buffer::unmapDevice()
unsigned int cv::ogl::Buffer::bufId() const
{
#ifndef HAVE_OPENGL
throw_nogl();
throw_no_ogl();
return 0;
#else
return impl_->bufId();
@@ -926,7 +916,7 @@ void cv::ogl::Texture2D::Impl::bind() const
cv::ogl::Texture2D::Texture2D() : rows_(0), cols_(0), format_(NONE)
{
#ifndef HAVE_OPENGL
throw_nogl();
throw_no_ogl();
#else
impl_ = Impl::empty();
#endif
@@ -940,7 +930,7 @@ cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, unsigned int
(void) aformat;
(void) atexId;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
impl_ = new Impl(atexId, autoRelease);
rows_ = arows;
@@ -956,7 +946,7 @@ cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, unsigned int atexId, b
(void) aformat;
(void) atexId;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
impl_ = new Impl(atexId, autoRelease);
rows_ = asize.height;
@@ -980,7 +970,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
#ifndef HAVE_OPENGL
(void) arr;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
const int kind = arr.kind();
@@ -1016,7 +1006,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
throw_nocuda();
throw_no_cuda();
#else
GpuMat dmat = arr.getGpuMat();
ogl::Buffer buf(dmat, ogl::Buffer::PIXEL_UNPACK_BUFFER);
@@ -1051,7 +1041,7 @@ void cv::ogl::Texture2D::create(int arows, int acols, Format aformat, bool autoR
(void) acols;
(void) aformat;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
if (rows_ != arows || cols_ != acols || format_ != aformat)
{
@@ -1080,7 +1070,7 @@ void cv::ogl::Texture2D::setAutoRelease(bool flag)
{
#ifndef HAVE_OPENGL
(void) flag;
throw_nogl();
throw_no_ogl();
#else
impl_->setAutoRelease(flag);
#endif
@@ -1091,7 +1081,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
#ifndef HAVE_OPENGL
(void) arr;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
const int kind = arr.kind();
@@ -1129,7 +1119,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
throw_nocuda();
throw_no_cuda();
#else
GpuMat dmat = arr.getGpuMat();
ogl::Buffer buf(dmat, ogl::Buffer::PIXEL_UNPACK_BUFFER);
@@ -1158,7 +1148,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
(void) arr;
(void) ddepth;
(void) autoRelease;
throw_nogl();
throw_no_ogl();
#else
const int kind = arr.kind();
@@ -1180,7 +1170,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
throw_nocuda();
throw_no_cuda();
#else
ogl::Buffer buf(rows_, cols_, CV_MAKE_TYPE(ddepth, cn), ogl::Buffer::PIXEL_PACK_BUFFER);
buf.bind(ogl::Buffer::PIXEL_PACK_BUFFER);
@@ -1207,7 +1197,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
void cv::ogl::Texture2D::bind() const
{
#ifndef HAVE_OPENGL
throw_nogl();
throw_no_ogl();
#else
impl_->bind();
#endif
@@ -1216,7 +1206,7 @@ void cv::ogl::Texture2D::bind() const
unsigned int cv::ogl::Texture2D::texId() const
{
#ifndef HAVE_OPENGL
throw_nogl();
throw_no_ogl();
return 0;
#else
return impl_->texId();
@@ -1331,7 +1321,7 @@ void cv::ogl::Arrays::setAutoRelease(bool flag)
void cv::ogl::Arrays::bind() const
{
#ifndef HAVE_OPENGL
throw_nogl();
throw_no_ogl();
#else
CV_Assert( texCoord_.empty() || texCoord_.size().area() == size_ );
CV_Assert( normal_.empty() || normal_.size().area() == size_ );
@@ -1416,7 +1406,7 @@ void cv::ogl::render(const ogl::Texture2D& tex, Rect_<double> wndRect, Rect_<dou
(void) tex;
(void) wndRect;
(void) texRect;
throw_nogl();
throw_no_ogl();
#else
if (!tex.empty())
{
@@ -1488,7 +1478,7 @@ void cv::ogl::render(const ogl::Arrays& arr, int mode, Scalar color)
(void) arr;
(void) mode;
(void) color;
throw_nogl();
throw_no_ogl();
#else
if (!arr.empty())
{
@@ -1508,7 +1498,7 @@ void cv::ogl::render(const ogl::Arrays& arr, InputArray indices, int mode, Scala
(void) indices;
(void) mode;
(void) color;
throw_nogl();
throw_no_ogl();
#else
if (!arr.empty() && !indices.empty())
{

View File

@@ -46,8 +46,10 @@
#include "opencv2/core/utility.hpp"
#include "opencv2/core/core_c.h"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/opengl.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/gpu_private.hpp"
#include <assert.h>
#include <ctype.h>
@@ -64,37 +66,6 @@
#define GET_OPTIMIZED(func) (func)
#endif
#ifdef HAVE_CUDA
# include <cuda_runtime.h>
# include <npp.h>
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
# define NPP_MINIMUM_REQUIRED_VERSION 4200
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
# error "Insufficient Cuda Runtime library version, please update it."
# endif
# if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
# error "Insufficient NPP version, please update it."
# endif
# if defined(__GNUC__)
# define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
# else
# define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
# endif
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
if (cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
#else
# define cudaSafeCall(expr)
#endif //HAVE_CUDA
namespace cv
{