moved common gpu utility functionality to gpu_private.hpp
This commit is contained in:
@@ -45,10 +45,8 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "opencv2/core/cuda_devptrs.hpp"
|
||||
|
||||
#ifndef CV_PI
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
#endif
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/base.hpp"
|
||||
|
||||
#ifndef CV_PI_F
|
||||
#ifndef CV_PI
|
||||
@@ -58,16 +56,22 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu { namespace cuda {
|
||||
static inline void checkError(cudaError_t err, const char* file, const int line, const char* func)
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
|
||||
}
|
||||
}}}
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define cvCudaSafeCall(expr) cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
|
||||
#define cvCudaSafeCall(expr) cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, "")
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void error(const char *error_string, const char *file, const int line, const char *func);
|
||||
|
||||
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
|
||||
{
|
||||
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||
@@ -79,38 +83,32 @@ namespace cv { namespace gpu
|
||||
}
|
||||
}}
|
||||
|
||||
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||
enum
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
namespace cuda
|
||||
{
|
||||
using cv::gpu::divUp;
|
||||
BORDER_REFLECT101_GPU = 0,
|
||||
BORDER_REPLICATE_GPU,
|
||||
BORDER_CONSTANT_GPU,
|
||||
BORDER_REFLECT_GPU,
|
||||
BORDER_WRAP_GPU
|
||||
};
|
||||
|
||||
#ifdef __CUDACC__
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
typedef signed char schar;
|
||||
#if defined (_WIN32) || defined (__APPLE__)
|
||||
typedef unsigned int uint;
|
||||
#endif
|
||||
namespace cuda
|
||||
{
|
||||
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
cvCudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
}
|
||||
#endif // __CUDACC__
|
||||
}
|
||||
#endif // __CUDACC__
|
||||
}}
|
||||
|
||||
|
||||
|
@@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
@@ -332,10 +332,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
@@ -345,7 +345,7 @@ namespace cv { namespace gpu { namespace cuda
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
StaticAssert<ft::smart_shift != 1>::check();
|
||||
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||
|
||||
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
@@ -358,10 +358,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
@@ -369,7 +369,7 @@ namespace cv { namespace gpu { namespace cuda
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
StaticAssert<ft::smart_shift != 1>::check();
|
||||
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||
|
||||
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
|
||||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
|
||||
@@ -383,10 +383,10 @@ namespace cv { namespace gpu { namespace cuda
|
||||
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
} // namespace transform_detail
|
||||
|
@@ -58,9 +58,6 @@ namespace cv
|
||||
// Simple lightweight structures that encapsulates information about an image on device.
|
||||
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
||||
|
||||
template <bool expr> struct StaticAssert;
|
||||
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
|
||||
|
||||
template<typename T> struct DevPtr
|
||||
{
|
||||
typedef T elem_type;
|
||||
|
134
modules/core/include/opencv2/core/gpu_private.hpp
Normal file
134
modules/core/include/opencv2/core/gpu_private.hpp
Normal file
@@ -0,0 +1,134 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_GPU_PRIVATE_HPP__
|
||||
#define __OPENCV_CORE_GPU_PRIVATE_HPP__
|
||||
|
||||
#ifndef __OPENCV_BUILD
|
||||
# error this is a private header which should not be used from outside of the OpenCV library
|
||||
#endif
|
||||
|
||||
#include "cvconfig.h"
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/base.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
# include <cuda.h>
|
||||
# include <cuda_runtime.h>
|
||||
# include <npp.h>
|
||||
# include "opencv2/core/stream_accessor.hpp"
|
||||
# include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
|
||||
|
||||
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
|
||||
# error "Insufficient Cuda Runtime library version, please update it."
|
||||
# endif
|
||||
|
||||
# if defined(CUDA_ARCH_BIN_OR_PTX_10)
|
||||
# error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu {
|
||||
CV_EXPORTS cv::String getNppErrorMessage(int code);
|
||||
|
||||
static inline void checkNppError(int code, const char* file, const int line, const char* func)
|
||||
{
|
||||
if (code < 0)
|
||||
cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line);
|
||||
}
|
||||
|
||||
// Converts CPU border extrapolation mode into GPU internal analogue.
|
||||
// Returns true if the GPU analogue exists, false otherwise.
|
||||
CV_EXPORTS bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
|
||||
}}
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
||||
static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without GPU support"); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define nppSafeCall(expr) cv::gpu::checkNppError(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define nppSafeCall(expr) cv::gpu::checkNppError(expr, __FILE__, __LINE__, "")
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
template<int n> struct NPPTypeTraits;
|
||||
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
|
||||
|
||||
class NppStreamHandler
|
||||
{
|
||||
public:
|
||||
inline explicit NppStreamHandler(cudaStream_t newStream)
|
||||
{
|
||||
oldStream = nppGetStream();
|
||||
nppSetStream(newStream);
|
||||
}
|
||||
|
||||
inline ~NppStreamHandler()
|
||||
{
|
||||
nppSetStream(oldStream);
|
||||
}
|
||||
|
||||
private:
|
||||
cudaStream_t oldStream;
|
||||
};
|
||||
}}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
#endif // __OPENCV_CORE_GPU_PRIVATE_HPP__
|
@@ -454,11 +454,6 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);
|
||||
|
||||
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Error handling
|
||||
|
||||
CV_EXPORTS void error(const char* error_string, const char* file, const int line, const char* func = "");
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
@@ -43,17 +43,20 @@
|
||||
#ifndef __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
|
||||
#define __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
|
||||
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
#include "cuda_runtime_api.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
// This is only header file that depends on Cuda. All other headers are independent.
|
||||
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
|
||||
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
|
||||
// In this case you have to install Cuda Toolkit.
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
// This is only header file that depends on Cuda. All other headers are independent.
|
||||
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
|
||||
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
|
||||
// In this case you have to install Cuda Toolkit.
|
||||
class Stream;
|
||||
|
||||
struct StreamAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
|
||||
|
Reference in New Issue
Block a user