From e630be3890d91f84d0f2d825c755bd0c1d070918 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 31 Jan 2014 15:52:06 +0400 Subject: [PATCH] disable NPP for GpuMat methods and for copyMakeBorder --- .../include/opencv2/dynamicuda/dynamicuda.hpp | 61 +++++++++++++++++-- modules/gpu/src/imgproc.cpp | 6 ++ 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/modules/dynamicuda/include/opencv2/dynamicuda/dynamicuda.hpp b/modules/dynamicuda/include/opencv2/dynamicuda/dynamicuda.hpp index d4d0220e0..00f087303 100644 --- a/modules/dynamicuda/include/opencv2/dynamicuda/dynamicuda.hpp +++ b/modules/dynamicuda/include/opencv2/dynamicuda/dynamicuda.hpp @@ -129,15 +129,20 @@ public: #if defined(USE_CUDA) -#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func) -#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func) +// Disable NPP for this file +//#define USE_NPP +#undef USE_NPP +#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func) inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") { if (cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), file, line, func); } +#ifdef USE_NPP + +#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func) inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") { if (err < 0) @@ -148,6 +153,8 @@ inline void ___nppSafeCall(int err, const char *file, const int line, const char } } +#endif + namespace cv { namespace gpu { namespace device { void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream); @@ -173,6 +180,8 @@ template void kernelSetCaller(GpuMat& src, Scalar s, const GpuMat& cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream); } +#ifdef USE_NPP + template struct NPPTypeTraits; template<> struct NPPTypeTraits { typedef Npp8u npp_type; }; template<> struct NPPTypeTraits { typedef Npp8s npp_type; }; @@ -182,9 +191,13 @@ template<> struct NPPTypeTraits { typedef Npp32s npp_type; }; template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; template<> struct NPPTypeTraits { typedef Npp64f npp_type; }; +#endif + ////////////////////////////////////////////////////////////////////////// // Convert +#ifdef USE_NPP + template struct NppConvertFunc { typedef typename NPPTypeTraits::npp_type src_t; @@ -232,9 +245,13 @@ template::func_ptr func> str } }; +#endif + ////////////////////////////////////////////////////////////////////////// // Set +#ifdef USE_NPP + template struct NppSetFunc { typedef typename NPPTypeTraits::npp_type src_t; @@ -339,9 +356,13 @@ template::func_ptr func> struct N } }; +#endif + ////////////////////////////////////////////////////////////////////////// // CopyMasked +#ifdef USE_NPP + template struct NppCopyMaskedFunc { typedef typename NPPTypeTraits::npp_type src_t; @@ -365,6 +386,8 @@ template::func_ptr func> struct N } }; +#endif + template static inline bool isAligned(const T* ptr, size_t size) { return reinterpret_cast(ptr) % size == 0; @@ -877,6 +900,8 @@ public: } typedef void (*func_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream); + +#ifdef USE_NPP static const func_t funcs[7][4] = { /* 8U */ {NppCopyMasked::call, cv::gpu::device::copyWithMask, NppCopyMasked::call, NppCopyMasked::call}, @@ -889,6 +914,9 @@ public: }; const func_t func = mask.channels() == src.channels() ? funcs[src.depth()][src.channels() - 1] : cv::gpu::device::copyWithMask; +#else + const func_t func = cv::gpu::device::copyWithMask; +#endif func(src, dst, mask, 0); } @@ -896,6 +924,8 @@ public: void convert(const GpuMat& src, GpuMat& dst) const { typedef void (*func_t)(const GpuMat& src, GpuMat& dst); + +#ifdef USE_NPP static const func_t funcs[7][7][4] = { { @@ -962,6 +992,7 @@ public: /* 64F -> 64F */ {0,0,0,0} } }; +#endif CV_Assert(src.depth() <= CV_64F && src.channels() <= 4); CV_Assert(dst.depth() <= CV_64F); @@ -980,8 +1011,12 @@ public: return; } +#ifdef USE_NPP const func_t func = funcs[src.depth()][dst.depth()][src.channels() - 1]; CV_DbgAssert(func != 0); +#else + const func_t func = cv::gpu::device::convertTo; +#endif func(src, dst); } @@ -1023,6 +1058,8 @@ public: } typedef void (*func_t)(GpuMat& src, Scalar s); + +#ifdef USE_NPP static const func_t funcs[7][4] = { {NppSet::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet::call}, @@ -1033,6 +1070,7 @@ public: {NppSet::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet::call}, {cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo } }; +#endif CV_Assert(m.depth() <= CV_64F && m.channels() <= 4); @@ -1042,14 +1080,22 @@ public: CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } +#ifdef USE_NPP + const func_t func = funcs[m.depth()][m.channels() - 1]; +#else + const func_t func = cv::gpu::device::setTo; +#endif + if (stream) cv::gpu::device::setTo(m, s, stream); else - funcs[m.depth()][m.channels() - 1](m, s); + func(m, s); } else { typedef void (*func_t)(GpuMat& src, Scalar s, const GpuMat& mask); + +#ifdef USE_NPP static const func_t funcs[7][4] = { {NppSetMask::call, cv::gpu::device::setTo, cv::gpu::device::setTo, NppSetMask::call}, @@ -1060,6 +1106,7 @@ public: {NppSetMask::call, cv::gpu::device::setTo, cv::gpu::device::setTo, NppSetMask::call}, {cv::gpu::device::setTo , cv::gpu::device::setTo, cv::gpu::device::setTo, cv::gpu::device::setTo } }; +#endif CV_Assert(m.depth() <= CV_64F && m.channels() <= 4); @@ -1069,10 +1116,16 @@ public: CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } +#ifdef USE_NPP + const func_t func = funcs[m.depth()][m.channels() - 1]; +#else + const func_t func = cv::gpu::device::setTo; +#endif + if (stream) cv::gpu::device::setTo(m, s, mask, stream); else - funcs[m.depth()][m.channels() - 1](m, s, mask); + func(m, s, mask); } } diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index 1904b6aad..97adb685f 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -244,6 +244,10 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q, //////////////////////////////////////////////////////////////////////// // copyMakeBorder +// Disable NPP for this file +//#define USE_NPP +#undef USE_NPP + namespace cv { namespace gpu { namespace device { namespace imgproc @@ -279,6 +283,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom cudaStream_t stream = StreamAccessor::getStream(s); +#ifdef USE_NPP if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1)) { NppiSize srcsz; @@ -328,6 +333,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom cudaSafeCall( cudaDeviceSynchronize() ); } else +#endif { typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream); static const caller_t callers[6][4] =