diff --git a/modules/cudalegacy/src/cuda/NCVPixelOperations.hpp b/modules/cudalegacy/src/cuda/NCVPixelOperations.hpp index 2d06cda85..3d570c5fa 100644 --- a/modules/cudalegacy/src/cuda/NCVPixelOperations.hpp +++ b/modules/cudalegacy/src/cuda/NCVPixelOperations.hpp @@ -48,24 +48,24 @@ #include "opencv2/cudalegacy/NCV.hpp" template inline __host__ __device__ TBase _pixMaxVal(); -template<> static inline __host__ __device__ Ncv8u _pixMaxVal() {return UCHAR_MAX;} -template<> static inline __host__ __device__ Ncv16u _pixMaxVal() {return USHRT_MAX;} -template<> static inline __host__ __device__ Ncv32u _pixMaxVal() {return UINT_MAX;} -template<> static inline __host__ __device__ Ncv8s _pixMaxVal() {return SCHAR_MAX;} -template<> static inline __host__ __device__ Ncv16s _pixMaxVal() {return SHRT_MAX;} -template<> static inline __host__ __device__ Ncv32s _pixMaxVal() {return INT_MAX;} -template<> static inline __host__ __device__ Ncv32f _pixMaxVal() {return FLT_MAX;} -template<> static inline __host__ __device__ Ncv64f _pixMaxVal() {return DBL_MAX;} +template<> inline __host__ __device__ Ncv8u _pixMaxVal() {return UCHAR_MAX;} +template<> inline __host__ __device__ Ncv16u _pixMaxVal() {return USHRT_MAX;} +template<> inline __host__ __device__ Ncv32u _pixMaxVal() {return UINT_MAX;} +template<> inline __host__ __device__ Ncv8s _pixMaxVal() {return SCHAR_MAX;} +template<> inline __host__ __device__ Ncv16s _pixMaxVal() {return SHRT_MAX;} +template<> inline __host__ __device__ Ncv32s _pixMaxVal() {return INT_MAX;} +template<> inline __host__ __device__ Ncv32f _pixMaxVal() {return FLT_MAX;} +template<> inline __host__ __device__ Ncv64f _pixMaxVal() {return DBL_MAX;} template inline __host__ __device__ TBase _pixMinVal(); -template<> static inline __host__ __device__ Ncv8u _pixMinVal() {return 0;} -template<> static inline __host__ __device__ Ncv16u _pixMinVal() {return 0;} -template<> static inline __host__ __device__ Ncv32u _pixMinVal() {return 0;} -template<> static inline __host__ __device__ Ncv8s _pixMinVal() {return SCHAR_MIN;} -template<> static inline __host__ __device__ Ncv16s _pixMinVal() {return SHRT_MIN;} -template<> static inline __host__ __device__ Ncv32s _pixMinVal() {return INT_MIN;} -template<> static inline __host__ __device__ Ncv32f _pixMinVal() {return FLT_MIN;} -template<> static inline __host__ __device__ Ncv64f _pixMinVal() {return DBL_MIN;} +template<> inline __host__ __device__ Ncv8u _pixMinVal() {return 0;} +template<> inline __host__ __device__ Ncv16u _pixMinVal() {return 0;} +template<> inline __host__ __device__ Ncv32u _pixMinVal() {return 0;} +template<> inline __host__ __device__ Ncv8s _pixMinVal() {return SCHAR_MIN;} +template<> inline __host__ __device__ Ncv16s _pixMinVal() {return SHRT_MIN;} +template<> inline __host__ __device__ Ncv32s _pixMinVal() {return INT_MIN;} +template<> inline __host__ __device__ Ncv32f _pixMinVal() {return FLT_MIN;} +template<> inline __host__ __device__ Ncv64f _pixMinVal() {return DBL_MIN;} template struct TConvVec2Base; template<> struct TConvVec2Base {typedef Ncv8u TBase;}; @@ -104,33 +104,33 @@ template<> struct TConvBase2Vec {typedef double3 TVec;}; template<> struct TConvBase2Vec {typedef double4 TVec;}; //TODO: consider using CUDA intrinsics to avoid branching -template static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);} -template static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);} -template static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);} -template static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;} +template inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);} +template inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);} +template inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);} +template inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;} //TODO: consider using CUDA intrinsics to avoid branching -template static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);} -template static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);} -template static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);} -template static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;} +template inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);} +template inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);} +template inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);} +template inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;} template inline Tout _pixMakeZero(); -template<> static inline __host__ __device__ uchar1 _pixMakeZero() {return make_uchar1(0);} -template<> static inline __host__ __device__ uchar3 _pixMakeZero() {return make_uchar3(0,0,0);} -template<> static inline __host__ __device__ uchar4 _pixMakeZero() {return make_uchar4(0,0,0,0);} -template<> static inline __host__ __device__ ushort1 _pixMakeZero() {return make_ushort1(0);} -template<> static inline __host__ __device__ ushort3 _pixMakeZero() {return make_ushort3(0,0,0);} -template<> static inline __host__ __device__ ushort4 _pixMakeZero() {return make_ushort4(0,0,0,0);} -template<> static inline __host__ __device__ uint1 _pixMakeZero() {return make_uint1(0);} -template<> static inline __host__ __device__ uint3 _pixMakeZero() {return make_uint3(0,0,0);} -template<> static inline __host__ __device__ uint4 _pixMakeZero() {return make_uint4(0,0,0,0);} -template<> static inline __host__ __device__ float1 _pixMakeZero() {return make_float1(0.f);} -template<> static inline __host__ __device__ float3 _pixMakeZero() {return make_float3(0.f,0.f,0.f);} -template<> static inline __host__ __device__ float4 _pixMakeZero() {return make_float4(0.f,0.f,0.f,0.f);} -template<> static inline __host__ __device__ double1 _pixMakeZero() {return make_double1(0.);} -template<> static inline __host__ __device__ double3 _pixMakeZero() {return make_double3(0.,0.,0.);} -template<> static inline __host__ __device__ double4 _pixMakeZero() {return make_double4(0.,0.,0.,0.);} +template<> inline __host__ __device__ uchar1 _pixMakeZero() {return make_uchar1(0);} +template<> inline __host__ __device__ uchar3 _pixMakeZero() {return make_uchar3(0,0,0);} +template<> inline __host__ __device__ uchar4 _pixMakeZero() {return make_uchar4(0,0,0,0);} +template<> inline __host__ __device__ ushort1 _pixMakeZero() {return make_ushort1(0);} +template<> inline __host__ __device__ ushort3 _pixMakeZero() {return make_ushort3(0,0,0);} +template<> inline __host__ __device__ ushort4 _pixMakeZero() {return make_ushort4(0,0,0,0);} +template<> inline __host__ __device__ uint1 _pixMakeZero() {return make_uint1(0);} +template<> inline __host__ __device__ uint3 _pixMakeZero() {return make_uint3(0,0,0);} +template<> inline __host__ __device__ uint4 _pixMakeZero() {return make_uint4(0,0,0,0);} +template<> inline __host__ __device__ float1 _pixMakeZero() {return make_float1(0.f);} +template<> inline __host__ __device__ float3 _pixMakeZero() {return make_float3(0.f,0.f,0.f);} +template<> inline __host__ __device__ float4 _pixMakeZero() {return make_float4(0.f,0.f,0.f,0.f);} +template<> inline __host__ __device__ double1 _pixMakeZero() {return make_double1(0.);} +template<> inline __host__ __device__ double3 _pixMakeZero() {return make_double3(0.,0.,0.);} +template<> inline __host__ __device__ double4 _pixMakeZero() {return make_double4(0.,0.,0.,0.);} static inline __host__ __device__ uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);} static inline __host__ __device__ uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);} @@ -180,7 +180,7 @@ static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix) return out; }}; -template static inline __host__ __device__ Tout _pixDemoteClampZ(Tin &pix) +template inline __host__ __device__ Tout _pixDemoteClampZ(Tin &pix) { return __pixDemoteClampZ_CN::_pixDemoteClampZ_CN(pix); } @@ -217,7 +217,7 @@ static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix) return out; }}; -template static inline __host__ __device__ Tout _pixDemoteClampNN(Tin &pix) +template inline __host__ __device__ Tout _pixDemoteClampNN(Tin &pix) { return __pixDemoteClampNN_CN::_pixDemoteClampNN_CN(pix); } diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index 394693240..7035a283c 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -59,11 +59,11 @@ namespace cv { namespace cuda { namespace device /////////////////////////////////////////////////////////////// template static float __device__ pixeldiff(const uchar* left, const uchar* right, float max_data_term); - template<> __device__ __forceinline__ static float pixeldiff<1>(const uchar* left, const uchar* right, float max_data_term) + template<> __device__ __forceinline__ float pixeldiff<1>(const uchar* left, const uchar* right, float max_data_term) { return fminf( ::abs((int)*left - *right), max_data_term); } - template<> __device__ __forceinline__ static float pixeldiff<3>(const uchar* left, const uchar* right, float max_data_term) + template<> __device__ __forceinline__ float pixeldiff<3>(const uchar* left, const uchar* right, float max_data_term) { float tb = 0.114f * ::abs((int)left[0] - right[0]); float tg = 0.587f * ::abs((int)left[1] - right[1]); @@ -71,7 +71,7 @@ namespace cv { namespace cuda { namespace device return fminf(tr + tg + tb, max_data_term); } - template<> __device__ __forceinline__ static float pixeldiff<4>(const uchar* left, const uchar* right, float max_data_term) + template<> __device__ __forceinline__ float pixeldiff<4>(const uchar* left, const uchar* right, float max_data_term) { uchar4 l = *((const uchar4*)left); uchar4 r = *((const uchar4*)right);