opencv_core: CUDA: check if __CUDA_ARCH__ is defined before comparing it
Changed statements of type "#if __CUDA_ARCH__ >= 200" to "#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200" in order to avoid warnings about __CUDA_ARCH__ being undefined.
This commit is contained in:
parent
8d79285d02
commit
029dfbc89d
@ -275,7 +275,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
template <typename Pointer, typename Reference, class Op>
|
template <typename Pointer, typename Reference, class Op>
|
||||||
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
(void) smem;
|
(void) smem;
|
||||||
(void) tid;
|
(void) tid;
|
||||||
|
|
||||||
@ -298,7 +298,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
{
|
{
|
||||||
const unsigned int laneId = Warp::laneId();
|
const unsigned int laneId = Warp::laneId();
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
|
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
|
||||||
|
|
||||||
if (laneId == 0)
|
if (laneId == 0)
|
||||||
@ -321,7 +321,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
if (tid < 32)
|
if (tid < 32)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
|
Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
|
||||||
#else
|
#else
|
||||||
Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||||
|
@ -101,7 +101,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
uint res = 0;
|
uint res = 0;
|
||||||
asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
||||||
return res;
|
return res;
|
||||||
@ -149,7 +149,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
uint res = 0;
|
uint res = 0;
|
||||||
asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
||||||
return res;
|
return res;
|
||||||
@ -191,7 +191,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
ushort res = 0;
|
ushort res = 0;
|
||||||
asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
||||||
return res;
|
return res;
|
||||||
@ -226,7 +226,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
short res = 0;
|
short res = 0;
|
||||||
asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
||||||
return res;
|
return res;
|
||||||
|
@ -54,7 +54,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
|
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
return __shfl(val, srcLane, width);
|
return __shfl(val, srcLane, width);
|
||||||
#else
|
#else
|
||||||
return T();
|
return T();
|
||||||
@ -62,7 +62,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
__device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
|
__device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
return (unsigned int) __shfl((int) val, srcLane, width);
|
return (unsigned int) __shfl((int) val, srcLane, width);
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
@ -70,7 +70,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
__device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
|
__device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
int lo = __double2loint(val);
|
int lo = __double2loint(val);
|
||||||
int hi = __double2hiint(val);
|
int hi = __double2hiint(val);
|
||||||
|
|
||||||
@ -86,7 +86,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
|
__device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
return __shfl_down(val, delta, width);
|
return __shfl_down(val, delta, width);
|
||||||
#else
|
#else
|
||||||
return T();
|
return T();
|
||||||
@ -94,7 +94,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
__device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
|
__device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
return (unsigned int) __shfl_down((int) val, delta, width);
|
return (unsigned int) __shfl_down((int) val, delta, width);
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
@ -102,7 +102,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
__device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
|
__device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
int lo = __double2loint(val);
|
int lo = __double2loint(val);
|
||||||
int hi = __double2hiint(val);
|
int hi = __double2hiint(val);
|
||||||
|
|
||||||
@ -118,7 +118,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
|
__device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
return __shfl_up(val, delta, width);
|
return __shfl_up(val, delta, width);
|
||||||
#else
|
#else
|
||||||
return T();
|
return T();
|
||||||
@ -126,7 +126,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
__device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
|
__device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
return (unsigned int) __shfl_up((int) val, delta, width);
|
return (unsigned int) __shfl_up((int) val, delta, width);
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
@ -134,7 +134,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
__device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
|
__device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
int lo = __double2loint(val);
|
int lo = __double2loint(val);
|
||||||
int hi = __double2hiint(val);
|
int hi = __double2hiint(val);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user