tabs
This commit is contained in:
parent
35d5a671a2
commit
74f1162a41
@ -49,50 +49,50 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 200
|
#if __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
// for Fermi memory space is detected automatically
|
// for Fermi memory space is detected automatically
|
||||||
template <typename T> struct ForceGlob
|
template <typename T> struct ForceGlob
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
|
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
|
||||||
};
|
};
|
||||||
|
|
||||||
#else // __CUDA_ARCH__ >= 200
|
#else // __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
#if defined(_WIN64) || defined(__LP64__)
|
#if defined(_WIN64) || defined(__LP64__)
|
||||||
// 64-bit register modifier for inlined asm
|
// 64-bit register modifier for inlined asm
|
||||||
#define _OPENCV_ASM_PTR_ "l"
|
#define _OPENCV_ASM_PTR_ "l"
|
||||||
#else
|
#else
|
||||||
// 32-bit register modifier for inlined asm
|
// 32-bit register modifier for inlined asm
|
||||||
#define _OPENCV_ASM_PTR_ "r"
|
#define _OPENCV_ASM_PTR_ "r"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<class T> struct ForceGlob;
|
template<class T> struct ForceGlob;
|
||||||
|
|
||||||
#define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
#define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
||||||
template <> struct ForceGlob<base_type> \
|
template <> struct ForceGlob<base_type> \
|
||||||
{ \
|
{ \
|
||||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||||
{ \
|
{ \
|
||||||
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \
|
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
#define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
|
#define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
|
||||||
template <> struct ForceGlob<base_type> \
|
template <> struct ForceGlob<base_type> \
|
||||||
{ \
|
{ \
|
||||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||||
{ \
|
{ \
|
||||||
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \
|
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_FORCE_GLOB_B(uchar, u8)
|
DEFINE_FORCE_GLOB_B(uchar, u8)
|
||||||
DEFINE_FORCE_GLOB_B(schar, s8)
|
DEFINE_FORCE_GLOB_B(schar, s8)
|
||||||
DEFINE_FORCE_GLOB_B(char, b8)
|
DEFINE_FORCE_GLOB_B(char, b8)
|
||||||
DEFINE_FORCE_GLOB (ushort, u16, h)
|
DEFINE_FORCE_GLOB (ushort, u16, h)
|
||||||
DEFINE_FORCE_GLOB (short, s16, h)
|
DEFINE_FORCE_GLOB (short, s16, h)
|
||||||
DEFINE_FORCE_GLOB (uint, u32, r)
|
DEFINE_FORCE_GLOB (uint, u32, r)
|
||||||
DEFINE_FORCE_GLOB (int, s32, r)
|
DEFINE_FORCE_GLOB (int, s32, r)
|
||||||
DEFINE_FORCE_GLOB (float, f32, f)
|
DEFINE_FORCE_GLOB (float, f32, f)
|
||||||
DEFINE_FORCE_GLOB (double, f64, d)
|
DEFINE_FORCE_GLOB (double, f64, d)
|
||||||
|
|
||||||
|
|
||||||
#undef DEFINE_FORCE_GLOB
|
#undef DEFINE_FORCE_GLOB
|
||||||
|
Loading…
x
Reference in New Issue
Block a user