fixed cudev compilation for old pre-Fermi archs
This commit is contained in:
parent
eff21788a8
commit
20f636fcee
@ -56,6 +56,7 @@ namespace grid_histogram_detail
|
|||||||
template <int BIN_COUNT, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
|
template <int BIN_COUNT, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
|
||||||
__global__ void histogram(const SrcPtr src, ResType* hist, const MaskPtr mask, const int rows, const int cols)
|
__global__ void histogram(const SrcPtr src, ResType* hist, const MaskPtr mask, const int rows, const int cols)
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH >= 120
|
||||||
__shared__ ResType smem[BIN_COUNT];
|
__shared__ ResType smem[BIN_COUNT];
|
||||||
|
|
||||||
const int y = blockIdx.x * blockDim.y + threadIdx.y;
|
const int y = blockIdx.x * blockDim.y + threadIdx.y;
|
||||||
@ -86,6 +87,7 @@ namespace grid_histogram_detail
|
|||||||
if (histVal > 0)
|
if (histVal > 0)
|
||||||
atomicAdd(hist + i, histVal);
|
atomicAdd(hist + i, histVal);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
|
template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
|
||||||
|
@ -57,6 +57,8 @@ namespace cv { namespace cudev {
|
|||||||
template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
|
template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
|
||||||
__host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
|
__host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
|
||||||
{
|
{
|
||||||
|
CV_Assert( deviceSupports(SHARED_ATOMICS) );
|
||||||
|
|
||||||
const int rows = getRows(src);
|
const int rows = getRows(src);
|
||||||
const int cols = getCols(src);
|
const int cols = getCols(src);
|
||||||
|
|
||||||
@ -75,6 +77,8 @@ __host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const Mas
|
|||||||
template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType>
|
template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType>
|
||||||
__host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
|
__host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
|
||||||
{
|
{
|
||||||
|
CV_Assert( deviceSupports(SHARED_ATOMICS) );
|
||||||
|
|
||||||
const int rows = getRows(src);
|
const int rows = getRows(src);
|
||||||
const int cols = getCols(src);
|
const int cols = getCols(src);
|
||||||
|
|
||||||
|
@ -52,6 +52,40 @@
|
|||||||
#include "gpumat.hpp"
|
#include "gpumat.hpp"
|
||||||
#include "traits.hpp"
|
#include "traits.hpp"
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
template <typename T> struct CvCudevTextureRef
|
||||||
|
{
|
||||||
|
typedef texture<T, cudaTextureType2D, cudaReadModeElementType> TexRef;
|
||||||
|
|
||||||
|
static TexRef ref;
|
||||||
|
|
||||||
|
__host__ static void bind(const cv::cudev::GlobPtrSz<T>& mat,
|
||||||
|
bool normalizedCoords = false,
|
||||||
|
cudaTextureFilterMode filterMode = cudaFilterModePoint,
|
||||||
|
cudaTextureAddressMode addressMode = cudaAddressModeClamp)
|
||||||
|
{
|
||||||
|
ref.normalized = normalizedCoords;
|
||||||
|
ref.filterMode = filterMode;
|
||||||
|
ref.addressMode[0] = addressMode;
|
||||||
|
ref.addressMode[1] = addressMode;
|
||||||
|
ref.addressMode[2] = addressMode;
|
||||||
|
|
||||||
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||||
|
|
||||||
|
CV_CUDEV_SAFE_CALL( cudaBindTexture2D(0, &ref, mat.data, &desc, mat.cols, mat.rows, mat.step) );
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ static void unbind()
|
||||||
|
{
|
||||||
|
CV_CUDEV_SAFE_CALL( cudaUnbindTexture(ref) );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
typename CvCudevTextureRef<T>::TexRef CvCudevTextureRef<T>::ref;
|
||||||
|
}
|
||||||
|
|
||||||
namespace cv { namespace cudev {
|
namespace cv { namespace cudev {
|
||||||
|
|
||||||
template <typename T> struct TexturePtr
|
template <typename T> struct TexturePtr
|
||||||
@ -63,79 +97,73 @@ template <typename T> struct TexturePtr
|
|||||||
|
|
||||||
__device__ __forceinline__ T operator ()(float y, float x) const
|
__device__ __forceinline__ T operator ()(float y, float x) const
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH < 300
|
||||||
|
// Use the texture reference
|
||||||
|
return tex2D(CvCudevTextureRef<T>::ref, x, y);
|
||||||
|
#else
|
||||||
|
// Use the texture object
|
||||||
return tex2D<T>(texObj, x, y);
|
return tex2D<T>(texObj, x, y);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T> struct Texture : TexturePtr<T>
|
template <typename T> struct Texture : TexturePtr<T>
|
||||||
{
|
{
|
||||||
int rows, cols;
|
int rows, cols;
|
||||||
|
bool cc30;
|
||||||
|
|
||||||
__host__ explicit Texture(const GlobPtrSz<T>& mat,
|
__host__ explicit Texture(const GlobPtrSz<T>& mat,
|
||||||
bool normalizedCoords = false,
|
bool normalizedCoords = false,
|
||||||
cudaTextureFilterMode filterMode = cudaFilterModePoint,
|
cudaTextureFilterMode filterMode = cudaFilterModePoint,
|
||||||
cudaTextureAddressMode addressMode = cudaAddressModeClamp)
|
cudaTextureAddressMode addressMode = cudaAddressModeClamp)
|
||||||
{
|
{
|
||||||
CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) );
|
cc30 = deviceSupports(FEATURE_SET_COMPUTE_30);
|
||||||
|
|
||||||
rows = mat.rows;
|
rows = mat.rows;
|
||||||
cols = mat.cols;
|
cols = mat.cols;
|
||||||
|
|
||||||
cudaResourceDesc texRes;
|
if (cc30)
|
||||||
std::memset(&texRes, 0, sizeof(texRes));
|
{
|
||||||
texRes.resType = cudaResourceTypePitch2D;
|
// Use the texture object
|
||||||
texRes.res.pitch2D.devPtr = mat.data;
|
cudaResourceDesc texRes;
|
||||||
texRes.res.pitch2D.height = mat.rows;
|
std::memset(&texRes, 0, sizeof(texRes));
|
||||||
texRes.res.pitch2D.width = mat.cols;
|
texRes.resType = cudaResourceTypePitch2D;
|
||||||
texRes.res.pitch2D.pitchInBytes = mat.step;
|
texRes.res.pitch2D.devPtr = mat.data;
|
||||||
texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
|
texRes.res.pitch2D.height = mat.rows;
|
||||||
|
texRes.res.pitch2D.width = mat.cols;
|
||||||
|
texRes.res.pitch2D.pitchInBytes = mat.step;
|
||||||
|
texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
|
||||||
|
|
||||||
cudaTextureDesc texDescr;
|
cudaTextureDesc texDescr;
|
||||||
std::memset(&texDescr, 0, sizeof(texDescr));
|
std::memset(&texDescr, 0, sizeof(texDescr));
|
||||||
texDescr.addressMode[0] = addressMode;
|
texDescr.normalizedCoords = normalizedCoords;
|
||||||
texDescr.addressMode[1] = addressMode;
|
texDescr.filterMode = filterMode;
|
||||||
texDescr.addressMode[2] = addressMode;
|
texDescr.addressMode[0] = addressMode;
|
||||||
texDescr.filterMode = filterMode;
|
texDescr.addressMode[1] = addressMode;
|
||||||
texDescr.readMode = cudaReadModeElementType;
|
texDescr.addressMode[2] = addressMode;
|
||||||
texDescr.normalizedCoords = normalizedCoords;
|
texDescr.readMode = cudaReadModeElementType;
|
||||||
|
|
||||||
CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
|
CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
|
||||||
}
|
}
|
||||||
|
else
|
||||||
__host__ explicit Texture(const GpuMat_<T>& mat,
|
{
|
||||||
bool normalizedCoords = false,
|
// Use the texture reference
|
||||||
cudaTextureFilterMode filterMode = cudaFilterModePoint,
|
CvCudevTextureRef<T>::bind(mat, normalizedCoords, filterMode, addressMode);
|
||||||
cudaTextureAddressMode addressMode = cudaAddressModeClamp)
|
}
|
||||||
{
|
|
||||||
CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) );
|
|
||||||
|
|
||||||
rows = mat.rows;
|
|
||||||
cols = mat.cols;
|
|
||||||
|
|
||||||
cudaResourceDesc texRes;
|
|
||||||
std::memset(&texRes, 0, sizeof(texRes));
|
|
||||||
texRes.resType = cudaResourceTypePitch2D;
|
|
||||||
texRes.res.pitch2D.devPtr = mat.data;
|
|
||||||
texRes.res.pitch2D.height = mat.rows;
|
|
||||||
texRes.res.pitch2D.width = mat.cols;
|
|
||||||
texRes.res.pitch2D.pitchInBytes = mat.step;
|
|
||||||
texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
|
|
||||||
|
|
||||||
cudaTextureDesc texDescr;
|
|
||||||
std::memset(&texDescr, 0, sizeof(texDescr));
|
|
||||||
texDescr.addressMode[0] = addressMode;
|
|
||||||
texDescr.addressMode[1] = addressMode;
|
|
||||||
texDescr.addressMode[2] = addressMode;
|
|
||||||
texDescr.filterMode = filterMode;
|
|
||||||
texDescr.readMode = cudaReadModeElementType;
|
|
||||||
texDescr.normalizedCoords = normalizedCoords;
|
|
||||||
|
|
||||||
CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ ~Texture()
|
__host__ ~Texture()
|
||||||
{
|
{
|
||||||
cudaDestroyTextureObject(this->texObj);
|
if (cc30)
|
||||||
|
{
|
||||||
|
// Use the texture object
|
||||||
|
cudaDestroyTextureObject(this->texObj);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Use the texture reference
|
||||||
|
CvCudevTextureRef<T>::unbind();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -64,11 +64,23 @@ __device__ __forceinline__ uint atomicAdd(uint* address, uint val)
|
|||||||
|
|
||||||
__device__ __forceinline__ float atomicAdd(float* address, float val)
|
__device__ __forceinline__ float atomicAdd(float* address, float val)
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH >= 200
|
||||||
return ::atomicAdd(address, val);
|
return ::atomicAdd(address, val);
|
||||||
|
#else
|
||||||
|
int* address_as_i = (int*) address;
|
||||||
|
int old = *address_as_i, assumed;
|
||||||
|
do {
|
||||||
|
assumed = old;
|
||||||
|
old = ::atomicCAS(address_as_i, assumed,
|
||||||
|
__float_as_int(val + __int_as_float(assumed)));
|
||||||
|
} while (assumed != old);
|
||||||
|
return __int_as_float(old);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ static double atomicAdd(double* address, double val)
|
__device__ static double atomicAdd(double* address, double val)
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH >= 130
|
||||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||||
unsigned long long int old = *address_as_ull, assumed;
|
unsigned long long int old = *address_as_ull, assumed;
|
||||||
do {
|
do {
|
||||||
@ -77,6 +89,11 @@ __device__ static double atomicAdd(double* address, double val)
|
|||||||
__double_as_longlong(val + __longlong_as_double(assumed)));
|
__double_as_longlong(val + __longlong_as_double(assumed)));
|
||||||
} while (assumed != old);
|
} while (assumed != old);
|
||||||
return __longlong_as_double(old);
|
return __longlong_as_double(old);
|
||||||
|
#else
|
||||||
|
(void) address;
|
||||||
|
(void) val;
|
||||||
|
return 0.0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// atomicMin
|
// atomicMin
|
||||||
@ -93,6 +110,7 @@ __device__ __forceinline__ uint atomicMin(uint* address, uint val)
|
|||||||
|
|
||||||
__device__ static float atomicMin(float* address, float val)
|
__device__ static float atomicMin(float* address, float val)
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH >= 120
|
||||||
int* address_as_i = (int*) address;
|
int* address_as_i = (int*) address;
|
||||||
int old = *address_as_i, assumed;
|
int old = *address_as_i, assumed;
|
||||||
do {
|
do {
|
||||||
@ -101,10 +119,16 @@ __device__ static float atomicMin(float* address, float val)
|
|||||||
__float_as_int(::fminf(val, __int_as_float(assumed))));
|
__float_as_int(::fminf(val, __int_as_float(assumed))));
|
||||||
} while (assumed != old);
|
} while (assumed != old);
|
||||||
return __int_as_float(old);
|
return __int_as_float(old);
|
||||||
|
#else
|
||||||
|
(void) address;
|
||||||
|
(void) val;
|
||||||
|
return 0.0f;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ static double atomicMin(double* address, double val)
|
__device__ static double atomicMin(double* address, double val)
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH >= 130
|
||||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||||
unsigned long long int old = *address_as_ull, assumed;
|
unsigned long long int old = *address_as_ull, assumed;
|
||||||
do {
|
do {
|
||||||
@ -113,6 +137,11 @@ __device__ static double atomicMin(double* address, double val)
|
|||||||
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
|
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
|
||||||
} while (assumed != old);
|
} while (assumed != old);
|
||||||
return __longlong_as_double(old);
|
return __longlong_as_double(old);
|
||||||
|
#else
|
||||||
|
(void) address;
|
||||||
|
(void) val;
|
||||||
|
return 0.0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// atomicMax
|
// atomicMax
|
||||||
@ -129,6 +158,7 @@ __device__ __forceinline__ uint atomicMax(uint* address, uint val)
|
|||||||
|
|
||||||
__device__ static float atomicMax(float* address, float val)
|
__device__ static float atomicMax(float* address, float val)
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH >= 120
|
||||||
int* address_as_i = (int*) address;
|
int* address_as_i = (int*) address;
|
||||||
int old = *address_as_i, assumed;
|
int old = *address_as_i, assumed;
|
||||||
do {
|
do {
|
||||||
@ -137,10 +167,16 @@ __device__ static float atomicMax(float* address, float val)
|
|||||||
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
|
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
|
||||||
} while (assumed != old);
|
} while (assumed != old);
|
||||||
return __int_as_float(old);
|
return __int_as_float(old);
|
||||||
|
#else
|
||||||
|
(void) address;
|
||||||
|
(void) val;
|
||||||
|
return 0.0f;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ static double atomicMax(double* address, double val)
|
__device__ static double atomicMax(double* address, double val)
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH >= 130
|
||||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||||
unsigned long long int old = *address_as_ull, assumed;
|
unsigned long long int old = *address_as_ull, assumed;
|
||||||
do {
|
do {
|
||||||
@ -149,6 +185,11 @@ __device__ static double atomicMax(double* address, double val)
|
|||||||
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
|
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
|
||||||
} while (assumed != old);
|
} while (assumed != old);
|
||||||
return __longlong_as_double(old);
|
return __longlong_as_double(old);
|
||||||
|
#else
|
||||||
|
(void) address;
|
||||||
|
(void) val;
|
||||||
|
return 0.0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -228,7 +228,11 @@ template <> __device__ __forceinline__ int saturate_cast<int>(float v)
|
|||||||
}
|
}
|
||||||
template <> __device__ __forceinline__ int saturate_cast<int>(double v)
|
template <> __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH >= 130
|
||||||
return __double2int_rn(v);
|
return __double2int_rn(v);
|
||||||
|
#else
|
||||||
|
return saturate_cast<int>((float) v);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
|
template <> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
|
||||||
@ -256,7 +260,11 @@ template <> __device__ __forceinline__ uint saturate_cast<uint>(float v)
|
|||||||
}
|
}
|
||||||
template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||||
{
|
{
|
||||||
|
#if CV_CUDEV_ARCH >= 130
|
||||||
return __double2uint_rn(v);
|
return __double2uint_rn(v);
|
||||||
|
#else
|
||||||
|
return saturate_cast<uint>((float) v);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
Loading…
Reference in New Issue
Block a user