refactored gpu info structures (TargetArchs and DeviceInfo)
now DeviceInfo provides full information about device (from cudaDeviceProp)
This commit is contained in:
parent
76f4b02b06
commit
2dab93c2e8
@ -392,17 +392,17 @@ private:
|
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
|
||||
//! this is the only function that do not throw exceptions if the library is compiled without CUDA
|
||||
CV_EXPORTS int getCudaEnabledDeviceCount();
|
||||
|
||||
//! Functions below throw cv::Expception if the library is compiled without Cuda.
|
||||
|
||||
//! set device to be used for GPU executions for the calling host thread
|
||||
CV_EXPORTS void setDevice(int device);
|
||||
|
||||
//! returns which device is currently being used for the calling host thread
|
||||
CV_EXPORTS int getDevice();
|
||||
|
||||
//! Explicitly destroys and cleans up all resources associated with the current device in the current process.
|
||||
//! Any subsequent API call to this device will reinitialize the device.
|
||||
//! explicitly destroys and cleans up all resources associated with the current device in the current process
|
||||
//! any subsequent API call to this device will reinitialize the device
|
||||
CV_EXPORTS void resetDevice();
|
||||
|
||||
enum FeatureSet
|
||||
@ -423,75 +423,218 @@ enum FeatureSet
|
||||
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
|
||||
};
|
||||
|
||||
// Checks whether current device supports the given feature
|
||||
//! checks whether current device supports the given feature
|
||||
CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
|
||||
|
||||
// Gives information about what GPU archs this OpenCV GPU module was
|
||||
// compiled for
|
||||
//! information about what GPU archs this OpenCV GPU module was compiled for
|
||||
class CV_EXPORTS TargetArchs
|
||||
{
|
||||
public:
|
||||
static bool builtWith(FeatureSet feature_set);
|
||||
|
||||
static bool has(int major, int minor);
|
||||
static bool hasPtx(int major, int minor);
|
||||
static bool hasBin(int major, int minor);
|
||||
|
||||
static bool hasEqualOrLessPtx(int major, int minor);
|
||||
static bool hasEqualOrGreater(int major, int minor);
|
||||
static bool hasEqualOrGreaterPtx(int major, int minor);
|
||||
static bool hasEqualOrGreaterBin(int major, int minor);
|
||||
private:
|
||||
TargetArchs();
|
||||
};
|
||||
|
||||
// Gives information about the given GPU
|
||||
//! information about the given GPU.
|
||||
class CV_EXPORTS DeviceInfo
|
||||
{
|
||||
public:
|
||||
// Creates DeviceInfo object for the current GPU
|
||||
DeviceInfo() : device_id_(getDevice()) { query(); }
|
||||
//! creates DeviceInfo object for the current GPU
|
||||
DeviceInfo();
|
||||
|
||||
// Creates DeviceInfo object for the given GPU
|
||||
DeviceInfo(int device_id) : device_id_(device_id) { query(); }
|
||||
//! creates DeviceInfo object for the given GPU
|
||||
DeviceInfo(int device_id);
|
||||
|
||||
String name() const { return name_; }
|
||||
//! device number.
|
||||
int deviceID() const;
|
||||
|
||||
// Return compute capability versions
|
||||
int majorVersion() const { return majorVersion_; }
|
||||
int minorVersion() const { return minorVersion_; }
|
||||
//! ASCII string identifying device
|
||||
const char* name() const;
|
||||
|
||||
int multiProcessorCount() const { return multi_processor_count_; }
|
||||
//! global memory available on device in bytes
|
||||
size_t totalGlobalMem() const;
|
||||
|
||||
//! shared memory available per block in bytes
|
||||
size_t sharedMemPerBlock() const;
|
||||
|
||||
//! 32-bit registers available per block
|
||||
int regsPerBlock() const;
|
||||
|
||||
//! warp size in threads
|
||||
int warpSize() const;
|
||||
|
||||
//! maximum pitch in bytes allowed by memory copies
|
||||
size_t memPitch() const;
|
||||
|
||||
//! maximum number of threads per block
|
||||
int maxThreadsPerBlock() const;
|
||||
|
||||
//! maximum size of each dimension of a block
|
||||
Vec3i maxThreadsDim() const;
|
||||
|
||||
//! maximum size of each dimension of a grid
|
||||
Vec3i maxGridSize() const;
|
||||
|
||||
//! clock frequency in kilohertz
|
||||
int clockRate() const;
|
||||
|
||||
//! constant memory available on device in bytes
|
||||
size_t totalConstMem() const;
|
||||
|
||||
//! major compute capability
|
||||
int major() const;
|
||||
|
||||
//! minor compute capability
|
||||
int minor() const;
|
||||
|
||||
//! alignment requirement for textures
|
||||
size_t textureAlignment() const;
|
||||
|
||||
//! pitch alignment requirement for texture references bound to pitched memory
|
||||
size_t texturePitchAlignment() const;
|
||||
|
||||
//! number of multiprocessors on device
|
||||
int multiProcessorCount() const;
|
||||
|
||||
//! specified whether there is a run time limit on kernels
|
||||
bool kernelExecTimeoutEnabled() const;
|
||||
|
||||
//! device is integrated as opposed to discrete
|
||||
bool integrated() const;
|
||||
|
||||
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
|
||||
bool canMapHostMemory() const;
|
||||
|
||||
enum ComputeMode
|
||||
{
|
||||
ComputeModeDefault, /**< default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */
|
||||
ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */
|
||||
ComputeModeProhibited, /**< compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */
|
||||
ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */
|
||||
};
|
||||
|
||||
//! compute mode
|
||||
ComputeMode computeMode() const;
|
||||
|
||||
//! maximum 1D texture size
|
||||
int maxTexture1D() const;
|
||||
|
||||
//! maximum 1D mipmapped texture size
|
||||
int maxTexture1DMipmap() const;
|
||||
|
||||
//! maximum size for 1D textures bound to linear memory
|
||||
int maxTexture1DLinear() const;
|
||||
|
||||
//! maximum 2D texture dimensions
|
||||
Vec2i maxTexture2D() const;
|
||||
|
||||
//! maximum 2D mipmapped texture dimensions
|
||||
Vec2i maxTexture2DMipmap() const;
|
||||
|
||||
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
|
||||
Vec3i maxTexture2DLinear() const;
|
||||
|
||||
//! maximum 2D texture dimensions if texture gather operations have to be performed
|
||||
Vec2i maxTexture2DGather() const;
|
||||
|
||||
//! maximum 3D texture dimensions
|
||||
Vec3i maxTexture3D() const;
|
||||
|
||||
//! maximum Cubemap texture dimensions
|
||||
int maxTextureCubemap() const;
|
||||
|
||||
//! maximum 1D layered texture dimensions
|
||||
Vec2i maxTexture1DLayered() const;
|
||||
|
||||
//! maximum 2D layered texture dimensions
|
||||
Vec3i maxTexture2DLayered() const;
|
||||
|
||||
//! maximum Cubemap layered texture dimensions
|
||||
Vec2i maxTextureCubemapLayered() const;
|
||||
|
||||
//! maximum 1D surface size
|
||||
int maxSurface1D() const;
|
||||
|
||||
//! maximum 2D surface dimensions
|
||||
Vec2i maxSurface2D() const;
|
||||
|
||||
//! maximum 3D surface dimensions
|
||||
Vec3i maxSurface3D() const;
|
||||
|
||||
//! maximum 1D layered surface dimensions
|
||||
Vec2i maxSurface1DLayered() const;
|
||||
|
||||
//! maximum 2D layered surface dimensions
|
||||
Vec3i maxSurface2DLayered() const;
|
||||
|
||||
//! maximum Cubemap surface dimensions
|
||||
int maxSurfaceCubemap() const;
|
||||
|
||||
//! maximum Cubemap layered surface dimensions
|
||||
Vec2i maxSurfaceCubemapLayered() const;
|
||||
|
||||
//! alignment requirements for surfaces
|
||||
size_t surfaceAlignment() const;
|
||||
|
||||
//! device can possibly execute multiple kernels concurrently
|
||||
bool concurrentKernels() const;
|
||||
|
||||
//! device has ECC support enabled
|
||||
bool ECCEnabled() const;
|
||||
|
||||
//! PCI bus ID of the device
|
||||
int pciBusID() const;
|
||||
|
||||
//! PCI device ID of the device
|
||||
int pciDeviceID() const;
|
||||
|
||||
//! PCI domain ID of the device
|
||||
int pciDomainID() const;
|
||||
|
||||
//! true if device is a Tesla device using TCC driver, false otherwise
|
||||
bool tccDriver() const;
|
||||
|
||||
//! number of asynchronous engines
|
||||
int asyncEngineCount() const;
|
||||
|
||||
//! device shares a unified address space with the host
|
||||
bool unifiedAddressing() const;
|
||||
|
||||
//! peak memory clock frequency in kilohertz
|
||||
int memoryClockRate() const;
|
||||
|
||||
//! global memory bus width in bits
|
||||
int memoryBusWidth() const;
|
||||
|
||||
//! size of L2 cache in bytes
|
||||
int l2CacheSize() const;
|
||||
|
||||
//! maximum resident threads per multiprocessor
|
||||
int maxThreadsPerMultiProcessor() const;
|
||||
|
||||
//! gets free and total device memory
|
||||
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
|
||||
size_t freeMemory() const;
|
||||
size_t totalMemory() const;
|
||||
|
||||
// Checks whether device supports the given feature
|
||||
//! checks whether device supports the given feature
|
||||
bool supports(FeatureSet feature_set) const;
|
||||
|
||||
// Checks whether the GPU module can be run on the given device
|
||||
//! checks whether the GPU module can be run on the given device
|
||||
bool isCompatible() const;
|
||||
|
||||
bool canMapHostMemory() const;
|
||||
|
||||
size_t textureAlignment() const;
|
||||
|
||||
int deviceID() const { return device_id_; }
|
||||
|
||||
private:
|
||||
void query();
|
||||
|
||||
int device_id_;
|
||||
|
||||
String name_;
|
||||
int multi_processor_count_;
|
||||
int majorVersion_;
|
||||
int minorVersion_;
|
||||
};
|
||||
|
||||
CV_EXPORTS void printCudaDeviceInfo(int device);
|
||||
|
||||
CV_EXPORTS void printShortCudaDeviceInfo(int device);
|
||||
|
||||
}} // namespace cv { namespace gpu {
|
||||
|
@ -567,6 +567,62 @@ Stream::Stream(const Ptr<Impl>& impl)
|
||||
{
|
||||
}
|
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
inline
|
||||
bool TargetArchs::has(int major, int minor)
|
||||
{
|
||||
return hasPtx(major, minor) || hasBin(major, minor);
|
||||
}
|
||||
|
||||
inline
|
||||
bool TargetArchs::hasEqualOrGreater(int major, int minor)
|
||||
{
|
||||
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
|
||||
}
|
||||
|
||||
inline
|
||||
DeviceInfo::DeviceInfo()
|
||||
{
|
||||
device_id_ = getDevice();
|
||||
}
|
||||
|
||||
inline
|
||||
DeviceInfo::DeviceInfo(int device_id)
|
||||
{
|
||||
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
|
||||
device_id_ = device_id;
|
||||
}
|
||||
|
||||
inline
|
||||
int DeviceInfo::deviceID() const
|
||||
{
|
||||
return device_id_;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t DeviceInfo::freeMemory() const
|
||||
{
|
||||
size_t _totalMemory, _freeMemory;
|
||||
queryMemory(_totalMemory, _freeMemory);
|
||||
return _freeMemory;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t DeviceInfo::totalMemory() const
|
||||
{
|
||||
size_t _totalMemory, _freeMemory;
|
||||
queryMemory(_totalMemory, _freeMemory);
|
||||
return _totalMemory;
|
||||
}
|
||||
|
||||
inline
|
||||
bool DeviceInfo::supports(FeatureSet feature_set) const
|
||||
{
|
||||
int version = major() * 10 + minor();
|
||||
return version >= feature_set;
|
||||
}
|
||||
|
||||
}} // namespace cv { namespace gpu {
|
||||
|
||||
//////////////////////////////// Mat ////////////////////////////////
|
||||
|
@ -41,50 +41,17 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include <limits>
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
||||
int cv::gpu::getCudaEnabledDeviceCount() { return 0; }
|
||||
|
||||
void cv::gpu::setDevice(int) { throw_no_cuda(); }
|
||||
int cv::gpu::getDevice() { throw_no_cuda(); return 0; }
|
||||
|
||||
void cv::gpu::resetDevice() { throw_no_cuda(); }
|
||||
|
||||
bool cv::gpu::deviceSupports(FeatureSet) { throw_no_cuda(); return false; }
|
||||
|
||||
bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::has(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasBin(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_no_cuda(); return false; }
|
||||
|
||||
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_no_cuda(); return 0; }
|
||||
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_no_cuda(); }
|
||||
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_no_cuda(); return 0; }
|
||||
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_no_cuda(); return 0; }
|
||||
bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_no_cuda(); return false; }
|
||||
bool cv::gpu::DeviceInfo::isCompatible() const { throw_no_cuda(); return false; }
|
||||
void cv::gpu::DeviceInfo::query() { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::printCudaDeviceInfo(int) { throw_no_cuda(); }
|
||||
void cv::gpu::printShortCudaDeviceInfo(int) { throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
int cv::gpu::getCudaEnabledDeviceCount()
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
return 0;
|
||||
#else
|
||||
int count;
|
||||
cudaError_t error = cudaGetDeviceCount( &count );
|
||||
cudaError_t error = cudaGetDeviceCount(&count);
|
||||
|
||||
if (error == cudaErrorInsufficientDriver)
|
||||
return -1;
|
||||
@ -94,25 +61,78 @@ int cv::gpu::getCudaEnabledDeviceCount()
|
||||
|
||||
cudaSafeCall( error );
|
||||
return count;
|
||||
#endif
|
||||
}
|
||||
|
||||
void cv::gpu::setDevice(int device)
|
||||
{
|
||||
cudaSafeCall( cudaSetDevice( device ) );
|
||||
#ifndef HAVE_CUDA
|
||||
(void) device;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
cudaSafeCall( cudaSetDevice(device) );
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::getDevice()
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
int device;
|
||||
cudaSafeCall( cudaGetDevice( &device ) );
|
||||
cudaSafeCall( cudaGetDevice(&device) );
|
||||
return device;
|
||||
#endif
|
||||
}
|
||||
|
||||
void cv::gpu::resetDevice()
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
#else
|
||||
cudaSafeCall( cudaDeviceReset() );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::deviceSupports(FeatureSet feature_set)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) feature_set;
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
static int versions[] =
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
};
|
||||
static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
|
||||
|
||||
const int devId = getDevice();
|
||||
|
||||
int version;
|
||||
|
||||
if (devId < cache_size && versions[devId] >= 0)
|
||||
{
|
||||
version = versions[devId];
|
||||
}
|
||||
else
|
||||
{
|
||||
DeviceInfo dev(devId);
|
||||
version = dev.major() * 10 + dev.minor();
|
||||
if (devId < cache_size)
|
||||
versions[devId] = version;
|
||||
}
|
||||
|
||||
return TargetArchs::builtWith(feature_set) && (version >= feature_set);
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// TargetArchs
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
class CudaArch
|
||||
@ -128,7 +148,7 @@ namespace
|
||||
bool hasEqualOrGreaterBin(int major, int minor) const;
|
||||
|
||||
private:
|
||||
static void fromStr(const String& set_as_str, std::vector<int>& arr);
|
||||
static void fromStr(const char* set_as_str, std::vector<int>& arr);
|
||||
|
||||
std::vector<int> bin;
|
||||
std::vector<int> ptx;
|
||||
@ -174,12 +194,14 @@ namespace
|
||||
return !bin.empty() && (bin.back() >= major * 10 + minor);
|
||||
}
|
||||
|
||||
void CudaArch::fromStr(const String& set_as_str, std::vector<int>& arr)
|
||||
void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr)
|
||||
{
|
||||
arr.clear();
|
||||
|
||||
const size_t len = strlen(set_as_str);
|
||||
|
||||
size_t pos = 0;
|
||||
while (pos < set_as_str.size())
|
||||
while (pos < len)
|
||||
{
|
||||
if (isspace(set_as_str[pos]))
|
||||
{
|
||||
@ -189,8 +211,8 @@ namespace
|
||||
{
|
||||
int cur_value;
|
||||
int chars_read;
|
||||
int args_read = sscanf(set_as_str.c_str() + pos, "%d%n", &cur_value, &chars_read);
|
||||
CV_Assert(args_read == 1);
|
||||
int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read);
|
||||
CV_Assert( args_read == 1 );
|
||||
|
||||
arr.push_back(cur_value);
|
||||
pos += chars_read;
|
||||
@ -201,70 +223,83 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) feature_set;
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return cudaArch.builtWith(feature_set);
|
||||
}
|
||||
|
||||
bool cv::gpu::TargetArchs::has(int major, int minor)
|
||||
{
|
||||
return hasPtx(major, minor) || hasBin(major, minor);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::TargetArchs::hasPtx(int major, int minor)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) major;
|
||||
(void) minor;
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return cudaArch.hasPtx(major, minor);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::TargetArchs::hasBin(int major, int minor)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) major;
|
||||
(void) minor;
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return cudaArch.hasBin(major, minor);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) major;
|
||||
(void) minor;
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return cudaArch.hasEqualOrLessPtx(major, minor);
|
||||
}
|
||||
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor)
|
||||
{
|
||||
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) major;
|
||||
(void) minor;
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return cudaArch.hasEqualOrGreaterPtx(major, minor);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) major;
|
||||
(void) minor;
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return cudaArch.hasEqualOrGreaterBin(major, minor);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::deviceSupports(FeatureSet feature_set)
|
||||
{
|
||||
static int versions[] =
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
};
|
||||
static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// DeviceInfo
|
||||
|
||||
const int devId = getDevice();
|
||||
|
||||
int version;
|
||||
|
||||
if (devId < cache_size && versions[devId] >= 0)
|
||||
version = versions[devId];
|
||||
else
|
||||
{
|
||||
DeviceInfo dev(devId);
|
||||
version = dev.majorVersion() * 10 + dev.minorVersion();
|
||||
if (devId < cache_size)
|
||||
versions[devId] = version;
|
||||
}
|
||||
|
||||
return TargetArchs::builtWith(feature_set) && (version >= feature_set);
|
||||
}
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -272,63 +307,579 @@ namespace
|
||||
{
|
||||
public:
|
||||
DeviceProps();
|
||||
~DeviceProps();
|
||||
|
||||
cudaDeviceProp* get(int devID);
|
||||
const cudaDeviceProp* get(int devID) const;
|
||||
|
||||
private:
|
||||
std::vector<cudaDeviceProp*> props_;
|
||||
std::vector<cudaDeviceProp> props_;
|
||||
};
|
||||
|
||||
DeviceProps::DeviceProps()
|
||||
{
|
||||
props_.resize(10, 0);
|
||||
}
|
||||
int count = getCudaEnabledDeviceCount();
|
||||
|
||||
DeviceProps::~DeviceProps()
|
||||
{
|
||||
for (size_t i = 0; i < props_.size(); ++i)
|
||||
if (count > 0)
|
||||
{
|
||||
if (props_[i])
|
||||
delete props_[i];
|
||||
props_.resize(count);
|
||||
|
||||
for (int devID = 0; devID < count; ++devID)
|
||||
{
|
||||
cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) );
|
||||
}
|
||||
}
|
||||
props_.clear();
|
||||
}
|
||||
|
||||
cudaDeviceProp* DeviceProps::get(int devID)
|
||||
const cudaDeviceProp* DeviceProps::get(int devID) const
|
||||
{
|
||||
if (devID >= (int) props_.size())
|
||||
props_.resize(devID + 5, 0);
|
||||
CV_Assert( static_cast<size_t>(devID) < props_.size() );
|
||||
|
||||
if (!props_[devID])
|
||||
{
|
||||
props_[devID] = new cudaDeviceProp;
|
||||
cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
|
||||
}
|
||||
|
||||
return props_[devID];
|
||||
return &props_[devID];
|
||||
}
|
||||
|
||||
DeviceProps deviceProps;
|
||||
DeviceProps& deviceProps()
|
||||
{
|
||||
static DeviceProps props;
|
||||
return props;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
const char* cv::gpu::DeviceInfo::name() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return "";
|
||||
#else
|
||||
return deviceProps().get(device_id_)->name;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t cv::gpu::DeviceInfo::totalGlobalMem() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->totalGlobalMem;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const
|
||||
{
|
||||
return deviceProps.get(device_id_)->sharedMemPerBlock;
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->sharedMemPerBlock;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::canMapHostMemory() const
|
||||
int cv::gpu::DeviceInfo::regsPerBlock() const
|
||||
{
|
||||
return deviceProps.get(device_id_)->canMapHostMemory != 0;
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->regsPerBlock;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::warpSize() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->warpSize;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t cv::gpu::DeviceInfo::memPitch() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->memPitch;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::maxThreadsPerBlock() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->maxThreadsPerBlock;
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec3i cv::gpu::DeviceInfo::maxThreadsDim() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec3i();
|
||||
#else
|
||||
return Vec3i(deviceProps().get(device_id_)->maxThreadsDim);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec3i cv::gpu::DeviceInfo::maxGridSize() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec3i();
|
||||
#else
|
||||
return Vec3i(deviceProps().get(device_id_)->maxGridSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::clockRate() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->clockRate;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t cv::gpu::DeviceInfo::totalConstMem() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->totalConstMem;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::major() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->major;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::minor() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->minor;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t cv::gpu::DeviceInfo::textureAlignment() const
|
||||
{
|
||||
return deviceProps.get(device_id_)->textureAlignment;
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->textureAlignment;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t cv::gpu::DeviceInfo::texturePitchAlignment() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->texturePitchAlignment;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::multiProcessorCount() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->multiProcessorCount;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::kernelExecTimeoutEnabled() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::integrated() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->integrated != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::canMapHostMemory() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->canMapHostMemory != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
DeviceInfo::ComputeMode cv::gpu::DeviceInfo::computeMode() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return ComputeModeDefault;
|
||||
#else
|
||||
static const ComputeMode tbl[] =
|
||||
{
|
||||
ComputeModeDefault,
|
||||
ComputeModeExclusive,
|
||||
ComputeModeProhibited,
|
||||
ComputeModeExclusiveProcess
|
||||
};
|
||||
|
||||
return tbl[deviceProps().get(device_id_)->computeMode];
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::maxTexture1D() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->maxTexture1D;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::maxTexture1DMipmap() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->maxTexture1DMipmap;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::maxTexture1DLinear() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->maxTexture1DLinear;
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec2i cv::gpu::DeviceInfo::maxTexture2D() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec2i();
|
||||
#else
|
||||
return Vec2i(deviceProps().get(device_id_)->maxTexture2D);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec2i cv::gpu::DeviceInfo::maxTexture2DMipmap() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec2i();
|
||||
#else
|
||||
return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec3i cv::gpu::DeviceInfo::maxTexture2DLinear() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec3i();
|
||||
#else
|
||||
return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec2i cv::gpu::DeviceInfo::maxTexture2DGather() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec2i();
|
||||
#else
|
||||
return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec3i cv::gpu::DeviceInfo::maxTexture3D() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec3i();
|
||||
#else
|
||||
return Vec3i(deviceProps().get(device_id_)->maxTexture3D);
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::maxTextureCubemap() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->maxTextureCubemap;
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec2i cv::gpu::DeviceInfo::maxTexture1DLayered() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec2i();
|
||||
#else
|
||||
return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec3i cv::gpu::DeviceInfo::maxTexture2DLayered() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec3i();
|
||||
#else
|
||||
return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec2i cv::gpu::DeviceInfo::maxTextureCubemapLayered() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec2i();
|
||||
#else
|
||||
return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered);
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::maxSurface1D() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->maxSurface1D;
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec2i cv::gpu::DeviceInfo::maxSurface2D() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec2i();
|
||||
#else
|
||||
return Vec2i(deviceProps().get(device_id_)->maxSurface2D);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec3i cv::gpu::DeviceInfo::maxSurface3D() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec3i();
|
||||
#else
|
||||
return Vec3i(deviceProps().get(device_id_)->maxSurface3D);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec2i cv::gpu::DeviceInfo::maxSurface1DLayered() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec2i();
|
||||
#else
|
||||
return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered);
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec3i cv::gpu::DeviceInfo::maxSurface2DLayered() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec3i();
|
||||
#else
|
||||
return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered);
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::maxSurfaceCubemap() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->maxSurfaceCubemap;
|
||||
#endif
|
||||
}
|
||||
|
||||
Vec2i cv::gpu::DeviceInfo::maxSurfaceCubemapLayered() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return Vec2i();
|
||||
#else
|
||||
return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered);
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t cv::gpu::DeviceInfo::surfaceAlignment() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->surfaceAlignment;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::concurrentKernels() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->concurrentKernels != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::ECCEnabled() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->ECCEnabled != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::pciBusID() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->pciBusID;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::pciDeviceID() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->pciDeviceID;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::pciDomainID() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->pciDomainID;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::tccDriver() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->tccDriver != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::asyncEngineCount() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->asyncEngineCount;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::unifiedAddressing() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->unifiedAddressing != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::memoryClockRate() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->memoryClockRate;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::memoryBusWidth() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->memoryBusWidth;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::l2CacheSize() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->l2CacheSize;
|
||||
#endif
|
||||
}
|
||||
|
||||
int cv::gpu::DeviceInfo::maxThreadsPerMultiProcessor() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
#else
|
||||
return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor;
|
||||
#endif
|
||||
}
|
||||
|
||||
void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) _totalMemory;
|
||||
(void) _freeMemory;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
int prevDeviceID = getDevice();
|
||||
if (prevDeviceID != device_id_)
|
||||
setDevice(device_id_);
|
||||
@ -337,51 +888,32 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory)
|
||||
|
||||
if (prevDeviceID != device_id_)
|
||||
setDevice(prevDeviceID);
|
||||
}
|
||||
|
||||
size_t cv::gpu::DeviceInfo::freeMemory() const
|
||||
{
|
||||
size_t _totalMemory, _freeMemory;
|
||||
queryMemory(_totalMemory, _freeMemory);
|
||||
return _freeMemory;
|
||||
}
|
||||
|
||||
size_t cv::gpu::DeviceInfo::totalMemory() const
|
||||
{
|
||||
size_t _totalMemory, _freeMemory;
|
||||
queryMemory(_totalMemory, _freeMemory);
|
||||
return _totalMemory;
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const
|
||||
{
|
||||
int version = majorVersion() * 10 + minorVersion();
|
||||
return version >= feature_set;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::DeviceInfo::isCompatible() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
// Check PTX compatibility
|
||||
if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
|
||||
if (TargetArchs::hasEqualOrLessPtx(major(), minor()))
|
||||
return true;
|
||||
|
||||
// Check BIN compatibility
|
||||
for (int i = minorVersion(); i >= 0; --i)
|
||||
if (TargetArchs::hasBin(majorVersion(), i))
|
||||
for (int i = minor(); i >= 0; --i)
|
||||
if (TargetArchs::hasBin(major(), i))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void cv::gpu::DeviceInfo::query()
|
||||
{
|
||||
const cudaDeviceProp* prop = deviceProps.get(device_id_);
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// print info
|
||||
|
||||
name_ = prop->name;
|
||||
multi_processor_count_ = prop->multiProcessorCount;
|
||||
majorVersion_ = prop->major;
|
||||
minorVersion_ = prop->minor;
|
||||
}
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -407,8 +939,14 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void cv::gpu::printCudaDeviceInfo(int device)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) device;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
int count = getCudaEnabledDeviceCount();
|
||||
bool valid = (device >= 0) && (device < count);
|
||||
|
||||
@ -484,11 +1022,17 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100);
|
||||
printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
|
||||
printf(", NumDevs = %d\n\n", count);
|
||||
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
void cv::gpu::printShortCudaDeviceInfo(int device)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) device;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
int count = getCudaEnabledDeviceCount();
|
||||
bool valid = (device >= 0) && (device < count);
|
||||
|
||||
@ -514,10 +1058,10 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
|
||||
|
||||
printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Error handling
|
@ -878,7 +878,7 @@ namespace
|
||||
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||
{
|
||||
DeviceInfo devInfo;
|
||||
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
|
||||
int cc = devInfo.major() * 10 + devInfo.minor();
|
||||
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
|
||||
}
|
||||
|
||||
@ -977,7 +977,7 @@ namespace
|
||||
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
||||
{
|
||||
DeviceInfo devInfo;
|
||||
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
|
||||
int cc = devInfo.major() * 10 + devInfo.minor();
|
||||
if (ksize > 16 && cc < 20)
|
||||
CV_Error(cv::Error::StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0");
|
||||
|
||||
|
@ -80,7 +80,7 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
|
||||
brox(loadMat(frame0), loadMat(frame1), u, v);
|
||||
|
||||
std::string fname(cvtest::TS::ptr()->get_data_path());
|
||||
if (devInfo.majorVersion() >= 2)
|
||||
if (devInfo.major() >= 2)
|
||||
fname += "opticalflow/brox_optical_flow_cc20.bin";
|
||||
else
|
||||
fname += "opticalflow/brox_optical_flow.bin";
|
||||
|
@ -91,7 +91,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
|
||||
|
||||
DeviceInfo device_info;
|
||||
|
||||
if (device_info.majorVersion() > 1 || device_info.multiProcessorCount() > 16)
|
||||
if (device_info.major() > 1 || device_info.multiProcessorCount() > 16)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -287,8 +287,8 @@ namespace perf
|
||||
cv::gpu::DeviceInfo info(i);
|
||||
|
||||
printf("[----------]\n"), fflush(stdout);
|
||||
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout);
|
||||
printf("[ ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout);
|
||||
printf("[ DEVICE ] \t# %d %s.\n", i, info.name()), fflush(stdout);
|
||||
printf("[ ] \tCompute capability: %d.%d\n", (int)info.major(), (int)info.minor()), fflush(stdout);
|
||||
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()), fflush(stdout);
|
||||
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout);
|
||||
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)), fflush(stdout);
|
||||
|
@ -682,13 +682,13 @@ void TestBase::Init(int argc, const char* const argv[])
|
||||
cv::gpu::DeviceInfo info(param_cuda_device);
|
||||
if (!info.isCompatible())
|
||||
{
|
||||
printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name().c_str()), fflush(stdout);
|
||||
printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name()), fflush(stdout);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
cv::gpu::setDevice(param_cuda_device);
|
||||
|
||||
printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name().c_str()), fflush(stdout);
|
||||
printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name()), fflush(stdout);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -82,8 +82,8 @@ int main()
|
||||
if (!dev_info.isCompatible())
|
||||
{
|
||||
std::cout << "GPU module isn't built for GPU #" << i << " ("
|
||||
<< dev_info.name() << ", CC " << dev_info.majorVersion()
|
||||
<< dev_info.minorVersion() << "\n";
|
||||
<< dev_info.name() << ", CC " << dev_info.major()
|
||||
<< dev_info.minor() << "\n";
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -112,8 +112,8 @@ int main(int argc, char** argv)
|
||||
if (!dev_info.isCompatible())
|
||||
{
|
||||
std::cout << "GPU module isn't built for GPU #" << i << " ("
|
||||
<< dev_info.name() << ", CC " << dev_info.majorVersion()
|
||||
<< dev_info.minorVersion() << "\n";
|
||||
<< dev_info.name() << ", CC " << dev_info.major()
|
||||
<< dev_info.minor() << "\n";
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -62,8 +62,8 @@ int main()
|
||||
if (!dev_info.isCompatible())
|
||||
{
|
||||
std::cout << "GPU module isn't built for GPU #" << i << " ("
|
||||
<< dev_info.name() << ", CC " << dev_info.majorVersion()
|
||||
<< dev_info.minorVersion() << "\n";
|
||||
<< dev_info.name() << ", CC " << dev_info.major()
|
||||
<< dev_info.minor() << "\n";
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -191,7 +191,7 @@ int main(int argc, const char* argv[])
|
||||
DeviceInfo dev_info(device);
|
||||
if (!dev_info.isCompatible())
|
||||
{
|
||||
cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
|
||||
cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.major() << '.' << dev_info.minor() << endl;
|
||||
return -1;
|
||||
}
|
||||
setDevice(device);
|
||||
|
@ -81,8 +81,8 @@ int main(int argc, char** argv)
|
||||
if (!dev_info.isCompatible())
|
||||
{
|
||||
std::cout << "GPU module isn't built for GPU #" << i << " ("
|
||||
<< dev_info.name() << ", CC " << dev_info.majorVersion()
|
||||
<< dev_info.minorVersion() << "\n";
|
||||
<< dev_info.name() << ", CC " << dev_info.major()
|
||||
<< dev_info.minor() << "\n";
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user