CUDA related func tables refactored to remove unneeded dependencies.
This commit is contained in:
@@ -239,23 +239,23 @@ static DeviceInfoFuncTable* deviceInfoFuncTable()
|
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
int cv::gpu::getCudaEnabledDeviceCount() { return gpuFuncTable()->getCudaEnabledDeviceCount(); }
|
||||
int cv::gpu::getCudaEnabledDeviceCount() { return deviceInfoFuncTable()->getCudaEnabledDeviceCount(); }
|
||||
|
||||
void cv::gpu::setDevice(int device) { gpuFuncTable()->setDevice(device); }
|
||||
int cv::gpu::getDevice() { return gpuFuncTable()->getDevice(); }
|
||||
void cv::gpu::setDevice(int device) { deviceInfoFuncTable()->setDevice(device); }
|
||||
int cv::gpu::getDevice() { return deviceInfoFuncTable()->getDevice(); }
|
||||
|
||||
void cv::gpu::resetDevice() { gpuFuncTable()->resetDevice(); }
|
||||
void cv::gpu::resetDevice() { deviceInfoFuncTable()->resetDevice(); }
|
||||
|
||||
bool cv::gpu::deviceSupports(FeatureSet feature_set) { return gpuFuncTable()->deviceSupports(feature_set); }
|
||||
bool cv::gpu::deviceSupports(FeatureSet feature_set) { return deviceInfoFuncTable()->deviceSupports(feature_set); }
|
||||
|
||||
bool cv::gpu::TargetArchs::builtWith(FeatureSet feature_set) { return gpuFuncTable()->builtWith(feature_set); }
|
||||
bool cv::gpu::TargetArchs::has(int major, int minor) { return gpuFuncTable()->has(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasPtx(int major, int minor) { return gpuFuncTable()->hasPtx(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return gpuFuncTable()->hasBin(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrLessPtx(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return gpuFuncTable()->hasEqualOrGreater(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterPtx(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterBin(major, minor); }
|
||||
bool cv::gpu::TargetArchs::builtWith(FeatureSet feature_set) { return deviceInfoFuncTable()->builtWith(feature_set); }
|
||||
bool cv::gpu::TargetArchs::has(int major, int minor) { return deviceInfoFuncTable()->has(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasPtx(int major, int minor) { return deviceInfoFuncTable()->hasPtx(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return deviceInfoFuncTable()->hasBin(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrLessPtx(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreater(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterPtx(major, minor); }
|
||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterBin(major, minor); }
|
||||
|
||||
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); }
|
||||
void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); }
|
||||
@@ -270,8 +270,8 @@ std::string cv::gpu::DeviceInfo::name() const { return deviceInfoFuncTable()->na
|
||||
int cv::gpu::DeviceInfo::multiProcessorCount() const { return deviceInfoFuncTable()->multiProcessorCount(); }
|
||||
void cv::gpu::DeviceInfo::query() { deviceInfoFuncTable()->query(); }
|
||||
|
||||
void cv::gpu::printCudaDeviceInfo(int device) { gpuFuncTable()->printCudaDeviceInfo(device); }
|
||||
void cv::gpu::printShortCudaDeviceInfo(int device) { gpuFuncTable()->printShortCudaDeviceInfo(device); }
|
||||
void cv::gpu::printCudaDeviceInfo(int device) { deviceInfoFuncTable()->printCudaDeviceInfo(device); }
|
||||
void cv::gpu::printShortCudaDeviceInfo(int device) { deviceInfoFuncTable()->printShortCudaDeviceInfo(device); }
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
|
@@ -4,6 +4,7 @@
|
||||
class DeviceInfoFuncTable
|
||||
{
|
||||
public:
|
||||
// cv::DeviceInfo
|
||||
virtual size_t sharedMemPerBlock() const = 0;
|
||||
virtual void queryMemory(size_t&, size_t&) const = 0;
|
||||
virtual size_t freeMemory() const = 0;
|
||||
@@ -16,25 +17,13 @@
|
||||
virtual int majorVersion() const = 0;
|
||||
virtual int minorVersion() const = 0;
|
||||
virtual int multiProcessorCount() const = 0;
|
||||
virtual ~DeviceInfoFuncTable() {};
|
||||
};
|
||||
|
||||
class GpuFuncTable
|
||||
{
|
||||
public:
|
||||
virtual ~GpuFuncTable() {}
|
||||
|
||||
// DeviceInfo routines
|
||||
virtual int getCudaEnabledDeviceCount() const = 0;
|
||||
|
||||
virtual void setDevice(int) const = 0;
|
||||
virtual int getDevice() const = 0;
|
||||
|
||||
virtual void resetDevice() const = 0;
|
||||
|
||||
virtual bool deviceSupports(FeatureSet) const = 0;
|
||||
|
||||
// TargetArchs
|
||||
// cv::TargetArchs
|
||||
virtual bool builtWith(FeatureSet) const = 0;
|
||||
virtual bool has(int, int) const = 0;
|
||||
virtual bool hasPtx(int, int) const = 0;
|
||||
@@ -47,6 +36,14 @@
|
||||
virtual void printCudaDeviceInfo(int) const = 0;
|
||||
virtual void printShortCudaDeviceInfo(int) const = 0;
|
||||
|
||||
virtual ~DeviceInfoFuncTable() {};
|
||||
};
|
||||
|
||||
class GpuFuncTable
|
||||
{
|
||||
public:
|
||||
virtual ~GpuFuncTable() {}
|
||||
|
||||
// GpuMat routines
|
||||
virtual void copy(const Mat& src, GpuMat& dst) const = 0;
|
||||
virtual void copy(const GpuMat& src, Mat& dst) const = 0;
|
||||
@@ -80,13 +77,7 @@
|
||||
int majorVersion() const { throw_nogpu; return -1; }
|
||||
int minorVersion() const { throw_nogpu; return -1; }
|
||||
int multiProcessorCount() const { throw_nogpu; return -1; }
|
||||
};
|
||||
|
||||
class EmptyFuncTable : public GpuFuncTable
|
||||
{
|
||||
public:
|
||||
|
||||
// DeviceInfo routines
|
||||
int getCudaEnabledDeviceCount() const { return 0; }
|
||||
|
||||
void setDevice(int) const { throw_nogpu; }
|
||||
@@ -107,6 +98,11 @@
|
||||
|
||||
void printCudaDeviceInfo(int) const { throw_nogpu; }
|
||||
void printShortCudaDeviceInfo(int) const { throw_nogpu; }
|
||||
};
|
||||
|
||||
class EmptyFuncTable : public GpuFuncTable
|
||||
{
|
||||
public:
|
||||
|
||||
void copy(const Mat&, GpuMat&) const { throw_nogpu; }
|
||||
void copy(const GpuMat&, Mat&) const { throw_nogpu; }
|
||||
@@ -568,12 +564,12 @@ namespace cv { namespace gpu { namespace device
|
||||
bool isCompatible() const
|
||||
{
|
||||
// Check PTX compatibility
|
||||
if (TargetArchs::hasEqualOrLessPtx(majorVersion_, minorVersion_))
|
||||
if (hasEqualOrLessPtx(majorVersion_, minorVersion_))
|
||||
return true;
|
||||
|
||||
// Check BIN compatibility
|
||||
for (int i = minorVersion_; i >= 0; --i)
|
||||
if (TargetArchs::hasBin(majorVersion_, i))
|
||||
if (hasBin(majorVersion_, i))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -614,44 +610,6 @@ namespace cv { namespace gpu { namespace device
|
||||
return multi_processor_count_;
|
||||
}
|
||||
|
||||
private:
|
||||
int device_id_;
|
||||
|
||||
std::string name_;
|
||||
int multi_processor_count_;
|
||||
int majorVersion_;
|
||||
int minorVersion_;
|
||||
};
|
||||
|
||||
class CudaFuncTable : public GpuFuncTable
|
||||
{
|
||||
protected:
|
||||
|
||||
const CudaArch cudaArch;
|
||||
|
||||
int convertSMVer2Cores(int major, int minor) const
|
||||
{
|
||||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||||
typedef struct {
|
||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
||||
int Cores;
|
||||
} SMtoCores;
|
||||
|
||||
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
|
||||
|
||||
int index = 0;
|
||||
while (gpuArchCoresPerSM[index].SM != -1)
|
||||
{
|
||||
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
|
||||
return gpuArchCoresPerSM[index].Cores;
|
||||
index++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
int getCudaEnabledDeviceCount() const
|
||||
{
|
||||
int count;
|
||||
@@ -859,6 +817,42 @@ namespace cv { namespace gpu { namespace device
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
private:
|
||||
int device_id_;
|
||||
|
||||
std::string name_;
|
||||
int multi_processor_count_;
|
||||
int majorVersion_;
|
||||
int minorVersion_;
|
||||
|
||||
const CudaArch cudaArch;
|
||||
|
||||
int convertSMVer2Cores(int major, int minor) const
|
||||
{
|
||||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||||
typedef struct {
|
||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
||||
int Cores;
|
||||
} SMtoCores;
|
||||
|
||||
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
|
||||
|
||||
int index = 0;
|
||||
while (gpuArchCoresPerSM[index].SM != -1)
|
||||
{
|
||||
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
|
||||
return gpuArchCoresPerSM[index].Cores;
|
||||
index++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class CudaFuncTable : public GpuFuncTable
|
||||
{
|
||||
public:
|
||||
|
||||
void copy(const Mat& src, GpuMat& dst) const
|
||||
{
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
||||
|
Reference in New Issue
Block a user