Merge pull request #512 from jet47:core-gpu-updates

This commit is contained in:
Andrey Kamaev 2013-02-21 12:22:03 +04:00 committed by OpenCV Buildbot
commit decf04dfec
2 changed files with 219 additions and 165 deletions

View File

@ -73,12 +73,16 @@ namespace cv { namespace gpu
FEATURE_SET_COMPUTE_20 = 20, FEATURE_SET_COMPUTE_20 = 20,
FEATURE_SET_COMPUTE_21 = 21, FEATURE_SET_COMPUTE_21 = 21,
FEATURE_SET_COMPUTE_30 = 30, FEATURE_SET_COMPUTE_30 = 30,
FEATURE_SET_COMPUTE_35 = 35,
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13, NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30 WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
}; };
// Checks whether current device supports the given feature
CV_EXPORTS bool deviceSupports(FeatureSet feature_set); CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
// Gives information about what GPU archs this OpenCV GPU module was // Gives information about what GPU archs this OpenCV GPU module was
@ -116,6 +120,9 @@ namespace cv { namespace gpu
int multiProcessorCount() const { return multi_processor_count_; } int multiProcessorCount() const { return multi_processor_count_; }
size_t sharedMemPerBlock() const;
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
size_t freeMemory() const; size_t freeMemory() const;
size_t totalMemory() const; size_t totalMemory() const;
@ -129,7 +136,6 @@ namespace cv { namespace gpu
private: private:
void query(); void query();
void queryMemory(size_t& free_memory, size_t& total_memory) const;
int device_id_; int device_id_;
@ -549,29 +555,6 @@ namespace cv { namespace gpu
{ {
ensureSizeIsEnough(size.height, size.width, type, m); ensureSizeIsEnough(size.height, size.width, type, m);
} }
inline void createContinuous(int rows, int cols, int type, GpuMat& m)
{
int area = rows * cols;
if (!m.isContinuous() || m.type() != type || m.size().area() != area)
ensureSizeIsEnough(1, area, type, m);
m = m.reshape(0, rows);
}
inline void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)
{
if (m.type() == type && m.rows >= rows && m.cols >= cols)
m = m(Rect(0, 0, cols, rows));
else
m.create(rows, cols, type);
}
inline GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat)
{
if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
return mat(Rect(0, 0, cols, rows));
return mat = GpuMat(rows, cols, type);
}
}} }}
#endif // __cplusplus #endif // __cplusplus

View File

@ -48,8 +48,8 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <npp.h> #include <npp.h>
#define CUDART_MINIMUM_REQUIRED_VERSION 4010 #define CUDART_MINIMUM_REQUIRED_VERSION 4020
#define NPP_MINIMUM_REQUIRED_VERSION 4100 #define NPP_MINIMUM_REQUIRED_VERSION 4200
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION) #if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
#error "Insufficient Cuda Runtime library version, please update it." #error "Insufficient Cuda Runtime library version, please update it."
@ -64,8 +64,108 @@ using namespace std;
using namespace cv; using namespace cv;
using namespace cv::gpu; using namespace cv::gpu;
#ifndef HAVE_CUDA
#define throw_nogpu CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
#else // HAVE_CUDA
namespace
{
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#endif
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (err < 0)
{
std::ostringstream msg;
msg << "NPP API Call Error: " << err;
cv::gpu::error(msg.str().c_str(), file, line, func);
}
}
}
#endif // HAVE_CUDA
//////////////////////////////// Initialization & Info //////////////////////// //////////////////////////////// Initialization & Info ////////////////////////
#ifndef HAVE_CUDA
int cv::gpu::getCudaEnabledDeviceCount() { return 0; }
void cv::gpu::setDevice(int) { throw_nogpu; }
int cv::gpu::getDevice() { throw_nogpu; return 0; }
void cv::gpu::resetDevice() { throw_nogpu; }
bool cv::gpu::deviceSupports(FeatureSet) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::has(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasBin(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_nogpu; return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_nogpu; return false; }
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_nogpu; return 0; }
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu; }
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_nogpu; return 0; }
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_nogpu; return 0; }
bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_nogpu; return false; }
bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu; return false; }
void cv::gpu::DeviceInfo::query() { throw_nogpu; }
void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu; }
void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu; }
#else // HAVE_CUDA
int cv::gpu::getCudaEnabledDeviceCount()
{
int count;
cudaError_t error = cudaGetDeviceCount( &count );
if (error == cudaErrorInsufficientDriver)
return -1;
if (error == cudaErrorNoDevice)
return 0;
cudaSafeCall( error );
return count;
}
void cv::gpu::setDevice(int device)
{
cudaSafeCall( cudaSetDevice( device ) );
}
int cv::gpu::getDevice()
{
int device;
cudaSafeCall( cudaGetDevice( &device ) );
return device;
}
void cv::gpu::resetDevice()
{
cudaSafeCall( cudaDeviceReset() );
}
namespace namespace
{ {
class CudaArch class CudaArch
@ -92,11 +192,9 @@ namespace
CudaArch::CudaArch() CudaArch::CudaArch()
{ {
#ifdef HAVE_CUDA
fromStr(CUDA_ARCH_BIN, bin); fromStr(CUDA_ARCH_BIN, bin);
fromStr(CUDA_ARCH_PTX, ptx); fromStr(CUDA_ARCH_PTX, ptx);
fromStr(CUDA_ARCH_FEATURES, features); fromStr(CUDA_ARCH_FEATURES, features);
#endif
} }
bool CudaArch::builtWith(FeatureSet feature_set) const bool CudaArch::builtWith(FeatureSet feature_set) const
@ -149,12 +247,7 @@ namespace
bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set) bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set)
{ {
#if defined (HAVE_CUDA)
return cudaArch.builtWith(feature_set); return cudaArch.builtWith(feature_set);
#else
(void)feature_set;
return false;
#endif
} }
bool cv::gpu::TargetArchs::has(int major, int minor) bool cv::gpu::TargetArchs::has(int major, int minor)
@ -164,35 +257,17 @@ bool cv::gpu::TargetArchs::has(int major, int minor)
bool cv::gpu::TargetArchs::hasPtx(int major, int minor) bool cv::gpu::TargetArchs::hasPtx(int major, int minor)
{ {
#if defined (HAVE_CUDA)
return cudaArch.hasPtx(major, minor); return cudaArch.hasPtx(major, minor);
#else
(void)major;
(void)minor;
return false;
#endif
} }
bool cv::gpu::TargetArchs::hasBin(int major, int minor) bool cv::gpu::TargetArchs::hasBin(int major, int minor)
{ {
#if defined (HAVE_CUDA)
return cudaArch.hasBin(major, minor); return cudaArch.hasBin(major, minor);
#else
(void)major;
(void)minor;
return false;
#endif
} }
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor)
{ {
#if defined (HAVE_CUDA)
return cudaArch.hasEqualOrLessPtx(major, minor); return cudaArch.hasEqualOrLessPtx(major, minor);
#else
(void)major;
(void)minor;
return false;
#endif
} }
bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor)
@ -202,24 +277,12 @@ bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor)
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
{ {
#if defined (HAVE_CUDA)
return cudaArch.hasEqualOrGreaterPtx(major, minor); return cudaArch.hasEqualOrGreaterPtx(major, minor);
#else
(void)major;
(void)minor;
return false;
#endif
} }
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
{ {
#if defined (HAVE_CUDA)
return cudaArch.hasEqualOrGreaterBin(major, minor); return cudaArch.hasEqualOrGreaterBin(major, minor);
#else
(void)major;
(void)minor;
return false;
#endif
} }
bool cv::gpu::deviceSupports(FeatureSet feature_set) bool cv::gpu::deviceSupports(FeatureSet feature_set)
@ -247,108 +310,84 @@ bool cv::gpu::deviceSupports(FeatureSet feature_set)
return TargetArchs::builtWith(feature_set) && (version >= feature_set); return TargetArchs::builtWith(feature_set) && (version >= feature_set);
} }
#if !defined (HAVE_CUDA)
#define throw_nogpu CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
int cv::gpu::getCudaEnabledDeviceCount() { return 0; }
void cv::gpu::setDevice(int) { throw_nogpu; }
int cv::gpu::getDevice() { throw_nogpu; return 0; }
void cv::gpu::resetDevice() { throw_nogpu; }
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_nogpu; return 0; }
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_nogpu; return 0; }
bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet) const { throw_nogpu; return false; }
bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu; return false; }
void cv::gpu::DeviceInfo::query() { throw_nogpu; }
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu; }
void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu; }
void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu; }
#undef throw_nogpu
#else // HAVE_CUDA
namespace namespace
{ {
#if defined(__GNUC__) class DeviceProps
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#endif
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{ {
if (cudaSuccess != err) public:
cv::gpu::error(cudaGetErrorString(err), file, line, func); DeviceProps();
~DeviceProps();
cudaDeviceProp* get(int devID);
private:
std::vector<cudaDeviceProp*> props_;
};
DeviceProps::DeviceProps()
{
props_.resize(10, 0);
} }
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") DeviceProps::~DeviceProps()
{ {
if (err < 0) for (size_t i = 0; i < props_.size(); ++i)
{ {
std::ostringstream msg; if (props_[i])
msg << "NPP API Call Error: " << err; delete props_[i];
cv::gpu::error(msg.str().c_str(), file, line, func);
} }
props_.clear();
} }
cudaDeviceProp* DeviceProps::get(int devID)
{
if (devID >= (int) props_.size())
props_.resize(devID + 5, 0);
if (!props_[devID])
{
props_[devID] = new cudaDeviceProp;
cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
}
return props_[devID];
}
DeviceProps deviceProps;
} }
int cv::gpu::getCudaEnabledDeviceCount() size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const
{ {
int count; return deviceProps.get(device_id_)->sharedMemPerBlock;
cudaError_t error = cudaGetDeviceCount( &count );
if (error == cudaErrorInsufficientDriver)
return -1;
if (error == cudaErrorNoDevice)
return 0;
cudaSafeCall(error);
return count;
} }
void cv::gpu::setDevice(int device) void cv::gpu::DeviceInfo::queryMemory(size_t& totalMemory, size_t& freeMemory) const
{ {
cudaSafeCall( cudaSetDevice( device ) ); int prevDeviceID = getDevice();
} if (prevDeviceID != device_id_)
setDevice(device_id_);
int cv::gpu::getDevice() cudaSafeCall( cudaMemGetInfo(&freeMemory, &totalMemory) );
{
int device;
cudaSafeCall( cudaGetDevice( &device ) );
return device;
}
void cv::gpu::resetDevice() if (prevDeviceID != device_id_)
{ setDevice(prevDeviceID);
cudaSafeCall( cudaDeviceReset() );
} }
size_t cv::gpu::DeviceInfo::freeMemory() const size_t cv::gpu::DeviceInfo::freeMemory() const
{ {
size_t free_memory, total_memory; size_t totalMemory, freeMemory;
queryMemory(free_memory, total_memory); queryMemory(totalMemory, freeMemory);
return free_memory; return freeMemory;
} }
size_t cv::gpu::DeviceInfo::totalMemory() const size_t cv::gpu::DeviceInfo::totalMemory() const
{ {
size_t free_memory, total_memory; size_t totalMemory, freeMemory;
queryMemory(free_memory, total_memory); queryMemory(totalMemory, freeMemory);
return total_memory; return totalMemory;
} }
bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet feature_set) const bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const
{ {
int version = majorVersion() * 10 + minorVersion(); int version = majorVersion() * 10 + minorVersion();
return version >= feature_set; return version >= feature_set;
@ -370,24 +409,12 @@ bool cv::gpu::DeviceInfo::isCompatible() const
void cv::gpu::DeviceInfo::query() void cv::gpu::DeviceInfo::query()
{ {
cudaDeviceProp prop; const cudaDeviceProp* prop = deviceProps.get(device_id_);
cudaSafeCall(cudaGetDeviceProperties(&prop, device_id_));
name_ = prop.name;
multi_processor_count_ = prop.multiProcessorCount;
majorVersion_ = prop.major;
minorVersion_ = prop.minor;
}
void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) const name_ = prop->name;
{ multi_processor_count_ = prop->multiProcessorCount;
int prev_device_id = getDevice(); majorVersion_ = prop->major;
if (prev_device_id != device_id_) minorVersion_ = prop->minor;
setDevice(device_id_);
cudaSafeCall(cudaMemGetInfo(&free_memory, &total_memory));
if (prev_device_id != device_id_)
setDevice(prev_device_id);
} }
namespace namespace
@ -764,6 +791,50 @@ cv::Mat::Mat(const GpuMat& m) : flags(0), dims(0), rows(0), cols(0), data(0), re
m.download(*this); m.download(*this);
} }
void cv::gpu::createContinuous(int rows, int cols, int type, GpuMat& m)
{
int area = rows * cols;
if (m.empty() || m.type() != type || !m.isContinuous() || m.size().area() < area)
m.create(1, area, type);
m.cols = cols;
m.rows = rows;
m.step = m.elemSize() * cols;
m.flags |= Mat::CONTINUOUS_FLAG;
}
void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)
{
if (m.empty() || m.type() != type || m.data != m.datastart)
m.create(rows, cols, type);
else
{
const size_t esz = m.elemSize();
const ptrdiff_t delta2 = m.dataend - m.datastart;
const size_t minstep = m.cols * esz;
Size wholeSize;
wholeSize.height = std::max(static_cast<int>((delta2 - minstep) / m.step + 1), m.rows);
wholeSize.width = std::max(static_cast<int>((delta2 - m.step * (wholeSize.height - 1)) / esz), m.cols);
if (wholeSize.height < rows || wholeSize.width < cols)
m.create(rows, cols, type);
else
{
m.cols = cols;
m.rows = rows;
}
}
}
GpuMat cv::gpu::allocMatFromBuf(int rows, int cols, int type, GpuMat &mat)
{
if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
return mat(Rect(0, 0, cols, rows));
return mat = GpuMat(rows, cols, type);
}
namespace namespace
{ {
class GpuFuncTable class GpuFuncTable
@ -787,25 +858,25 @@ namespace
}; };
} }
#if !defined HAVE_CUDA || defined(CUDA_DISABLER_) #ifndef HAVE_CUDA
namespace namespace
{ {
class EmptyFuncTable : public GpuFuncTable class EmptyFuncTable : public GpuFuncTable
{ {
public: public:
void copy(const Mat&, GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } void copy(const Mat&, GpuMat&) const { throw_nogpu; }
void copy(const GpuMat&, Mat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } void copy(const GpuMat&, Mat&) const { throw_nogpu; }
void copy(const GpuMat&, GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } void copy(const GpuMat&, GpuMat&) const { throw_nogpu; }
void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_nogpu; }
void convert(const GpuMat&, GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } void convert(const GpuMat&, GpuMat&) const { throw_nogpu; }
void convert(const GpuMat&, GpuMat&, double, double) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } void convert(const GpuMat&, GpuMat&, double, double) const { throw_nogpu; }
void setTo(GpuMat&, Scalar, const GpuMat&) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } void setTo(GpuMat&, Scalar, const GpuMat&) const { throw_nogpu; }
void mallocPitch(void**, size_t*, size_t, size_t) const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); } void mallocPitch(void**, size_t*, size_t, size_t) const { throw_nogpu; }
void free(void*) const {} void free(void*) const {}
}; };