updated gpu initialization functions, added compile-time error on CC 1.0

This commit is contained in:
Alexey Spizhevoy 2011-01-20 14:13:07 +00:00
parent 6187b97199
commit 574b3f94a1
6 changed files with 70 additions and 146 deletions

View File

@ -708,47 +708,36 @@ if(WITH_CUDA)
message(STATUS "CUDA detected: " ${CUDA_VERSION}) message(STATUS "CUDA detected: " ${CUDA_VERSION})
set(CUDA_ARCH_GPU "1.3 2.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for") set(CUDA_ARCH_GPU "1.3 2.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for")
set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
# Architectures to be searched for in user's input # These variables are used in config templates
set (CUDA_ARCH_ALL 1.0 1.1 1.2 1.3 2.0 2.1) string(REGEX REPLACE "\\." "" ARCH_GPU_NO_POINTS "${CUDA_ARCH_GPU}")
string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
# Parse user's input # Ckeck if user specified 1.0 compute capability
foreach(ARCH IN LISTS CUDA_ARCH_ALL) string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_GPU} ${CUDA_ARCH_PTX}")
string(REGEX MATCH ${ARCH} ARCH_GPU_MATCH "${CUDA_ARCH_GPU}") if(NOT ${HAS_ARCH_10} STREQUAL "")
string(REGEX MATCH ${ARCH} ARCH_PTX_MATCH "${CUDA_ARCH_PTX}") set(OPENCV_ARCH_GPU_OR_PTX_10 1)
string(REGEX REPLACE "\\." "" ARCH_GPU_AS_NUM "${ARCH_GPU_MATCH}") endif()
string(REGEX REPLACE "\\." "" ARCH_PTX_AS_NUM "${ARCH_PTX_MATCH}")
# Define variables indicating the architectures specified by user
if(NOT ${ARCH_GPU_AS_NUM} STREQUAL "")
set(OPENCV_ARCH_GPU_${ARCH_GPU_AS_NUM} 1)
endif()
if(NOT ${ARCH_PTX_AS_NUM} STREQUAL "")
set(OPENCV_ARCH_PTX_${ARCH_PTX_AS_NUM} 1)
endif()
endforeach()
set(NVCC_FLAGS_EXTRA "") set(NVCC_FLAGS_EXTRA "")
# Tell nvcc to add binaries for the specified GPUs # Tell nvcc to add binaries for the specified GPUs
string(REGEX REPLACE "\\." "" CUDA_ARCH_GPU "${CUDA_ARCH_GPU}") string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_GPU_NO_POINTS}")
string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_GPU_LIST "${CUDA_ARCH_GPU}") foreach(ARCH IN LISTS ARCH_LIST)
foreach(ARCH_GPU IN LISTS CUDA_ARCH_GPU_LIST) set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH})
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_GPU},code=sm_${ARCH_GPU})
endforeach() endforeach()
# Tell nvcc to add PTX intermediate code for the specified architectures # Tell nvcc to add PTX intermediate code for the specified architectures
string(REGEX REPLACE "\\." "" CUDA_ARCH_PTX "${CUDA_ARCH_PTX}") string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")
string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_PTX_LIST "${CUDA_ARCH_PTX}") foreach(ARCH IN LISTS ARCH_LIST)
foreach(ARCH_PTX IN LISTS CUDA_ARCH_PTX_LIST) set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_PTX},code=compute_${ARCH_PTX})
endforeach() endforeach()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
message(STATUS "CUDA NVCC flags: ${CUDA_NVCC_FLAGS}")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}") set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")
message(STATUS "CUDA NVCC flags: ${CUDA_NVCC_FLAGS}")
endif() endif()
endif() endif()

View File

@ -163,21 +163,14 @@
/* NVidia Cuda Runtime API*/ /* NVidia Cuda Runtime API*/
#cmakedefine HAVE_CUDA #cmakedefine HAVE_CUDA
/* Compile for 'real' NVIDIA GPU architecture */ /* Compile for 'real' NVIDIA GPU architectures */
#cmakedefine OPENCV_ARCH_GPU_10 #define OPENCV_ARCH_GPU "${ARCH_GPU_NO_POINTS}"
#cmakedefine OPENCV_ARCH_GPU_11
#cmakedefine OPENCV_ARCH_GPU_12
#cmakedefine OPENCV_ARCH_GPU_13
#cmakedefine OPENCV_ARCH_GPU_20
#cmakedefine OPENCV_ARCH_GPU_21
/* Compile for 'virtual' NVIDIA PTX architecture */ /* Compile for 'virtual' NVIDIA PTX architectures */
#cmakedefine OPENCV_ARCH_PTX_10 #define OPENCV_ARCH_PTX "${ARCH_PTX_NO_POINTS}"
#cmakedefine OPENCV_ARCH_PTX_11
#cmakedefine OPENCV_ARCH_PTX_12 /* Create PTX or CUBIN for 1.0 compute capability */
#cmakedefine OPENCV_ARCH_PTX_13 #cmakedefine OPENCV_ARCH_GPU_OR_PTX_10
#cmakedefine OPENCV_ARCH_PTX_20
#cmakedefine OPENCV_ARCH_PTX_21
/* VideoInput library */ /* VideoInput library */
#cmakedefine HAVE_VIDEOINPUT #cmakedefine HAVE_VIDEOINPUT

View File

@ -232,10 +232,10 @@ private:
\cvCppFunc{gpu::ConvolveBuf::ConvolveBuf} \cvCppFunc{gpu::ConvolveBuf::ConvolveBuf}
\cvdefCpp{ConvolveBuf();} \cvdefCpp{ConvolveBuf::ConvolveBuf();}
Constructs an empty buffer which will be properly resized after first call of the convolve function. Constructs an empty buffer which will be properly resized after first call of the convolve function.
\cvdefCpp{ConvolveBuf(Size image\_size, Size templ\_size);} \cvdefCpp{ConvolveBuf::ConvolveBuf(Size image\_size, Size templ\_size);}
Constructs a buffer for the convolve function with respectively arguments. Constructs a buffer for the convolve function with respectively arguments.

View File

@ -82,13 +82,13 @@ Creates HOG descriptor and detector.
\cvCppFunc{gpu::HOGDescriptor::getDescriptorSize} \cvCppFunc{gpu::HOGDescriptor::getDescriptorSize}
Returns number of coefficients required for the classification. Returns number of coefficients required for the classification.
\cvdefCpp{size\_t getDescriptorSize() const;} \cvdefCpp{size\_t HOGDescriptor::getDescriptorSize() const;}
\cvCppFunc{gpu::HOGDescriptor::getBlockHistogramSize} \cvCppFunc{gpu::HOGDescriptor::getBlockHistogramSize}
Returns block histogram size. Returns block histogram size.
\cvdefCpp{size\_t getBlockHistogramSize() const;} \cvdefCpp{size\_t HOGDescriptor::getBlockHistogramSize() const;}
\cvCppFunc{gpu::HOGDescriptor::setSVMDetector} \cvCppFunc{gpu::HOGDescriptor::setSVMDetector}
@ -100,25 +100,25 @@ Sets coefficients for the linear SVM classifier.
\cvCppFunc{gpu::HOGDescriptor::getDefaultPeopleDetector} \cvCppFunc{gpu::HOGDescriptor::getDefaultPeopleDetector}
Returns coefficients of the classifier trained for people detection (for default window size). Returns coefficients of the classifier trained for people detection (for default window size).
\cvdefCpp{static vector<float> getDefaultPeopleDetector();} \cvdefCpp{static vector<float> HOGDescriptor::getDefaultPeopleDetector();}
\cvCppFunc{gpu::HOGDescriptor::getPeopleDetector48x96} \cvCppFunc{gpu::HOGDescriptor::getPeopleDetector48x96}
Returns coefficients of the classifier trained for people detection (for 48x96 windows). Returns coefficients of the classifier trained for people detection (for 48x96 windows).
\cvdefCpp{static vector<float> getPeopleDetector48x96();} \cvdefCpp{static vector<float> HOGDescriptor::getPeopleDetector48x96();}
\cvCppFunc{gpu::HOGDescriptor::getPeopleDetector64x128} \cvCppFunc{gpu::HOGDescriptor::getPeopleDetector64x128}
Returns coefficients of the classifier trained for people detection (for 64x128 windows). Returns coefficients of the classifier trained for people detection (for 64x128 windows).
\cvdefCpp{static vector<float> getPeopleDetector64x128();} \cvdefCpp{static vector<float> HOGDescriptor::getPeopleDetector64x128();}
\cvCppFunc{gpu::HOGDescriptor::detect} \cvCppFunc{gpu::HOGDescriptor::detect}
Perfroms object detection without multiscale window. Perfroms object detection without multiscale window.
\cvdefCpp{void detect(const GpuMat\& img, vector<Point>\& found\_locations,\par \cvdefCpp{void HOGDescriptor::detect(const GpuMat\& img, vector<Point>\& found\_locations,\par
double hit\_threshold=0, Size win\_stride=Size(),\par double hit\_threshold=0, Size win\_stride=Size(),\par
Size padding=Size());} Size padding=Size());}
@ -134,10 +134,10 @@ Perfroms object detection without multiscale window.
\cvCppFunc{gpu::HOGDescriptor::detectMultiScale} \cvCppFunc{gpu::HOGDescriptor::detectMultiScale}
Perfroms object detection with multiscale window. Perfroms object detection with multiscale window.
\cvdefCpp{void detectMultiScale(const GpuMat\& img, vector<Rect>\& found\_locations,\par \cvdefCpp{void HOGDescriptor::detectMultiScale(const GpuMat\& img,\par
double hit\_threshold=0, Size win\_stride=Size(),\par vector<Rect>\& found\_locations, double hit\_threshold=0,\par
Size padding=Size(), double scale0=1.05,\par Size win\_stride=Size(), Size padding=Size(),\par
int group\_threshold=2);} double scale0=1.05, int group\_threshold=2);}
\begin{description} \begin{description}
\cvarg{img}{Source image. See \cvCppCross{gpu::HOGDescriptor::detect} for type limitations.} \cvarg{img}{Source image. See \cvCppCross{gpu::HOGDescriptor::detect} for type limitations.}
@ -154,9 +154,9 @@ See \cvCppCross{groupRectangles}.}
\cvCppFunc{gpu::HOGDescriptor::getDescriptors} \cvCppFunc{gpu::HOGDescriptor::getDescriptors}
Returns block descriptors computed for the whole image. It's mainly used for classifier learning purposes. Returns block descriptors computed for the whole image. It's mainly used for classifier learning purposes.
\cvdefCpp{void getDescriptors(const GpuMat\& img, Size win\_stride,\par \cvdefCpp{void HOGDescriptor::getDescriptors(const GpuMat\& img,\par
GpuMat\& descriptors,\par Size win\_stride, GpuMat\& descriptors,\par
int descr\_format=DESCR\_FORMAT\_COL\_BY\_COL);} int descr\_format=DESCR\_FORMAT\_COL\_BY\_COL);}
\begin{description} \begin{description}
\cvarg{img}{Source image. See \cvCppCross{gpu::HOGDescriptor::detect} for type limitations.} \cvarg{img}{Source image. See \cvCppCross{gpu::HOGDescriptor::detect} for type limitations.}

View File

@ -41,6 +41,7 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#include <functional>
using namespace cv; using namespace cv;
using namespace cv::gpu; using namespace cv::gpu;
@ -58,12 +59,12 @@ CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& /*free*/, size_t& /*total*/) { t
CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int /*device*/) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int /*device*/) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int /*device*/) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int /*device*/) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { return false; }
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { return false; }
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { return false; }
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { return false; }
CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor) { return false; }
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor) { return false; }
CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) { throw_nogpu(); return false; }
@ -142,118 +143,55 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
namespace namespace
{ {
template <typename Comparer> template <typename Comparer>
bool checkPtxVersion(int major, int minor, Comparer cmp) bool compare(const std::string& str, int x, Comparer cmp)
{ {
#ifdef OPENCV_ARCH_PTX_10 std::stringstream stream(str);
if (cmp(1, 0, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_PTX_11 int val;
if (cmp(1, 1, major, minor)) return true; stream >> val;
#endif
#ifdef OPENCV_ARCH_PTX_12 while (!stream.eof() && !stream.fail())
if (cmp(1, 2, major, minor)) return true; {
#endif if (cmp(val, x))
return true;
#ifdef OPENCV_ARCH_PTX_13 stream >> val;
if (cmp(1, 3, major, minor)) return true; }
#endif
#ifdef OPENCV_ARCH_PTX_20
if (cmp(2, 0, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_PTX_21
if (cmp(2, 1, major, minor)) return true;
#endif
return false; return false;
} }
template <typename Comparer>
bool checkCubinVersion(int major, int minor, Comparer cmp)
{
#ifdef OPENCV_ARCH_GPU_10
if (cmp(1, 0, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_11
if (cmp(1, 1, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_12
if (cmp(1, 2, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_13
if (cmp(1, 3, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_20
if (cmp(2, 0, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_21
if (cmp(2, 1, major, minor)) return true;
#endif
return false;
}
struct ComparerEqual
{
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
{
return lhs1 == rhs1 && lhs2 == rhs2;
}
};
struct ComparerLessOrEqual
{
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
}
};
struct ComparerGreaterOrEqual
{
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
{
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
}
};
} }
CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor)
{ {
return checkPtxVersion(major, minor, ComparerEqual()); return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, std::equal_to<int>());
} }
CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor)
{ {
return checkPtxVersion(major, minor, ComparerLessOrEqual()); return ::compare(OPENCV_ARCH_PTX, major * 10 + minor,
std::less_equal<int>());
} }
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor)
{ {
return checkPtxVersion(major, minor, ComparerGreaterOrEqual()); return ::compare(OPENCV_ARCH_PTX, major * 10 + minor,
std::greater_equal<int>());
} }
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor)
{ {
return checkCubinVersion(major, minor, ComparerEqual()); return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, std::equal_to<int>());
} }
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor)
{ {
return checkCubinVersion(major, minor, ComparerGreaterOrEqual()); return ::compare(OPENCV_ARCH_GPU, major * 10 + minor,
std::greater_equal<int>());
} }
@ -284,7 +222,7 @@ CV_EXPORTS bool cv::gpu::isCompatibleWith(int device)
return true; return true;
// Check CUBIN compatibilty // Check CUBIN compatibilty
for (int i = 0; i <= minor; ++i) for (int i = minor; i >= 0; --i)
if (hasCubinVersion(major, i)) if (hasCubinVersion(major, i))
return true; return true;

View File

@ -85,6 +85,10 @@
#error "Insufficient NPP version, please update it." #error "Insufficient NPP version, please update it."
#endif #endif
#if defined(OPENCV_ARCH_GPU_OR_PTX_10)
#error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
#endif
static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); } static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); }
#else /* defined(HAVE_CUDA) */ #else /* defined(HAVE_CUDA) */