added checkPtxVersion into gpu module
This commit is contained in:
parent
cbb132ccb1
commit
1a0d41fb53
@ -709,19 +709,19 @@ if (WITH_CUDA)
|
||||
set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability")
|
||||
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
|
||||
|
||||
string(REGEX MATCH "1\\.0" NVIDIA_CC_10 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.1" NVIDIA_CC_11 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.2" NVIDIA_CC_12 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.3" NVIDIA_CC_13 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "2\\.0" NVIDIA_CC_20 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "2\\.1" NVIDIA_CC_21 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.0" STR_OPENCV_GPU_CUDA_ARCH_10 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.1" STR_OPENCV_GPU_CUDA_ARCH_11 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.2" STR_OPENCV_GPU_CUDA_ARCH_12 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "1\\.3" STR_OPENCV_GPU_CUDA_ARCH_13 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "2\\.0" STR_OPENCV_GPU_CUDA_ARCH_20 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
string(REGEX MATCH "2\\.1" STR_OPENCV_GPU_CUDA_ARCH_21 ${CUDA_COMPUTE_CAPABILITIES})
|
||||
|
||||
string(COMPARE EQUAL "1.0" "${NVIDIA_CC_10}" HAVE_PTX_FOR_NVIDIA_CC_10)
|
||||
string(COMPARE EQUAL "1.1" "${NVIDIA_CC_11}" HAVE_PTX_FOR_NVIDIA_CC_11)
|
||||
string(COMPARE EQUAL "1.2" "${NVIDIA_CC_12}" HAVE_PTX_FOR_NVIDIA_CC_12)
|
||||
string(COMPARE EQUAL "1.3" "${NVIDIA_CC_13}" HAVE_PTX_FOR_NVIDIA_CC_13)
|
||||
string(COMPARE EQUAL "2.0" "${NVIDIA_CC_20}" HAVE_PTX_FOR_NVIDIA_CC_20)
|
||||
string(COMPARE EQUAL "2.1" "${NVIDIA_CC_21}" HAVE_PTX_FOR_NVIDIA_CC_21)
|
||||
string(COMPARE EQUAL "1.0" "${STR_OPENCV_GPU_CUDA_ARCH_10}" OPENCV_GPU_CUDA_ARCH_10)
|
||||
string(COMPARE EQUAL "1.1" "${STR_OPENCV_GPU_CUDA_ARCH_11}" OPENCV_GPU_CUDA_ARCH_11)
|
||||
string(COMPARE EQUAL "1.2" "${STR_OPENCV_GPU_CUDA_ARCH_12}" OPENCV_GPU_CUDA_ARCH_12)
|
||||
string(COMPARE EQUAL "1.3" "${STR_OPENCV_GPU_CUDA_ARCH_13}" OPENCV_GPU_CUDA_ARCH_13)
|
||||
string(COMPARE EQUAL "2.0" "${STR_OPENCV_GPU_CUDA_ARCH_20}" OPENCV_GPU_CUDA_ARCH_20)
|
||||
string(COMPARE EQUAL "2.1" "${STR_OPENCV_GPU_CUDA_ARCH_21}" OPENCV_GPU_CUDA_ARCH_21)
|
||||
|
||||
set(CUDA_NVCC_FLAGS_NUM "")
|
||||
|
||||
|
@ -164,22 +164,22 @@
|
||||
#cmakedefine HAVE_CUDA
|
||||
|
||||
/* The project was generated with 1.0 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_10
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_10
|
||||
|
||||
/* The project was generated with 1.1 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_11
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_11
|
||||
|
||||
/* The project was generated with 1.2 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_12
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_12
|
||||
|
||||
/* The project was generated with 1.3 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_13
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_13
|
||||
|
||||
/* The project was generated with 2.0 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_20
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_20
|
||||
|
||||
/* The project was generated with 2.1 NVIDIA device arch support */
|
||||
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_21
|
||||
#cmakedefine OPENCV_GPU_CUDA_ARCH_21
|
||||
|
||||
/* VideoInput library */
|
||||
#cmakedefine HAVE_VIDEOINPUT
|
||||
|
@ -69,11 +69,20 @@ Returns true, if the specified GPU has atomics support, otherwise false.
|
||||
\end{description}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::hasPtxFor}
|
||||
\cvCppFunc{gpu::checkPtxVersion}
|
||||
Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
|
||||
|
||||
\cvdefCpp{bool hasPtxFor(int major, int minor);}
|
||||
\cvdefCpp{template $<$unsigned int cmp\_op$>$\newline
|
||||
bool checkPtxVersion(int major, int minor);}
|
||||
\begin{description}
|
||||
\cvarg{cmp\_op}{Comparison operation:
|
||||
\begin{description}
|
||||
\cvarg{CMP\_EQ}{Return true, if at least one of GPU module PTX versions matches the given one, otherwise false}
|
||||
\cvarg{CMP\_LT}{Return true, if at least one of GPU module PTX versions is less than the given one, otherwise false}
|
||||
\cvarg{CMP\_LE}{Return true, if at least one of GPU module PTX versions is less or equal to the given one, otherwise false}
|
||||
\cvarg{CMP\_GT}{Return true, if at least one of GPU module PTX versions is greater than the given one, otherwise false}
|
||||
\cvarg{CMP\_GE}{Return true, if at least one of GPU module PTX versions is greater or equal to the given one, otherwise false}
|
||||
\end{description}}
|
||||
\cvarg{major}{Major CC version.}
|
||||
\cvarg{minor}{Minor CC version.}
|
||||
\end{description}
|
||||
|
@ -72,8 +72,8 @@ namespace cv
|
||||
CV_EXPORTS bool hasNativeDoubleSupport(int device);
|
||||
CV_EXPORTS bool hasAtomicsSupport(int device);
|
||||
|
||||
//! Checks if the GPU module was built with PTX support (-arch) of the given CC
|
||||
CV_EXPORTS bool hasPtxFor(int major, int minor);
|
||||
template <unsigned int cmp_op>
|
||||
CV_EXPORTS bool checkPtxVersion(int major, int minor);
|
||||
|
||||
//! Checks if the GPU module is PTX compatible with the given NVIDIA device
|
||||
CV_EXPORTS bool isCompatibleWith(int device);
|
||||
|
@ -68,6 +68,7 @@ CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount()
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS string cv::gpu::getDeviceName(int device)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
@ -75,10 +76,13 @@ CV_EXPORTS string cv::gpu::getDeviceName(int device)
|
||||
return prop.name;
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS void cv::gpu::setDevice(int device)
|
||||
{
|
||||
cudaSafeCall( cudaSetDevice( device ) );
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS int cv::gpu::getDevice()
|
||||
{
|
||||
int device;
|
||||
@ -86,6 +90,7 @@ CV_EXPORTS int cv::gpu::getDevice()
|
||||
return device;
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS void cv::gpu::getComputeCapability(int device, int& major, int& minor)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
@ -95,6 +100,7 @@ CV_EXPORTS void cv::gpu::getComputeCapability(int device, int& major, int& minor
|
||||
minor = prop.minor;
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS int cv::gpu::getNumberOfSMs(int device)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
@ -108,6 +114,7 @@ CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& free, size_t& total)
|
||||
cudaSafeCall( cudaMemGetInfo( &free, &total ) );
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int device)
|
||||
{
|
||||
int major, minor;
|
||||
@ -115,6 +122,7 @@ CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int device)
|
||||
return major > 1 || (major == 1 && minor >= 3);
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
|
||||
{
|
||||
int major, minor;
|
||||
@ -122,36 +130,90 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
|
||||
return major > 1 || (major == 1 && minor >= 1);
|
||||
}
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasPtxFor(int major, int minor)
|
||||
|
||||
namespace
|
||||
{
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_10
|
||||
if (major == 1 && minor == 0) return true;
|
||||
template <unsigned int cmp_op>
|
||||
bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2);
|
||||
|
||||
template <>
|
||||
bool comparePairs<CMP_EQ>(int lhs1, int lhs2, int rhs1, int rhs2)
|
||||
{
|
||||
return lhs1 == rhs1 && lhs2 == rhs2;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool comparePairs<CMP_GT>(int lhs1, int lhs2, int rhs1, int rhs2)
|
||||
{
|
||||
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 > rhs2);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool comparePairs<CMP_GE>(int lhs1, int lhs2, int rhs1, int rhs2)
|
||||
{
|
||||
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool comparePairs<CMP_LT>(int lhs1, int lhs2, int rhs1, int rhs2)
|
||||
{
|
||||
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 < rhs2);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
bool comparePairs<CMP_LE>(int lhs1, int lhs2, int rhs1, int rhs2)
|
||||
{
|
||||
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool comparePairs<CMP_NE>(int lhs1, int lhs2, int rhs1, int rhs2)
|
||||
{
|
||||
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int cmp_op>
|
||||
CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor)
|
||||
{
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_10
|
||||
if (comparePairs<cmp_op>(1, 0, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_11
|
||||
if (major == 1 && minor == 1) return true;
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_11
|
||||
if (comparePairs<cmp_op>(1, 1, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_12
|
||||
if (major == 1 && minor == 2) return true;
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_12
|
||||
if (comparePairs<cmp_op>(1, 2, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_13
|
||||
if (major == 1 && minor == 3) return true;
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_13
|
||||
if (comparePairs<cmp_op>(1, 3, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_20
|
||||
if (major == 2 && minor == 0) return true;
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_20
|
||||
if (comparePairs<cmp_op>(2, 0, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PTX_FOR_NVIDIA_CC_21
|
||||
if (major == 2 && minor == 1) return true;
|
||||
#ifdef OPENCV_GPU_CUDA_ARCH_21
|
||||
if (comparePairs<cmp_op>(2, 1, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_EQ>(int major, int minor);
|
||||
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GT>(int major, int minor);
|
||||
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GE>(int major, int minor);
|
||||
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LT>(int major, int minor);
|
||||
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LE>(int major, int minor);
|
||||
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_NE>(int major, int minor);
|
||||
|
||||
|
||||
CV_EXPORTS bool isCompatibleWith(int device)
|
||||
{
|
||||
// According to the CUDA C Programming Guide Version 3.2: "PTX code
|
||||
@ -161,17 +223,7 @@ CV_EXPORTS bool isCompatibleWith(int device)
|
||||
int major, minor;
|
||||
getComputeCapability(device, major, minor);
|
||||
|
||||
for (; major >= 1; --major)
|
||||
{
|
||||
for (; minor >= 0; --minor)
|
||||
{
|
||||
if (hasPtxFor(major, minor))
|
||||
return true;
|
||||
}
|
||||
minor = 9;
|
||||
}
|
||||
|
||||
return false;
|
||||
return checkPtxVersion<CMP_LE>(major, minor);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user