added checkPtxVersion into gpu module

This commit is contained in:
Alexey Spizhevoy 2011-01-18 14:52:35 +00:00
parent cbb132ccb1
commit 1a0d41fb53
5 changed files with 107 additions and 46 deletions

View File

@ -709,19 +709,19 @@ if (WITH_CUDA)
set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability") set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability")
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES}) set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "1\\.0" NVIDIA_CC_10 ${CUDA_COMPUTE_CAPABILITIES}) string(REGEX MATCH "1\\.0" STR_OPENCV_GPU_CUDA_ARCH_10 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "1\\.1" NVIDIA_CC_11 ${CUDA_COMPUTE_CAPABILITIES}) string(REGEX MATCH "1\\.1" STR_OPENCV_GPU_CUDA_ARCH_11 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "1\\.2" NVIDIA_CC_12 ${CUDA_COMPUTE_CAPABILITIES}) string(REGEX MATCH "1\\.2" STR_OPENCV_GPU_CUDA_ARCH_12 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "1\\.3" NVIDIA_CC_13 ${CUDA_COMPUTE_CAPABILITIES}) string(REGEX MATCH "1\\.3" STR_OPENCV_GPU_CUDA_ARCH_13 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "2\\.0" NVIDIA_CC_20 ${CUDA_COMPUTE_CAPABILITIES}) string(REGEX MATCH "2\\.0" STR_OPENCV_GPU_CUDA_ARCH_20 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "2\\.1" NVIDIA_CC_21 ${CUDA_COMPUTE_CAPABILITIES}) string(REGEX MATCH "2\\.1" STR_OPENCV_GPU_CUDA_ARCH_21 ${CUDA_COMPUTE_CAPABILITIES})
string(COMPARE EQUAL "1.0" "${NVIDIA_CC_10}" HAVE_PTX_FOR_NVIDIA_CC_10) string(COMPARE EQUAL "1.0" "${STR_OPENCV_GPU_CUDA_ARCH_10}" OPENCV_GPU_CUDA_ARCH_10)
string(COMPARE EQUAL "1.1" "${NVIDIA_CC_11}" HAVE_PTX_FOR_NVIDIA_CC_11) string(COMPARE EQUAL "1.1" "${STR_OPENCV_GPU_CUDA_ARCH_11}" OPENCV_GPU_CUDA_ARCH_11)
string(COMPARE EQUAL "1.2" "${NVIDIA_CC_12}" HAVE_PTX_FOR_NVIDIA_CC_12) string(COMPARE EQUAL "1.2" "${STR_OPENCV_GPU_CUDA_ARCH_12}" OPENCV_GPU_CUDA_ARCH_12)
string(COMPARE EQUAL "1.3" "${NVIDIA_CC_13}" HAVE_PTX_FOR_NVIDIA_CC_13) string(COMPARE EQUAL "1.3" "${STR_OPENCV_GPU_CUDA_ARCH_13}" OPENCV_GPU_CUDA_ARCH_13)
string(COMPARE EQUAL "2.0" "${NVIDIA_CC_20}" HAVE_PTX_FOR_NVIDIA_CC_20) string(COMPARE EQUAL "2.0" "${STR_OPENCV_GPU_CUDA_ARCH_20}" OPENCV_GPU_CUDA_ARCH_20)
string(COMPARE EQUAL "2.1" "${NVIDIA_CC_21}" HAVE_PTX_FOR_NVIDIA_CC_21) string(COMPARE EQUAL "2.1" "${STR_OPENCV_GPU_CUDA_ARCH_21}" OPENCV_GPU_CUDA_ARCH_21)
set(CUDA_NVCC_FLAGS_NUM "") set(CUDA_NVCC_FLAGS_NUM "")

View File

@ -164,22 +164,22 @@
#cmakedefine HAVE_CUDA #cmakedefine HAVE_CUDA
/* The project was generated with 1.0 NVIDIA device arch support */ /* The project was generated with 1.0 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_10 #cmakedefine OPENCV_GPU_CUDA_ARCH_10
/* The project was generated with 1.1 NVIDIA device arch support */ /* The project was generated with 1.1 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_11 #cmakedefine OPENCV_GPU_CUDA_ARCH_11
/* The project was generated with 1.2 NVIDIA device arch support */ /* The project was generated with 1.2 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_12 #cmakedefine OPENCV_GPU_CUDA_ARCH_12
/* The project was generated with 1.3 NVIDIA device arch support */ /* The project was generated with 1.3 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_13 #cmakedefine OPENCV_GPU_CUDA_ARCH_13
/* The project was generated with 2.0 NVIDIA device arch support */ /* The project was generated with 2.0 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_20 #cmakedefine OPENCV_GPU_CUDA_ARCH_20
/* The project was generated with 2.1 NVIDIA device arch support */ /* The project was generated with 2.1 NVIDIA device arch support */
#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_21 #cmakedefine OPENCV_GPU_CUDA_ARCH_21
/* VideoInput library */ /* VideoInput library */
#cmakedefine HAVE_VIDEOINPUT #cmakedefine HAVE_VIDEOINPUT

View File

@ -69,11 +69,20 @@ Returns true, if the specified GPU has atomics support, otherwise false.
\end{description} \end{description}
\cvCppFunc{gpu::hasPtxFor} \cvCppFunc{gpu::checkPtxVersion}
Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false. Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
\cvdefCpp{bool hasPtxFor(int major, int minor);} \cvdefCpp{template $<$unsigned int cmp\_op$>$\newline
bool checkPtxVersion(int major, int minor);}
\begin{description} \begin{description}
\cvarg{cmp\_op}{Comparison operation:
\begin{description}
\cvarg{CMP\_EQ}{Return true, if at least one of GPU module PTX versions matches the given one, otherwise false}
\cvarg{CMP\_LT}{Return true, if at least one of GPU module PTX versions is less than the given one, otherwise false}
\cvarg{CMP\_LE}{Return true, if at least one of GPU module PTX versions is less or equal to the given one, otherwise false}
\cvarg{CMP\_GT}{Return true, if at least one of GPU module PTX versions is greater than the given one, otherwise false}
\cvarg{CMP\_GE}{Return true, if at least one of GPU module PTX versions is greater or equal to the given one, otherwise false}
\end{description}}
\cvarg{major}{Major CC version.} \cvarg{major}{Major CC version.}
\cvarg{minor}{Minor CC version.} \cvarg{minor}{Minor CC version.}
\end{description} \end{description}

View File

@ -72,8 +72,8 @@ namespace cv
CV_EXPORTS bool hasNativeDoubleSupport(int device); CV_EXPORTS bool hasNativeDoubleSupport(int device);
CV_EXPORTS bool hasAtomicsSupport(int device); CV_EXPORTS bool hasAtomicsSupport(int device);
//! Checks if the GPU module was built with PTX support (-arch) of the given CC template <unsigned int cmp_op>
CV_EXPORTS bool hasPtxFor(int major, int minor); CV_EXPORTS bool checkPtxVersion(int major, int minor);
//! Checks if the GPU module is PTX compatible with the given NVIDIA device //! Checks if the GPU module is PTX compatible with the given NVIDIA device
CV_EXPORTS bool isCompatibleWith(int device); CV_EXPORTS bool isCompatibleWith(int device);

View File

@ -68,6 +68,7 @@ CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount()
return count; return count;
} }
CV_EXPORTS string cv::gpu::getDeviceName(int device) CV_EXPORTS string cv::gpu::getDeviceName(int device)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
@ -75,10 +76,13 @@ CV_EXPORTS string cv::gpu::getDeviceName(int device)
return prop.name; return prop.name;
} }
CV_EXPORTS void cv::gpu::setDevice(int device) CV_EXPORTS void cv::gpu::setDevice(int device)
{ {
cudaSafeCall( cudaSetDevice( device ) ); cudaSafeCall( cudaSetDevice( device ) );
} }
CV_EXPORTS int cv::gpu::getDevice() CV_EXPORTS int cv::gpu::getDevice()
{ {
int device; int device;
@ -86,6 +90,7 @@ CV_EXPORTS int cv::gpu::getDevice()
return device; return device;
} }
CV_EXPORTS void cv::gpu::getComputeCapability(int device, int& major, int& minor) CV_EXPORTS void cv::gpu::getComputeCapability(int device, int& major, int& minor)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
@ -95,6 +100,7 @@ CV_EXPORTS void cv::gpu::getComputeCapability(int device, int& major, int& minor
minor = prop.minor; minor = prop.minor;
} }
CV_EXPORTS int cv::gpu::getNumberOfSMs(int device) CV_EXPORTS int cv::gpu::getNumberOfSMs(int device)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
@ -108,6 +114,7 @@ CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& free, size_t& total)
cudaSafeCall( cudaMemGetInfo( &free, &total ) ); cudaSafeCall( cudaMemGetInfo( &free, &total ) );
} }
CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int device) CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int device)
{ {
int major, minor; int major, minor;
@ -115,6 +122,7 @@ CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int device)
return major > 1 || (major == 1 && minor >= 3); return major > 1 || (major == 1 && minor >= 3);
} }
CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device) CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
{ {
int major, minor; int major, minor;
@ -122,36 +130,90 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
return major > 1 || (major == 1 && minor >= 1); return major > 1 || (major == 1 && minor >= 1);
} }
CV_EXPORTS bool cv::gpu::hasPtxFor(int major, int minor)
namespace
{ {
#ifdef HAVE_PTX_FOR_NVIDIA_CC_10 template <unsigned int cmp_op>
if (major == 1 && minor == 0) return true; bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2);
template <>
bool comparePairs<CMP_EQ>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 == rhs1 && lhs2 == rhs2;
}
template <>
bool comparePairs<CMP_GT>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 > rhs2);
}
template <>
bool comparePairs<CMP_GE>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
}
template <>
bool comparePairs<CMP_LT>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 < rhs2);
}
template <>
bool comparePairs<CMP_LE>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
}
template <>
bool comparePairs<CMP_NE>(int lhs1, int lhs2, int rhs1, int rhs2)
{
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
}
}
template <unsigned int cmp_op>
CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor)
{
#ifdef OPENCV_GPU_CUDA_ARCH_10
if (comparePairs<cmp_op>(1, 0, major, minor)) return true;
#endif #endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_11 #ifdef OPENCV_GPU_CUDA_ARCH_11
if (major == 1 && minor == 1) return true; if (comparePairs<cmp_op>(1, 1, major, minor)) return true;
#endif #endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_12 #ifdef OPENCV_GPU_CUDA_ARCH_12
if (major == 1 && minor == 2) return true; if (comparePairs<cmp_op>(1, 2, major, minor)) return true;
#endif #endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_13 #ifdef OPENCV_GPU_CUDA_ARCH_13
if (major == 1 && minor == 3) return true; if (comparePairs<cmp_op>(1, 3, major, minor)) return true;
#endif #endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_20 #ifdef OPENCV_GPU_CUDA_ARCH_20
if (major == 2 && minor == 0) return true; if (comparePairs<cmp_op>(2, 0, major, minor)) return true;
#endif #endif
#ifdef HAVE_PTX_FOR_NVIDIA_CC_21 #ifdef OPENCV_GPU_CUDA_ARCH_21
if (major == 2 && minor == 1) return true; if (comparePairs<cmp_op>(2, 1, major, minor)) return true;
#endif #endif
return false; return false;
} }
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_EQ>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GT>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GE>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LT>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LE>(int major, int minor);
template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_NE>(int major, int minor);
CV_EXPORTS bool isCompatibleWith(int device) CV_EXPORTS bool isCompatibleWith(int device)
{ {
// According to the CUDA C Programming Guide Version 3.2: "PTX code // According to the CUDA C Programming Guide Version 3.2: "PTX code
@ -161,17 +223,7 @@ CV_EXPORTS bool isCompatibleWith(int device)
int major, minor; int major, minor;
getComputeCapability(device, major, minor); getComputeCapability(device, major, minor);
for (; major >= 1; --major) return checkPtxVersion<CMP_LE>(major, minor);
{
for (; minor >= 0; --minor)
{
if (hasPtxFor(major, minor))
return true;
}
minor = 9;
}
return false;
} }
#endif #endif